Re: [Xen-devel] [PATCH RFC v2 11/12] x86: modify interrupt handlers to support stack switching

2018-02-02 Thread Juergen Gross
On 31/01/18 11:36, Jan Beulich wrote:
 On 30.01.18 at 18:19,  wrote:
>> On 30/01/18 17:07, Jan Beulich wrote:
>> On 22.01.18 at 13:32,  wrote:
 --- a/xen/arch/x86/x86_64/asm-offsets.c
 +++ b/xen/arch/x86/x86_64/asm-offsets.c
 @@ -137,6 +137,10 @@ void __dummy__(void)
  OFFSET(CPUINFO_processor_id, struct cpu_info, processor_id);
  OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu);
  OFFSET(CPUINFO_cr4, struct cpu_info, cr4);
 +OFFSET(CPUINFO_stack_bottom_cpu, struct cpu_info, stack_bottom_cpu);
 +OFFSET(CPUINFO_flags, struct cpu_info, flags);
 +DEFINE(ASM_ON_VCPUSTACK, ON_VCPUSTACK);
 +DEFINE(ASM_VCPUSTACK_ACTIVE, VCPUSTACK_ACTIVE);
>>>
>>> Seeing their uses in asm_defns.h it's not really clear to me why
>>> you can't use the C constants there, the more that those uses
>>> are inside C macros (which perhaps would better be assembler
>>> ones). The latter doesn't even appear to be used in assembly
>>> code.
>>
>> I tried using the C constants but this led to rather nasty include
>> dependencies.
> 
> Hmm, I can imagine this to be the case, but I'd like to have more
> detail for justification. current.h itself doesn't have that many
> dependencies, and if half-way reasonable disentangling our
> headers may be the better choice.

Some #ifndef __ASSEMBLY__ made it work.

I think I had the defines in another header in the beginning and just
didn't switch back after moving them to current.h.

> 
>> ASM_VCPUSTACK_ACTIVE will be used when %cr3 switching is being added.
> 
> Please introduce it when needed.
> 
 --- a/xen/common/wait.c
 +++ b/xen/common/wait.c
 @@ -122,10 +122,10 @@ void wake_up_all(struct waitqueue_head *wq)
  
  static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
  {
 -struct cpu_info *cpu_info = get_cpu_info();
 +struct cpu_user_regs *user_regs = guest_cpu_user_regs();
  struct vcpu *curr = current;
  unsigned long dummy;
 -u32 entry_vector = cpu_info->guest_cpu_user_regs.entry_vector;
 +u32 entry_vector = user_regs->entry_vector;
  
  ASSERT(wqv->esp == 0);
  
 @@ -160,7 +160,7 @@ static void __prepare_to_wait(struct waitqueue_vcpu 
 *wqv)
  "pop %%r11; pop %%r10; pop %%r9;  pop %%r8;"
  "pop %%rbp; pop %%rdx; pop %%rbx; pop %%rax"
  : "=&S" (wqv->esp), "=&c" (dummy), "=&D" (dummy)
 -: "i" (PAGE_SIZE), "0" (0), "1" (cpu_info), "2" (wqv->stack)
 +: "i" (PAGE_SIZE), "0" (0), "1" (user_regs), "2" (wqv->stack)
  : "memory" );
  
  if ( unlikely(wqv->esp == 0) )
 @@ -169,7 +169,7 @@ static void __prepare_to_wait(struct waitqueue_vcpu 
 *wqv)
  domain_crash_synchronous();
  }
  
 -cpu_info->guest_cpu_user_regs.entry_vector = entry_vector;
 +user_regs->entry_vector = entry_vector;
  }
>>>
>>> I don't see how this change is related to the purpose of this patch,
>>> or why the change is needed. All you do is utilize that
>>> guest_cpu_user_regs is the first field of struct cpu_info afaics.
>>
>> guest_cpu_user_regs() might point to either stack, while get_cpu_info()
>> will always reference the Xen stack and never the per-vcpu one.
> 
> Then the description should say so for justification.

Okay, added.


Juergen

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH RFC v2 11/12] x86: modify interrupt handlers to support stack switching

2018-01-31 Thread Jan Beulich
>>> On 30.01.18 at 18:19,  wrote:
> On 30/01/18 17:07, Jan Beulich wrote:
> On 22.01.18 at 13:32,  wrote:
>>> --- a/xen/arch/x86/x86_64/asm-offsets.c
>>> +++ b/xen/arch/x86/x86_64/asm-offsets.c
>>> @@ -137,6 +137,10 @@ void __dummy__(void)
>>>  OFFSET(CPUINFO_processor_id, struct cpu_info, processor_id);
>>>  OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu);
>>>  OFFSET(CPUINFO_cr4, struct cpu_info, cr4);
>>> +OFFSET(CPUINFO_stack_bottom_cpu, struct cpu_info, stack_bottom_cpu);
>>> +OFFSET(CPUINFO_flags, struct cpu_info, flags);
>>> +DEFINE(ASM_ON_VCPUSTACK, ON_VCPUSTACK);
>>> +DEFINE(ASM_VCPUSTACK_ACTIVE, VCPUSTACK_ACTIVE);
>> 
>> Seeing their uses in asm_defns.h it's not really clear to me why
>> you can't use the C constants there, the more that those uses
>> are inside C macros (which perhaps would better be assembler
>> ones). The latter doesn't even appear to be used in assembly
>> code.
> 
> I tried using the C constants but this led to rather nasty include
> dependencies.

Hmm, I can imagine this to be the case, but I'd like to have more
detail for justification. current.h itself doesn't have that many
dependencies, and if half-way reasonable disentangling our
headers may be the better choice.

> ASM_VCPUSTACK_ACTIVE will be used when %cr3 switching is being added.

Please introduce it when needed.

>>> --- a/xen/common/wait.c
>>> +++ b/xen/common/wait.c
>>> @@ -122,10 +122,10 @@ void wake_up_all(struct waitqueue_head *wq)
>>>  
>>>  static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
>>>  {
>>> -struct cpu_info *cpu_info = get_cpu_info();
>>> +struct cpu_user_regs *user_regs = guest_cpu_user_regs();
>>>  struct vcpu *curr = current;
>>>  unsigned long dummy;
>>> -u32 entry_vector = cpu_info->guest_cpu_user_regs.entry_vector;
>>> +u32 entry_vector = user_regs->entry_vector;
>>>  
>>>  ASSERT(wqv->esp == 0);
>>>  
>>> @@ -160,7 +160,7 @@ static void __prepare_to_wait(struct waitqueue_vcpu 
>>> *wqv)
>>>  "pop %%r11; pop %%r10; pop %%r9;  pop %%r8;"
>>>  "pop %%rbp; pop %%rdx; pop %%rbx; pop %%rax"
>>>  : "=&S" (wqv->esp), "=&c" (dummy), "=&D" (dummy)
>>> -: "i" (PAGE_SIZE), "0" (0), "1" (cpu_info), "2" (wqv->stack)
>>> +: "i" (PAGE_SIZE), "0" (0), "1" (user_regs), "2" (wqv->stack)
>>>  : "memory" );
>>>  
>>>  if ( unlikely(wqv->esp == 0) )
>>> @@ -169,7 +169,7 @@ static void __prepare_to_wait(struct waitqueue_vcpu 
>>> *wqv)
>>>  domain_crash_synchronous();
>>>  }
>>>  
>>> -cpu_info->guest_cpu_user_regs.entry_vector = entry_vector;
>>> +user_regs->entry_vector = entry_vector;
>>>  }
>> 
>> I don't see how this change is related to the purpose of this patch,
>> or why the change is needed. All you do is utilize that
>> guest_cpu_user_regs is the first field of struct cpu_info afaics.
> 
> guest_cpu_user_regs() might point to either stack, while get_cpu_info()
> will always reference the Xen stack and never the per-vcpu one.

Then the description should say so for justification.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH RFC v2 11/12] x86: modify interrupt handlers to support stack switching

2018-01-30 Thread Juergen Gross
On 30/01/18 17:07, Jan Beulich wrote:
 On 22.01.18 at 13:32,  wrote:
>> --- a/xen/arch/x86/x86_64/asm-offsets.c
>> +++ b/xen/arch/x86/x86_64/asm-offsets.c
>> @@ -137,6 +137,10 @@ void __dummy__(void)
>>  OFFSET(CPUINFO_processor_id, struct cpu_info, processor_id);
>>  OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu);
>>  OFFSET(CPUINFO_cr4, struct cpu_info, cr4);
>> +OFFSET(CPUINFO_stack_bottom_cpu, struct cpu_info, stack_bottom_cpu);
>> +OFFSET(CPUINFO_flags, struct cpu_info, flags);
>> +DEFINE(ASM_ON_VCPUSTACK, ON_VCPUSTACK);
>> +DEFINE(ASM_VCPUSTACK_ACTIVE, VCPUSTACK_ACTIVE);
> 
> Seeing their uses in asm_defns.h it's not really clear to me why
> you can't use the C constants there, the more that those uses
> are inside C macros (which perhaps would better be assembler
> ones). The latter doesn't even appear to be used in assembly
> code.

I tried using the C constants but this led to rather nasty include
dependencies.

ASM_VCPUSTACK_ACTIVE will be used when %cr3 switching is being added.

> 
>> --- a/xen/arch/x86/x86_64/compat/entry.S
>> +++ b/xen/arch/x86/x86_64/compat/entry.S
>> @@ -19,6 +19,7 @@ ENTRY(entry_int82)
>>  movl  $HYPERCALL_VECTOR, 4(%rsp)
>>  SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. 
>> */
>>  mov   %rsp, %rdi
>> +SWITCH_FROM_VCPU_STACK
>>  CR4_PV32_RESTORE
> 
> Once again - why for compat mode guests?
> 
>> @@ -615,7 +623,9 @@ ENTRY(early_page_fault)
>>  movl  $TRAP_page_fault,4(%rsp)
>>  SAVE_ALL
>>  movq  %rsp,%rdi
>> +SWITCH_FROM_VCPU_STACK
> 
> Why, in this context?

Same as before: consistency. I can remove this.

> 
>>  call  do_early_page_fault
>> +movq  %rsp, %rdi
>>  jmp   restore_all_xen
> 
> Doesn't this belong in an earlier patch?

I have cleaned this up already.

> 
>> --- a/xen/common/wait.c
>> +++ b/xen/common/wait.c
>> @@ -122,10 +122,10 @@ void wake_up_all(struct waitqueue_head *wq)
>>  
>>  static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
>>  {
>> -struct cpu_info *cpu_info = get_cpu_info();
>> +struct cpu_user_regs *user_regs = guest_cpu_user_regs();
>>  struct vcpu *curr = current;
>>  unsigned long dummy;
>> -u32 entry_vector = cpu_info->guest_cpu_user_regs.entry_vector;
>> +u32 entry_vector = user_regs->entry_vector;
>>  
>>  ASSERT(wqv->esp == 0);
>>  
>> @@ -160,7 +160,7 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
>>  "pop %%r11; pop %%r10; pop %%r9;  pop %%r8;"
>>  "pop %%rbp; pop %%rdx; pop %%rbx; pop %%rax"
>>  : "=&S" (wqv->esp), "=&c" (dummy), "=&D" (dummy)
>> -: "i" (PAGE_SIZE), "0" (0), "1" (cpu_info), "2" (wqv->stack)
>> +: "i" (PAGE_SIZE), "0" (0), "1" (user_regs), "2" (wqv->stack)
>>  : "memory" );
>>  
>>  if ( unlikely(wqv->esp == 0) )
>> @@ -169,7 +169,7 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
>>  domain_crash_synchronous();
>>  }
>>  
>> -cpu_info->guest_cpu_user_regs.entry_vector = entry_vector;
>> +user_regs->entry_vector = entry_vector;
>>  }
> 
> I don't see how this change is related to the purpose of this patch,
> or why the change is needed. All you do is utilize that
> guest_cpu_user_regs is the first field of struct cpu_info afaics.

guest_cpu_user_regs() might point to either stack, while get_cpu_info()
will always reference the Xen stack and never the per-vcpu one.

> 
>> --- a/xen/include/asm-x86/asm_defns.h
>> +++ b/xen/include/asm-x86/asm_defns.h
>> @@ -116,6 +116,25 @@ void ret_from_intr(void);
>>  GET_STACK_END(reg);   \
>>  __GET_CURRENT(reg)
>>  
>> +#define SWITCH_FROM_VCPU_STACK   \
>> +GET_STACK_END(ax);   \
>> +testb $ASM_ON_VCPUSTACK, STACK_CPUINFO_FIELD(flags)(%rax);   \
>> +jz1f;\
>> +movq  STACK_CPUINFO_FIELD(stack_bottom_cpu)(%rax), %rsp; \
>> +1:
>> +
>> +#define SWITCH_FROM_VCPU_STACK_IST   \
>> +GET_STACK_END(ax);   \
>> +testb $ASM_ON_VCPUSTACK, STACK_CPUINFO_FIELD(flags)(%rax);   \
>> +jz1f;\
>> +subq  $(CPUINFO_sizeof - 1), %rax;   \
>> +addq  CPUINFO_stack_bottom_cpu(%rax), %rsp;  \
>> +subq  %rax, %rsp;\
> 
> If I'm not mistaken, %rsp is complete rubbish for on instruction
> here. While quite likely not a problem in practice, it would still
> feel better if you went through an intermediate register. I also
> think the calculation might then end up easier to follow. It'll also
> make analysis of a crash eas

Re: [Xen-devel] [PATCH RFC v2 11/12] x86: modify interrupt handlers to support stack switching

2018-01-30 Thread Jan Beulich
>>> On 22.01.18 at 13:32,  wrote:
> --- a/xen/arch/x86/x86_64/asm-offsets.c
> +++ b/xen/arch/x86/x86_64/asm-offsets.c
> @@ -137,6 +137,10 @@ void __dummy__(void)
>  OFFSET(CPUINFO_processor_id, struct cpu_info, processor_id);
>  OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu);
>  OFFSET(CPUINFO_cr4, struct cpu_info, cr4);
> +OFFSET(CPUINFO_stack_bottom_cpu, struct cpu_info, stack_bottom_cpu);
> +OFFSET(CPUINFO_flags, struct cpu_info, flags);
> +DEFINE(ASM_ON_VCPUSTACK, ON_VCPUSTACK);
> +DEFINE(ASM_VCPUSTACK_ACTIVE, VCPUSTACK_ACTIVE);

Seeing their uses in asm_defns.h it's not really clear to me why
you can't use the C constants there, the more that those uses
are inside C macros (which perhaps would better be assembler
ones). The latter doesn't even appear to be used in assembly
code.

> --- a/xen/arch/x86/x86_64/compat/entry.S
> +++ b/xen/arch/x86/x86_64/compat/entry.S
> @@ -19,6 +19,7 @@ ENTRY(entry_int82)
>  movl  $HYPERCALL_VECTOR, 4(%rsp)
>  SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. 
> */
>  mov   %rsp, %rdi
> +SWITCH_FROM_VCPU_STACK
>  CR4_PV32_RESTORE

Once again - why for compat mode guests?

> @@ -615,7 +623,9 @@ ENTRY(early_page_fault)
>  movl  $TRAP_page_fault,4(%rsp)
>  SAVE_ALL
>  movq  %rsp,%rdi
> +SWITCH_FROM_VCPU_STACK

Why, in this context?

>  call  do_early_page_fault
> +movq  %rsp, %rdi
>  jmp   restore_all_xen

Doesn't this belong in an earlier patch?

> --- a/xen/common/wait.c
> +++ b/xen/common/wait.c
> @@ -122,10 +122,10 @@ void wake_up_all(struct waitqueue_head *wq)
>  
>  static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
>  {
> -struct cpu_info *cpu_info = get_cpu_info();
> +struct cpu_user_regs *user_regs = guest_cpu_user_regs();
>  struct vcpu *curr = current;
>  unsigned long dummy;
> -u32 entry_vector = cpu_info->guest_cpu_user_regs.entry_vector;
> +u32 entry_vector = user_regs->entry_vector;
>  
>  ASSERT(wqv->esp == 0);
>  
> @@ -160,7 +160,7 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
>  "pop %%r11; pop %%r10; pop %%r9;  pop %%r8;"
>  "pop %%rbp; pop %%rdx; pop %%rbx; pop %%rax"
>  : "=&S" (wqv->esp), "=&c" (dummy), "=&D" (dummy)
> -: "i" (PAGE_SIZE), "0" (0), "1" (cpu_info), "2" (wqv->stack)
> +: "i" (PAGE_SIZE), "0" (0), "1" (user_regs), "2" (wqv->stack)
>  : "memory" );
>  
>  if ( unlikely(wqv->esp == 0) )
> @@ -169,7 +169,7 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
>  domain_crash_synchronous();
>  }
>  
> -cpu_info->guest_cpu_user_regs.entry_vector = entry_vector;
> +user_regs->entry_vector = entry_vector;
>  }

I don't see how this change is related to the purpose of this patch,
or why the change is needed. All you do is utilize that
guest_cpu_user_regs is the first field of struct cpu_info afaics.

> --- a/xen/include/asm-x86/asm_defns.h
> +++ b/xen/include/asm-x86/asm_defns.h
> @@ -116,6 +116,25 @@ void ret_from_intr(void);
>  GET_STACK_END(reg);   \
>  __GET_CURRENT(reg)
>  
> +#define SWITCH_FROM_VCPU_STACK   \
> +GET_STACK_END(ax);   \
> +testb $ASM_ON_VCPUSTACK, STACK_CPUINFO_FIELD(flags)(%rax);   \
> +jz1f;\
> +movq  STACK_CPUINFO_FIELD(stack_bottom_cpu)(%rax), %rsp; \
> +1:
> +
> +#define SWITCH_FROM_VCPU_STACK_IST   \
> +GET_STACK_END(ax);   \
> +testb $ASM_ON_VCPUSTACK, STACK_CPUINFO_FIELD(flags)(%rax);   \
> +jz1f;\
> +subq  $(CPUINFO_sizeof - 1), %rax;   \
> +addq  CPUINFO_stack_bottom_cpu(%rax), %rsp;  \
> +subq  %rax, %rsp;\

If I'm not mistaken, %rsp is complete rubbish for on instruction
here. While quite likely not a problem in practice, it would still
feel better if you went through an intermediate register. I also
think the calculation might then end up easier to follow. It'll also
make analysis of a crash easier if an NMI or #MC hits exactly at
this boundary.

> +1:
> +
> +#define SWITCH_TO_VCPU_STACK \
> +movq  %rdi, %rsp

For these additions as a whole: At least in new pieces of code
please avoid insn suffixes when they're redundant with registers
used.

> @@ -94,9 +95,16 @@ static inline struct cpu_info *get_cpu_info(void)
>  #define set_processor_id(id)  do {  \
>  struct cpu_info *ci__ = get_cpu_info(); \
>  ci__->per_

[Xen-devel] [PATCH RFC v2 11/12] x86: modify interrupt handlers to support stack switching

2018-01-22 Thread Juergen Gross
Modify the interrupt handlers to switch stacks on interrupt entry in
case they are running on a per-vcpu stack. Same applies to returning
to the guest: in case the to be loaded context is located on a
per-vcpu stack switch to this one before returning to the guest.

Signed-off-by: Juergen Gross 
---
 xen/arch/x86/x86_64/asm-offsets.c  |  4 
 xen/arch/x86/x86_64/compat/entry.S |  5 -
 xen/arch/x86/x86_64/entry.S| 15 +--
 xen/common/wait.c  |  8 
 xen/include/asm-x86/asm_defns.h| 19 +++
 xen/include/asm-x86/current.h  | 10 +-
 6 files changed, 53 insertions(+), 8 deletions(-)

diff --git a/xen/arch/x86/x86_64/asm-offsets.c 
b/xen/arch/x86/x86_64/asm-offsets.c
index e136af6b99..0da756e7af 100644
--- a/xen/arch/x86/x86_64/asm-offsets.c
+++ b/xen/arch/x86/x86_64/asm-offsets.c
@@ -137,6 +137,10 @@ void __dummy__(void)
 OFFSET(CPUINFO_processor_id, struct cpu_info, processor_id);
 OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu);
 OFFSET(CPUINFO_cr4, struct cpu_info, cr4);
+OFFSET(CPUINFO_stack_bottom_cpu, struct cpu_info, stack_bottom_cpu);
+OFFSET(CPUINFO_flags, struct cpu_info, flags);
+DEFINE(ASM_ON_VCPUSTACK, ON_VCPUSTACK);
+DEFINE(ASM_VCPUSTACK_ACTIVE, VCPUSTACK_ACTIVE);
 DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
 BLANK();
 
diff --git a/xen/arch/x86/x86_64/compat/entry.S 
b/xen/arch/x86/x86_64/compat/entry.S
index abf3fcae48..b8d74e83db 100644
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -19,6 +19,7 @@ ENTRY(entry_int82)
 movl  $HYPERCALL_VECTOR, 4(%rsp)
 SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. */
 mov   %rsp, %rdi
+SWITCH_FROM_VCPU_STACK
 CR4_PV32_RESTORE
 
 GET_CURRENT(bx)
@@ -109,6 +110,7 @@ compat_process_trap:
 /* %rbx: struct vcpu, interrupts disabled */
 ENTRY(compat_restore_all_guest)
 ASSERT_INTERRUPTS_DISABLED
+SWITCH_TO_VCPU_STACK
 mov   $~(X86_EFLAGS_IOPL|X86_EFLAGS_NT|X86_EFLAGS_VM),%r11d
 and   UREGS_eflags(%rsp),%r11d
 .Lcr4_orig:
@@ -195,7 +197,6 @@ ENTRY(compat_post_handle_exception)
 
 /* See lstar_enter for entry register state. */
 ENTRY(cstar_enter)
-sti
 CR4_PV32_RESTORE
 movq  8(%rsp),%rax /* Restore %rax. */
 movq  $FLAT_KERNEL_SS,8(%rsp)
@@ -206,6 +207,8 @@ ENTRY(cstar_enter)
 movl  $TRAP_syscall, 4(%rsp)
 SAVE_ALL
 movq  %rsp, %rdi
+SWITCH_FROM_VCPU_STACK
+sti
 GET_CURRENT(bx)
 movq  VCPU_domain(%rbx),%rcx
 cmpb  $0,DOMAIN_is_32bit_pv(%rcx)
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index f7412b87c2..991a8799a9 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -37,6 +37,7 @@ ENTRY(switch_to_kernel)
 /* %rbx: struct vcpu, interrupts disabled */
 restore_all_guest:
 ASSERT_INTERRUPTS_DISABLED
+SWITCH_TO_VCPU_STACK
 RESTORE_ALL
 testw $TRAP_syscall,4(%rsp)
 jziret_exit_to_guest
@@ -71,6 +72,7 @@ iret_exit_to_guest:
 ALIGN
 /* No special register assumptions. */
 restore_all_xen:
+SWITCH_TO_VCPU_STACK
 RESTORE_ALL adj=8
 iretq
 
@@ -91,7 +93,6 @@ restore_all_xen:
  * %ss must be saved into the space left by the trampoline.
  */
 ENTRY(lstar_enter)
-sti
 movq  8(%rsp),%rax /* Restore %rax. */
 movq  $FLAT_KERNEL_SS,8(%rsp)
 pushq %r11
@@ -101,6 +102,8 @@ ENTRY(lstar_enter)
 movl  $TRAP_syscall, 4(%rsp)
 SAVE_ALL
 mov   %rsp, %rdi
+SWITCH_FROM_VCPU_STACK
+sti
 GET_CURRENT(bx)
 testb $TF_kernel_mode,VCPU_thread_flags(%rbx)
 jzswitch_to_kernel
@@ -189,7 +192,6 @@ process_trap:
 jmp  test_all_events
 
 ENTRY(sysenter_entry)
-sti
 pushq $FLAT_USER_SS
 pushq $0
 pushfq
@@ -201,6 +203,8 @@ GLOBAL(sysenter_eflags_saved)
 movl  $TRAP_syscall, 4(%rsp)
 SAVE_ALL
 movq  %rsp, %rdi
+SWITCH_FROM_VCPU_STACK
+sti
 GET_CURRENT(bx)
 cmpb  $0,VCPU_sysenter_disables_events(%rbx)
 movq  VCPU_sysenter_addr(%rbx),%rax
@@ -237,6 +241,7 @@ ENTRY(int80_direct_trap)
 movl  $0x80, 4(%rsp)
 SAVE_ALL
 mov   %rsp, %rdi
+SWITCH_FROM_VCPU_STACK
 
 cmpb  $0,untrusted_msi(%rip)
 UNLIKELY_START(ne, msi_check)
@@ -408,6 +413,7 @@ ENTRY(dom_crash_sync_extable)
 ENTRY(common_interrupt)
 SAVE_ALL CLAC
 movq %rsp,%rdi
+SWITCH_FROM_VCPU_STACK
 CR4_PV32_RESTORE
 pushq %rdi
 callq do_IRQ
@@ -430,6 +436,7 @@ ENTRY(page_fault)
 GLOBAL(handle_exception)
 SAVE_ALL CLAC
 movq  %rsp, %rdi
+SWITCH_FROM_VCPU_STACK
 handle_exception_saved:
 GET_CURRENT(bx)
 testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%rdi)
@@ -60