On Sat, Dec 22, 2007 at 09:13:44PM +0200, Avi Kivity wrote:
> Unfortunately, this fails badly on Intel i386:
Hmm ok there's a definitive bug that I forgot a int1 kernel->kernel
switch on x86 has no special debug stack like on x86-64. This will
have a better chance to work, hope I got all offsets right by
memory.... At least the offset "32" in the leal and eax + fastcall
should all be right or I doubt it could survive the double
dereferencing. Likely the one-more-derefence didn't oops there because
you likely have >=1g of ram and there was a 25% chance of crashing due
the lack of sched-in and 75% chance of crashing in the
one-more-dereference in a more meaningful way.
Signed-off-by: Andrea Arcangeli <[EMAIL PROTECTED]>
diff --git a/kernel/hack-module.awk b/kernel/hack-module.awk
index 7993aa2..5187c96 100644
--- a/kernel/hack-module.awk
+++ b/kernel/hack-module.awk
@@ -24,32 +24,6 @@
printf("MODULE_INFO(version, \"%s\");\n", version)
}
-/^static unsigned long vmcs_readl/ {
- in_vmcs_read = 1
-}
-
-/ASM_VMX_VMREAD_RDX_RAX/ && in_vmcs_read {
- printf("\tstart_special_insn();\n")
-}
-
-/return/ && in_vmcs_read {
- printf("\tend_special_insn();\n");
- in_vmcs_read = 0
-}
-
-/^static void vmcs_writel/ {
- in_vmcs_write = 1
-}
-
-/ASM_VMX_VMWRITE_RAX_RDX/ && in_vmcs_write {
- printf("\tstart_special_insn();\n")
-}
-
-/if/ && in_vmcs_write {
- printf("\tend_special_insn();\n");
- in_vmcs_write = 0
-}
-
/^static void vmx_load_host_state/ {
vmx_load_host_state = 1
}
@@ -74,15 +48,6 @@
print "\tspecial_reload_dr7();"
}
-/static void vcpu_put|static int __vcpu_run|static struct kvm_vcpu
\*vmx_create_vcpu/ {
- in_tricky_func = 1
-}
-
-/preempt_disable|get_cpu/ && in_tricky_func {
- printf("\tin_special_section();\n");
- in_tricky_func = 0
-}
-
/unsigned long flags;/ && vmx_load_host_state {
print "\tunsigned long gsbase;"
}
@@ -90,4 +55,3 @@
/local_irq_save/ && vmx_load_host_state {
print "\t\tgsbase = vmcs_readl(HOST_GS_BASE);"
}
-
diff --git a/kernel/preempt.c b/kernel/preempt.c
index 8bb0405..fd6f8dc 100644
--- a/kernel/preempt.c
+++ b/kernel/preempt.c
@@ -6,8 +6,6 @@
static DEFINE_SPINLOCK(pn_lock);
static LIST_HEAD(pn_list);
-static DEFINE_PER_CPU(int, notifier_enabled);
-static DEFINE_PER_CPU(struct task_struct *, last_tsk);
#define dprintk(fmt) do { \
if (0) \
@@ -15,59 +13,95 @@ static DEFINE_PER_CPU(struct task_struct *, last_tsk);
current->pid, raw_smp_processor_id()); \
} while (0)
-static void preempt_enable_notifiers(void)
+static void preempt_enable_sched_out_notifiers(void)
{
- int cpu = raw_smp_processor_id();
-
- if (per_cpu(notifier_enabled, cpu))
- return;
-
- dprintk("\n");
- per_cpu(notifier_enabled, cpu) = 1;
asm volatile ("mov %0, %%db0" : : "r"(schedule));
- asm volatile ("mov %0, %%db7" : : "r"(0x702ul));
+ asm volatile ("mov %0, %%db7" : : "r"(0x701ul));
+#ifdef CONFIG_X86_64
+ current->thread.debugreg7 = 0ul;
+#else
+ current->thread.debugreg[7] = 0ul;
+#endif
+#ifdef TIF_DEBUG
+ clear_tsk_thread_flag(current, TIF_DEBUG);
+#endif
+}
+
+static void preempt_enable_sched_in_notifiers(void * addr)
+{
+ asm volatile ("mov %0, %%db0" : : "r"(addr));
+ asm volatile ("mov %0, %%db7" : : "r"(0x701ul));
+#ifdef CONFIG_X86_64
+ current->thread.debugreg0 = (unsigned long) addr;
+ current->thread.debugreg7 = 0x701ul;
+#else
+ current->thread.debugreg[0] = (unsigned long) addr;
+ current->thread.debugreg[7] = 0x701ul;
+#endif
+#ifdef TIF_DEBUG
+ set_tsk_thread_flag(current, TIF_DEBUG);
+#endif
}
void special_reload_dr7(void)
{
- asm volatile ("mov %0, %%db7" : : "r"(0x702ul));
+ asm volatile ("mov %0, %%db7" : : "r"(0x701ul));
}
EXPORT_SYMBOL_GPL(special_reload_dr7);
-static void preempt_disable_notifiers(void)
+static void __preempt_disable_notifiers(void)
{
- int cpu = raw_smp_processor_id();
-
- if (!per_cpu(notifier_enabled, cpu))
- return;
+ asm volatile ("mov %0, %%db7" : : "r"(0ul));
+}
- dprintk("\n");
- per_cpu(notifier_enabled, cpu) = 0;
- asm volatile ("mov %0, %%db7" : : "r"(0x400ul));
+static void preempt_disable_notifiers(void)
+{
+ __preempt_disable_notifiers();
+#ifdef CONFIG_X86_64
+ current->thread.debugreg7 = 0ul;
+#else
+ current->thread.debugreg[7] = 0ul;
+#endif
+#ifdef TIF_DEBUG
+ clear_tsk_thread_flag(current, TIF_DEBUG);
+#endif
}
-static void __attribute__((used)) preempt_notifier_trigger(void)
+static void fastcall __attribute__((used)) preempt_notifier_trigger(void ***
ip)
{
struct preempt_notifier *pn;
int cpu = raw_smp_processor_id();
int found = 0;
- unsigned long flags;
dprintk(" - in\n");
//dump_stack();
- spin_lock_irqsave(&pn_lock, flags);
+ spin_lock(&pn_lock);
list_for_each_entry(pn, &pn_list, link)
if (pn->tsk == current) {
found = 1;
break;
}
- spin_unlock_irqrestore(&pn_lock, flags);
- preempt_disable_notifiers();
+ spin_unlock(&pn_lock);
+
if (found) {
- dprintk("sched_out\n");
- pn->ops->sched_out(pn, NULL);
- per_cpu(last_tsk, cpu) = NULL;
- }
+ if ((void *) *ip != schedule) {
+ dprintk("sched_in\n");
+ preempt_enable_sched_out_notifiers();
+ pn->ops->sched_in(pn, cpu);
+ } else {
+ void * sched_in_addr;
+ dprintk("sched_out\n");
+#ifdef CONFIG_X86_64
+ sched_in_addr = **(ip+3);
+#else
+ /* no special debug stack switch on x86 */
+ sched_in_addr = (void *) *(ip+3);
+#endif
+ preempt_enable_sched_in_notifiers(sched_in_addr);
+ pn->ops->sched_out(pn, NULL);
+ }
+ } else
+ __preempt_disable_notifiers();
dprintk(" - out\n");
}
@@ -104,6 +138,11 @@ asm ("pn_int1_handler: \n\t"
"pop " TMP " \n\t"
"jz .Lnotme \n\t"
SAVE_REGS "\n\t"
+#ifdef CONFIG_X86_64
+ "leaq 120(%rsp),%rdi\n\t"
+#else
+ "leal 32(%esp),%eax\n\t"
+#endif
"call preempt_notifier_trigger \n\t"
RESTORE_REGS "\n\t"
#ifdef CONFIG_X86_64
@@ -121,75 +160,28 @@ asm ("pn_int1_handler: \n\t"
#endif
);
-void in_special_section(void)
-{
- struct preempt_notifier *pn;
- int cpu = raw_smp_processor_id();
- int found = 0;
- unsigned long flags;
-
- if (per_cpu(last_tsk, cpu) == current)
- return;
-
- dprintk(" - in\n");
- spin_lock_irqsave(&pn_lock, flags);
- list_for_each_entry(pn, &pn_list, link)
- if (pn->tsk == current) {
- found = 1;
- break;
- }
- spin_unlock_irqrestore(&pn_lock, flags);
- if (found) {
- dprintk("\n");
- per_cpu(last_tsk, cpu) = current;
- pn->ops->sched_in(pn, cpu);
- preempt_enable_notifiers();
- }
- dprintk(" - out\n");
-}
-EXPORT_SYMBOL_GPL(in_special_section);
-
-void start_special_insn(void)
-{
- preempt_disable();
- in_special_section();
-}
-EXPORT_SYMBOL_GPL(start_special_insn);
-
-void end_special_insn(void)
-{
- preempt_enable();
-}
-EXPORT_SYMBOL_GPL(end_special_insn);
-
void preempt_notifier_register(struct preempt_notifier *notifier)
{
- int cpu = get_cpu();
unsigned long flags;
dprintk(" - in\n");
spin_lock_irqsave(&pn_lock, flags);
- preempt_enable_notifiers();
+ preempt_enable_sched_out_notifiers();
notifier->tsk = current;
list_add(¬ifier->link, &pn_list);
spin_unlock_irqrestore(&pn_lock, flags);
- per_cpu(last_tsk, cpu) = current;
- put_cpu();
dprintk(" - out\n");
}
void preempt_notifier_unregister(struct preempt_notifier *notifier)
{
- int cpu = get_cpu();
unsigned long flags;
dprintk(" - in\n");
spin_lock_irqsave(&pn_lock, flags);
list_del(¬ifier->link);
spin_unlock_irqrestore(&pn_lock, flags);
- per_cpu(last_tsk, cpu) = NULL;
preempt_disable_notifiers();
- put_cpu();
dprintk(" - out\n");
}
@@ -238,7 +230,16 @@ void preempt_notifier_sys_init(void)
static void do_disable(void *blah)
{
- preempt_disable_notifiers();
+#ifdef TIF_DEBUG
+ if (!test_tsk_thread_flag(current, TIF_DEBUG))
+#else
+#ifdef CONFIG_X86_64
+ if (!current->thread.debugreg7)
+#else
+ if (!current->thread.debugreg[7])
+#endif
+#endif
+ __preempt_disable_notifiers();
}
void preempt_notifier_sys_exit(void)
>
> > kvm: emulating preempt notifiers; do not benchmark on this machine
> > loaded kvm module (kvm-56-127-g433be51)
> > vmwrite error: reg c08 value d8 (err 3080)
> > [<f8baf9e2>] vmx_save_host_state+0x4f/0x162 [kvm_intel]
> > [<c0425803>] __cond_resched+0x25/0x3c
> > [<f91a22a4>] kvm_arch_vcpu_ioctl_run+0x16f/0x3a7 [kvm]
> > [<f919f244>] kvm_vcpu_ioctl+0xcb/0x28f [kvm]
> > [<c0421987>] enqueue_entity+0x2c0/0x2ea
> > [<c05a8340>] skb_dequeue+0x39/0x3f
> > [<c0604b6d>] unix_stream_recvmsg+0x3a2/0x4c3
> > [<c0425c82>] scheduler_tick+0x1a1/0x274
> > [<c0487329>] core_sys_select+0x21f/0x2fa
> > [<c043e9e6>] clockevents_program_event+0xb5/0xbc
> > [<c04c6853>] avc_has_perm+0x4e/0x58
> > [<c04c7174>] inode_has_perm+0x66/0x6e
> > [<c0430bed>] recalc_sigpending+0xb/0x1d
> > [<c043231d>] dequeue_signal+0xa9/0x12a
> > [<c043cb95>] getnstimeofday+0x30/0xbf
> > [<c04c7205>] file_has_perm+0x89/0x91
> > [<f919f179>] kvm_vcpu_ioctl+0x0/0x28f [kvm]
> > [<c04861b9>] do_ioctl+0x21/0xa0
> > [<c048646f>] vfs_ioctl+0x237/0x249
> > [<c04864cd>] sys_ioctl+0x4c/0x67
> > [<c0404f26>] sysenter_past_esp+0x5f/0x85
> > =======================
>
> vmwrite error means the vmcs pointer was not loaded, probably because
> the sched_in event did not fire after a vcpu migration.
>
> --
> Do not meddle in the internals of kernels, for they are subtle and quick to
> panic.
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
kvm-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/kvm-devel