On Sat, Dec 22, 2007 at 09:13:44PM +0200, Avi Kivity wrote: > Unfortunately, this fails badly on Intel i386:
Hmm ok there's a definitive bug that I forgot a int1 kernel->kernel switch on x86 has no special debug stack like on x86-64. This will have a better chance to work, hope I got all offsets right by memory.... At least the offset "32" in the leal and eax + fastcall should all be right or I doubt it could survive the double dereferencing. Likely the one-more-derefence didn't oops there because you likely have >=1g of ram and there was a 25% chance of crashing due the lack of sched-in and 75% chance of crashing in the one-more-dereference in a more meaningful way. Signed-off-by: Andrea Arcangeli <[EMAIL PROTECTED]> diff --git a/kernel/hack-module.awk b/kernel/hack-module.awk index 7993aa2..5187c96 100644 --- a/kernel/hack-module.awk +++ b/kernel/hack-module.awk @@ -24,32 +24,6 @@ printf("MODULE_INFO(version, \"%s\");\n", version) } -/^static unsigned long vmcs_readl/ { - in_vmcs_read = 1 -} - -/ASM_VMX_VMREAD_RDX_RAX/ && in_vmcs_read { - printf("\tstart_special_insn();\n") -} - -/return/ && in_vmcs_read { - printf("\tend_special_insn();\n"); - in_vmcs_read = 0 -} - -/^static void vmcs_writel/ { - in_vmcs_write = 1 -} - -/ASM_VMX_VMWRITE_RAX_RDX/ && in_vmcs_write { - printf("\tstart_special_insn();\n") -} - -/if/ && in_vmcs_write { - printf("\tend_special_insn();\n"); - in_vmcs_write = 0 -} - /^static void vmx_load_host_state/ { vmx_load_host_state = 1 } @@ -74,15 +48,6 @@ print "\tspecial_reload_dr7();" } -/static void vcpu_put|static int __vcpu_run|static struct kvm_vcpu \*vmx_create_vcpu/ { - in_tricky_func = 1 -} - -/preempt_disable|get_cpu/ && in_tricky_func { - printf("\tin_special_section();\n"); - in_tricky_func = 0 -} - /unsigned long flags;/ && vmx_load_host_state { print "\tunsigned long gsbase;" } @@ -90,4 +55,3 @@ /local_irq_save/ && vmx_load_host_state { print "\t\tgsbase = vmcs_readl(HOST_GS_BASE);" } - diff --git a/kernel/preempt.c b/kernel/preempt.c index 8bb0405..fd6f8dc 100644 --- a/kernel/preempt.c +++ b/kernel/preempt.c @@ -6,8 +6,6 @@ static DEFINE_SPINLOCK(pn_lock); static LIST_HEAD(pn_list); -static DEFINE_PER_CPU(int, notifier_enabled); -static DEFINE_PER_CPU(struct task_struct *, last_tsk); #define dprintk(fmt) do { \ if (0) \ @@ -15,59 +13,95 @@ static DEFINE_PER_CPU(struct task_struct *, last_tsk); current->pid, raw_smp_processor_id()); \ } while (0) -static void preempt_enable_notifiers(void) +static void preempt_enable_sched_out_notifiers(void) { - int cpu = raw_smp_processor_id(); - - if (per_cpu(notifier_enabled, cpu)) - return; - - dprintk("\n"); - per_cpu(notifier_enabled, cpu) = 1; asm volatile ("mov %0, %%db0" : : "r"(schedule)); - asm volatile ("mov %0, %%db7" : : "r"(0x702ul)); + asm volatile ("mov %0, %%db7" : : "r"(0x701ul)); +#ifdef CONFIG_X86_64 + current->thread.debugreg7 = 0ul; +#else + current->thread.debugreg[7] = 0ul; +#endif +#ifdef TIF_DEBUG + clear_tsk_thread_flag(current, TIF_DEBUG); +#endif +} + +static void preempt_enable_sched_in_notifiers(void * addr) +{ + asm volatile ("mov %0, %%db0" : : "r"(addr)); + asm volatile ("mov %0, %%db7" : : "r"(0x701ul)); +#ifdef CONFIG_X86_64 + current->thread.debugreg0 = (unsigned long) addr; + current->thread.debugreg7 = 0x701ul; +#else + current->thread.debugreg[0] = (unsigned long) addr; + current->thread.debugreg[7] = 0x701ul; +#endif +#ifdef TIF_DEBUG + set_tsk_thread_flag(current, TIF_DEBUG); +#endif } void special_reload_dr7(void) { - asm volatile ("mov %0, %%db7" : : "r"(0x702ul)); + asm volatile ("mov %0, %%db7" : : "r"(0x701ul)); } EXPORT_SYMBOL_GPL(special_reload_dr7); -static void preempt_disable_notifiers(void) +static void __preempt_disable_notifiers(void) { - int cpu = raw_smp_processor_id(); - - if (!per_cpu(notifier_enabled, cpu)) - return; + asm volatile ("mov %0, %%db7" : : "r"(0ul)); +} - dprintk("\n"); - per_cpu(notifier_enabled, cpu) = 0; - asm volatile ("mov %0, %%db7" : : "r"(0x400ul)); +static void preempt_disable_notifiers(void) +{ + __preempt_disable_notifiers(); +#ifdef CONFIG_X86_64 + current->thread.debugreg7 = 0ul; +#else + current->thread.debugreg[7] = 0ul; +#endif +#ifdef TIF_DEBUG + clear_tsk_thread_flag(current, TIF_DEBUG); +#endif } -static void __attribute__((used)) preempt_notifier_trigger(void) +static void fastcall __attribute__((used)) preempt_notifier_trigger(void *** ip) { struct preempt_notifier *pn; int cpu = raw_smp_processor_id(); int found = 0; - unsigned long flags; dprintk(" - in\n"); //dump_stack(); - spin_lock_irqsave(&pn_lock, flags); + spin_lock(&pn_lock); list_for_each_entry(pn, &pn_list, link) if (pn->tsk == current) { found = 1; break; } - spin_unlock_irqrestore(&pn_lock, flags); - preempt_disable_notifiers(); + spin_unlock(&pn_lock); + if (found) { - dprintk("sched_out\n"); - pn->ops->sched_out(pn, NULL); - per_cpu(last_tsk, cpu) = NULL; - } + if ((void *) *ip != schedule) { + dprintk("sched_in\n"); + preempt_enable_sched_out_notifiers(); + pn->ops->sched_in(pn, cpu); + } else { + void * sched_in_addr; + dprintk("sched_out\n"); +#ifdef CONFIG_X86_64 + sched_in_addr = **(ip+3); +#else + /* no special debug stack switch on x86 */ + sched_in_addr = (void *) *(ip+3); +#endif + preempt_enable_sched_in_notifiers(sched_in_addr); + pn->ops->sched_out(pn, NULL); + } + } else + __preempt_disable_notifiers(); dprintk(" - out\n"); } @@ -104,6 +138,11 @@ asm ("pn_int1_handler: \n\t" "pop " TMP " \n\t" "jz .Lnotme \n\t" SAVE_REGS "\n\t" +#ifdef CONFIG_X86_64 + "leaq 120(%rsp),%rdi\n\t" +#else + "leal 32(%esp),%eax\n\t" +#endif "call preempt_notifier_trigger \n\t" RESTORE_REGS "\n\t" #ifdef CONFIG_X86_64 @@ -121,75 +160,28 @@ asm ("pn_int1_handler: \n\t" #endif ); -void in_special_section(void) -{ - struct preempt_notifier *pn; - int cpu = raw_smp_processor_id(); - int found = 0; - unsigned long flags; - - if (per_cpu(last_tsk, cpu) == current) - return; - - dprintk(" - in\n"); - spin_lock_irqsave(&pn_lock, flags); - list_for_each_entry(pn, &pn_list, link) - if (pn->tsk == current) { - found = 1; - break; - } - spin_unlock_irqrestore(&pn_lock, flags); - if (found) { - dprintk("\n"); - per_cpu(last_tsk, cpu) = current; - pn->ops->sched_in(pn, cpu); - preempt_enable_notifiers(); - } - dprintk(" - out\n"); -} -EXPORT_SYMBOL_GPL(in_special_section); - -void start_special_insn(void) -{ - preempt_disable(); - in_special_section(); -} -EXPORT_SYMBOL_GPL(start_special_insn); - -void end_special_insn(void) -{ - preempt_enable(); -} -EXPORT_SYMBOL_GPL(end_special_insn); - void preempt_notifier_register(struct preempt_notifier *notifier) { - int cpu = get_cpu(); unsigned long flags; dprintk(" - in\n"); spin_lock_irqsave(&pn_lock, flags); - preempt_enable_notifiers(); + preempt_enable_sched_out_notifiers(); notifier->tsk = current; list_add(¬ifier->link, &pn_list); spin_unlock_irqrestore(&pn_lock, flags); - per_cpu(last_tsk, cpu) = current; - put_cpu(); dprintk(" - out\n"); } void preempt_notifier_unregister(struct preempt_notifier *notifier) { - int cpu = get_cpu(); unsigned long flags; dprintk(" - in\n"); spin_lock_irqsave(&pn_lock, flags); list_del(¬ifier->link); spin_unlock_irqrestore(&pn_lock, flags); - per_cpu(last_tsk, cpu) = NULL; preempt_disable_notifiers(); - put_cpu(); dprintk(" - out\n"); } @@ -238,7 +230,16 @@ void preempt_notifier_sys_init(void) static void do_disable(void *blah) { - preempt_disable_notifiers(); +#ifdef TIF_DEBUG + if (!test_tsk_thread_flag(current, TIF_DEBUG)) +#else +#ifdef CONFIG_X86_64 + if (!current->thread.debugreg7) +#else + if (!current->thread.debugreg[7]) +#endif +#endif + __preempt_disable_notifiers(); } void preempt_notifier_sys_exit(void) > > > kvm: emulating preempt notifiers; do not benchmark on this machine > > loaded kvm module (kvm-56-127-g433be51) > > vmwrite error: reg c08 value d8 (err 3080) > > [<f8baf9e2>] vmx_save_host_state+0x4f/0x162 [kvm_intel] > > [<c0425803>] __cond_resched+0x25/0x3c > > [<f91a22a4>] kvm_arch_vcpu_ioctl_run+0x16f/0x3a7 [kvm] > > [<f919f244>] kvm_vcpu_ioctl+0xcb/0x28f [kvm] > > [<c0421987>] enqueue_entity+0x2c0/0x2ea > > [<c05a8340>] skb_dequeue+0x39/0x3f > > [<c0604b6d>] unix_stream_recvmsg+0x3a2/0x4c3 > > [<c0425c82>] scheduler_tick+0x1a1/0x274 > > [<c0487329>] core_sys_select+0x21f/0x2fa > > [<c043e9e6>] clockevents_program_event+0xb5/0xbc > > [<c04c6853>] avc_has_perm+0x4e/0x58 > > [<c04c7174>] inode_has_perm+0x66/0x6e > > [<c0430bed>] recalc_sigpending+0xb/0x1d > > [<c043231d>] dequeue_signal+0xa9/0x12a > > [<c043cb95>] getnstimeofday+0x30/0xbf > > [<c04c7205>] file_has_perm+0x89/0x91 > > [<f919f179>] kvm_vcpu_ioctl+0x0/0x28f [kvm] > > [<c04861b9>] do_ioctl+0x21/0xa0 > > [<c048646f>] vfs_ioctl+0x237/0x249 > > [<c04864cd>] sys_ioctl+0x4c/0x67 > > [<c0404f26>] sysenter_past_esp+0x5f/0x85 > > ======================= > > vmwrite error means the vmcs pointer was not loaded, probably because > the sched_in event did not fire after a vcpu migration. > > -- > Do not meddle in the internals of kernels, for they are subtle and quick to > panic. ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2005. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel