[PATCH 2/3] KVM: dynamise halt_poll_ns adjustment
There are two new kernel parameters for changing the halt_poll_ns: halt_poll_ns_grow and halt_poll_ns_shrink. halt_poll_ns_grow affects halt_poll_ns when an interrupt arrives and halt_poll_ns_shrink does it when idle VCPU is detected. halt_poll_ns_shrink/ | halt_poll_ns_grow| interrupt arrives| idle VCPU is detected -+--+--- 1 | = halt_poll_ns | = 0 halt_poll_ns | *= halt_poll_ns_grow | /= halt_poll_ns_shrink otherwise| += halt_poll_ns_grow | -= halt_poll_ns_shrink A third new parameter, halt_poll_ns_max, controls the maximal halt_poll_ns; it is internally rounded down to a closest multiple of halt_poll_ns_grow. Signed-off-by: Wanpeng Li wanpeng...@hotmail.com --- virt/kvm/kvm_main.c | 81 - 1 file changed, 80 insertions(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a122b52..bcfbd35 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -66,9 +66,28 @@ MODULE_AUTHOR(Qumranet); MODULE_LICENSE(GPL); -static unsigned int halt_poll_ns; +#define KVM_HALT_POLL_NS 50 +#define KVM_HALT_POLL_NS_GROW 2 +#define KVM_HALT_POLL_NS_SHRINK 0 +#define KVM_HALT_POLL_NS_MAX \ + INT_MAX / KVM_HALT_POLL_NS_GROW + +static unsigned int halt_poll_ns = KVM_HALT_POLL_NS; module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR); +/* Default doubles per-vcpu halt_poll_ns. */ +static int halt_poll_ns_grow = KVM_HALT_POLL_NS_GROW; +module_param(halt_poll_ns_grow, int, S_IRUGO); + +/* Default resets per-vcpu halt_poll_ns . */ +int halt_poll_ns_shrink = KVM_HALT_POLL_NS_SHRINK; +module_param(halt_poll_ns_shrink, int, S_IRUGO); + +/* Default is to compute the maximum so we can never overflow. */ +unsigned int halt_poll_ns_actual_max = KVM_HALT_POLL_NS_MAX; +unsigned int halt_poll_ns_max = KVM_HALT_POLL_NS_MAX; +module_param(halt_poll_ns_max, int, S_IRUGO); + /* * Ordering of locks: * @@ -1907,6 +1926,62 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn) } EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty); +static unsigned int __grow_halt_poll_ns(unsigned int val) +{ + if (halt_poll_ns_grow 1) + return halt_poll_ns; + + val = min(val, halt_poll_ns_actual_max); + + if (val == 0) + return halt_poll_ns; + + if (halt_poll_ns_grow halt_poll_ns) + val *= halt_poll_ns_grow; + else + val += halt_poll_ns_grow; + + return val; +} + +static unsigned int __shrink_halt_poll_ns(int val, int modifier, int minimum) +{ + if (modifier 1) + return 0; + + if (modifier halt_poll_ns) + val /= modifier; + else + val -= modifier; + + return val; +} + +static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) +{ + vcpu-halt_poll_ns = __grow_halt_poll_ns(vcpu-halt_poll_ns); +} + +static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu) +{ + vcpu-halt_poll_ns = __shrink_halt_poll_ns(vcpu-halt_poll_ns, + halt_poll_ns_shrink, halt_poll_ns); +} + +/* + * halt_poll_ns_actual_max is computed to be one grow_halt_poll_ns() below + * halt_poll_ns_max. (See __grow_halt_poll_ns for the reason.) + * This prevents overflows, because ple_halt_poll_ns is int. + * halt_poll_ns_max effectively rounded down to a multiple of halt_poll_ns_grow in + * this process. + */ +static void update_halt_poll_ns_actual_max(void) +{ + halt_poll_ns_actual_max = + __shrink_halt_poll_ns(max(halt_poll_ns_max, halt_poll_ns), + halt_poll_ns_grow, INT_MIN); +} + static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu) { if (kvm_arch_vcpu_runnable(vcpu)) { @@ -1941,6 +2016,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) */ if (kvm_vcpu_check_block(vcpu) 0) { ++vcpu-stat.halt_successful_poll; + grow_halt_poll_ns(vcpu); goto out; } cur = ktime_get(); @@ -1954,6 +2030,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) break; waited = true; + if (halt_poll_ns) + shrink_halt_poll_ns(vcpu); schedule(); } @@ -2950,6 +3028,7 @@ static void hardware_enable_nolock(void *junk) cpumask_set_cpu(cpu, cpus_hardware_enabled); r = kvm_arch_hardware_enable(); + update_halt_poll_ns_actual_max(); if (r) { cpumask_clear_cpu(cpu, cpus_hardware_enabled); -- 1.9.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/3] KVM: Dynamic halt_poll_ns
There is a downside of halt_poll_ns since poll is still happen for idle VCPU which can waste cpu usage. This patchset add the ability to adjust halt_poll_ns dynamically, grows halt_poll_ns if an interrupt arrives and shrinks halt_poll_ns when idle VCPU is detected. There are two new kernel parameters for changing the halt_poll_ns: halt_poll_ns_grow and halt_poll_ns_shrink. halt_poll_ns_grow affects halt_poll_ns when an interrupt arrives and halt_poll_ns_shrink does it when idle VCPU is detected. halt_poll_ns_shrink/ | halt_poll_ns_grow| interrupt arrives| idle VCPU is detected -+--+--- 1 | = halt_poll_ns | = 0 halt_poll_ns | *= halt_poll_ns_grow | /= halt_poll_ns_shrink otherwise| += halt_poll_ns_grow | -= halt_poll_ns_shrink A third new parameter, halt_poll_ns_max, controls the maximal halt_poll_ns; it is internally rounded down to a closest multiple of halt_poll_ns_grow. Wanpeng Li (3): KVM: make halt_poll_ns per-VCPU KVM: dynamise halt_poll_ns adjustment KVM: trace kvm_halt_poll_ns grow/shrink include/linux/kvm_host.h | 1 + include/trace/events/kvm.h | 30 virt/kvm/kvm_main.c| 90 +- 3 files changed, 120 insertions(+), 1 deletion(-) -- 1.9.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/3] KVM: trace kvm_halt_poll_ns grow/shrink
Tracepoint for dynamic halt_pool_ns, fired on every potential change. Signed-off-by: Wanpeng Li wanpeng...@hotmail.com --- include/trace/events/kvm.h | 30 ++ virt/kvm/kvm_main.c| 8 2 files changed, 38 insertions(+) diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index a44062d..75ddf80 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -356,6 +356,36 @@ TRACE_EVENT( __entry-address) ); +TRACE_EVENT(kvm_halt_poll_ns, + TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old), + TP_ARGS(grow, vcpu_id, new, old), + + TP_STRUCT__entry( + __field(bool, grow) + __field(unsigned int, vcpu_id) + __field(int, new) + __field(int, old) + ), + + TP_fast_assign( + __entry-grow = grow; + __entry-vcpu_id= vcpu_id; + __entry-new= new; + __entry-old= old; + ), + + TP_printk(vcpu %u: halt_pool_ns %d (%s %d), + __entry-vcpu_id, + __entry-new, + __entry-grow ? grow : shrink, + __entry-old) +); + +#define trace_kvm_halt_poll_ns_grow(vcpu_id, new, old) \ + trace_kvm_halt_poll_ns(true, vcpu_id, new, old) +#define trace_kvm_halt_poll_ns_shrink(vcpu_id, new, old) \ + trace_kvm_halt_poll_ns(false, vcpu_id, new, old) + #endif #endif /* _TRACE_KVM_MAIN_H */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index bcfbd35..65e3631 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1959,13 +1959,21 @@ static unsigned int __shrink_halt_poll_ns(int val, int modifier, int minimum) static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) { + int old = vcpu-halt_poll_ns; + vcpu-halt_poll_ns = __grow_halt_poll_ns(vcpu-halt_poll_ns); + + trace_kvm_halt_poll_ns_grow(vcpu-vcpu_id, vcpu-halt_poll_ns, old); } static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu) { + int old = vcpu-halt_poll_ns; + vcpu-halt_poll_ns = __shrink_halt_poll_ns(vcpu-halt_poll_ns, halt_poll_ns_shrink, halt_poll_ns); + + trace_kvm_halt_poll_ns_shrink(vcpu-vcpu_id, vcpu-halt_poll_ns, old); } /* -- 1.9.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/3] KVM: make halt_poll_ns per-VCPU
Change halt_poll_ns into per-VCPU variable, seeded from module parameter, to allow greater flexibility. Signed-off-by: Wanpeng Li wanpeng...@hotmail.com --- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 1 + 2 files changed, 2 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 81089cf..1bef9e2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -242,6 +242,7 @@ struct kvm_vcpu { int sigset_active; sigset_t sigset; struct kvm_vcpu_stat stat; + unsigned int halt_poll_ns; #ifdef CONFIG_HAS_IOMEM int mmio_needed; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d8db2f8f..a122b52 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -217,6 +217,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) vcpu-kvm = kvm; vcpu-vcpu_id = id; vcpu-pid = NULL; + vcpu-halt_poll_ns = halt_poll_ns; init_waitqueue_head(vcpu-wq); kvm_async_pf_vcpu_init(vcpu); -- 1.9.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/3] kvm: don't register wildcard MMIO EVENTFD on two buses
On Mon, 24 Aug 2015 11:29:29 +0800 Jason Wang jasow...@redhat.com wrote: On 08/21/2015 05:29 PM, Cornelia Huck wrote: On Fri, 21 Aug 2015 16:03:52 +0800 Jason Wang jasow...@redhat.com wrote: @@ -850,9 +845,15 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) Unfortunately snipped by diff, but the check here is on !len !PIO, which only does the desired thing as VIRTIO_CCW always uses len == 8. Should the check be for !len MMIO instead? I think the answer depends on whether len == 0 is valid for ccw. If not we can fail the assign earlier. Since even without this patch, if userspace tries to register a dev with len equals to zero, it will also be registered to KVM_FAST_MMIO_BUS. If yes, we need check as you suggested here. I don't think len != 8 makes much sense for the way ioeventfd is defined for ccw (we handle hypercalls with a payload specifying the device), but we currently don't actively fence it. But regardless, I'd prefer to decide directly upon whether userspace actually tried to register for the mmio bus. ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS, p-addr, 0, p-dev); if (ret 0) - goto register_fail; + goto unlock_fail; + } else { + ret = kvm_io_bus_register_dev(kvm, bus_idx, p-addr, p-length, +p-dev); + if (ret 0) + goto unlock_fail; } Hm... maybe the following would be more obvious: my_bus = (p-length == 0) (bus_idx == KVM_MMIO_BUS) ? KVM_FAST_MMIO_BUS : bus_idx; ret = kvm_io_bus_register_dev(kvm, my_bus, p-addr, p-length, pdev-dev); + kvm-buses[bus_idx]-ioeventfd_count++; list_add_tail(p-list, kvm-ioeventfds); (...) @@ -900,10 +899,11 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) if (!p-wildcard p-datamatch != args-datamatch) continue; - kvm_io_bus_unregister_dev(kvm, bus_idx, p-dev); if (!p-length) { kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS, p-dev); + } else { + kvm_io_bus_unregister_dev(kvm, bus_idx, p-dev); } Similar comments here... do you want to check for bus_idx == KVM_MMIO_BUS as well? Good catch. I think keep the original code as is will be also ok to solve this. (with changing the bus_idx to KVM_FAST_MMIO_BUS during registering if it was an wildcard mmio). Do you need to handle the ioeventfd_count changes on the fast mmio bus as well? kvm-buses[bus_idx]-ioeventfd_count--; ioeventfd_release(p); -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM slow LAMP guest
On 24-8-2015 1:26, Wanpeng Li wrote: On 8/24/15 3:18 AM, Hansa wrote: On 16-7-2015 13:27, Paolo Bonzini wrote: On 15/07/2015 22:02, C. Bröcker wrote: What OS is this? Is it RHEL/CentOS? If so, halt_poll_ns will be in 6.7 which will be out in a few days/weeks. Paolo OK. As said CentOS 6.6. But where do I put this parameter? You can add kvm.halt_poll_ns=50 to the kernel command line. If you have the parameter, you have the /sys/module/kvm/parameters/halt_poll_ns file. Hi, I upgraded to the CentOS 6.7 release which came out last month and as promised the halt_poll_ns parameter was available. Last week I tested the availability status every 5 minutes on my Wordpress VM's with the halt_poll_ns kernel param set on DOM0. I'm pleased to announce that it solves the problem! How much seconds to load your Wordpress site this time? Regards, Wanpeng Li I've only tested availability using the webmin's Remote HTTP Service monitoring tool. Currently it only times out when doing backups. I will do some tests with curl and let you know the result in a day or so. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 14/15] KVM: arm64: implement MSI injection in ITS emulation
Hi, On 03/08/15 18:06, Marc Zyngier wrote: On 03/08/15 16:37, Eric Auger wrote: Andre, Pavel, On 08/03/2015 11:16 AM, Pavel Fedin wrote: Hello! Again the case that leaves me uncomfortable is the one where the userspace does not provide the devid whereas it must (GICv3 ITS case). Hypothetical broken userland which does not exist for now ? Yes but that's the rule to be not confident in *any* userspace, isn't it? Well, that's only regarding safety, not regarding functionality, right? So if we could break the kernel by not providing the flag and/or devid, this needs to be fixed. But if it just doesn't work, that's OK. As of now I prefer keeping the flags at uapi level and propagate it downto the kernel, as long as I don't have any answer for the unset devid discrimination question. Please apologize for my stubbornness ;-) I think this flag should be kept, as it really indicates what is valid in the MSI structure. It also has other benefits such as making obvious what userspace expects, which can then be checked against the kernel's own expectations. I agree on this. Usually this kind of redundancy leads to strange code, but this does not seem to apply here, since we can at least still guard the assignments to demonstrate that the devid field needs to go along with the flag. Cheers, Andre. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
linux-kvm.org wiki
To whom it may concern: I'm trying to make an edit to this page: http://www.linux-kvm.org/index.php?title=Windows7Install I have successfully created an account. When I Save the edit to the page, it looks like everything worked okay. However, when I refresh the page, my edit is gone, and no history of my edit is visible in the page history or my contributions page. Does anyone have any idea what's going on? I've edited lots of MediaWiki sites before, so I'm *pretty* sure I'm not doing anything wrong. Thanks, Jonathon -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[Bug 103141] Host-triggerable NULL pointer oops
https://bugzilla.kernel.org/show_bug.cgi?id=103141 --- Comment #2 from felix felix.vo...@posteo.de --- Created attachment 185681 -- https://bugzilla.kernel.org/attachment.cgi?id=185681action=edit Test program 2 (C99) You mean can as in I think it does or it did for me? And anyway, it seems to only fix the most proximate cause of the crash. My biggest worry is that KVM_SET_USER_MEMORY_REGION ioctls with guest_phys_addr around the 0xfff0 to 0x range seem not to register; starting the VM looks like as if the region wasn't placed there. I attach test program 2. Running that on my system with 0x44000 as an argument outputs halted (as expected), but 0x45000 and larger multiples of 0x1000 give internal error, subcode 1. -- You are receiving this mail because: You are watching the assignee of the bug. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[Bug 103141] Host-triggerable NULL pointer oops
https://bugzilla.kernel.org/show_bug.cgi?id=103141 felix felix.vo...@posteo.de changed: What|Removed |Added Attachment #185681|0 |1 is obsolete|| --- Comment #3 from felix felix.vo...@posteo.de --- Created attachment 185691 -- https://bugzilla.kernel.org/attachment.cgi?id=185691action=edit Test program 2 (C99) [non-oopsing version] -- You are receiving this mail because: You are watching the assignee of the bug. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 01/15] KVM: arm/arm64: VGIC: don't track used LRs in the distributor
Hi Eric, On 12/08/15 10:01, Eric Auger wrote: diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index bc40137..394622c 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -79,7 +79,6 @@ #include vgic.h static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); -static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); @@ -647,6 +646,17 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, return false; } +static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr vlr) +{ +vgic_ops-sync_lr_elrsr(vcpu, lr, vlr); +} why not renaming this into vgic_set_elrsr. This would be homogeneous with other virtual interface control register setters? But that would involve renaming the vgic_ops members as well to be consistent, right? As there is no change in the behaviour, a naming change sounds unmotivated to me. And _set_ wouldn't be exact, as this function deals only with only one bit at a time and allows to clear it as well. + +static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) +{ +return vgic_ops-get_elrsr(vcpu); +} If I am not wrong, each time you manipulate the elrsr you handle the bitmap. why not directly returning an unsigned long * then (elrsr_ptr)? Because the pointer needs to point somewhere, and that storage is currently located on the caller's stack. Directly returning a pointer would require the caller to provide some memory for the u64, which does not save you so much in terms on LOC: - u64 elrsr = vgic_get_elrsr(vcpu); - unsigned long *elrsr_ptr = u64_to_bitmask(elrsr); + u64 elrsr; + unsigned long *elrsr_ptr = vgic_get_elrsr_bm(vcpu, elrsr); Also we need u64_to_bitmask() in one case when converting the EISR value, so we cannot get lost of that function. + /** * vgic_unqueue_irqs - move pending/active IRQs from LRs to the distributor * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs @@ -658,9 +668,11 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = vcpu-arch.vgic_cpu; +u64 elrsr = vgic_get_elrsr(vcpu); +unsigned long *elrsr_ptr = u64_to_bitmask(elrsr); int i; -for_each_set_bit(i, vgic_cpu-lr_used, vgic_cpu-nr_lr) { +for_each_clear_bit(i, elrsr_ptr, vgic_cpu-nr_lr) { struct vgic_lr lr = vgic_get_lr(vcpu, i); /* @@ -703,7 +715,7 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * Mark the LR as free for other use. */ BUG_ON(lr.state LR_STATE_MASK); -vgic_retire_lr(i, lr.irq, vcpu); +vgic_sync_lr_elrsr(vcpu, i, lr); vgic_irq_clear_queued(vcpu, lr.irq); /* Finally update the VGIC state. */ @@ -1011,17 +1023,6 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, vgic_ops-set_lr(vcpu, lr, vlr); } -static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr vlr) -{ -vgic_ops-sync_lr_elrsr(vcpu, lr, vlr); -} - -static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) -{ -return vgic_ops-get_elrsr(vcpu); -} - static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu) { return vgic_ops-get_eisr(vcpu); @@ -1062,18 +1063,6 @@ static inline void vgic_enable(struct kvm_vcpu *vcpu) vgic_ops-enable(vcpu); } -static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) -{ -struct vgic_cpu *vgic_cpu = vcpu-arch.vgic_cpu; -struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr); - -vlr.state = 0; -vgic_set_lr(vcpu, lr_nr, vlr); -clear_bit(lr_nr, vgic_cpu-lr_used); -vgic_cpu-vgic_irq_lr_map[irq] = LR_EMPTY; -vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); -} - /* * An interrupt may have been disabled after being made pending on the * CPU interface (the classic case is a timer running while we're @@ -1085,23 +1074,32 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) */ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) { -struct vgic_cpu *vgic_cpu = vcpu-arch.vgic_cpu; +u64 elrsr = vgic_get_elrsr(vcpu); +unsigned long *elrsr_ptr = u64_to_bitmask(elrsr); if you agree with above modif I would simply rename elrsr_ptr into elrsr. int lr; +struct vgic_lr vlr; why moving this declaration here. I think this can remain in the block. Possibly. Don't remember the reason of this move, I think it was due to some other changes I later removed. I will revert it. -for_each_set_bit(lr, vgic_cpu-lr_used, vgic-nr_lr) { -struct vgic_lr vlr = vgic_get_lr(vcpu, lr); +for_each_clear_bit(lr, elrsr_ptr,
Re: [PATCH 1/3] KVM: make halt_poll_ns per-VCPU
On Mon, Aug 24, 2015 at 5:53 AM, Wanpeng Li wanpeng...@hotmail.com wrote: Change halt_poll_ns into per-VCPU variable, seeded from module parameter, to allow greater flexibility. You should also change kvm_vcpu_block to read halt_poll_ns from the vcpu instead of the module parameter. Signed-off-by: Wanpeng Li wanpeng...@hotmail.com --- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 1 + 2 files changed, 2 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 81089cf..1bef9e2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -242,6 +242,7 @@ struct kvm_vcpu { int sigset_active; sigset_t sigset; struct kvm_vcpu_stat stat; + unsigned int halt_poll_ns; #ifdef CONFIG_HAS_IOMEM int mmio_needed; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d8db2f8f..a122b52 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -217,6 +217,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) vcpu-kvm = kvm; vcpu-vcpu_id = id; vcpu-pid = NULL; + vcpu-halt_poll_ns = halt_poll_ns; init_waitqueue_head(vcpu-wq); kvm_async_pf_vcpu_init(vcpu); -- 1.9.1 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/3] KVM: dynamise halt_poll_ns adjustment
On Mon, Aug 24, 2015 at 5:53 AM, Wanpeng Li wanpeng...@hotmail.com wrote: There are two new kernel parameters for changing the halt_poll_ns: halt_poll_ns_grow and halt_poll_ns_shrink. halt_poll_ns_grow affects halt_poll_ns when an interrupt arrives and halt_poll_ns_shrink does it when idle VCPU is detected. halt_poll_ns_shrink/ | halt_poll_ns_grow| interrupt arrives| idle VCPU is detected -+--+--- 1 | = halt_poll_ns | = 0 halt_poll_ns | *= halt_poll_ns_grow | /= halt_poll_ns_shrink otherwise| += halt_poll_ns_grow | -= halt_poll_ns_shrink A third new parameter, halt_poll_ns_max, controls the maximal halt_poll_ns; it is internally rounded down to a closest multiple of halt_poll_ns_grow. I like the idea of growing and shrinking halt_poll_ns, but I'm not sure we grow and shrink in the right places here. For example, if vcpu-halt_poll_ns gets down to 0, I don't see how it can then grow back up. This might work better: if (poll successfully for interrupt): stay the same else if (length of kvm_vcpu_block is longer than halt_poll_ns_max): shrink else if (length of kvm_vcpu_block is less than halt_poll_ns_max): grow where halt_poll_ns_max is something reasonable, like 2 millisecond. You get diminishing returns from halt polling as the length of the halt gets longer (halt polling only reduces halt latency by 10-15 us). So there's little benefit to polling longer than a few milliseconds. Signed-off-by: Wanpeng Li wanpeng...@hotmail.com --- virt/kvm/kvm_main.c | 81 - 1 file changed, 80 insertions(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a122b52..bcfbd35 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -66,9 +66,28 @@ MODULE_AUTHOR(Qumranet); MODULE_LICENSE(GPL); -static unsigned int halt_poll_ns; +#define KVM_HALT_POLL_NS 50 +#define KVM_HALT_POLL_NS_GROW 2 +#define KVM_HALT_POLL_NS_SHRINK 0 +#define KVM_HALT_POLL_NS_MAX \ + INT_MAX / KVM_HALT_POLL_NS_GROW + +static unsigned int halt_poll_ns = KVM_HALT_POLL_NS; module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR); +/* Default doubles per-vcpu halt_poll_ns. */ +static int halt_poll_ns_grow = KVM_HALT_POLL_NS_GROW; +module_param(halt_poll_ns_grow, int, S_IRUGO); + +/* Default resets per-vcpu halt_poll_ns . */ +int halt_poll_ns_shrink = KVM_HALT_POLL_NS_SHRINK; +module_param(halt_poll_ns_shrink, int, S_IRUGO); + +/* Default is to compute the maximum so we can never overflow. */ +unsigned int halt_poll_ns_actual_max = KVM_HALT_POLL_NS_MAX; +unsigned int halt_poll_ns_max = KVM_HALT_POLL_NS_MAX; +module_param(halt_poll_ns_max, int, S_IRUGO); + /* * Ordering of locks: * @@ -1907,6 +1926,62 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn) } EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty); +static unsigned int __grow_halt_poll_ns(unsigned int val) +{ + if (halt_poll_ns_grow 1) + return halt_poll_ns; + + val = min(val, halt_poll_ns_actual_max); + + if (val == 0) + return halt_poll_ns; + + if (halt_poll_ns_grow halt_poll_ns) + val *= halt_poll_ns_grow; + else + val += halt_poll_ns_grow; + + return val; +} + +static unsigned int __shrink_halt_poll_ns(int val, int modifier, int minimum) +{ + if (modifier 1) + return 0; + + if (modifier halt_poll_ns) + val /= modifier; + else + val -= modifier; + + return val; +} + +static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) +{ + vcpu-halt_poll_ns = __grow_halt_poll_ns(vcpu-halt_poll_ns); +} + +static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu) +{ + vcpu-halt_poll_ns = __shrink_halt_poll_ns(vcpu-halt_poll_ns, + halt_poll_ns_shrink, halt_poll_ns); +} + +/* + * halt_poll_ns_actual_max is computed to be one grow_halt_poll_ns() below + * halt_poll_ns_max. (See __grow_halt_poll_ns for the reason.) + * This prevents overflows, because ple_halt_poll_ns is int. + * halt_poll_ns_max effectively rounded down to a multiple of halt_poll_ns_grow in + * this process. + */ +static void update_halt_poll_ns_actual_max(void) +{ + halt_poll_ns_actual_max = + __shrink_halt_poll_ns(max(halt_poll_ns_max, halt_poll_ns), + halt_poll_ns_grow, INT_MIN); +} + static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu) { if (kvm_arch_vcpu_runnable(vcpu)) { @@ -1941,6 +2016,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) */ if (kvm_vcpu_check_block(vcpu) 0) { ++vcpu-stat.halt_successful_poll; +
Re: [PATCH v2 05/15] KVM: arm/arm64: make GIC frame address initialization model specific
Hi, On 12/08/15 14:02, Eric Auger wrote: On 07/10/2015 04:21 PM, Andre Przywara wrote: Currently we initialize all the possible GIC frame addresses in one function, without looking at the specific GIC model we instantiate for the guest. As this gets confusing when adding another VGIC model later, lets move these initializations into the respective model's init nit: tobe more precise the init emulation function (not the vgic_v2/v3_init_model model's init function). pfouh?! ;-) functions. OK, will try to find a wording that is not completely confusing. Signed-off-by: Andre Przywara andre.przyw...@arm.com --- virt/kvm/arm/vgic-v2-emul.c | 3 +++ virt/kvm/arm/vgic-v3-emul.c | 3 +++ virt/kvm/arm/vgic.c | 3 --- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c index 1390797..8faa28c 100644 --- a/virt/kvm/arm/vgic-v2-emul.c +++ b/virt/kvm/arm/vgic-v2-emul.c @@ -567,6 +567,9 @@ void vgic_v2_init_emulation(struct kvm *kvm) dist-vm_ops.init_model = vgic_v2_init_model; dist-vm_ops.map_resources = vgic_v2_map_resources; +dist-vgic_cpu_base = VGIC_ADDR_UNDEF; +dist-vgic_dist_base = VGIC_ADDR_UNDEF; Looks strange to see the common dist_base here. Why don't you leave it in common part, kvm_vgic_create; all the more so you left kvm-arch.vgic.vctrl_base = vgic-vctrl_base in kvm_vgic_create. The idea behind this is that dist_base refers to similar, but not identical distributors (v2 vs. v3), so I found it a good idea to initialize it in here. Also vctrl_base is host facing and not set by userland, so this doesn't really compare here. Cheers, Andre. + kvm-arch.max_vcpus = VGIC_V2_MAX_CPUS; } diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c index d2eeb20..1f42348 100644 --- a/virt/kvm/arm/vgic-v3-emul.c +++ b/virt/kvm/arm/vgic-v3-emul.c @@ -885,6 +885,9 @@ void vgic_v3_init_emulation(struct kvm *kvm) dist-vm_ops.destroy_model = vgic_v3_destroy_model; dist-vm_ops.map_resources = vgic_v3_map_resources; +dist-vgic_dist_base = VGIC_ADDR_UNDEF; +dist-vgic_redist_base = VGIC_ADDR_UNDEF; + kvm-arch.max_vcpus = KVM_MAX_VCPUS; } diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index cc8f5ed..59f1801 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1830,9 +1830,6 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) kvm-arch.vgic.in_kernel = true; kvm-arch.vgic.vgic_model = type; kvm-arch.vgic.vctrl_base = vgic-vctrl_base; -kvm-arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; -kvm-arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; -kvm-arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF; out_unlock: for (; vcpu_lock_idx = 0; vcpu_lock_idx--) { -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 07/15] KVM: arm64: handle ITS related GICv3 redistributor registers
Hi Eric, On 13/08/15 13:17, Eric Auger wrote: On 07/10/2015 04:21 PM, Andre Przywara wrote: In the GICv3 redistributor there are the PENDBASER and PROPBASER registers which we did not emulate so far, as they only make sense when having an ITS. In preparation for that emulate those MMIO accesses by storing the 64-bit data written into it into a variable which we later read in the ITS emulation. Signed-off-by: Andre Przywara andre.przyw...@arm.com --- include/kvm/arm_vgic.h | 8 virt/kvm/arm/vgic-v3-emul.c | 44 virt/kvm/arm/vgic.c | 35 +++ virt/kvm/arm/vgic.h | 4 4 files changed, 91 insertions(+) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 3ee063b..8c6cb0e 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -256,6 +256,14 @@ struct vgic_dist { struct vgic_vm_ops vm_ops; struct vgic_io_device dist_iodev; struct vgic_io_device *redist_iodevs; + +/* Address of LPI configuration table shared by all redistributors */ +u64 propbaser; + +/* Addresses of LPI pending tables per redistributor */ +u64 *pendbaser; + +boollpis_enabled; }; struct vgic_v2_cpu_if { diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c index a8cf669..5269ad1 100644 --- a/virt/kvm/arm/vgic-v3-emul.c +++ b/virt/kvm/arm/vgic-v3-emul.c @@ -651,6 +651,38 @@ static bool handle_mmio_cfg_reg_redist(struct kvm_vcpu *vcpu, return vgic_handle_cfg_reg(reg, mmio, offset); } +/* We don't trigger any actions here, just store the register value */ +static bool handle_mmio_propbaser_redist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ +struct vgic_dist *dist = vcpu-kvm-arch.vgic; +int mode = ACCESS_READ_VALUE; + +/* Storing a value with LPIs already enabled is undefined */ +mode |= dist-lpis_enabled ? ACCESS_WRITE_IGNORED : ACCESS_WRITE_VALUE; +vgic_handle_base_register(vcpu, mmio, offset, dist-propbaser, mode); + +return false; +} + +/* We don't trigger any actions here, just store the register value */ +static bool handle_mmio_pendbaser_redist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ +struct kvm_vcpu *rdvcpu = mmio-private; +struct vgic_dist *dist = vcpu-kvm-arch.vgic; +int mode = ACCESS_READ_VALUE; + +/* Storing a value with LPIs already enabled is undefined */ +mode |= dist-lpis_enabled ? ACCESS_WRITE_IGNORED : ACCESS_WRITE_VALUE; +vgic_handle_base_register(vcpu, mmio, offset, + dist-pendbaser[rdvcpu-vcpu_id], mode); + +return false; +} + #define SGI_base(x) ((x) + SZ_64K) static const struct vgic_io_range vgic_redist_ranges[] = { @@ -679,6 +711,18 @@ static const struct vgic_io_range vgic_redist_ranges[] = { .handle_mmio= handle_mmio_raz_wi, }, { +.base = GICR_PENDBASER, +.len= 0x08, +.bits_per_irq = 0, +.handle_mmio= handle_mmio_pendbaser_redist, +}, +{ +.base = GICR_PROPBASER, +.len= 0x08, +.bits_per_irq = 0, +.handle_mmio= handle_mmio_propbaser_redist, +}, +{ .base = GICR_IDREGS, .len= 0x30, .bits_per_irq = 0, diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 15e447f..49ee92b 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -446,6 +446,41 @@ void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, } } +/* handle a 64-bit register access */ +void vgic_handle_base_register(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset, u64 *basereg, + int mode) +{ why do we have vcpu in the proto? I don't see it used. Also if it were can't we fetch it from mmio-private? why not renaming this into something like vgic_reg64_access as par vgic_reg_access 32b flavor above. vgic_handle* usually is the name of the region handler returning bool? Makes sense, I both renamed the function and removed the vcpu parameter. I need to check whether we need the vcpu to do some endianness checks in the future, though. Using mmio-private would be a hack, then. Cheers, Andre. +u32 reg; +u64 breg; + +switch (offset ~3) { +case 0x00: +breg = *basereg; +reg = lower_32_bits(breg); +vgic_reg_access(mmio, reg, offset 3, mode); +
Re: Build regressions/improvements in v4.2-rc8
On Mon, Aug 24, 2015 at 10:34 AM, Geert Uytterhoeven ge...@linux-m68k.org wrote: JFYI, when comparing v4.2-rc8[1] to v4.2-rc7[3], the summaries are: - build errors: +4/-7 4 regressions: + /home/kisskb/slave/src/include/linux/kvm_host.h: error: array subscript is above array bounds [-Werror=array-bounds]: = 430:19 (arch/powerpc/kvm/book3s_64_mmu.c: In function 'kvmppc_mmu _book3s_64_tlbie':) powerpc-randconfig (seen before in a v3.15-rc1 build?) + error: initramfs.c: undefined reference to `__stack_chk_guard': = .init.text+0x1cb0) (init/built-in.o: In function `parse_header':) x86_64-randconfig + error: pci.c: undefined reference to `pci_ioremap_io': = .init.text+0x3c4) (arch/arm/mach-versatile/built-in.o: In function `pci_versatile_setup') arm-randconfig [1] http://kisskb.ellerman.id.au/kisskb/head/9289/ (253 out of 254 configs) [3] http://kisskb.ellerman.id.au/kisskb/head/9260/ (253 out of 254 configs) Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say programmer or something like that. -- Linus Torvalds -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PULL 00/12] ppc patch queue 2015-08-22
On 24/08/2015 06:49, Alexander Graf wrote: Hi Paolo, This is my current patch queue for ppc. Please pull. Done, but this queue has not been in linux-next. Please push to kvm-ppc-next on your github Linux tree as well; please keep an eye on Ah, sorry. I pushed to kvm-ppc-next in parallel to sending the request. No problem, and Linus in the end did do an rc8 so I can wait till I'm back for sending the PPC/ARM pull request. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PULL 00/12] ppc patch queue 2015-08-22
On 24/08/2015 06:49, Alexander Graf wrote: Hi Paolo, This is my current patch queue for ppc. Please pull. Done, but this queue has not been in linux-next. Please push to kvm-ppc-next on your github Linux tree as well; please keep an eye on Ah, sorry. I pushed to kvm-ppc-next in parallel to sending the request. No problem, and Linus in the end did do an rc8 so I can wait till I'm back for sending the PPC/ARM pull request. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH 4/5] KVM: x86: enable unhandled MSR exits for vmx
Peter Hornyack peterhorny...@google.com writes: Set the vm's unhandled_msr_exits flag when user space calls the KVM_ENABLE_CAP ioctl with KVM_CAP_UNHANDLED_MSR_EXITS. After kvm fails to handle a guest rdmsr or wrmsr, check this flag and exit to user space with KVM_EXIT_MSR rather than immediately injecting a GP fault. On reentry into kvm, use the complete_userspace_io callback path to call vmx_complete_userspace_msr. Complete the MSR access if user space was able to handle it successfully, or fail the MSR access and inject a GP fault if user space could not handle the access. ... +static int vmx_complete_userspace_msr(struct kvm_vcpu *vcpu) +{ + struct msr_data msr; + + if (vcpu-run-msr.index != vcpu-arch.regs[VCPU_REGS_RCX]) { + pr_debug(msr.index 0x%x changed, does not match ecx 0x%lx\n, + vcpu-run-msr.index, + vcpu-arch.regs[VCPU_REGS_RCX]); + goto err_out; + } + msr.index = vcpu-run-msr.index; + msr.data = vcpu-run-msr.data; + msr.host_initiated = false; + + switch (vcpu-run-msr.direction) { + case KVM_EXIT_MSR_RDMSR: + if (vcpu-run-msr.handled == KVM_EXIT_MSR_HANDLED) + complete_rdmsr(vcpu, msr); + else + fail_rdmsr(vcpu, msr); + break; Should we check for MSR_UNHANDLED ? Could be debugging relief if userspace is filling it up with random crap! I think it should be ok if the trace function catches that too. + case KVM_EXIT_MSR_WRMSR: + if (vcpu-run-msr.handled == KVM_EXIT_MSR_HANDLED) + complete_wrmsr(vcpu, msr); + else + fail_wrmsr(vcpu, msr); + break; + default: + pr_debug(bad msr.direction %u\n, vcpu-run-msr.direction); + goto err_out; + } + + return 1; +err_out: + vcpu-run-exit_reason = KVM_EXIT_MSR; + vcpu-run-msr.direction = KVM_EXIT_MSR_COMPLETION_FAILED; + return 0; +} + +/* + * Returns 1 if the rdmsr handling is complete; returns 0 if kvm should exit to + * user space to handle this rdmsr. + */ static int handle_rdmsr(struct kvm_vcpu *vcpu) { u32 ecx = vcpu-arch.regs[VCPU_REGS_RCX]; @@ -5499,6 +5547,16 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu) msr_info.index = ecx; msr_info.host_initiated = false; if (vmx_get_msr(vcpu, msr_info)) { + if (vcpu-kvm-arch.unhandled_msr_exits) { + vcpu-run-exit_reason = KVM_EXIT_MSR; + vcpu-run-msr.direction = KVM_EXIT_MSR_RDMSR; + vcpu-run-msr.index = msr_info.index; + vcpu-run-msr.data = 0; + vcpu-run-msr.handled = 0; + vcpu-arch.complete_userspace_io = + vmx_complete_userspace_msr; + return 0; + } fail_rdmsr(vcpu, msr_info); return 1; } @@ -5508,6 +5566,10 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu) return 1; } +/* + * Returns 1 if the wrmsr handling is complete; returns 0 if kvm should exit to + * user space to handle this wrmsr. + */ static int handle_wrmsr(struct kvm_vcpu *vcpu) { struct msr_data msr; @@ -5519,6 +5581,16 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu) msr.index = ecx; msr.host_initiated = false; if (kvm_set_msr(vcpu, msr) != 0) { + if (vcpu-kvm-arch.unhandled_msr_exits) { I think kvm should ultimately decide which msrs it can let userspace handle even if userspace has explicitly enabled the ioctl. Bandan + vcpu-run-exit_reason = KVM_EXIT_MSR; + vcpu-run-msr.direction = KVM_EXIT_MSR_WRMSR; + vcpu-run-msr.index = msr.index; + vcpu-run-msr.data = msr.data; + vcpu-run-msr.handled = 0; + vcpu-arch.complete_userspace_io = + vmx_complete_userspace_msr; + return 0; + } fail_wrmsr(vcpu, msr); return 1; } @@ -8163,7 +8235,7 @@ static bool vmx_xsaves_supported(void) static bool vmx_msr_exits_supported(void) { - return false; + return true; } static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4bbc2a1676c9..5c22f4655741 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2461,6 +2461,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ENABLE_CAP_VM: case KVM_CAP_DISABLE_QUIRKS: case KVM_CAP_SET_BOOT_CPU_ID: + case KVM_CAP_UNHANDLED_MSR_EXITS: #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT case KVM_CAP_ASSIGN_DEV_IRQ: case KVM_CAP_PCI_2_3: @@ -3568,6
Re: [RFC PATCH 3/5] KVM: x86: add msr_exits_supported to kvm_x86_ops
Peter Hornyack peterhorny...@google.com writes: msr_exits_supported will be checked when user space attempts to enable the KVM_CAP_UNHANDLED_MSR_EXITS capability for the vm. This is needed because MSR exit support will be implemented for vmx but not svm later in this patchset. Is svm future work ? :) Are there any such svm msrs ? Signed-off-by: Peter Hornyack peterhorny...@google.com --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm.c | 6 ++ arch/x86/kvm/vmx.c | 6 ++ 3 files changed, 13 insertions(+) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c12e845f59e6..a6e145b1e271 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -854,6 +854,7 @@ struct kvm_x86_ops { void (*handle_external_intr)(struct kvm_vcpu *vcpu); bool (*mpx_supported)(void); bool (*xsaves_supported)(void); + bool (*msr_exits_supported)(void); int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 74d825716f4f..bcbb56f49b9f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4249,6 +4249,11 @@ static bool svm_xsaves_supported(void) return false; } +static bool svm_msr_exits_supported(void) +{ + return false; +} + static bool svm_has_wbinvd_exit(void) { return true; @@ -4540,6 +4545,7 @@ static struct kvm_x86_ops svm_x86_ops = { .invpcid_supported = svm_invpcid_supported, .mpx_supported = svm_mpx_supported, .xsaves_supported = svm_xsaves_supported, + .msr_exits_supported = svm_msr_exits_supported, .set_supported_cpuid = svm_set_supported_cpuid, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index acc38e27d221..27fec385d79d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8161,6 +8161,11 @@ static bool vmx_xsaves_supported(void) SECONDARY_EXEC_XSAVES; } +static bool vmx_msr_exits_supported(void) +{ + return false; +} + static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) { u32 exit_intr_info; @@ -10413,6 +10418,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .handle_external_intr = vmx_handle_external_intr, .mpx_supported = vmx_mpx_supported, .xsaves_supported = vmx_xsaves_supported, + .msr_exits_supported = vmx_msr_exits_supported, .check_nested_events = vmx_check_nested_events, -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Bug 103321] New: NPT page attribute support causes extreme slowdown
Please try this: Still no difference I guess the trace_kvm_cr_write() call in that patch was supposed to trigger kvm_cr entries while tracing? I couldn't find any, though, the only entries containing cr within the output of trace-cmd report were kvm_exit ones that looked quite similar to the previous dump. If you still think it's worth it I'll send you the whole ~10MB compressed trace. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH 0/5] KVM: x86: exit to user space on unhandled MSR accesses
Peter Hornyack peterhorny...@google.com writes: On Wed, Aug 19, 2015 at 2:43 PM, Bandan Das b...@redhat.com wrote: Peter Hornyack peterhorny...@google.com writes: There are numerous MSRs that kvm does not currently handle. On Intel platforms we have observed guest VMs accessing some of these MSRs (for example, MSR_PLATFORM_INFO) and behaving poorly (to the point of guest OS crashes) when they receive a GP fault because the MSR is not emulated. This patchset adds a new kvm exit path for unhandled MSR accesses that allows user space to emulate additional MSRs without having to implement them in kvm. ^^ So, I am trying to understand this motivation. A while back when a patch was posted to emulate MSR_PLATFORM_INFO, it was rejected. Why ? Because it seemed impossible to emulate it correctly (most concerns were related to migration iirc). Although I haven't reviewed all patches in this series yet, what I understand from the above message is-It's ok to emulate MSR_PLATFORM_INFO *incorrectly* as long as we are doing it in the user space. I understand the part where it makes sense to move stuff to userspace. But if kvm isn't emulating certain msrs yet, either we should add support, or they haven't been added because it's not possible to emulate them correctly. The logic that it's probably ok to let userspace do the (incorrect) emulation is something I don't understand. I wasn't aware of the past discussion of MSR_PLATFORM_INFO, I'll search for it - thanks for pointing it out. MSR_PLATFORM_INFO is just one example though. In addition to the benefits I already mentioned for user space MSR emulation (agility, reduced attack surface), this patchset offers a benefit even if user space declines to emulate the MSR by returning into kvm with KVM_EXIT_MSR_UNHANDLED: it makes it easier to monitor in the first place, via logging mechanisms in user space instead of in the kernel, for when guests are accessing MSRs that kvm doesn't implement. Agreed, that would be more useful then how it's being handled currently. This patchset has already revealed a few other MSRs (IA32_MPERF, IA32_APERF) that guests in our test environment are accessing but which kvm doesn't implement yet. I haven't yet investigated deeply what these MSRs are used for and how they might be emulated (or if they can't actually be emulated correctly), but if we do discover an unimplemented non-performance-sensitive MSR that we could emulate correctly, with this patchset we would quickly just implement it in user space. Ok, makes sense. But it seems like this could be an area of abuse as well. For example, this could lessen the incentive to add emulation for new msrs in kvm and rather just emulate them in userspace ? However, as you mentioned, it's probably ok as long as there's a clear demarcation of which msrs this is done for. This shouldn't be the default behavior. On a different note, if there's a related qemu change, can you please point me to it ? Thanks, Bandan It seems like the next in line is to let userspace emulate thier own version of unimplemented x86 instructions. The core of the patchset modifies the vmx handle_rdmsr and handle_wrmsr functions to exit to user space on MSR reads/writes that kvm can't handle itself. Then, on the return path into kvm we check for outstanding user space MSR completions and either complete the MSR access successfully or inject a GP fault as kvm would do by default. This new exit path must be enabled for the vm via the KVM_CAP_UNHANDLED_MSR_EXITS capability. In the future we plan to extend this functionality to allow user space to register the MSRs that it would like to handle itself, even if kvm already provides an implementation. In the long-term we will move the I seriously hope we don't do this! Bandan implementation of all non-performance-sensitive MSRs to user space, reducing the potential attack surface of kvm and allowing us to respond to bugs more quickly. This patchset has been tested with our non-qemu user space hypervisor on vmx platforms; svm support is not implemented. Peter Hornyack (5): KVM: x86: refactor vmx rdmsr/wrmsr completion into new functions KVM: add KVM_EXIT_MSR exit reason and capability. KVM: x86: add msr_exits_supported to kvm_x86_ops KVM: x86: enable unhandled MSR exits for vmx KVM: x86: add trace events for unhandled MSR exits Documentation/virtual/kvm/api.txt | 48 +++ arch/x86/include/asm/kvm_host.h | 2 + arch/x86/kvm/svm.c| 6 ++ arch/x86/kvm/trace.h | 28 + arch/x86/kvm/vmx.c| 126 ++ arch/x86/kvm/x86.c| 13 include/trace/events/kvm.h| 2 +- include/uapi/linux/kvm.h | 14 + 8 files changed, 227 insertions(+), 12 deletions(-) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to
Re: [PATCH 2/3] KVM: dynamise halt_poll_ns adjustment
Hi David, On 8/25/15 1:00 AM, David Matlack wrote: On Mon, Aug 24, 2015 at 5:53 AM, Wanpeng Li wanpeng...@hotmail.com wrote: There are two new kernel parameters for changing the halt_poll_ns: halt_poll_ns_grow and halt_poll_ns_shrink. halt_poll_ns_grow affects halt_poll_ns when an interrupt arrives and halt_poll_ns_shrink does it when idle VCPU is detected. halt_poll_ns_shrink/ | halt_poll_ns_grow| interrupt arrives| idle VCPU is detected -+--+--- 1 | = halt_poll_ns | = 0 halt_poll_ns | *= halt_poll_ns_grow | /= halt_poll_ns_shrink otherwise| += halt_poll_ns_grow | -= halt_poll_ns_shrink A third new parameter, halt_poll_ns_max, controls the maximal halt_poll_ns; it is internally rounded down to a closest multiple of halt_poll_ns_grow. I like the idea of growing and shrinking halt_poll_ns, but I'm not sure we grow and shrink in the right places here. For example, if vcpu-halt_poll_ns gets down to 0, I don't see how it can then grow back up. This has already done in __grow_halt_poll_ns(). This might work better: if (poll successfully for interrupt): stay the same else if (length of kvm_vcpu_block is longer than halt_poll_ns_max): shrink else if (length of kvm_vcpu_block is less than halt_poll_ns_max): grow where halt_poll_ns_max is something reasonable, like 2 millisecond. You get diminishing returns from halt polling as the length of the halt gets longer (halt polling only reduces halt latency by 10-15 us). So there's little benefit to polling longer than a few milliseconds. Great idea, David! Thanks for your review and I will implement it in next version. :-) Regards, Wanpeng Li Signed-off-by: Wanpeng Li wanpeng...@hotmail.com --- virt/kvm/kvm_main.c | 81 - 1 file changed, 80 insertions(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a122b52..bcfbd35 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -66,9 +66,28 @@ MODULE_AUTHOR(Qumranet); MODULE_LICENSE(GPL); -static unsigned int halt_poll_ns; +#define KVM_HALT_POLL_NS 50 +#define KVM_HALT_POLL_NS_GROW 2 +#define KVM_HALT_POLL_NS_SHRINK 0 +#define KVM_HALT_POLL_NS_MAX \ + INT_MAX / KVM_HALT_POLL_NS_GROW + +static unsigned int halt_poll_ns = KVM_HALT_POLL_NS; module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR); +/* Default doubles per-vcpu halt_poll_ns. */ +static int halt_poll_ns_grow = KVM_HALT_POLL_NS_GROW; +module_param(halt_poll_ns_grow, int, S_IRUGO); + +/* Default resets per-vcpu halt_poll_ns . */ +int halt_poll_ns_shrink = KVM_HALT_POLL_NS_SHRINK; +module_param(halt_poll_ns_shrink, int, S_IRUGO); + +/* Default is to compute the maximum so we can never overflow. */ +unsigned int halt_poll_ns_actual_max = KVM_HALT_POLL_NS_MAX; +unsigned int halt_poll_ns_max = KVM_HALT_POLL_NS_MAX; +module_param(halt_poll_ns_max, int, S_IRUGO); + /* * Ordering of locks: * @@ -1907,6 +1926,62 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn) } EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty); +static unsigned int __grow_halt_poll_ns(unsigned int val) +{ + if (halt_poll_ns_grow 1) + return halt_poll_ns; + + val = min(val, halt_poll_ns_actual_max); + + if (val == 0) + return halt_poll_ns; + + if (halt_poll_ns_grow halt_poll_ns) + val *= halt_poll_ns_grow; + else + val += halt_poll_ns_grow; + + return val; +} + +static unsigned int __shrink_halt_poll_ns(int val, int modifier, int minimum) +{ + if (modifier 1) + return 0; + + if (modifier halt_poll_ns) + val /= modifier; + else + val -= modifier; + + return val; +} + +static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) +{ + vcpu-halt_poll_ns = __grow_halt_poll_ns(vcpu-halt_poll_ns); +} + +static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu) +{ + vcpu-halt_poll_ns = __shrink_halt_poll_ns(vcpu-halt_poll_ns, + halt_poll_ns_shrink, halt_poll_ns); +} + +/* + * halt_poll_ns_actual_max is computed to be one grow_halt_poll_ns() below + * halt_poll_ns_max. (See __grow_halt_poll_ns for the reason.) + * This prevents overflows, because ple_halt_poll_ns is int. + * halt_poll_ns_max effectively rounded down to a multiple of halt_poll_ns_grow in + * this process. + */ +static void update_halt_poll_ns_actual_max(void) +{ + halt_poll_ns_actual_max = + __shrink_halt_poll_ns(max(halt_poll_ns_max, halt_poll_ns), + halt_poll_ns_grow, INT_MIN); +} + static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu) { if (kvm_arch_vcpu_runnable(vcpu)) { @@ -1941,6 +2016,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) */ if (kvm_vcpu_check_block(vcpu)
Re: [PATCH 1/3] KVM: make halt_poll_ns per-VCPU
On 8/25/15 12:59 AM, David Matlack wrote: On Mon, Aug 24, 2015 at 5:53 AM, Wanpeng Li wanpeng...@hotmail.com wrote: Change halt_poll_ns into per-VCPU variable, seeded from module parameter, to allow greater flexibility. You should also change kvm_vcpu_block to read halt_poll_ns from the vcpu instead of the module parameter. Indeed, thanks for your review. :-) Regards, Wanpeng Li Signed-off-by: Wanpeng Li wanpeng...@hotmail.com --- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 1 + 2 files changed, 2 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 81089cf..1bef9e2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -242,6 +242,7 @@ struct kvm_vcpu { int sigset_active; sigset_t sigset; struct kvm_vcpu_stat stat; + unsigned int halt_poll_ns; #ifdef CONFIG_HAS_IOMEM int mmio_needed; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d8db2f8f..a122b52 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -217,6 +217,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) vcpu-kvm = kvm; vcpu-vcpu_id = id; vcpu-pid = NULL; + vcpu-halt_poll_ns = halt_poll_ns; init_waitqueue_head(vcpu-wq); kvm_async_pf_vcpu_init(vcpu); -- 1.9.1 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/3] kvm: don't register wildcard MMIO EVENTFD on two buses
On 08/24/2015 10:05 PM, Cornelia Huck wrote: On Mon, 24 Aug 2015 11:29:29 +0800 Jason Wang jasow...@redhat.com wrote: On 08/21/2015 05:29 PM, Cornelia Huck wrote: On Fri, 21 Aug 2015 16:03:52 +0800 Jason Wang jasow...@redhat.com wrote: @@ -850,9 +845,15 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) Unfortunately snipped by diff, but the check here is on !len !PIO, which only does the desired thing as VIRTIO_CCW always uses len == 8. Should the check be for !len MMIO instead? I think the answer depends on whether len == 0 is valid for ccw. If not we can fail the assign earlier. Since even without this patch, if userspace tries to register a dev with len equals to zero, it will also be registered to KVM_FAST_MMIO_BUS. If yes, we need check as you suggested here. I don't think len != 8 makes much sense for the way ioeventfd is defined for ccw (we handle hypercalls with a payload specifying the device), but we currently don't actively fence it. But regardless, I'd prefer to decide directly upon whether userspace actually tried to register for the mmio bus. Ok. ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS, p-addr, 0, p-dev); if (ret 0) - goto register_fail; + goto unlock_fail; + } else { + ret = kvm_io_bus_register_dev(kvm, bus_idx, p-addr, p-length, +p-dev); + if (ret 0) + goto unlock_fail; } Hm... maybe the following would be more obvious: my_bus = (p-length == 0) (bus_idx == KVM_MMIO_BUS) ? KVM_FAST_MMIO_BUS : bus_idx; ret = kvm_io_bus_register_dev(kvm, my_bus, p-addr, p-length, pdev-dev); + kvm-buses[bus_idx]-ioeventfd_count++; list_add_tail(p-list, kvm-ioeventfds); (...) @@ -900,10 +899,11 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) if (!p-wildcard p-datamatch != args-datamatch) continue; - kvm_io_bus_unregister_dev(kvm, bus_idx, p-dev); if (!p-length) { kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS, p-dev); + } else { + kvm_io_bus_unregister_dev(kvm, bus_idx, p-dev); } Similar comments here... do you want to check for bus_idx == KVM_MMIO_BUS as well? Good catch. I think keep the original code as is will be also ok to solve this. (with changing the bus_idx to KVM_FAST_MMIO_BUS during registering if it was an wildcard mmio). Do you need to handle the ioeventfd_count changes on the fast mmio bus as well? Yes. So actually, it needs some changes: checking the return value of kvm_io_bus_unregister_dev() and decide which bus does the device belongs to. kvm-buses[bus_idx]-ioeventfd_count--; ioeventfd_release(p); -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html