[PATCH 2/3] KVM: dynamise halt_poll_ns adjustment

2015-08-24 Thread Wanpeng Li
There are two new kernel parameters for changing the halt_poll_ns:
halt_poll_ns_grow and halt_poll_ns_shrink. halt_poll_ns_grow affects 
halt_poll_ns when an interrupt arrives and halt_poll_ns_shrink
does it when idle VCPU is detected. 

  halt_poll_ns_shrink/ |
  halt_poll_ns_grow| interrupt arrives| idle VCPU is detected
  -+--+---
   1  |  = halt_poll_ns  |  = 0 
   halt_poll_ns   | *= halt_poll_ns_grow | /= halt_poll_ns_shrink
  otherwise| += halt_poll_ns_grow | -= halt_poll_ns_shrink

A third new parameter, halt_poll_ns_max, controls the maximal halt_poll_ns;
it is internally rounded down to a closest multiple of halt_poll_ns_grow.

Signed-off-by: Wanpeng Li wanpeng...@hotmail.com
---
 virt/kvm/kvm_main.c | 81 -
 1 file changed, 80 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a122b52..bcfbd35 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -66,9 +66,28 @@
 MODULE_AUTHOR(Qumranet);
 MODULE_LICENSE(GPL);
 
-static unsigned int halt_poll_ns;
+#define KVM_HALT_POLL_NS  50
+#define KVM_HALT_POLL_NS_GROW   2
+#define KVM_HALT_POLL_NS_SHRINK 0
+#define KVM_HALT_POLL_NS_MAX \
+   INT_MAX / KVM_HALT_POLL_NS_GROW
+
+static unsigned int halt_poll_ns = KVM_HALT_POLL_NS;
 module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
 
+/* Default doubles per-vcpu halt_poll_ns. */
+static int halt_poll_ns_grow = KVM_HALT_POLL_NS_GROW;
+module_param(halt_poll_ns_grow, int, S_IRUGO);
+
+/* Default resets per-vcpu halt_poll_ns . */
+int halt_poll_ns_shrink = KVM_HALT_POLL_NS_SHRINK;
+module_param(halt_poll_ns_shrink, int, S_IRUGO);
+
+/* Default is to compute the maximum so we can never overflow. */
+unsigned int halt_poll_ns_actual_max = KVM_HALT_POLL_NS_MAX;
+unsigned int halt_poll_ns_max = KVM_HALT_POLL_NS_MAX;
+module_param(halt_poll_ns_max, int, S_IRUGO);
+
 /*
  * Ordering of locks:
  *
@@ -1907,6 +1926,62 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, 
gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);
 
+static unsigned int __grow_halt_poll_ns(unsigned int val)
+{
+   if (halt_poll_ns_grow  1)
+   return halt_poll_ns;
+
+   val = min(val, halt_poll_ns_actual_max);
+
+   if (val == 0)
+   return halt_poll_ns;
+
+   if (halt_poll_ns_grow  halt_poll_ns)
+   val *= halt_poll_ns_grow;
+   else
+   val += halt_poll_ns_grow;
+
+   return val;
+}
+
+static unsigned int __shrink_halt_poll_ns(int val, int modifier, int minimum)
+{
+   if (modifier  1)
+   return 0;
+
+   if (modifier  halt_poll_ns)
+   val /= modifier;
+   else
+   val -= modifier;
+
+   return val;
+}
+
+static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
+{
+   vcpu-halt_poll_ns = __grow_halt_poll_ns(vcpu-halt_poll_ns);
+}
+
+static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu)
+{
+   vcpu-halt_poll_ns = __shrink_halt_poll_ns(vcpu-halt_poll_ns,
+   halt_poll_ns_shrink, halt_poll_ns);
+}
+
+/*
+ * halt_poll_ns_actual_max is computed to be one grow_halt_poll_ns() below
+ * halt_poll_ns_max. (See __grow_halt_poll_ns for the reason.)
+ * This prevents overflows, because ple_halt_poll_ns is int.
+ * halt_poll_ns_max effectively rounded down to a multiple of 
halt_poll_ns_grow in
+ * this process.
+ */
+static void update_halt_poll_ns_actual_max(void)
+{
+   halt_poll_ns_actual_max =
+   __shrink_halt_poll_ns(max(halt_poll_ns_max, halt_poll_ns),
+   halt_poll_ns_grow, INT_MIN);
+}
+
 static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
 {
if (kvm_arch_vcpu_runnable(vcpu)) {
@@ -1941,6 +2016,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 */
if (kvm_vcpu_check_block(vcpu)  0) {
++vcpu-stat.halt_successful_poll;
+   grow_halt_poll_ns(vcpu);
goto out;
}
cur = ktime_get();
@@ -1954,6 +2030,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
break;
 
waited = true;
+   if (halt_poll_ns)
+   shrink_halt_poll_ns(vcpu);
schedule();
}
 
@@ -2950,6 +3028,7 @@ static void hardware_enable_nolock(void *junk)
cpumask_set_cpu(cpu, cpus_hardware_enabled);
 
r = kvm_arch_hardware_enable();
+   update_halt_poll_ns_actual_max();
 
if (r) {
cpumask_clear_cpu(cpu, cpus_hardware_enabled);
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/3] KVM: Dynamic halt_poll_ns

2015-08-24 Thread Wanpeng Li
There is a downside of halt_poll_ns since poll is still happen for idle 
VCPU which can waste cpu usage. This patchset add the ability to adjust 
halt_poll_ns dynamically, grows halt_poll_ns if an interrupt arrives and 
shrinks halt_poll_ns when idle VCPU is detected.

There are two new kernel parameters for changing the halt_poll_ns:
halt_poll_ns_grow and halt_poll_ns_shrink. halt_poll_ns_grow affects 
halt_poll_ns when an interrupt arrives and halt_poll_ns_shrink
does it when idle VCPU is detected. 

  halt_poll_ns_shrink/ |
  halt_poll_ns_grow| interrupt arrives| idle VCPU is detected
  -+--+---
   1  |  = halt_poll_ns  |  = 0 
   halt_poll_ns   | *= halt_poll_ns_grow | /= halt_poll_ns_shrink
  otherwise| += halt_poll_ns_grow | -= halt_poll_ns_shrink

A third new parameter, halt_poll_ns_max, controls the maximal halt_poll_ns;
it is internally rounded down to a closest multiple of halt_poll_ns_grow.

Wanpeng Li (3):
  KVM: make halt_poll_ns per-VCPU
  KVM: dynamise halt_poll_ns adjustment 
  KVM: trace kvm_halt_poll_ns grow/shrink 

 include/linux/kvm_host.h   |  1 +
 include/trace/events/kvm.h | 30 
 virt/kvm/kvm_main.c| 90 +-
 3 files changed, 120 insertions(+), 1 deletion(-)

-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/3] KVM: trace kvm_halt_poll_ns grow/shrink

2015-08-24 Thread Wanpeng Li
Tracepoint for dynamic halt_pool_ns, fired on every potential change.

Signed-off-by: Wanpeng Li wanpeng...@hotmail.com
---
 include/trace/events/kvm.h | 30 ++
 virt/kvm/kvm_main.c|  8 
 2 files changed, 38 insertions(+)

diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index a44062d..75ddf80 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -356,6 +356,36 @@ TRACE_EVENT(
  __entry-address)
 );
 
+TRACE_EVENT(kvm_halt_poll_ns,
+   TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),
+   TP_ARGS(grow, vcpu_id, new, old),
+
+   TP_STRUCT__entry(
+   __field(bool, grow)
+   __field(unsigned int, vcpu_id)
+   __field(int, new)
+   __field(int, old)
+   ),
+
+   TP_fast_assign(
+   __entry-grow   = grow;
+   __entry-vcpu_id= vcpu_id;
+   __entry-new= new;
+   __entry-old= old;
+   ),
+
+   TP_printk(vcpu %u: halt_pool_ns %d (%s %d),
+   __entry-vcpu_id,
+   __entry-new,
+   __entry-grow ? grow : shrink,
+   __entry-old)
+);
+
+#define trace_kvm_halt_poll_ns_grow(vcpu_id, new, old) \
+   trace_kvm_halt_poll_ns(true, vcpu_id, new, old)
+#define trace_kvm_halt_poll_ns_shrink(vcpu_id, new, old) \
+   trace_kvm_halt_poll_ns(false, vcpu_id, new, old)
+
 #endif
 
 #endif /* _TRACE_KVM_MAIN_H */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index bcfbd35..65e3631 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1959,13 +1959,21 @@ static unsigned int __shrink_halt_poll_ns(int val, int 
modifier, int minimum)
 
 static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
 {
+   int old = vcpu-halt_poll_ns;
+
vcpu-halt_poll_ns = __grow_halt_poll_ns(vcpu-halt_poll_ns);
+
+   trace_kvm_halt_poll_ns_grow(vcpu-vcpu_id, vcpu-halt_poll_ns, old);
 }
 
 static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu)
 {
+   int old = vcpu-halt_poll_ns;
+
vcpu-halt_poll_ns = __shrink_halt_poll_ns(vcpu-halt_poll_ns,
halt_poll_ns_shrink, halt_poll_ns);
+
+   trace_kvm_halt_poll_ns_shrink(vcpu-vcpu_id, vcpu-halt_poll_ns, old);
 }
 
 /*
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/3] KVM: make halt_poll_ns per-VCPU

2015-08-24 Thread Wanpeng Li
Change halt_poll_ns into per-VCPU variable, seeded from module parameter,
to allow greater flexibility.

Signed-off-by: Wanpeng Li wanpeng...@hotmail.com
---
 include/linux/kvm_host.h | 1 +
 virt/kvm/kvm_main.c  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 81089cf..1bef9e2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -242,6 +242,7 @@ struct kvm_vcpu {
int sigset_active;
sigset_t sigset;
struct kvm_vcpu_stat stat;
+   unsigned int halt_poll_ns;
 
 #ifdef CONFIG_HAS_IOMEM
int mmio_needed;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d8db2f8f..a122b52 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -217,6 +217,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, 
unsigned id)
vcpu-kvm = kvm;
vcpu-vcpu_id = id;
vcpu-pid = NULL;
+   vcpu-halt_poll_ns = halt_poll_ns;
init_waitqueue_head(vcpu-wq);
kvm_async_pf_vcpu_init(vcpu);
 
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] kvm: don't register wildcard MMIO EVENTFD on two buses

2015-08-24 Thread Cornelia Huck
On Mon, 24 Aug 2015 11:29:29 +0800
Jason Wang jasow...@redhat.com wrote:

 On 08/21/2015 05:29 PM, Cornelia Huck wrote:
  On Fri, 21 Aug 2015 16:03:52 +0800
  Jason Wang jasow...@redhat.com wrote:

  @@ -850,9 +845,15 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct 
  kvm_ioeventfd *args)
  Unfortunately snipped by diff, but the check here is on !len  !PIO,
  which only does the desired thing as VIRTIO_CCW always uses len == 8.
  Should the check be for !len  MMIO instead?
 
 I think the answer depends on whether len == 0 is valid for ccw. If not
 we can fail the assign earlier. Since even without this patch, if
 userspace tries to register a dev with len equals to zero, it will also
 be registered to KVM_FAST_MMIO_BUS. If yes, we need check as you
 suggested here.

I don't think len != 8 makes much sense for the way ioeventfd is
defined for ccw (we handle hypercalls with a payload specifying the
device), but we currently don't actively fence it.

But regardless, I'd prefer to decide directly upon whether userspace
actually tried to register for the mmio bus.

 
 
 ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS,
   p-addr, 0, p-dev);
 if (ret  0)
  -  goto register_fail;
  +  goto unlock_fail;
  +  } else {
  +  ret = kvm_io_bus_register_dev(kvm, bus_idx, p-addr, p-length,
  +p-dev);
  +  if (ret  0)
  +  goto unlock_fail;
 }
  Hm... maybe the following would be more obvious:
 
  my_bus = (p-length == 0)  (bus_idx == KVM_MMIO_BUS) ? KVM_FAST_MMIO_BUS 
  : bus_idx;
  ret = kvm_io_bus_register_dev(kvm, my_bus, p-addr, p-length, pdev-dev); 
 
   
  +
 kvm-buses[bus_idx]-ioeventfd_count++;
 list_add_tail(p-list, kvm-ioeventfds);
  (...)
 
  @@ -900,10 +899,11 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct 
  kvm_ioeventfd *args)
 if (!p-wildcard  p-datamatch != args-datamatch)
 continue;
   
  -  kvm_io_bus_unregister_dev(kvm, bus_idx, p-dev);
 if (!p-length) {
 kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS,
   p-dev);
  +  } else {
  +  kvm_io_bus_unregister_dev(kvm, bus_idx, p-dev);
 }
  Similar comments here... do you want to check for bus_idx ==
  KVM_MMIO_BUS as well?
 
 Good catch. I think keep the original code as is will be also ok to
 solve this. (with changing the bus_idx to KVM_FAST_MMIO_BUS during
 registering if it was an wildcard mmio).

Do you need to handle the ioeventfd_count changes on the fast mmio bus
as well?

 
 
 kvm-buses[bus_idx]-ioeventfd_count--;
 ioeventfd_release(p);
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM slow LAMP guest

2015-08-24 Thread Hansa

On 24-8-2015 1:26, Wanpeng Li wrote:

On 8/24/15 3:18 AM, Hansa wrote:

On 16-7-2015 13:27, Paolo Bonzini wrote:

On 15/07/2015 22:02, C. Bröcker wrote:

What OS is this?  Is it RHEL/CentOS? If so, halt_poll_ns will be in 6.7
which will be out in a few days/weeks.

Paolo

OK. As said CentOS 6.6.
But where do I put this parameter?

You can add kvm.halt_poll_ns=50 to the kernel command line.  If
you have the parameter, you have the
/sys/module/kvm/parameters/halt_poll_ns file.

Hi,

I upgraded to the CentOS 6.7 release which came out last month and as promised 
the halt_poll_ns parameter was available.
Last week I tested the availability status every 5 minutes on my Wordpress VM's 
with the halt_poll_ns kernel param set on DOM0. I'm pleased to announce that it 
solves the problem!


How much seconds to load your Wordpress site this time?

Regards,
Wanpeng Li

I've only tested availability using the webmin's Remote HTTP Service 
monitoring tool. Currently it only times out when doing backups. I will do some tests 
with curl and let you know the result in a day or so.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 14/15] KVM: arm64: implement MSI injection in ITS emulation

2015-08-24 Thread Andre Przywara
Hi,

On 03/08/15 18:06, Marc Zyngier wrote:
 On 03/08/15 16:37, Eric Auger wrote:
 Andre, Pavel,
 On 08/03/2015 11:16 AM, Pavel Fedin wrote:
  Hello!

 Again the case that leaves me uncomfortable is the one where the
 userspace does not provide the devid whereas it must (GICv3 ITS case).

  Hypothetical broken userland which does not exist for now ?
 Yes but that's the rule to be not confident in *any* userspace, isn't it?

Well, that's only regarding safety, not regarding functionality, right?
So if we could break the kernel by not providing the flag and/or devid,
this needs to be fixed. But if it just doesn't work, that's OK.


 As of now I prefer keeping the flags at uapi level and propagate it
 downto the kernel, as long as I don't have any answer for the unset
 devid discrimination question. Please apologize for my stubbornness ;-)
 
 I think this flag should be kept, as it really indicates what is valid
 in the MSI structure. It also has other benefits such as making obvious
 what userspace expects, which can then be checked against the kernel's
 own expectations.

I agree on this. Usually this kind of redundancy leads to strange code,
but this does not seem to apply here, since we can at least still guard
the assignments to demonstrate that the devid field needs to go along
with the flag.

Cheers,
Andre.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


linux-kvm.org wiki

2015-08-24 Thread Jonathon Reinhart
To whom it may concern:

I'm trying to make an edit to this page:

http://www.linux-kvm.org/index.php?title=Windows7Install

I have successfully created an account. When I Save the edit to the
page, it looks like everything worked okay. However, when I refresh
the page, my edit is gone, and no history of my edit is visible in the
page history or my contributions page.

Does anyone have any idea what's going on?  I've edited lots of
MediaWiki sites before, so I'm *pretty* sure I'm not doing anything
wrong.

Thanks,
Jonathon
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Bug 103141] Host-triggerable NULL pointer oops

2015-08-24 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=103141

--- Comment #2 from felix felix.vo...@posteo.de ---
Created attachment 185681
  -- https://bugzilla.kernel.org/attachment.cgi?id=185681action=edit
Test program 2 (C99)

You mean can as in I think it does or it did for me?

And anyway, it seems to only fix the most proximate cause of the crash. My
biggest worry is that KVM_SET_USER_MEMORY_REGION ioctls with guest_phys_addr
around the 0xfff0 to 0x range seem not to register; starting the
VM looks like as if the region wasn't placed there.

I attach test program 2. Running that on my system with 0x44000 as an argument
outputs halted (as expected), but 0x45000 and larger multiples of 0x1000 give
internal error, subcode 1.

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Bug 103141] Host-triggerable NULL pointer oops

2015-08-24 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=103141

felix felix.vo...@posteo.de changed:

   What|Removed |Added

 Attachment #185681|0   |1
is obsolete||

--- Comment #3 from felix felix.vo...@posteo.de ---
Created attachment 185691
  -- https://bugzilla.kernel.org/attachment.cgi?id=185691action=edit
Test program 2 (C99) [non-oopsing version]

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 01/15] KVM: arm/arm64: VGIC: don't track used LRs in the distributor

2015-08-24 Thread Andre Przywara
Hi Eric,

On 12/08/15 10:01, Eric Auger wrote:

 diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
 index bc40137..394622c 100644
 --- a/virt/kvm/arm/vgic.c
 +++ b/virt/kvm/arm/vgic.c
 @@ -79,7 +79,6 @@
  #include vgic.h
  
  static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
 -static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
  static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
  static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr 
 lr_desc);
  
 @@ -647,6 +646,17 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio 
 *mmio,
  return false;
  }
  
 +static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
 +   struct vgic_lr vlr)
 +{
 +vgic_ops-sync_lr_elrsr(vcpu, lr, vlr);
 +}
 why not renaming this into vgic_set_elrsr. This would be homogeneous
 with other virtual interface control register setters?

But that would involve renaming the vgic_ops members as well to be
consistent, right? As there is no change in the behaviour, a naming
change sounds unmotivated to me. And _set_ wouldn't be exact, as this
function deals only with only one bit at a time and allows to clear it
as well.

 +
 +static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
 +{
 +return vgic_ops-get_elrsr(vcpu);
 +}
 If I am not wrong, each time you manipulate the elrsr you handle the
 bitmap. why not directly returning an unsigned long * then (elrsr_ptr)?

Because the pointer needs to point somewhere, and that storage is
currently located on the caller's stack. Directly returning a pointer
would require the caller to provide some memory for the u64, which does
not save you so much in terms on LOC:

-   u64 elrsr = vgic_get_elrsr(vcpu);
-   unsigned long *elrsr_ptr = u64_to_bitmask(elrsr);
+   u64 elrsr;
+   unsigned long *elrsr_ptr = vgic_get_elrsr_bm(vcpu, elrsr);

Also we need u64_to_bitmask() in one case when converting the EISR
value, so we cannot get lost of that function.

 +
  /**
   * vgic_unqueue_irqs - move pending/active IRQs from LRs to the distributor
   * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs
 @@ -658,9 +668,11 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio 
 *mmio,
  void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
  {
  struct vgic_cpu *vgic_cpu = vcpu-arch.vgic_cpu;
 +u64 elrsr = vgic_get_elrsr(vcpu);
 +unsigned long *elrsr_ptr = u64_to_bitmask(elrsr);
  int i;
  
 -for_each_set_bit(i, vgic_cpu-lr_used, vgic_cpu-nr_lr) {
 +for_each_clear_bit(i, elrsr_ptr, vgic_cpu-nr_lr) {
  struct vgic_lr lr = vgic_get_lr(vcpu, i);
  
  /*
 @@ -703,7 +715,7 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
   * Mark the LR as free for other use.
   */
  BUG_ON(lr.state  LR_STATE_MASK);
 -vgic_retire_lr(i, lr.irq, vcpu);
 +vgic_sync_lr_elrsr(vcpu, i, lr);
  vgic_irq_clear_queued(vcpu, lr.irq);
  
  /* Finally update the VGIC state. */
 @@ -1011,17 +1023,6 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr,
  vgic_ops-set_lr(vcpu, lr, vlr);
  }
  
 -static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
 -   struct vgic_lr vlr)
 -{
 -vgic_ops-sync_lr_elrsr(vcpu, lr, vlr);
 -}
 -
 -static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
 -{
 -return vgic_ops-get_elrsr(vcpu);
 -}
 -
  static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu)
  {
  return vgic_ops-get_eisr(vcpu);
 @@ -1062,18 +1063,6 @@ static inline void vgic_enable(struct kvm_vcpu *vcpu)
  vgic_ops-enable(vcpu);
  }
  
 -static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
 -{
 -struct vgic_cpu *vgic_cpu = vcpu-arch.vgic_cpu;
 -struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr);
 -
 -vlr.state = 0;
 -vgic_set_lr(vcpu, lr_nr, vlr);
 -clear_bit(lr_nr, vgic_cpu-lr_used);
 -vgic_cpu-vgic_irq_lr_map[irq] = LR_EMPTY;
 -vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
 -}
 -
  /*
   * An interrupt may have been disabled after being made pending on the
   * CPU interface (the classic case is a timer running while we're
 @@ -1085,23 +1074,32 @@ static void vgic_retire_lr(int lr_nr, int irq, 
 struct kvm_vcpu *vcpu)
   */
  static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
  {
 -struct vgic_cpu *vgic_cpu = vcpu-arch.vgic_cpu;
 +u64 elrsr = vgic_get_elrsr(vcpu);
 +unsigned long *elrsr_ptr = u64_to_bitmask(elrsr);
 if you agree with above modif I would simply rename elrsr_ptr into elrsr.
  int lr;
 +struct vgic_lr vlr;
 why moving this declaration here. I think this can remain in the block.

Possibly. Don't remember the reason of this move, I think it was due to
some other changes I later removed. I will revert it.

  
 -for_each_set_bit(lr, vgic_cpu-lr_used, vgic-nr_lr) {
 -struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 +for_each_clear_bit(lr, elrsr_ptr, 

Re: [PATCH 1/3] KVM: make halt_poll_ns per-VCPU

2015-08-24 Thread David Matlack
On Mon, Aug 24, 2015 at 5:53 AM, Wanpeng Li wanpeng...@hotmail.com wrote:
 Change halt_poll_ns into per-VCPU variable, seeded from module parameter,
 to allow greater flexibility.

You should also change kvm_vcpu_block to read halt_poll_ns from
the vcpu instead of the module parameter.


 Signed-off-by: Wanpeng Li wanpeng...@hotmail.com
 ---
  include/linux/kvm_host.h | 1 +
  virt/kvm/kvm_main.c  | 1 +
  2 files changed, 2 insertions(+)

 diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
 index 81089cf..1bef9e2 100644
 --- a/include/linux/kvm_host.h
 +++ b/include/linux/kvm_host.h
 @@ -242,6 +242,7 @@ struct kvm_vcpu {
 int sigset_active;
 sigset_t sigset;
 struct kvm_vcpu_stat stat;
 +   unsigned int halt_poll_ns;

  #ifdef CONFIG_HAS_IOMEM
 int mmio_needed;
 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
 index d8db2f8f..a122b52 100644
 --- a/virt/kvm/kvm_main.c
 +++ b/virt/kvm/kvm_main.c
 @@ -217,6 +217,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, 
 unsigned id)
 vcpu-kvm = kvm;
 vcpu-vcpu_id = id;
 vcpu-pid = NULL;
 +   vcpu-halt_poll_ns = halt_poll_ns;
 init_waitqueue_head(vcpu-wq);
 kvm_async_pf_vcpu_init(vcpu);

 --
 1.9.1

 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] KVM: dynamise halt_poll_ns adjustment

2015-08-24 Thread David Matlack
On Mon, Aug 24, 2015 at 5:53 AM, Wanpeng Li wanpeng...@hotmail.com wrote:
 There are two new kernel parameters for changing the halt_poll_ns:
 halt_poll_ns_grow and halt_poll_ns_shrink. halt_poll_ns_grow affects
 halt_poll_ns when an interrupt arrives and halt_poll_ns_shrink
 does it when idle VCPU is detected.

   halt_poll_ns_shrink/ |
   halt_poll_ns_grow| interrupt arrives| idle VCPU is detected
   -+--+---
1  |  = halt_poll_ns  |  = 0
halt_poll_ns   | *= halt_poll_ns_grow | /= halt_poll_ns_shrink
   otherwise| += halt_poll_ns_grow | -= halt_poll_ns_shrink

 A third new parameter, halt_poll_ns_max, controls the maximal halt_poll_ns;
 it is internally rounded down to a closest multiple of halt_poll_ns_grow.

I like the idea of growing and shrinking halt_poll_ns, but I'm not sure
we grow and shrink in the right places here. For example, if vcpu-halt_poll_ns
gets down to 0, I don't see how it can then grow back up.

This might work better:
  if (poll successfully for interrupt): stay the same
  else if (length of kvm_vcpu_block is longer than halt_poll_ns_max): shrink
  else if (length of kvm_vcpu_block is less than halt_poll_ns_max): grow

where halt_poll_ns_max is something reasonable, like 2 millisecond.

You get diminishing returns from halt polling as the length of the
halt gets longer (halt polling only reduces halt latency by 10-15 us).
So there's little benefit to polling longer than a few milliseconds.


 Signed-off-by: Wanpeng Li wanpeng...@hotmail.com
 ---
  virt/kvm/kvm_main.c | 81 
 -
  1 file changed, 80 insertions(+), 1 deletion(-)

 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
 index a122b52..bcfbd35 100644
 --- a/virt/kvm/kvm_main.c
 +++ b/virt/kvm/kvm_main.c
 @@ -66,9 +66,28 @@
  MODULE_AUTHOR(Qumranet);
  MODULE_LICENSE(GPL);

 -static unsigned int halt_poll_ns;
 +#define KVM_HALT_POLL_NS  50
 +#define KVM_HALT_POLL_NS_GROW   2
 +#define KVM_HALT_POLL_NS_SHRINK 0
 +#define KVM_HALT_POLL_NS_MAX \
 +   INT_MAX / KVM_HALT_POLL_NS_GROW
 +
 +static unsigned int halt_poll_ns = KVM_HALT_POLL_NS;
  module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);

 +/* Default doubles per-vcpu halt_poll_ns. */
 +static int halt_poll_ns_grow = KVM_HALT_POLL_NS_GROW;
 +module_param(halt_poll_ns_grow, int, S_IRUGO);
 +
 +/* Default resets per-vcpu halt_poll_ns . */
 +int halt_poll_ns_shrink = KVM_HALT_POLL_NS_SHRINK;
 +module_param(halt_poll_ns_shrink, int, S_IRUGO);
 +
 +/* Default is to compute the maximum so we can never overflow. */
 +unsigned int halt_poll_ns_actual_max = KVM_HALT_POLL_NS_MAX;
 +unsigned int halt_poll_ns_max = KVM_HALT_POLL_NS_MAX;
 +module_param(halt_poll_ns_max, int, S_IRUGO);
 +
  /*
   * Ordering of locks:
   *
 @@ -1907,6 +1926,62 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, 
 gfn_t gfn)
  }
  EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);

 +static unsigned int __grow_halt_poll_ns(unsigned int val)
 +{
 +   if (halt_poll_ns_grow  1)
 +   return halt_poll_ns;
 +
 +   val = min(val, halt_poll_ns_actual_max);
 +
 +   if (val == 0)
 +   return halt_poll_ns;
 +
 +   if (halt_poll_ns_grow  halt_poll_ns)
 +   val *= halt_poll_ns_grow;
 +   else
 +   val += halt_poll_ns_grow;
 +
 +   return val;
 +}
 +
 +static unsigned int __shrink_halt_poll_ns(int val, int modifier, int minimum)
 +{
 +   if (modifier  1)
 +   return 0;
 +
 +   if (modifier  halt_poll_ns)
 +   val /= modifier;
 +   else
 +   val -= modifier;
 +
 +   return val;
 +}
 +
 +static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
 +{
 +   vcpu-halt_poll_ns = __grow_halt_poll_ns(vcpu-halt_poll_ns);
 +}
 +
 +static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu)
 +{
 +   vcpu-halt_poll_ns = __shrink_halt_poll_ns(vcpu-halt_poll_ns,
 +   halt_poll_ns_shrink, halt_poll_ns);
 +}
 +
 +/*
 + * halt_poll_ns_actual_max is computed to be one grow_halt_poll_ns() below
 + * halt_poll_ns_max. (See __grow_halt_poll_ns for the reason.)
 + * This prevents overflows, because ple_halt_poll_ns is int.
 + * halt_poll_ns_max effectively rounded down to a multiple of 
 halt_poll_ns_grow in
 + * this process.
 + */
 +static void update_halt_poll_ns_actual_max(void)
 +{
 +   halt_poll_ns_actual_max =
 +   __shrink_halt_poll_ns(max(halt_poll_ns_max, halt_poll_ns),
 +   halt_poll_ns_grow, INT_MIN);
 +}
 +
  static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
  {
 if (kvm_arch_vcpu_runnable(vcpu)) {
 @@ -1941,6 +2016,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
  */
 if (kvm_vcpu_check_block(vcpu)  0) {
 ++vcpu-stat.halt_successful_poll;
 +   

Re: [PATCH v2 05/15] KVM: arm/arm64: make GIC frame address initialization model specific

2015-08-24 Thread Andre Przywara
Hi,

On 12/08/15 14:02, Eric Auger wrote:
 On 07/10/2015 04:21 PM, Andre Przywara wrote:
 Currently we initialize all the possible GIC frame addresses in one
 function, without looking at the specific GIC model we instantiate
 for the guest.
 As this gets confusing when adding another VGIC model later, lets
 move these initializations into the respective model's init 
 nit: tobe more precise the init emulation function (not the
 vgic_v2/v3_init_model model's init function). pfouh?! ;-)
 functions.

OK, will try to find a wording that is not completely confusing.


 Signed-off-by: Andre Przywara andre.przyw...@arm.com
 ---
  virt/kvm/arm/vgic-v2-emul.c | 3 +++
  virt/kvm/arm/vgic-v3-emul.c | 3 +++
  virt/kvm/arm/vgic.c | 3 ---
  3 files changed, 6 insertions(+), 3 deletions(-)

 diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c
 index 1390797..8faa28c 100644
 --- a/virt/kvm/arm/vgic-v2-emul.c
 +++ b/virt/kvm/arm/vgic-v2-emul.c
 @@ -567,6 +567,9 @@ void vgic_v2_init_emulation(struct kvm *kvm)
  dist-vm_ops.init_model = vgic_v2_init_model;
  dist-vm_ops.map_resources = vgic_v2_map_resources;
  
 +dist-vgic_cpu_base = VGIC_ADDR_UNDEF;
 +dist-vgic_dist_base = VGIC_ADDR_UNDEF;
 Looks strange to see the common dist_base here. Why don't you leave it
 in common part, kvm_vgic_create; all the more so you left
 kvm-arch.vgic.vctrl_base = vgic-vctrl_base in kvm_vgic_create.

The idea behind this is that dist_base refers to similar, but not
identical distributors (v2 vs. v3), so I found it a good idea to
initialize it in here. Also vctrl_base is host facing and not set by
userland, so this doesn't really compare here.

Cheers,
Andre.

 +
  kvm-arch.max_vcpus = VGIC_V2_MAX_CPUS;
  }
  
 diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c
 index d2eeb20..1f42348 100644
 --- a/virt/kvm/arm/vgic-v3-emul.c
 +++ b/virt/kvm/arm/vgic-v3-emul.c
 @@ -885,6 +885,9 @@ void vgic_v3_init_emulation(struct kvm *kvm)
  dist-vm_ops.destroy_model = vgic_v3_destroy_model;
  dist-vm_ops.map_resources = vgic_v3_map_resources;
  
 +dist-vgic_dist_base = VGIC_ADDR_UNDEF;
 +dist-vgic_redist_base = VGIC_ADDR_UNDEF;
 +
  kvm-arch.max_vcpus = KVM_MAX_VCPUS;
  }
  
 diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
 index cc8f5ed..59f1801 100644
 --- a/virt/kvm/arm/vgic.c
 +++ b/virt/kvm/arm/vgic.c
 @@ -1830,9 +1830,6 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
  kvm-arch.vgic.in_kernel = true;
  kvm-arch.vgic.vgic_model = type;
  kvm-arch.vgic.vctrl_base = vgic-vctrl_base;
 -kvm-arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
 -kvm-arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
 -kvm-arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF;
  
  out_unlock:
  for (; vcpu_lock_idx = 0; vcpu_lock_idx--) {

 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 07/15] KVM: arm64: handle ITS related GICv3 redistributor registers

2015-08-24 Thread Andre Przywara
Hi Eric,

On 13/08/15 13:17, Eric Auger wrote:
 On 07/10/2015 04:21 PM, Andre Przywara wrote:
 In the GICv3 redistributor there are the PENDBASER and PROPBASER
 registers which we did not emulate so far, as they only make sense
 when having an ITS. In preparation for that emulate those MMIO
 accesses by storing the 64-bit data written into it into a variable
 which we later read in the ITS emulation.

 Signed-off-by: Andre Przywara andre.przyw...@arm.com
 ---
  include/kvm/arm_vgic.h  |  8 
  virt/kvm/arm/vgic-v3-emul.c | 44 
 
  virt/kvm/arm/vgic.c | 35 +++
  virt/kvm/arm/vgic.h |  4 
  4 files changed, 91 insertions(+)

 diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
 index 3ee063b..8c6cb0e 100644
 --- a/include/kvm/arm_vgic.h
 +++ b/include/kvm/arm_vgic.h
 @@ -256,6 +256,14 @@ struct vgic_dist {
  struct vgic_vm_ops  vm_ops;
  struct vgic_io_device   dist_iodev;
  struct vgic_io_device   *redist_iodevs;
 +
 +/* Address of LPI configuration table shared by all redistributors */
 +u64 propbaser;
 +
 +/* Addresses of LPI pending tables per redistributor */
 +u64 *pendbaser;
 +
 +boollpis_enabled;
  };
  
  struct vgic_v2_cpu_if {
 diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c
 index a8cf669..5269ad1 100644
 --- a/virt/kvm/arm/vgic-v3-emul.c
 +++ b/virt/kvm/arm/vgic-v3-emul.c
 @@ -651,6 +651,38 @@ static bool handle_mmio_cfg_reg_redist(struct kvm_vcpu 
 *vcpu,
  return vgic_handle_cfg_reg(reg, mmio, offset);
  }
  
 +/* We don't trigger any actions here, just store the register value */
 +static bool handle_mmio_propbaser_redist(struct kvm_vcpu *vcpu,
 + struct kvm_exit_mmio *mmio,
 + phys_addr_t offset)
 +{
 +struct vgic_dist *dist = vcpu-kvm-arch.vgic;
 +int mode = ACCESS_READ_VALUE;
 +
 +/* Storing a value with LPIs already enabled is undefined */
 +mode |= dist-lpis_enabled ? ACCESS_WRITE_IGNORED : ACCESS_WRITE_VALUE;
 +vgic_handle_base_register(vcpu, mmio, offset, dist-propbaser, mode);
 +
 +return false;
 +}
 +
 +/* We don't trigger any actions here, just store the register value */
 +static bool handle_mmio_pendbaser_redist(struct kvm_vcpu *vcpu,
 + struct kvm_exit_mmio *mmio,
 + phys_addr_t offset)
 +{
 +struct kvm_vcpu *rdvcpu = mmio-private;
 +struct vgic_dist *dist = vcpu-kvm-arch.vgic;
 +int mode = ACCESS_READ_VALUE;
 +
 +/* Storing a value with LPIs already enabled is undefined */
 +mode |= dist-lpis_enabled ? ACCESS_WRITE_IGNORED : ACCESS_WRITE_VALUE;
 +vgic_handle_base_register(vcpu, mmio, offset,
 +  dist-pendbaser[rdvcpu-vcpu_id], mode);
 +
 +return false;
 +}
 +
  #define SGI_base(x) ((x) + SZ_64K)
  
  static const struct vgic_io_range vgic_redist_ranges[] = {
 @@ -679,6 +711,18 @@ static const struct vgic_io_range vgic_redist_ranges[] 
 = {
  .handle_mmio= handle_mmio_raz_wi,
  },
  {
 +.base   = GICR_PENDBASER,
 +.len= 0x08,
 +.bits_per_irq   = 0,
 +.handle_mmio= handle_mmio_pendbaser_redist,
 +},
 +{
 +.base   = GICR_PROPBASER,
 +.len= 0x08,
 +.bits_per_irq   = 0,
 +.handle_mmio= handle_mmio_propbaser_redist,
 +},
 +{
  .base   = GICR_IDREGS,
  .len= 0x30,
  .bits_per_irq   = 0,
 diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
 index 15e447f..49ee92b 100644
 --- a/virt/kvm/arm/vgic.c
 +++ b/virt/kvm/arm/vgic.c
 @@ -446,6 +446,41 @@ void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 
 *reg,
  }
  }
  
 +/* handle a 64-bit register access */
 +void vgic_handle_base_register(struct kvm_vcpu *vcpu,
 +   struct kvm_exit_mmio *mmio,
 +   phys_addr_t offset, u64 *basereg,
 +   int mode)
 +{
 why do we have vcpu in the proto? I don't see it used. Also if it were
 can't we fetch it from mmio-private?
 
 why not renaming this into something like vgic_reg64_access as par
 vgic_reg_access 32b flavor above. vgic_handle* usually is the name of
 the region handler returning bool?

Makes sense, I both renamed the function and removed the vcpu parameter.
I need to check whether we need the vcpu to do some endianness checks in
the future, though. Using mmio-private would be a hack, then.

Cheers,
Andre.

 
 +u32 reg;
 +u64 breg;
 +
 +switch (offset  ~3) {
 +case 0x00:
 +breg = *basereg;
 +reg = lower_32_bits(breg);
 +vgic_reg_access(mmio, reg, offset  3, mode);
 +

Re: Build regressions/improvements in v4.2-rc8

2015-08-24 Thread Geert Uytterhoeven
On Mon, Aug 24, 2015 at 10:34 AM, Geert Uytterhoeven
ge...@linux-m68k.org wrote:
 JFYI, when comparing v4.2-rc8[1] to v4.2-rc7[3], the summaries are:
   - build errors: +4/-7

4 regressions:
  + /home/kisskb/slave/src/include/linux/kvm_host.h: error: array
subscript is above array bounds [-Werror=array-bounds]:  = 430:19
(arch/powerpc/kvm/book3s_64_mmu.c: In function 'kvmppc_mmu
_book3s_64_tlbie':)

powerpc-randconfig (seen before in a v3.15-rc1 build?)

  + error: initramfs.c: undefined reference to `__stack_chk_guard':
= .init.text+0x1cb0)
(init/built-in.o: In function `parse_header':)

x86_64-randconfig

  + error: pci.c: undefined reference to `pci_ioremap_io':  = .init.text+0x3c4)
(arch/arm/mach-versatile/built-in.o: In function `pci_versatile_setup')

arm-randconfig

 [1] http://kisskb.ellerman.id.au/kisskb/head/9289/ (253 out of 254 configs)
 [3] http://kisskb.ellerman.id.au/kisskb/head/9260/ (253 out of 254 configs)

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say programmer or something like that.
-- Linus Torvalds
--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PULL 00/12] ppc patch queue 2015-08-22

2015-08-24 Thread Paolo Bonzini


On 24/08/2015 06:49, Alexander Graf wrote:
   Hi Paolo,
  
   This is my current patch queue for ppc.  Please pull.
  
  Done, but this queue has not been in linux-next.  Please push to
  kvm-ppc-next on your github Linux tree as well; please keep an eye on
 
 Ah, sorry. I pushed to kvm-ppc-next in parallel to sending the request.

No problem, and Linus in the end did do an rc8 so I can wait till I'm
back for sending the PPC/ARM pull request.

Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PULL 00/12] ppc patch queue 2015-08-22

2015-08-24 Thread Paolo Bonzini


On 24/08/2015 06:49, Alexander Graf wrote:
   Hi Paolo,
  
   This is my current patch queue for ppc.  Please pull.
  
  Done, but this queue has not been in linux-next.  Please push to
  kvm-ppc-next on your github Linux tree as well; please keep an eye on
 
 Ah, sorry. I pushed to kvm-ppc-next in parallel to sending the request.

No problem, and Linus in the end did do an rc8 so I can wait till I'm
back for sending the PPC/ARM pull request.

Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 4/5] KVM: x86: enable unhandled MSR exits for vmx

2015-08-24 Thread Bandan Das
Peter Hornyack peterhorny...@google.com writes:

 Set the vm's unhandled_msr_exits flag when user space calls the
 KVM_ENABLE_CAP ioctl with KVM_CAP_UNHANDLED_MSR_EXITS. After kvm fails
 to handle a guest rdmsr or wrmsr, check this flag and exit to user space
 with KVM_EXIT_MSR rather than immediately injecting a GP fault.

 On reentry into kvm, use the complete_userspace_io callback path to call
 vmx_complete_userspace_msr. Complete the MSR access if user space was
 able to handle it successfully, or fail the MSR access and inject a GP
 fault if user space could not handle the access.

...
 +static int vmx_complete_userspace_msr(struct kvm_vcpu *vcpu)
 +{
 + struct msr_data msr;
 +
 + if (vcpu-run-msr.index != vcpu-arch.regs[VCPU_REGS_RCX]) {
 + pr_debug(msr.index 0x%x changed, does not match ecx 0x%lx\n,
 +  vcpu-run-msr.index,
 +  vcpu-arch.regs[VCPU_REGS_RCX]);
 + goto err_out;
 + }
 + msr.index = vcpu-run-msr.index;
 + msr.data = vcpu-run-msr.data;
 + msr.host_initiated = false;
 +
 + switch (vcpu-run-msr.direction) {
 + case KVM_EXIT_MSR_RDMSR:
 + if (vcpu-run-msr.handled == KVM_EXIT_MSR_HANDLED)
 + complete_rdmsr(vcpu, msr);
 + else
 + fail_rdmsr(vcpu, msr);
 + break;

Should we check for MSR_UNHANDLED ? Could be debugging relief if userspace is
filling it up with random crap! I think it should be ok if the trace function
catches that too.

 + case KVM_EXIT_MSR_WRMSR:
 + if (vcpu-run-msr.handled == KVM_EXIT_MSR_HANDLED)
 + complete_wrmsr(vcpu, msr);
 + else
 + fail_wrmsr(vcpu, msr);
 + break;
 + default:
 + pr_debug(bad msr.direction %u\n, vcpu-run-msr.direction);
 + goto err_out;
 + }
 +
 + return 1;
 +err_out:
 + vcpu-run-exit_reason = KVM_EXIT_MSR;
 + vcpu-run-msr.direction = KVM_EXIT_MSR_COMPLETION_FAILED;
 + return 0;
 +}
 +
 +/*
 + * Returns 1 if the rdmsr handling is complete; returns 0 if kvm should exit 
 to
 + * user space to handle this rdmsr.
 + */
  static int handle_rdmsr(struct kvm_vcpu *vcpu)
  {
   u32 ecx = vcpu-arch.regs[VCPU_REGS_RCX];
 @@ -5499,6 +5547,16 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu)
   msr_info.index = ecx;
   msr_info.host_initiated = false;
   if (vmx_get_msr(vcpu, msr_info)) {
 + if (vcpu-kvm-arch.unhandled_msr_exits) {
 + vcpu-run-exit_reason = KVM_EXIT_MSR;
 + vcpu-run-msr.direction = KVM_EXIT_MSR_RDMSR;
 + vcpu-run-msr.index = msr_info.index;
 + vcpu-run-msr.data = 0;
 + vcpu-run-msr.handled = 0;
 + vcpu-arch.complete_userspace_io =
 + vmx_complete_userspace_msr;
 + return 0;
 + }
   fail_rdmsr(vcpu, msr_info);
   return 1;
   }
 @@ -5508,6 +5566,10 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu)
   return 1;
  }
  
 +/*
 + * Returns 1 if the wrmsr handling is complete; returns 0 if kvm should exit 
 to
 + * user space to handle this wrmsr.
 + */
  static int handle_wrmsr(struct kvm_vcpu *vcpu)
  {
   struct msr_data msr;
 @@ -5519,6 +5581,16 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
   msr.index = ecx;
   msr.host_initiated = false;
   if (kvm_set_msr(vcpu, msr) != 0) {
 + if (vcpu-kvm-arch.unhandled_msr_exits) {

I think kvm should ultimately decide which msrs it can let userspace
handle even if userspace has explicitly enabled the ioctl.

Bandan

 + vcpu-run-exit_reason = KVM_EXIT_MSR;
 + vcpu-run-msr.direction = KVM_EXIT_MSR_WRMSR;
 + vcpu-run-msr.index = msr.index;
 + vcpu-run-msr.data = msr.data;
 + vcpu-run-msr.handled = 0;
 + vcpu-arch.complete_userspace_io =
 + vmx_complete_userspace_msr;
 + return 0;
 + }
   fail_wrmsr(vcpu, msr);
   return 1;
   }
 @@ -8163,7 +8235,7 @@ static bool vmx_xsaves_supported(void)
  
  static bool vmx_msr_exits_supported(void)
  {
 - return false;
 + return true;
  }
  
  static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
 index 4bbc2a1676c9..5c22f4655741 100644
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
 @@ -2461,6 +2461,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long 
 ext)
   case KVM_CAP_ENABLE_CAP_VM:
   case KVM_CAP_DISABLE_QUIRKS:
   case KVM_CAP_SET_BOOT_CPU_ID:
 + case KVM_CAP_UNHANDLED_MSR_EXITS:
  #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
   case KVM_CAP_ASSIGN_DEV_IRQ:
   case KVM_CAP_PCI_2_3:
 @@ -3568,6 

Re: [RFC PATCH 3/5] KVM: x86: add msr_exits_supported to kvm_x86_ops

2015-08-24 Thread Bandan Das
Peter Hornyack peterhorny...@google.com writes:

 msr_exits_supported will be checked when user space attempts to enable
 the KVM_CAP_UNHANDLED_MSR_EXITS capability for the vm. This is needed
 because MSR exit support will be implemented for vmx but not svm later
 in this patchset.

Is svm future work ? :) Are there any such svm msrs ?


 Signed-off-by: Peter Hornyack peterhorny...@google.com
 ---
  arch/x86/include/asm/kvm_host.h | 1 +
  arch/x86/kvm/svm.c  | 6 ++
  arch/x86/kvm/vmx.c  | 6 ++
  3 files changed, 13 insertions(+)

 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
 index c12e845f59e6..a6e145b1e271 100644
 --- a/arch/x86/include/asm/kvm_host.h
 +++ b/arch/x86/include/asm/kvm_host.h
 @@ -854,6 +854,7 @@ struct kvm_x86_ops {
   void (*handle_external_intr)(struct kvm_vcpu *vcpu);
   bool (*mpx_supported)(void);
   bool (*xsaves_supported)(void);
 + bool (*msr_exits_supported)(void);
  
   int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
  
 diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
 index 74d825716f4f..bcbb56f49b9f 100644
 --- a/arch/x86/kvm/svm.c
 +++ b/arch/x86/kvm/svm.c
 @@ -4249,6 +4249,11 @@ static bool svm_xsaves_supported(void)
   return false;
  }
  
 +static bool svm_msr_exits_supported(void)
 +{
 + return false;
 +}
 +
  static bool svm_has_wbinvd_exit(void)
  {
   return true;
 @@ -4540,6 +4545,7 @@ static struct kvm_x86_ops svm_x86_ops = {
   .invpcid_supported = svm_invpcid_supported,
   .mpx_supported = svm_mpx_supported,
   .xsaves_supported = svm_xsaves_supported,
 + .msr_exits_supported = svm_msr_exits_supported,
  
   .set_supported_cpuid = svm_set_supported_cpuid,
  
 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
 index acc38e27d221..27fec385d79d 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -8161,6 +8161,11 @@ static bool vmx_xsaves_supported(void)
   SECONDARY_EXEC_XSAVES;
  }
  
 +static bool vmx_msr_exits_supported(void)
 +{
 + return false;
 +}
 +
  static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
  {
   u32 exit_intr_info;
 @@ -10413,6 +10418,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
   .handle_external_intr = vmx_handle_external_intr,
   .mpx_supported = vmx_mpx_supported,
   .xsaves_supported = vmx_xsaves_supported,
 + .msr_exits_supported = vmx_msr_exits_supported,
  
   .check_nested_events = vmx_check_nested_events,
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Bug 103321] New: NPT page attribute support causes extreme slowdown

2015-08-24 Thread Sebastian Schütte

 Please try this:
Still no difference

I guess the trace_kvm_cr_write() call in that patch was supposed to
trigger kvm_cr entries while tracing? I couldn't find any, though, the
only entries containing cr within the output of trace-cmd report
were kvm_exit ones that looked quite similar to the previous dump.

If you still think it's worth it I'll send you the whole ~10MB
compressed trace.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 0/5] KVM: x86: exit to user space on unhandled MSR accesses

2015-08-24 Thread Bandan Das
Peter Hornyack peterhorny...@google.com writes:

 On Wed, Aug 19, 2015 at 2:43 PM, Bandan Das b...@redhat.com wrote:
 Peter Hornyack peterhorny...@google.com writes:

 There are numerous MSRs that kvm does not currently handle. On Intel
 platforms we have observed guest VMs accessing some of these MSRs (for
 example, MSR_PLATFORM_INFO) and behaving poorly (to the point of guest OS
 crashes) when they receive a GP fault because the MSR is not emulated. This
 patchset adds a new kvm exit path for unhandled MSR accesses that allows
 user space to emulate additional MSRs without having to implement them in
 kvm.

 ^^ So, I am trying to understand this motivation. A while back when
 a patch was posted to emulate MSR_PLATFORM_INFO, it was rejected.
 Why ? Because it seemed impossible to emulate it correctly (most concerns 
 were
 related to migration iirc). Although I haven't reviewed all patches in this 
 series
 yet, what I understand from the above message is-It's ok to emulate
 MSR_PLATFORM_INFO *incorrectly* as long as we are doing it in the user 
 space.

 I understand the part where it makes sense to move stuff to userspace.
 But if kvm isn't emulating certain msrs yet, either we should add support,
 or they haven't been added because it's not possible to emulate them
 correctly. The logic that it's probably ok to let userspace do the 
 (incorrect)
 emulation is something I don't understand.

 I wasn't aware of the past discussion of MSR_PLATFORM_INFO, I'll
 search for it - thanks for pointing it out.

 MSR_PLATFORM_INFO is just one example though. In addition to the
 benefits I already mentioned for user space MSR emulation (agility,
 reduced attack surface), this patchset offers a benefit even if user
 space declines to emulate the MSR by returning into kvm with
 KVM_EXIT_MSR_UNHANDLED: it makes it easier to monitor in the first
 place, via logging mechanisms in user space instead of in the kernel,
 for when guests are accessing MSRs that kvm doesn't implement.

Agreed, that would be more useful then how it's being handled currently.

 This patchset has already revealed a few other MSRs (IA32_MPERF,
 IA32_APERF) that guests in our test environment are accessing but
 which kvm doesn't implement yet. I haven't yet investigated deeply
 what these MSRs are used for and how they might be emulated (or if
 they can't actually be emulated correctly), but if we do discover an
 unimplemented non-performance-sensitive MSR that we could emulate
 correctly, with this patchset we would quickly just implement it in
 user space.

Ok, makes sense. But it seems like this could be an area of abuse as
well. For example, this could lessen the incentive to add emulation for
new msrs in kvm and rather just emulate them in userspace ?

However, as you mentioned, it's probably ok as long as there's a clear
demarcation of which msrs this is done for. This shouldn't be
the default behavior.

On a different note, if there's a related qemu change, can you please
point me to it ?

Thanks,
Bandan

 It seems like the next in line
 is to let userspace emulate thier own version of unimplemented x86 
 instructions.


 The core of the patchset modifies the vmx handle_rdmsr and handle_wrmsr
 functions to exit to user space on MSR reads/writes that kvm can't handle
 itself. Then, on the return path into kvm we check for outstanding user
 space MSR completions and either complete the MSR access successfully or
 inject a GP fault as kvm would do by default. This new exit path must be
 enabled for the vm via the KVM_CAP_UNHANDLED_MSR_EXITS capability.

 In the future we plan to extend this functionality to allow user space to
 register the MSRs that it would like to handle itself, even if kvm already
 provides an implementation. In the long-term we will move the

 I seriously hope we don't do this!

 Bandan
 implementation of all non-performance-sensitive MSRs to user space,
 reducing the potential attack surface of kvm and allowing us to respond to
 bugs more quickly.

 This patchset has been tested with our non-qemu user space hypervisor on
 vmx platforms; svm support is not implemented.

 Peter Hornyack (5):
   KVM: x86: refactor vmx rdmsr/wrmsr completion into new functions
   KVM: add KVM_EXIT_MSR exit reason and capability.
   KVM: x86: add msr_exits_supported to kvm_x86_ops
   KVM: x86: enable unhandled MSR exits for vmx
   KVM: x86: add trace events for unhandled MSR exits

  Documentation/virtual/kvm/api.txt |  48 +++
  arch/x86/include/asm/kvm_host.h   |   2 +
  arch/x86/kvm/svm.c|   6 ++
  arch/x86/kvm/trace.h  |  28 +
  arch/x86/kvm/vmx.c| 126 
 ++
  arch/x86/kvm/x86.c|  13 
  include/trace/events/kvm.h|   2 +-
  include/uapi/linux/kvm.h  |  14 +
  8 files changed, 227 insertions(+), 12 deletions(-)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to 

Re: [PATCH 2/3] KVM: dynamise halt_poll_ns adjustment

2015-08-24 Thread Wanpeng Li

Hi David,
On 8/25/15 1:00 AM, David Matlack wrote:

On Mon, Aug 24, 2015 at 5:53 AM, Wanpeng Li wanpeng...@hotmail.com wrote:

There are two new kernel parameters for changing the halt_poll_ns:
halt_poll_ns_grow and halt_poll_ns_shrink. halt_poll_ns_grow affects
halt_poll_ns when an interrupt arrives and halt_poll_ns_shrink
does it when idle VCPU is detected.

   halt_poll_ns_shrink/ |
   halt_poll_ns_grow| interrupt arrives| idle VCPU is detected
   -+--+---
1  |  = halt_poll_ns  |  = 0
halt_poll_ns   | *= halt_poll_ns_grow | /= halt_poll_ns_shrink
   otherwise| += halt_poll_ns_grow | -= halt_poll_ns_shrink

A third new parameter, halt_poll_ns_max, controls the maximal halt_poll_ns;
it is internally rounded down to a closest multiple of halt_poll_ns_grow.

I like the idea of growing and shrinking halt_poll_ns, but I'm not sure
we grow and shrink in the right places here. For example, if vcpu-halt_poll_ns
gets down to 0, I don't see how it can then grow back up.


This has already done in __grow_halt_poll_ns().


This might work better:
   if (poll successfully for interrupt): stay the same
   else if (length of kvm_vcpu_block is longer than halt_poll_ns_max): shrink
   else if (length of kvm_vcpu_block is less than halt_poll_ns_max): grow

where halt_poll_ns_max is something reasonable, like 2 millisecond.

You get diminishing returns from halt polling as the length of the
halt gets longer (halt polling only reduces halt latency by 10-15 us).
So there's little benefit to polling longer than a few milliseconds.


Great idea, David! Thanks for your review and I will implement it in 
next version. :-)


Regards,
Wanpeng Li




Signed-off-by: Wanpeng Li wanpeng...@hotmail.com
---
  virt/kvm/kvm_main.c | 81 -
  1 file changed, 80 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a122b52..bcfbd35 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -66,9 +66,28 @@
  MODULE_AUTHOR(Qumranet);
  MODULE_LICENSE(GPL);

-static unsigned int halt_poll_ns;
+#define KVM_HALT_POLL_NS  50
+#define KVM_HALT_POLL_NS_GROW   2
+#define KVM_HALT_POLL_NS_SHRINK 0
+#define KVM_HALT_POLL_NS_MAX \
+   INT_MAX / KVM_HALT_POLL_NS_GROW
+
+static unsigned int halt_poll_ns = KVM_HALT_POLL_NS;
  module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);

+/* Default doubles per-vcpu halt_poll_ns. */
+static int halt_poll_ns_grow = KVM_HALT_POLL_NS_GROW;
+module_param(halt_poll_ns_grow, int, S_IRUGO);
+
+/* Default resets per-vcpu halt_poll_ns . */
+int halt_poll_ns_shrink = KVM_HALT_POLL_NS_SHRINK;
+module_param(halt_poll_ns_shrink, int, S_IRUGO);
+
+/* Default is to compute the maximum so we can never overflow. */
+unsigned int halt_poll_ns_actual_max = KVM_HALT_POLL_NS_MAX;
+unsigned int halt_poll_ns_max = KVM_HALT_POLL_NS_MAX;
+module_param(halt_poll_ns_max, int, S_IRUGO);
+
  /*
   * Ordering of locks:
   *
@@ -1907,6 +1926,62 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, 
gfn_t gfn)
  }
  EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);

+static unsigned int __grow_halt_poll_ns(unsigned int val)
+{
+   if (halt_poll_ns_grow  1)
+   return halt_poll_ns;
+
+   val = min(val, halt_poll_ns_actual_max);
+
+   if (val == 0)
+   return halt_poll_ns;
+
+   if (halt_poll_ns_grow  halt_poll_ns)
+   val *= halt_poll_ns_grow;
+   else
+   val += halt_poll_ns_grow;
+
+   return val;
+}
+
+static unsigned int __shrink_halt_poll_ns(int val, int modifier, int minimum)
+{
+   if (modifier  1)
+   return 0;
+
+   if (modifier  halt_poll_ns)
+   val /= modifier;
+   else
+   val -= modifier;
+
+   return val;
+}
+
+static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
+{
+   vcpu-halt_poll_ns = __grow_halt_poll_ns(vcpu-halt_poll_ns);
+}
+
+static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu)
+{
+   vcpu-halt_poll_ns = __shrink_halt_poll_ns(vcpu-halt_poll_ns,
+   halt_poll_ns_shrink, halt_poll_ns);
+}
+
+/*
+ * halt_poll_ns_actual_max is computed to be one grow_halt_poll_ns() below
+ * halt_poll_ns_max. (See __grow_halt_poll_ns for the reason.)
+ * This prevents overflows, because ple_halt_poll_ns is int.
+ * halt_poll_ns_max effectively rounded down to a multiple of 
halt_poll_ns_grow in
+ * this process.
+ */
+static void update_halt_poll_ns_actual_max(void)
+{
+   halt_poll_ns_actual_max =
+   __shrink_halt_poll_ns(max(halt_poll_ns_max, halt_poll_ns),
+   halt_poll_ns_grow, INT_MIN);
+}
+
  static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
  {
 if (kvm_arch_vcpu_runnable(vcpu)) {
@@ -1941,6 +2016,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
  */
 if (kvm_vcpu_check_block(vcpu) 

Re: [PATCH 1/3] KVM: make halt_poll_ns per-VCPU

2015-08-24 Thread Wanpeng Li

On 8/25/15 12:59 AM, David Matlack wrote:

On Mon, Aug 24, 2015 at 5:53 AM, Wanpeng Li wanpeng...@hotmail.com wrote:

Change halt_poll_ns into per-VCPU variable, seeded from module parameter,
to allow greater flexibility.

You should also change kvm_vcpu_block to read halt_poll_ns from
the vcpu instead of the module parameter.


Indeed, thanks for your review. :-)

Regards,
Wanpeng Li




Signed-off-by: Wanpeng Li wanpeng...@hotmail.com
---
  include/linux/kvm_host.h | 1 +
  virt/kvm/kvm_main.c  | 1 +
  2 files changed, 2 insertions(+)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 81089cf..1bef9e2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -242,6 +242,7 @@ struct kvm_vcpu {
 int sigset_active;
 sigset_t sigset;
 struct kvm_vcpu_stat stat;
+   unsigned int halt_poll_ns;

  #ifdef CONFIG_HAS_IOMEM
 int mmio_needed;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d8db2f8f..a122b52 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -217,6 +217,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, 
unsigned id)
 vcpu-kvm = kvm;
 vcpu-vcpu_id = id;
 vcpu-pid = NULL;
+   vcpu-halt_poll_ns = halt_poll_ns;
 init_waitqueue_head(vcpu-wq);
 kvm_async_pf_vcpu_init(vcpu);

--
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] kvm: don't register wildcard MMIO EVENTFD on two buses

2015-08-24 Thread Jason Wang


On 08/24/2015 10:05 PM, Cornelia Huck wrote:
 On Mon, 24 Aug 2015 11:29:29 +0800
 Jason Wang jasow...@redhat.com wrote:

 On 08/21/2015 05:29 PM, Cornelia Huck wrote:
 On Fri, 21 Aug 2015 16:03:52 +0800
 Jason Wang jasow...@redhat.com wrote:
 @@ -850,9 +845,15 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct 
 kvm_ioeventfd *args)
 Unfortunately snipped by diff, but the check here is on !len  !PIO,
 which only does the desired thing as VIRTIO_CCW always uses len == 8.
 Should the check be for !len  MMIO instead?
 I think the answer depends on whether len == 0 is valid for ccw. If not
 we can fail the assign earlier. Since even without this patch, if
 userspace tries to register a dev with len equals to zero, it will also
 be registered to KVM_FAST_MMIO_BUS. If yes, we need check as you
 suggested here.
 I don't think len != 8 makes much sense for the way ioeventfd is
 defined for ccw (we handle hypercalls with a payload specifying the
 device), but we currently don't actively fence it.

 But regardless, I'd prefer to decide directly upon whether userspace
 actually tried to register for the mmio bus.

Ok.


ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS,
  p-addr, 0, p-dev);
if (ret  0)
 -  goto register_fail;
 +  goto unlock_fail;
 +  } else {
 +  ret = kvm_io_bus_register_dev(kvm, bus_idx, p-addr, p-length,
 +p-dev);
 +  if (ret  0)
 +  goto unlock_fail;
}
 Hm... maybe the following would be more obvious:

 my_bus = (p-length == 0)  (bus_idx == KVM_MMIO_BUS) ? KVM_FAST_MMIO_BUS 
 : bus_idx;
 ret = kvm_io_bus_register_dev(kvm, my_bus, p-addr, p-length, pdev-dev); 

  
 +
kvm-buses[bus_idx]-ioeventfd_count++;
list_add_tail(p-list, kvm-ioeventfds);
 (...)

 @@ -900,10 +899,11 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct 
 kvm_ioeventfd *args)
if (!p-wildcard  p-datamatch != args-datamatch)
continue;
  
 -  kvm_io_bus_unregister_dev(kvm, bus_idx, p-dev);
if (!p-length) {
kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS,
  p-dev);
 +  } else {
 +  kvm_io_bus_unregister_dev(kvm, bus_idx, p-dev);
}
 Similar comments here... do you want to check for bus_idx ==
 KVM_MMIO_BUS as well?
 Good catch. I think keep the original code as is will be also ok to
 solve this. (with changing the bus_idx to KVM_FAST_MMIO_BUS during
 registering if it was an wildcard mmio).
 Do you need to handle the ioeventfd_count changes on the fast mmio bus
 as well?

Yes. So actually, it needs some changes: checking the return value of
kvm_io_bus_unregister_dev() and decide which bus does the device belongs to.


kvm-buses[bus_idx]-ioeventfd_count--;
ioeventfd_release(p);

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html