[PATCH v1] ARM/ARM64: support KVM_IOEVENTFD

2014-11-18 Thread Ming Lei
>From Documentation/virtual/kvm/api.txt, all ARCHs should support
ioeventfd.

Also ARM VM has supported PCI bus already, and ARM64 will do too,
ioeventfd is required for some popular devices, like virtio-blk
and virtio-scsi dataplane in QEMU.

Without this patch, virtio-blk-pci dataplane can't work in QEMU.

This patch has been tested on both ARM and ARM64.

Signed-off-by: Ming Lei 
---
v1:
- make eventfd.o built in ARM64
 arch/arm/kvm/Kconfig|1 +
 arch/arm/kvm/Makefile   |2 +-
 arch/arm/kvm/arm.c  |1 +
 arch/arm/kvm/mmio.c |   19 +++
 arch/arm64/kvm/Kconfig  |1 +
 arch/arm64/kvm/Makefile |2 +-
 6 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 466bd29..25bd83a 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -23,6 +23,7 @@ config KVM
select HAVE_KVM_CPU_RELAX_INTERCEPT
select KVM_MMIO
select KVM_ARM_HOST
+   select HAVE_KVM_EVENTFD
depends on ARM_VIRT_EXT && ARM_LPAE
---help---
  Support hosting virtualized guest machines. You will also
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index f7057ed..859db09 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -15,7 +15,7 @@ AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt)
 AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
 
 KVM := ../../../virt/kvm
-kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
+kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o
 
 obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9e193c8..d90d989 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -172,6 +172,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_IRQCHIP:
r = vgic_present;
break;
+   case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_USER_MEMORY:
case KVM_CAP_SYNC_MMU:
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 4cb5a93..ee332a7 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -162,6 +162,21 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t 
fault_ipa,
return 0;
 }
 
+static int handle_io_bus_rw(struct kvm_vcpu *vcpu, gpa_t addr,
+   int len, void *val, bool write)
+{
+   int idx, ret;
+
+   idx = srcu_read_lock(&vcpu->kvm->srcu);
+   if (write)
+   ret = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, val);
+   else
+   ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, val);
+   srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+   return ret;
+}
+
 int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 phys_addr_t fault_ipa)
 {
@@ -200,6 +215,10 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run 
*run,
if (vgic_handle_mmio(vcpu, run, &mmio))
return 1;
 
+   if (!handle_io_bus_rw(vcpu, fault_ipa, mmio.len, &mmio.data,
+   mmio.is_write))
+   return 1;
+
kvm_prepare_mmio(run, &mmio);
return 0;
 }
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 8ba85e9..642f57c 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -26,6 +26,7 @@ config KVM
select KVM_ARM_HOST
select KVM_ARM_VGIC
select KVM_ARM_TIMER
+   select HAVE_KVM_EVENTFD
---help---
  Support hosting virtualized guest machines.
 
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 32a0961..2e6b827 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -11,7 +11,7 @@ ARM=../../../arch/arm/kvm
 
 obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
 
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o 
$(KVM)/eventfd.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] ARM/ARM64: support KVM_IOEVENTFD

2014-11-18 Thread Ming Lei
On Tue, Nov 18, 2014 at 11:24 PM, Ming Lei  wrote:
> From Documentation/virtual/kvm/api.txt, all ARCHs should support
> ioeventfd.
>
> Also ARM VM has supported PCI bus already, and ARM64 will do too,
> ioeventfd is required for some popular devices, like virtio-blk
> and virtio-scsi dataplane in QEMU.
>
> Without this patch, virtio-blk-pci dataplane can't work in QEMU.

Please ignore this one because eventfd.o is missed to build in arm64.

>
> Signed-off-by: Ming Lei 
> ---
>  arch/arm/kvm/Kconfig   |1 +
>  arch/arm/kvm/Makefile  |2 +-
>  arch/arm/kvm/arm.c |1 +
>  arch/arm/kvm/mmio.c|   19 +++
>  arch/arm64/kvm/Kconfig |1 +
>  5 files changed, 23 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
> index 466bd29..25bd83a 100644
> --- a/arch/arm/kvm/Kconfig
> +++ b/arch/arm/kvm/Kconfig
> @@ -23,6 +23,7 @@ config KVM
> select HAVE_KVM_CPU_RELAX_INTERCEPT
> select KVM_MMIO
> select KVM_ARM_HOST
> +   select HAVE_KVM_EVENTFD
> depends on ARM_VIRT_EXT && ARM_LPAE
> ---help---
>   Support hosting virtualized guest machines. You will also
> diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
> index f7057ed..859db09 100644
> --- a/arch/arm/kvm/Makefile
> +++ b/arch/arm/kvm/Makefile
> @@ -15,7 +15,7 @@ AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt)
>  AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
>
>  KVM := ../../../virt/kvm
> -kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
> +kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o
>
>  obj-y += kvm-arm.o init.o interrupts.o
>  obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index 9e193c8..d90d989 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -172,6 +172,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long 
> ext)
> case KVM_CAP_IRQCHIP:
> r = vgic_present;
> break;
> +   case KVM_CAP_IOEVENTFD:
> case KVM_CAP_DEVICE_CTRL:
> case KVM_CAP_USER_MEMORY:
> case KVM_CAP_SYNC_MMU:
> diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
> index 4cb5a93..ee332a7 100644
> --- a/arch/arm/kvm/mmio.c
> +++ b/arch/arm/kvm/mmio.c
> @@ -162,6 +162,21 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t 
> fault_ipa,
> return 0;
>  }
>
> +static int handle_io_bus_rw(struct kvm_vcpu *vcpu, gpa_t addr,
> +   int len, void *val, bool write)
> +{
> +   int idx, ret;
> +
> +   idx = srcu_read_lock(&vcpu->kvm->srcu);
> +   if (write)
> +   ret = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, 
> val);
> +   else
> +   ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, 
> val);
> +   srcu_read_unlock(&vcpu->kvm->srcu, idx);
> +
> +   return ret;
> +}
> +
>  int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
>  phys_addr_t fault_ipa)
>  {
> @@ -200,6 +215,10 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run 
> *run,
> if (vgic_handle_mmio(vcpu, run, &mmio))
> return 1;
>
> +   if (!handle_io_bus_rw(vcpu, fault_ipa, mmio.len, &mmio.data,
> +   mmio.is_write))
> +   return 1;
> +
> kvm_prepare_mmio(run, &mmio);
> return 0;
>  }
> diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
> index 8ba85e9..642f57c 100644
> --- a/arch/arm64/kvm/Kconfig
> +++ b/arch/arm64/kvm/Kconfig
> @@ -26,6 +26,7 @@ config KVM
> select KVM_ARM_HOST
> select KVM_ARM_VGIC
> select KVM_ARM_TIMER
> +   select HAVE_KVM_EVENTFD
> ---help---
>   Support hosting virtualized guest machines.
>
> --
> 1.7.9.5
>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: vhost + multiqueue + RSS question.

2014-11-18 Thread Jason Wang
On 11/18/2014 07:05 PM, Michael S. Tsirkin wrote:
> On Tue, Nov 18, 2014 at 11:37:03AM +0800, Jason Wang wrote:
>> > On 11/17/2014 07:58 PM, Michael S. Tsirkin wrote:
>>> > > On Mon, Nov 17, 2014 at 01:22:07PM +0200, Gleb Natapov wrote:
> > >> > On Mon, Nov 17, 2014 at 12:38:16PM +0200, Michael S. Tsirkin wrote:
>>> > >>> > > On Mon, Nov 17, 2014 at 09:44:23AM +0200, Gleb Natapov wrote:
> >  > > > On Sun, Nov 16, 2014 at 08:56:04PM +0200, Michael S. 
> >  > > > Tsirkin wrote:
>>> > > > > > > On Sun, Nov 16, 2014 at 06:18:18PM +0200, Gleb 
>>> > > > > > > Natapov wrote:
> > >> > > > > > Hi Michael,
> > >> > > > > > 
> > >> > > > > >  I am playing with vhost multiqueue capability 
> > >> > > > > > and have a question about
> > >> > > > > > vhost multiqueue and RSS (receive side 
> > >> > > > > > steering). My setup has Mellanox
> > >> > > > > > ConnectX-3 NIC which supports multiqueue and 
> > >> > > > > > RSS. Network related
> > >> > > > > > parameters for qemu are:
> > >> > > > > > 
> > >> > > > > >-netdev 
> > >> > > > > > tap,id=hn0,script=qemu-ifup.sh,vhost=on,queues=4
> > >> > > > > >-device 
> > >> > > > > > virtio-net-pci,netdev=hn0,id=nic1,mq=on,vectors=10
> > >> > > > > > 
> > >> > > > > > In a guest I ran "ethtool -L eth0 combined 4" 
> > >> > > > > > to enable multiqueue.
> > >> > > > > > 
> > >> > > > > > I am running one tcp stream into the guest 
> > >> > > > > > using iperf. Since there is
> > >> > > > > > only one tcp stream I expect it to be handled 
> > >> > > > > > by one queue only but
> > >> > > > > > this seams to be not the case. ethtool -S on a 
> > >> > > > > > host shows that the
> > >> > > > > > stream is handled by one queue in the NIC, 
> > >> > > > > > just like I would expect,
> > >> > > > > > but in a guest all 4 virtio-input interrupt 
> > >> > > > > > are incremented. Am I
> > >> > > > > > missing any configuration?
>>> > > > > > > 
>>> > > > > > > I don't see anything obviously wrong with what you 
>>> > > > > > > describe.
>>> > > > > > > Maybe, somehow, same irqfd got bound to multiple 
>>> > > > > > > MSI vectors?
> >  > > > It does not look like this is what is happening judging 
> >  > > > by the way
> >  > > > interrupts are distributed between queues. They are not 
> >  > > > distributed
> >  > > > uniformly and often I see one queue gets most interrupt 
> >  > > > and others get
> >  > > > much less and then it changes.
>>> > >>> > > 
>>> > >>> > > Weird. It would happen if you transmitted from multiple CPUs.
>>> > >>> > > You did pin iperf to a single CPU within guest, did you not?
>>> > >>> > > 
> > >> > No, I didn't because I didn't expect it to matter for input 
> > >> > interrupts.
> > >> > When I run iperf on a host rx queue that receives all packets 
> > >> > depends
> > >> > only on a connection itself, not on a cpu iperf is running on (I 
> > >> > tested
> > >> > that).
>>> > > This really depends on the type of networking card you have
>>> > > on the host, and how it's configured.
>>> > >
>>> > > I think you will get something more closely resembling this
>>> > > behaviour if you enable RFS in host.
>>> > >
> > >> > When I pin iperf in a guest I do indeed see that all interrupts
> > >> > are arriving to the same irq vector. Is a number after virtio-input
> > >> > in /proc/interrupt any indication of a queue a packet arrived to 
> > >> > (on
> > >> > a host I can use ethtool -S to check what queue receives packets, 
> > >> > but
> > >> > unfortunately this does not work for virtio nic in a guest)?
>>> > > I think it is.
>>> > >
> > >> > Because if
> > >> > it is the way RSS works in virtio is not how it works on a host 
> > >> > and not
> > >> > what I would expect after reading about RSS. The queue a packets 
> > >> > arrives
> > >> > to should be calculated by hashing fields from a packet header 
> > >> > only.
>>> > > Yes, what virtio has is not RSS - it's an accelerated RFS really.
>> > 
>> > Strictly speaking, not aRFS. aRFS requires a programmable filter and
>> > needs driver to fill the filter on demand. For virtio-net, this is done
>> > automatically in host side (tun/tap). There's no guest involvement.
> Well guest affects the filter by sending tx packets.
>


Yes, it is.
--
To unsubscribe

Re: [question] kvm fully support vga adapter pass-through ?

2014-11-18 Thread Alex Williamson
On Wed, 2014-11-19 at 09:56 +0800, Zhang Haoyu wrote:
> Hi all,
> 
> Does the combination of qemu-2.0.1 and linux-3.10 fully support
> direct-assign vga adapters to vm?

No, at best I'd call it experimental

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[question] kvm fully support vga adapter pass-through ?

2014-11-18 Thread Zhang Haoyu
Hi all,

Does the combination of qemu-2.0.1 and linux-3.10 fully support direct-assign 
vga adapters to vm?

Thanks,
Zhang Haoyu

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v2][PATCH] kvm: x86: vmx: remove MMIO_MAX_GEN

2014-11-18 Thread Tiejun Chen
Actually MMIO_MAX_GEN is same as MMIO_GEN_MASK.

Signed-off-by: Tiejun Chen 
---
v2:

* Instead, we'd like to keep MMIO_GEN_MASK.

 arch/x86/kvm/mmu.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ac1c4de..4ea0dcb 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -214,13 +214,12 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
 #define MMIO_GEN_LOW_SHIFT 10
 #define MMIO_GEN_LOW_MASK  ((1 << MMIO_GEN_LOW_SHIFT) - 2)
 #define MMIO_GEN_MASK  ((1 << MMIO_GEN_SHIFT) - 1)
-#define MMIO_MAX_GEN   ((1 << MMIO_GEN_SHIFT) - 1)
 
 static u64 generation_mmio_spte_mask(unsigned int gen)
 {
u64 mask;
 
-   WARN_ON(gen > MMIO_MAX_GEN);
+   WARN_ON(gen & ~MMIO_GEN_MASK);
 
mask = (gen & MMIO_GEN_LOW_MASK) << MMIO_SPTE_GEN_LOW_SHIFT;
mask |= ((u64)gen >> MMIO_GEN_LOW_SHIFT) << MMIO_SPTE_GEN_HIGH_SHIFT;
@@ -263,13 +262,13 @@ static bool is_mmio_spte(u64 spte)
 
 static gfn_t get_mmio_spte_gfn(u64 spte)
 {
-   u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask;
+   u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask;
return (spte & ~mask) >> PAGE_SHIFT;
 }
 
 static unsigned get_mmio_spte_access(u64 spte)
 {
-   u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask;
+   u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask;
return (spte & ~mask) & ~PAGE_MASK;
 }
 
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: x86: Fix lost interrupt on irr_pending race

2014-11-18 Thread Radim Krčmář
2014-11-18 20:51+0100, Paolo Bonzini:
> On 16/11/2014 22:49, Nadav Amit wrote:
> > @@ -374,13 +378,15 @@ static inline void apic_clear_irr(int vec, struct 
> > kvm_lapic *apic)
> > +   apic->irr_pending = false;
> > +   apic_clear_vector(vec, apic->regs + APIC_IRR);
> > +   if (apic_search_irr(apic) != -1)
> > +   apic->irr_pending = true;
> > }
> >  }
> 
> This is even more tricky than it looks like. :)
> 
> No one can concurrently look at apic->irr_pending while it is false, in
> particular apic_sync_pv_eoi_to_guest cannot enable PV EOI by mistake
> just because it sees a false irr_pending.  So it's okay if it is first
> set to false and then to true.

Yeah, using 'atomic_t irr_count' instead seems less error-prone to me;
and it would usually end in temporary unpublished-patches tree, but you
can take a look at the idea:

---8<---
KVM: x86: convert irr_pending to atomic irr_count

We've already had a buggy race with it ... atomic_t is simple to grasp
and harder to misuse, so we can switch without losing much performance.
(Read is normal and clear_irr does not parse APIC_IRR in exchange.
 We inflate lapic_struct by 3 bytes.)

Only two races remain now, which is the minimum without a proper lock,
atomic_t also makes their existence obvious on every use.

/__apic_test_and.*()/ aren't atomic, so we have to introduce new ones.
---
 arch/x86/kvm/lapic.c | 48 
 arch/x86/kvm/lapic.h |  4 ++--
 2 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index e0e5642..e3169c5 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -102,6 +102,16 @@ static inline void apic_clear_vector(int vec, void *bitmap)
clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
 }
 
+static inline int apic_test_and_set_vector(int vec, void *bitmap)
+{
+   return test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+}
+
+static inline int apic_test_and_clear_vector(int vec, void *bitmap)
+{
+   return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+}
+
 static inline int __apic_test_and_set_vector(int vec, void *bitmap)
 {
return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -341,12 +351,11 @@ EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
 
 static inline void apic_set_irr(int vec, struct kvm_lapic *apic)
 {
-   apic_set_vector(vec, apic->regs + APIC_IRR);
-   /*
-* irr_pending must be true if any interrupt is pending; set it after
-* APIC_IRR to avoid race with apic_clear_irr
-*/
-   apic->irr_pending = true;
+   if (apic_test_and_set_vector(vec, apic->regs + APIC_IRR))
+   return;
+
+   if (likely(!kvm_apic_vid_enabled(vcpu->kvm)))
+   atomic_inc(&apic->irr_count);
 }
 
 static inline int apic_search_irr(struct kvm_lapic *apic)
@@ -359,10 +368,10 @@ static inline int apic_find_highest_irr(struct kvm_lapic 
*apic)
int result;
 
/*
-* Note that irr_pending is just a hint. It will be always
-* true with virtual interrupt delivery enabled.
+* Note that irr_count is just a hint. It will be always
+* nonzero with virtual interrupt delivery enabled.
 */
-   if (!apic->irr_pending)
+   if (!atomic_read(&apic->irr_count))
return -1;
 
kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
@@ -376,18 +385,16 @@ static inline void apic_clear_irr(int vec, struct 
kvm_lapic *apic)
 {
struct kvm_vcpu *vcpu;
 
+   if(!apic_test_and_clear_vector(vec, apic->regs + APIC_IRR))
+   return;
+
vcpu = apic->vcpu;
 
-   if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) {
+   if (unlikely(kvm_apic_vid_enabled(vcpu->kvm)))
/* try to update RVI */
-   apic_clear_vector(vec, apic->regs + APIC_IRR);
kvm_make_request(KVM_REQ_EVENT, vcpu);
-   } else {
-   apic->irr_pending = false;
-   apic_clear_vector(vec, apic->regs + APIC_IRR);
-   if (apic_search_irr(apic) != -1)
-   apic->irr_pending = true;
-   }
+   else
+   atomic_dec(&apic->irr_count);
 }
 
 static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
@@ -1522,7 +1529,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
}
-   apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm);
+   atomic_set(&apic->irr_count, kvm_apic_vid_enabled(vcpu->kvm));
apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm);
apic->highest_isr_cache = -1;
update_divide_count(apic);
@@ -1732,7 +1739,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
hrtimer_cancel(&apic->lapic_timer.timer);
update_divide_count(apic);
start_apic_timer(apic);
-   apic

Re: [PATCH] KVM: x86: Fix lost interrupt on irr_pending race

2014-11-18 Thread Paolo Bonzini


On 16/11/2014 22:49, Nadav Amit wrote:
> @@ -374,13 +378,15 @@ static inline void apic_clear_irr(int vec, struct 
> kvm_lapic *apic)
>  
>   vcpu = apic->vcpu;
>  
> - apic_clear_vector(vec, apic->regs + APIC_IRR);
> - if (unlikely(kvm_apic_vid_enabled(vcpu->kvm)))
> + if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) {
>   /* try to update RVI */
> + apic_clear_vector(vec, apic->regs + APIC_IRR);
>   kvm_make_request(KVM_REQ_EVENT, vcpu);
> - else {
> - vec = apic_search_irr(apic);
> - apic->irr_pending = (vec != -1);
> + } else {
> + apic->irr_pending = false;
> + apic_clear_vector(vec, apic->regs + APIC_IRR);
> + if (apic_search_irr(apic) != -1)
> + apic->irr_pending = true;
>   }
>  }

This is even more tricky than it looks like. :)

No one can concurrently look at apic->irr_pending while it is false, in
particular apic_sync_pv_eoi_to_guest cannot enable PV EOI by mistake
just because it sees a false irr_pending.  So it's okay if it is first
set to false and then to true.

I'll apply the patch tomorrow.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] ARM/ARM64: support KVM_IOEVENTFD

2014-11-18 Thread Ming Lei
>From Documentation/virtual/kvm/api.txt, all ARCHs should support
ioeventfd.

Also ARM VM has supported PCI bus already, and ARM64 will do too,
ioeventfd is required for some popular devices, like virtio-blk
and virtio-scsi dataplane in QEMU.

Without this patch, virtio-blk-pci dataplane can't work in QEMU.

Signed-off-by: Ming Lei 
---
 arch/arm/kvm/Kconfig   |1 +
 arch/arm/kvm/Makefile  |2 +-
 arch/arm/kvm/arm.c |1 +
 arch/arm/kvm/mmio.c|   19 +++
 arch/arm64/kvm/Kconfig |1 +
 5 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 466bd29..25bd83a 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -23,6 +23,7 @@ config KVM
select HAVE_KVM_CPU_RELAX_INTERCEPT
select KVM_MMIO
select KVM_ARM_HOST
+   select HAVE_KVM_EVENTFD
depends on ARM_VIRT_EXT && ARM_LPAE
---help---
  Support hosting virtualized guest machines. You will also
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index f7057ed..859db09 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -15,7 +15,7 @@ AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt)
 AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
 
 KVM := ../../../virt/kvm
-kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
+kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o
 
 obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9e193c8..d90d989 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -172,6 +172,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_IRQCHIP:
r = vgic_present;
break;
+   case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_USER_MEMORY:
case KVM_CAP_SYNC_MMU:
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 4cb5a93..ee332a7 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -162,6 +162,21 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t 
fault_ipa,
return 0;
 }
 
+static int handle_io_bus_rw(struct kvm_vcpu *vcpu, gpa_t addr,
+   int len, void *val, bool write)
+{
+   int idx, ret;
+
+   idx = srcu_read_lock(&vcpu->kvm->srcu);
+   if (write)
+   ret = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, val);
+   else
+   ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, val);
+   srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+   return ret;
+}
+
 int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 phys_addr_t fault_ipa)
 {
@@ -200,6 +215,10 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run 
*run,
if (vgic_handle_mmio(vcpu, run, &mmio))
return 1;
 
+   if (!handle_io_bus_rw(vcpu, fault_ipa, mmio.len, &mmio.data,
+   mmio.is_write))
+   return 1;
+
kvm_prepare_mmio(run, &mmio);
return 0;
 }
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 8ba85e9..642f57c 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -26,6 +26,7 @@ config KVM
select KVM_ARM_HOST
select KVM_ARM_VGIC
select KVM_ARM_TIMER
+   select HAVE_KVM_EVENTFD
---help---
  Support hosting virtualized guest machines.
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: x86: Fix lost interrupt on irr_pending race

2014-11-18 Thread Radim Krčmář
2014-11-16 23:49+0200, Nadav Amit:
> apic_find_highest_irr assumes irr_pending is set if any vector in APIC_IRR is
> set.  If this assumption is broken and apicv is disabled, the injection of
> interrupts may be deferred until another interrupt is delivered to the guest.
> Ultimately, if no other interrupt should be injected to that vCPU, the pending
> interrupt may be lost.
> 
> commit 56cc2406d68c ("KVM: nVMX: fix "acknowledge interrupt on exit" when 
> APICv
> is in use") changed the behavior of apic_clear_irr so irr_pending is cleared
> after setting APIC_IRR vector. After this commit, if apic_set_irr and
> apic_clear_irr run simultaneously, a race may occur, resulting in APIC_IRR
> vector set, and irr_pending cleared. In the following example, assume a single
> vector is set in IRR prior to calling apic_clear_irr:
> 
> apic_set_irr  apic_clear_irr
>   --
> apic->irr_pending = true;
>   apic_clear_vector(...);
>   vec = apic_search_irr(apic);
>   // => vec == -1
> apic_set_vector(...);
>   apic->irr_pending = (vec != -1);
>   // => apic->irr_pending == false
> 
> Nonetheless, it appears the race might even occur prior to this commit:
> 
> apic_set_irr  apic_clear_irr
>   --
> apic->irr_pending = true;
>   apic->irr_pending = false;
>   apic_clear_vector(...);
>   if (apic_search_irr(apic) != -1)
>   apic->irr_pending = true;
>   // => apic->irr_pending == false
> apic_set_vector(...);
> 
> Fixing this issue by:
> 1. Restoring the previous behavior of apic_clear_irr: clear irr_pending, call
>apic_clear_vector, and then if APIC_IRR is non-zero, set irr_pending.
> 2. On apic_set_irr: first call apic_set_vector, then set irr_pending.
> 
> Signed-off-by: Nadav Amit 
> ---
>  arch/x86/kvm/lapic.c | 18 --
>  1 file changed, 12 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 6e8ce5a..e0e5642 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -341,8 +341,12 @@ EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
>  
>  static inline void apic_set_irr(int vec, struct kvm_lapic *apic)
>  {
> - apic->irr_pending = true;
>   apic_set_vector(vec, apic->regs + APIC_IRR);
> + /*
> +  * irr_pending must be true if any interrupt is pending; set it after
> +  * APIC_IRR to avoid race with apic_clear_irr
> +  */
> + apic->irr_pending = true;

(A race that ends up with 'irr_pending = true' and zero IRR is
 harmless.)

>  }
>  
>  static inline int apic_search_irr(struct kvm_lapic *apic)
> @@ -374,13 +378,15 @@ static inline void apic_clear_irr(int vec, struct 
> kvm_lapic *apic)
>  
>   vcpu = apic->vcpu;
>  
> - apic_clear_vector(vec, apic->regs + APIC_IRR);
> - if (unlikely(kvm_apic_vid_enabled(vcpu->kvm)))
> + if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) {
>   /* try to update RVI */
> + apic_clear_vector(vec, apic->regs + APIC_IRR);
>   kvm_make_request(KVM_REQ_EVENT, vcpu);
> - else {
> - vec = apic_search_irr(apic);
> - apic->irr_pending = (vec != -1);
> + } else {
> + apic->irr_pending = false;
> + apic_clear_vector(vec, apic->regs + APIC_IRR);
> + if (apic_search_irr(apic) != -1)
> + apic->irr_pending = true;
>   }

Works because apic_clear_vector() is also a compiler barrier ...

Reviewed-by: Radim Krčmář 

(I hope the performance gain of irr_pending is worth its complexity.)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: can I make this work… (Foundation for accessibility project)

2014-11-18 Thread Eric S. Johansson


On 11/18/2014 8:53 AM, Hans de Goede wrote:

kvm's usb pass-through should be able to handle this without any issues
(other then some latency), it uses special buffering for isochronous
usb packets, which should take care of usb audio working.

I've never tested audio recording, but audio playback and video recording
(webcams) work, so I expect audio recording to be fine.



That's great to know. I will spin up a version of Windows 7 and give it 
a try given that I'm not looking at it, I can strip it down to the 
barest user interface elements and improve performance significantly.


FYI, Windows 8 actually works better for hand crips like myself because 
it's easier to fit the big blocky icons with hand tremors and has flaky 
fine motion control them is to try and hit the tiny little icons and 
menus. Yes, that's why I like unity as well. :-) I would love to have a 
capability to make the title bar drop downs bigger temporarily  so I 
could more easily pick elements out of them but otherwise have been 
visually out-of-the-way.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: can I make this work… (Foundation for accessibility project)

2014-11-18 Thread Eric S. Johansson


On 11/18/2014 8:50 AM, Paolo Bonzini wrote:


I'm adding two people who might know.

Do you have any idea what the "magic to pipe data back to the Linux
host" should look like?  Does a normal serial port (COM1 for Windows,
/dev/ttyS0 for Linux) work?



The fine magic comes in three forms. Keystroke injection, context 
feedback, and exporting UI elements such as microphone level, 
recognition correction,  partial recognition pop-ups into the linux 
environment.


All of these have in common the magic trick of using the isolation of 
the Windows environment to provide a single dictation target to 
NaturallySpeaking. All of the information necessary for the above 
capabilities would pass through this target. initially, this would be an 
ssh session with command redirecting standard into whatever 
accessibility inputs available.


 The host side of this gateway would be responsible for all of the 
proper input redirection. In theory, it would even be possible to direct 
speech recognition towards two targets depending on the grammar. For 
example in the programming by speech environment I'm working on, I would 
dictate directly into the editor sometimes and into a secondary window 
for focused speech UI action. At no time, would my hand touch the mouse. 
:-) It will happen because of the context set by the speech UI as a 
deliberate effect of certain commands.


--- longer ramble about speech and nuance issues. ---

Being a crip who's trying to write code with speech, it's not going to 
be fast. once I get the Basic keystroke injection working, it will be 
good enough to continuing developing my program by speech environment. 
But to discuss that, would go down the rathole of current models of 
speech user interfaces, why don't work, things you shouldn't do such as 
speaking the keyboard, intentional automation, contextual grammars and a 
host of other things of spent the past 15 years learning about and 
figuring out how to make a change. By the way, that knowledge and 
passion is why I I've started a consulting practice that focuses on 
improving user experience/user interfaces starting from the intent of 
the user and perspective of a disabled person with the result being an 
improved UI for everybody.


The hardest part is going to be everything except a keystroke injection. 
This is because they require special knowledge that nuance is loath to 
give up. I don't get it. Nuance totes and gets federal benefits for 
producing something that is "section 508 compliant" yet, the only way 
you could be considered an accessibility tool is if you do nothing but 
write in Microsoft Word.  I worked for a dragon reseller for a while 
with medical record systems and, nuance doesn't even make an attempt to 
try and speech enable the medical record environment. They have people 
using a couple of solutions that don't work well and effectively provide 
no UI automation[1] tied into speech commands.


A bunch of us techno Crips have built environments that greatly enhance 
the range of solutions NaturallySpeaking could be used for but, nuance 
won't talk to us, won't give us any documentation to keep things running 
on our own, won't sell us the documentation either and worst of all, 
they have written terms into the AUP designed to bar extensions like our 
environment unless you buy the most expensive version of 
NaturallySpeaking available.


And did I mention that they have many bugs that are a significant 
problem for every user, not to mention the scripts and the last time I 
checked, it will cost about $10 to report a bug (support call cost) and 
then there's no guarantee they'll ever fix. In version 13, I'm seeing 
bugs that have been around since version 7 or 8.


I will do what I can to implement the magic and when I get stumped, 
then, I'll figure out what I'm going to do technically and politically.


--- eric

[1] This is kind of a lie. They have the tools to what you navigate 
blindly through an application (i.e. hit 15 tabs, two down arrows, and a 
mouse click and it might end up in the right UI element to do something. 
unfortunately, they do not have anything to make it predictable, 
repeatable or survive revisions in the user interface. But this is one 
of those rat holes I said it wouldn't go down.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: can I make this work… (Foundation for accessibility project)

2014-11-18 Thread Hans de Goede
Hi,

On 11/18/2014 02:50 PM, Paolo Bonzini wrote:
> 
> 
> On 18/11/2014 07:48, Eric S. Johansson wrote:
>> I'm trying to figure out ways of making it possible to drive Linux from
>> Windows speech recognition (NaturallySpeaking).  The goal is a system
>> where Windows runs in a virtual machine (Linux host), audio is passed
>> through from a USB headset to the Windows environment. And the output of
>> the recognition engine is piped through some magic back to the Linux host.
>>
>> the hardest part of all of this without question is getting clean
>> uninterrupted audio from the USB device all the way through to the
>> Windows virtual machine. virtual box, VMware fail mostly in delivering 
>> reliable  audio to the virtual machine.
>>
>> I expect KVM to not  work right  with regards to getting clean
>> audio/real-time USB but I'm asking in case I'm wrong. if it doesn't work
>> or can't work yet, what would it take to make it possible for clean
>> audio to be passed through to a guest?
> 
> I'm adding two people who might know.

kvm's usb pass-through should be able to handle this without any issues
(other then some latency), it uses special buffering for isochronous
usb packets, which should take care of usb audio working.

I've never tested audio recording, but audio playback and video recording
(webcams) work, so I expect audio recording to be fine.

Regards,

Hans
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: can I make this work… (Foundation for accessibility project)

2014-11-18 Thread Paolo Bonzini


On 18/11/2014 07:48, Eric S. Johansson wrote:
> I'm trying to figure out ways of making it possible to drive Linux from
> Windows speech recognition (NaturallySpeaking).  The goal is a system
> where Windows runs in a virtual machine (Linux host), audio is passed
> through from a USB headset to the Windows environment. And the output of
> the recognition engine is piped through some magic back to the Linux host.
> 
> the hardest part of all of this without question is getting clean
> uninterrupted audio from the USB device all the way through to the
> Windows virtual machine. virtual box, VMware fail mostly in delivering 
> reliable  audio to the virtual machine.
> 
> I expect KVM to not  work right  with regards to getting clean
> audio/real-time USB but I'm asking in case I'm wrong. if it doesn't work
> or can't work yet, what would it take to make it possible for clean
> audio to be passed through to a guest?

I'm adding two people who might know.

Do you have any idea what the "magic to pipe data back to the Linux
host" should look like?  Does a normal serial port (COM1 for Windows,
/dev/ttyS0 for Linux) work?

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: vhost + multiqueue + RSS question.

2014-11-18 Thread Michael S. Tsirkin
On Tue, Nov 18, 2014 at 11:37:03AM +0800, Jason Wang wrote:
> On 11/17/2014 07:58 PM, Michael S. Tsirkin wrote:
> > On Mon, Nov 17, 2014 at 01:22:07PM +0200, Gleb Natapov wrote:
> >> > On Mon, Nov 17, 2014 at 12:38:16PM +0200, Michael S. Tsirkin wrote:
> >>> > > On Mon, Nov 17, 2014 at 09:44:23AM +0200, Gleb Natapov wrote:
>  > > > On Sun, Nov 16, 2014 at 08:56:04PM +0200, Michael S. Tsirkin wrote:
> > > > > > On Sun, Nov 16, 2014 at 06:18:18PM +0200, Gleb Natapov wrote:
> >> > > > > > Hi Michael,
> >> > > > > > 
> >> > > > > >  I am playing with vhost multiqueue capability and have a 
> >> > > > > > question about
> >> > > > > > vhost multiqueue and RSS (receive side steering). My setup 
> >> > > > > > has Mellanox
> >> > > > > > ConnectX-3 NIC which supports multiqueue and RSS. Network 
> >> > > > > > related
> >> > > > > > parameters for qemu are:
> >> > > > > > 
> >> > > > > >-netdev tap,id=hn0,script=qemu-ifup.sh,vhost=on,queues=4
> >> > > > > >-device virtio-net-pci,netdev=hn0,id=nic1,mq=on,vectors=10
> >> > > > > > 
> >> > > > > > In a guest I ran "ethtool -L eth0 combined 4" to enable 
> >> > > > > > multiqueue.
> >> > > > > > 
> >> > > > > > I am running one tcp stream into the guest using iperf. 
> >> > > > > > Since there is
> >> > > > > > only one tcp stream I expect it to be handled by one queue 
> >> > > > > > only but
> >> > > > > > this seams to be not the case. ethtool -S on a host shows 
> >> > > > > > that the
> >> > > > > > stream is handled by one queue in the NIC, just like I would 
> >> > > > > > expect,
> >> > > > > > but in a guest all 4 virtio-input interrupt are incremented. 
> >> > > > > > Am I
> >> > > > > > missing any configuration?
> > > > > > 
> > > > > > I don't see anything obviously wrong with what you describe.
> > > > > > Maybe, somehow, same irqfd got bound to multiple MSI vectors?
>  > > > It does not look like this is what is happening judging by the way
>  > > > interrupts are distributed between queues. They are not distributed
>  > > > uniformly and often I see one queue gets most interrupt and others 
>  > > > get
>  > > > much less and then it changes.
> >>> > > 
> >>> > > Weird. It would happen if you transmitted from multiple CPUs.
> >>> > > You did pin iperf to a single CPU within guest, did you not?
> >>> > > 
> >> > No, I didn't because I didn't expect it to matter for input interrupts.
> >> > When I run iperf on a host rx queue that receives all packets depends
> >> > only on a connection itself, not on a cpu iperf is running on (I tested
> >> > that).
> > This really depends on the type of networking card you have
> > on the host, and how it's configured.
> >
> > I think you will get something more closely resembling this
> > behaviour if you enable RFS in host.
> >
> >> > When I pin iperf in a guest I do indeed see that all interrupts
> >> > are arriving to the same irq vector. Is a number after virtio-input
> >> > in /proc/interrupt any indication of a queue a packet arrived to (on
> >> > a host I can use ethtool -S to check what queue receives packets, but
> >> > unfortunately this does not work for virtio nic in a guest)?
> > I think it is.
> >
> >> > Because if
> >> > it is the way RSS works in virtio is not how it works on a host and not
> >> > what I would expect after reading about RSS. The queue a packets arrives
> >> > to should be calculated by hashing fields from a packet header only.
> > Yes, what virtio has is not RSS - it's an accelerated RFS really.
> 
> Strictly speaking, not aRFS. aRFS requires a programmable filter and
> needs driver to fill the filter on demand. For virtio-net, this is done
> automatically in host side (tun/tap). There's no guest involvement.

Well guest affects the filter by sending tx packets.

> >
> > The point is to try and take application locality into account.
> >
> 
> Yes, the locality was done through (consider a N vcpu guest with N queue):
> 
> - virtio-net driver will provide a default 1:1 mapping between vcpu and
> txq through XPS
> - virtio-net driver will suggest a default irq affinity hint also for a
> 1:1 mapping bettwen vcpu and txq/rxq
> 
> With all these, each vcpu get its private txq/rxq paris. And host side
> implementation (tun/tap) will make sure if the packets of a flow were
> received from queue N, if will also use queue N to transmit the packets
> of this flow to guest.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: x86: vmx: remove MMIO_GEN_MASK

2014-11-18 Thread Paolo Bonzini


On 18/11/2014 10:14, Tiejun Chen wrote:
> Actually MMIO_MAX_GEN is same as MMIO_GEN_MASK, and
> especially in all cases we already AND this so its
> also unnecessary to generate such a WARN_ON().
> 
> Signed-off-by: Tiejun Chen 

Good idea, but I actually prefer keeping MMIO_GEN_MASK since it is often
used together with an "&".

Then, the WARN_ON is better written as

-   WARN_ON(gen > MMIO_MAX_GEN);
+   WARN_ON(gen & ~MMIO_GEN_MASK);

Paolo

> ---
>  arch/x86/kvm/mmu.c | 5 +
>  1 file changed, 1 insertion(+), 4 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index ac1c4de..7712345 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -213,15 +213,12 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
>  #define MMIO_GEN_SHIFT   20
>  #define MMIO_GEN_LOW_SHIFT   10
>  #define MMIO_GEN_LOW_MASK((1 << MMIO_GEN_LOW_SHIFT) - 2)
> -#define MMIO_GEN_MASK((1 << MMIO_GEN_SHIFT) - 1)
>  #define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1)
>  
>  static u64 generation_mmio_spte_mask(unsigned int gen)
>  {
>   u64 mask;
>  
> - WARN_ON(gen > MMIO_MAX_GEN);
> -
>   mask = (gen & MMIO_GEN_LOW_MASK) << MMIO_SPTE_GEN_LOW_SHIFT;
>   mask |= ((u64)gen >> MMIO_GEN_LOW_SHIFT) << MMIO_SPTE_GEN_HIGH_SHIFT;
>   return mask;
> @@ -240,7 +237,7 @@ static unsigned int get_mmio_spte_generation(u64 spte)
>  
>  static unsigned int kvm_current_mmio_generation(struct kvm *kvm)
>  {
> - return kvm_memslots(kvm)->generation & MMIO_GEN_MASK;
> + return kvm_memslots(kvm)->generation & MMIO_MAX_GEN;
>  }
>  
>  static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: x86: vmx: cleanup handle_ept_violation

2014-11-18 Thread Paolo Bonzini


On 18/11/2014 10:12, Tiejun Chen wrote:
> Instead, just use PFERR_{FETCH, PRESENT, WRITE}_MASK
> inside handle_ept_violation() for slightly better code.
> 
> Signed-off-by: Tiejun Chen 
> ---
>  arch/x86/kvm/vmx.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 284f5c2..22dc60d 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -5474,11 +5474,11 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
>   trace_kvm_page_fault(gpa, exit_qualification);
>  
>   /* It is a write fault? */
> - error_code = exit_qualification & (1U << 1);
> + error_code = exit_qualification & PFERR_WRITE_MASK;
>   /* It is a fetch fault? */
> - error_code |= (exit_qualification & (1U << 2)) << 2;
> + error_code |= (exit_qualification << 2) & PFERR_FETCH_MASK;
>   /* ept page table is present? */
> - error_code |= (exit_qualification >> 3) & 0x1;
> + error_code |= (exit_qualification >> 3) & PFERR_PRESENT_MASK;

Ok.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [v2][PATCH] kvm: x86: mmio: fix setting the present bit of mmio spte

2014-11-18 Thread Paolo Bonzini


On 18/11/2014 10:23, Chen, Tiejun wrote:
>> On 32-bit PAE hosts, PTEs have bit 62 reserved, as in your patch:
> 
> Do you mean just one reserved bit is fine enough in this case?

Yes.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [v2][PATCH] kvm: x86: mmio: fix setting the present bit of mmio spte

2014-11-18 Thread Chen, Tiejun

On 2014/11/17 19:40, Paolo Bonzini wrote:



On 17/11/2014 12:31, Tiejun Chen wrote:

In non-ept 64-bit of PAE case maxphyaddr may be 52bit as well,


There is no such thing as 64-bit PAE.



Definitely.


On 32-bit PAE hosts, PTEs have bit 62 reserved, as in your patch:



Do you mean just one reserved bit is fine enough in this case?

Thanks
Tiejun
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvm: x86: vmx: remove MMIO_GEN_MASK

2014-11-18 Thread Tiejun Chen
Actually MMIO_MAX_GEN is same as MMIO_GEN_MASK, and
especially in all cases we already AND this so its
also unnecessary to generate such a WARN_ON().

Signed-off-by: Tiejun Chen 
---
 arch/x86/kvm/mmu.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ac1c4de..7712345 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -213,15 +213,12 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
 #define MMIO_GEN_SHIFT 20
 #define MMIO_GEN_LOW_SHIFT 10
 #define MMIO_GEN_LOW_MASK  ((1 << MMIO_GEN_LOW_SHIFT) - 2)
-#define MMIO_GEN_MASK  ((1 << MMIO_GEN_SHIFT) - 1)
 #define MMIO_MAX_GEN   ((1 << MMIO_GEN_SHIFT) - 1)
 
 static u64 generation_mmio_spte_mask(unsigned int gen)
 {
u64 mask;
 
-   WARN_ON(gen > MMIO_MAX_GEN);
-
mask = (gen & MMIO_GEN_LOW_MASK) << MMIO_SPTE_GEN_LOW_SHIFT;
mask |= ((u64)gen >> MMIO_GEN_LOW_SHIFT) << MMIO_SPTE_GEN_HIGH_SHIFT;
return mask;
@@ -240,7 +237,7 @@ static unsigned int get_mmio_spte_generation(u64 spte)
 
 static unsigned int kvm_current_mmio_generation(struct kvm *kvm)
 {
-   return kvm_memslots(kvm)->generation & MMIO_GEN_MASK;
+   return kvm_memslots(kvm)->generation & MMIO_MAX_GEN;
 }
 
 static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvm: x86: vmx: cleanup handle_ept_violation

2014-11-18 Thread Tiejun Chen
Instead, just use PFERR_{FETCH, PRESENT, WRITE}_MASK
inside handle_ept_violation() for slightly better code.

Signed-off-by: Tiejun Chen 
---
 arch/x86/kvm/vmx.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 284f5c2..22dc60d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5474,11 +5474,11 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
trace_kvm_page_fault(gpa, exit_qualification);
 
/* It is a write fault? */
-   error_code = exit_qualification & (1U << 1);
+   error_code = exit_qualification & PFERR_WRITE_MASK;
/* It is a fetch fault? */
-   error_code |= (exit_qualification & (1U << 2)) << 2;
+   error_code |= (exit_qualification << 2) & PFERR_FETCH_MASK;
/* ept page table is present? */
-   error_code |= (exit_qualification >> 3) & 0x1;
+   error_code |= (exit_qualification >> 3) & PFERR_PRESENT_MASK;
 
vcpu->arch.exit_qualification = exit_qualification;
 
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html