Re: [PATCH v5 13/14] KVM: ARM: Handle I/O aborts

2013-01-15 Thread Gleb Natapov
On Tue, Jan 08, 2013 at 01:40:05PM -0500, Christoffer Dall wrote:
 When the guest accesses I/O memory this will create data abort
 exceptions and they are handled by decoding the HSR information
 (physical address, read/write, length, register) and forwarding reads
 and writes to QEMU which performs the device emulation.
 
 Certain classes of load/store operations do not support the syndrome
 information provided in the HSR and we therefore must be able to fetch
 the offending instruction from guest memory and decode it manually.
 
 We only support instruction decoding for valid reasonable MMIO operations
 where trapping them do not provide sufficient information in the HSR (no
 16-bit Thumb instructions provide register writeback that we care about).
 
 The following instruction types are NOT supported for MMIO operations
 despite the HSR not containing decode info:
  - any Load/Store multiple
  - any load/store exclusive
  - any load/store dual
  - anything with the PC as the dest register
 
 This requires changing the general flow somewhat since new calls to run
 the VCPU must check if there's a pending MMIO load and perform the write
 after userspace has made the data available.
 
 Rusty Russell fixed a horrible race pointed out by Ben Herrenschmidt:
 (1) Guest complicated mmio instruction traps.
 (2) The hardware doesn't tell us enough, so we need to read the actual
 instruction which was being exectuted.
 (3) KVM maps the instruction virtual address to a physical address.
 (4) The guest (SMP) swaps out that page, and fills it with something else.
 (5) We read the physical address, but now that's the wrong thing.
How can this happen?! The guest cannot reuse physical page before it
flushes it from all vcpus tlb cache. For that it needs to send
synchronous IPI to all vcpus and IPI will not be processed by a vcpu
while it does emulation.

 
 Reviewed-by: Marcelo Tosatti mtosa...@redhat.com
 Signed-off-by: Rusty Russell rusty.russ...@linaro.org
 Signed-off-by: Marc Zyngier marc.zyng...@arm.com
 Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
 ---
  arch/arm/include/asm/kvm_arm.h |3 
  arch/arm/include/asm/kvm_asm.h |2 
  arch/arm/include/asm/kvm_decode.h  |   47 
  arch/arm/include/asm/kvm_emulate.h |8 +
  arch/arm/include/asm/kvm_host.h|7 +
  arch/arm/include/asm/kvm_mmio.h|   51 
  arch/arm/kvm/Makefile  |2 
  arch/arm/kvm/arm.c |   14 +
  arch/arm/kvm/decode.c  |  462 
 
  arch/arm/kvm/emulate.c |  169 +
  arch/arm/kvm/interrupts.S  |   38 +++
  arch/arm/kvm/mmio.c|  154 
  arch/arm/kvm/mmu.c |7 -
  arch/arm/kvm/trace.h   |   21 ++
  14 files changed, 981 insertions(+), 4 deletions(-)
  create mode 100644 arch/arm/include/asm/kvm_decode.h
  create mode 100644 arch/arm/include/asm/kvm_mmio.h
  create mode 100644 arch/arm/kvm/decode.c
  create mode 100644 arch/arm/kvm/mmio.c
 
 diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
 index 3ff6f22..151c4ce 100644
 --- a/arch/arm/include/asm/kvm_arm.h
 +++ b/arch/arm/include/asm/kvm_arm.h
 @@ -173,8 +173,11 @@
  #define HSR_ISS  (HSR_IL - 1)
  #define HSR_ISV_SHIFT(24)
  #define HSR_ISV  (1U  HSR_ISV_SHIFT)
 +#define HSR_SRT_SHIFT(16)
 +#define HSR_SRT_MASK (0xf  HSR_SRT_SHIFT)
  #define HSR_FSC  (0x3f)
  #define HSR_FSC_TYPE (0x3c)
 +#define HSR_SSE  (1  21)
  #define HSR_WNR  (1  6)
  #define HSR_CV_SHIFT (24)
  #define HSR_CV   (1U  HSR_CV_SHIFT)
 diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
 index 5e06e81..58d787b 100644
 --- a/arch/arm/include/asm/kvm_asm.h
 +++ b/arch/arm/include/asm/kvm_asm.h
 @@ -77,6 +77,8 @@ extern void __kvm_flush_vm_context(void);
  extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
  
  extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 +
 +extern u64 __kvm_va_to_pa(struct kvm_vcpu *vcpu, u32 va, bool priv);
  #endif
  
  #endif /* __ARM_KVM_ASM_H__ */
 diff --git a/arch/arm/include/asm/kvm_decode.h 
 b/arch/arm/include/asm/kvm_decode.h
 new file mode 100644
 index 000..3c37cb9
 --- /dev/null
 +++ b/arch/arm/include/asm/kvm_decode.h
 @@ -0,0 +1,47 @@
 +/*
 + * Copyright (C) 2012 - Virtual Open Systems and Columbia University
 + * Author: Christoffer Dall c.d...@virtualopensystems.com
 + *
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License, version 2, as
 + * published by the Free Software Foundation.
 + *
 + * This program is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU General Public License for more details.
 + *
 + * You should 

Re: [PATCH v5 13/14] KVM: ARM: Handle I/O aborts

2013-01-15 Thread Marc Zyngier
On 15/01/13 13:18, Gleb Natapov wrote:
 On Tue, Jan 08, 2013 at 01:40:05PM -0500, Christoffer Dall wrote:
 When the guest accesses I/O memory this will create data abort
 exceptions and they are handled by decoding the HSR information
 (physical address, read/write, length, register) and forwarding reads
 and writes to QEMU which performs the device emulation.

 Certain classes of load/store operations do not support the syndrome
 information provided in the HSR and we therefore must be able to fetch
 the offending instruction from guest memory and decode it manually.

 We only support instruction decoding for valid reasonable MMIO operations
 where trapping them do not provide sufficient information in the HSR (no
 16-bit Thumb instructions provide register writeback that we care about).

 The following instruction types are NOT supported for MMIO operations
 despite the HSR not containing decode info:
  - any Load/Store multiple
  - any load/store exclusive
  - any load/store dual
  - anything with the PC as the dest register

 This requires changing the general flow somewhat since new calls to run
 the VCPU must check if there's a pending MMIO load and perform the write
 after userspace has made the data available.

 Rusty Russell fixed a horrible race pointed out by Ben Herrenschmidt:
 (1) Guest complicated mmio instruction traps.
 (2) The hardware doesn't tell us enough, so we need to read the actual
 instruction which was being exectuted.
 (3) KVM maps the instruction virtual address to a physical address.
 (4) The guest (SMP) swaps out that page, and fills it with something else.
 (5) We read the physical address, but now that's the wrong thing.
 How can this happen?! The guest cannot reuse physical page before it
 flushes it from all vcpus tlb cache. For that it needs to send
 synchronous IPI to all vcpus and IPI will not be processed by a vcpu
 while it does emulation.

I don't know how this works on x86, but a KVM/ARM guest can definitely
handle an IPI.

Furthermore, TLB invalidation doesn't require an IPI on ARMv7 (unless
we're doing some set/way operation which is handled separately).

M.
-- 
Jazz is not dead. It just smells funny...

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 13/14] KVM: ARM: Handle I/O aborts

2013-01-15 Thread Gleb Natapov
On Tue, Jan 15, 2013 at 01:29:40PM +, Marc Zyngier wrote:
 On 15/01/13 13:18, Gleb Natapov wrote:
  On Tue, Jan 08, 2013 at 01:40:05PM -0500, Christoffer Dall wrote:
  When the guest accesses I/O memory this will create data abort
  exceptions and they are handled by decoding the HSR information
  (physical address, read/write, length, register) and forwarding reads
  and writes to QEMU which performs the device emulation.
 
  Certain classes of load/store operations do not support the syndrome
  information provided in the HSR and we therefore must be able to fetch
  the offending instruction from guest memory and decode it manually.
 
  We only support instruction decoding for valid reasonable MMIO operations
  where trapping them do not provide sufficient information in the HSR (no
  16-bit Thumb instructions provide register writeback that we care about).
 
  The following instruction types are NOT supported for MMIO operations
  despite the HSR not containing decode info:
   - any Load/Store multiple
   - any load/store exclusive
   - any load/store dual
   - anything with the PC as the dest register
 
  This requires changing the general flow somewhat since new calls to run
  the VCPU must check if there's a pending MMIO load and perform the write
  after userspace has made the data available.
 
  Rusty Russell fixed a horrible race pointed out by Ben Herrenschmidt:
  (1) Guest complicated mmio instruction traps.
  (2) The hardware doesn't tell us enough, so we need to read the actual
  instruction which was being exectuted.
  (3) KVM maps the instruction virtual address to a physical address.
  (4) The guest (SMP) swaps out that page, and fills it with something else.
  (5) We read the physical address, but now that's the wrong thing.
  How can this happen?! The guest cannot reuse physical page before it
  flushes it from all vcpus tlb cache. For that it needs to send
  synchronous IPI to all vcpus and IPI will not be processed by a vcpu
  while it does emulation.
 
 I don't know how this works on x86, but a KVM/ARM guest can definitely
 handle an IPI.
 
How can a vcpu handle an IPI while it is not in a guest mode?

 Furthermore, TLB invalidation doesn't require an IPI on ARMv7 (unless
 we're doing some set/way operation which is handled separately).
 
What prevents a page to be swapped out while code is fetched from it?
 
--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 13/14] KVM: ARM: Handle I/O aborts

2013-01-15 Thread Marc Zyngier
On 15/01/13 13:34, Gleb Natapov wrote:
 On Tue, Jan 15, 2013 at 01:29:40PM +, Marc Zyngier wrote:
 On 15/01/13 13:18, Gleb Natapov wrote:
 On Tue, Jan 08, 2013 at 01:40:05PM -0500, Christoffer Dall wrote:
 When the guest accesses I/O memory this will create data abort
 exceptions and they are handled by decoding the HSR information
 (physical address, read/write, length, register) and forwarding reads
 and writes to QEMU which performs the device emulation.

 Certain classes of load/store operations do not support the syndrome
 information provided in the HSR and we therefore must be able to fetch
 the offending instruction from guest memory and decode it manually.

 We only support instruction decoding for valid reasonable MMIO operations
 where trapping them do not provide sufficient information in the HSR (no
 16-bit Thumb instructions provide register writeback that we care about).

 The following instruction types are NOT supported for MMIO operations
 despite the HSR not containing decode info:
  - any Load/Store multiple
  - any load/store exclusive
  - any load/store dual
  - anything with the PC as the dest register

 This requires changing the general flow somewhat since new calls to run
 the VCPU must check if there's a pending MMIO load and perform the write
 after userspace has made the data available.

 Rusty Russell fixed a horrible race pointed out by Ben Herrenschmidt:
 (1) Guest complicated mmio instruction traps.
 (2) The hardware doesn't tell us enough, so we need to read the actual
 instruction which was being exectuted.
 (3) KVM maps the instruction virtual address to a physical address.
 (4) The guest (SMP) swaps out that page, and fills it with something else.
 (5) We read the physical address, but now that's the wrong thing.
 How can this happen?! The guest cannot reuse physical page before it
 flushes it from all vcpus tlb cache. For that it needs to send
 synchronous IPI to all vcpus and IPI will not be processed by a vcpu
 while it does emulation.

 I don't know how this works on x86, but a KVM/ARM guest can definitely
 handle an IPI.

 How can a vcpu handle an IPI while it is not in a guest mode?

I think there is some misunderstanding. A guest IPI is of course handled
while running the guest. You completely lost me here.

 Furthermore, TLB invalidation doesn't require an IPI on ARMv7 (unless
 we're doing some set/way operation which is handled separately).

 What prevents a page to be swapped out while code is fetched from it?

Why would you prevent it? TLB invalidation is broadcast by the HW. If
you swap a page out, you flag the entry as invalid and invalidate the
corresponding TLB. If you hit it, you swap the page back in.

M.
-- 
Jazz is not dead. It just smells funny...

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 13/14] KVM: ARM: Handle I/O aborts

2013-01-15 Thread Gleb Natapov
On Tue, Jan 15, 2013 at 01:46:04PM +, Marc Zyngier wrote:
 On 15/01/13 13:34, Gleb Natapov wrote:
  On Tue, Jan 15, 2013 at 01:29:40PM +, Marc Zyngier wrote:
  On 15/01/13 13:18, Gleb Natapov wrote:
  On Tue, Jan 08, 2013 at 01:40:05PM -0500, Christoffer Dall wrote:
  When the guest accesses I/O memory this will create data abort
  exceptions and they are handled by decoding the HSR information
  (physical address, read/write, length, register) and forwarding reads
  and writes to QEMU which performs the device emulation.
 
  Certain classes of load/store operations do not support the syndrome
  information provided in the HSR and we therefore must be able to fetch
  the offending instruction from guest memory and decode it manually.
 
  We only support instruction decoding for valid reasonable MMIO operations
  where trapping them do not provide sufficient information in the HSR (no
  16-bit Thumb instructions provide register writeback that we care about).
 
  The following instruction types are NOT supported for MMIO operations
  despite the HSR not containing decode info:
   - any Load/Store multiple
   - any load/store exclusive
   - any load/store dual
   - anything with the PC as the dest register
 
  This requires changing the general flow somewhat since new calls to run
  the VCPU must check if there's a pending MMIO load and perform the write
  after userspace has made the data available.
 
  Rusty Russell fixed a horrible race pointed out by Ben Herrenschmidt:
  (1) Guest complicated mmio instruction traps.
  (2) The hardware doesn't tell us enough, so we need to read the actual
  instruction which was being exectuted.
  (3) KVM maps the instruction virtual address to a physical address.
  (4) The guest (SMP) swaps out that page, and fills it with something 
  else.
  (5) We read the physical address, but now that's the wrong thing.
  How can this happen?! The guest cannot reuse physical page before it
  flushes it from all vcpus tlb cache. For that it needs to send
  synchronous IPI to all vcpus and IPI will not be processed by a vcpu
  while it does emulation.
 
  I don't know how this works on x86, but a KVM/ARM guest can definitely
  handle an IPI.
 
  How can a vcpu handle an IPI while it is not in a guest mode?
 
 I think there is some misunderstanding. A guest IPI is of course handled
 while running the guest. You completely lost me here.
You need IPI from one guest vcpu to another to invalidate its TLB on
x86. That prevents the race from happening there.

 
  Furthermore, TLB invalidation doesn't require an IPI on ARMv7 (unless
  we're doing some set/way operation which is handled separately).
 
  What prevents a page to be swapped out while code is fetched from it?
 
 Why would you prevent it? TLB invalidation is broadcast by the HW. If
 you swap a page out, you flag the entry as invalid and invalidate the
 corresponding TLB. If you hit it, you swap the page back in.
 
There is no IPI (or anything that requires response from cpu whose TLB
is invalidated) involved in invalidating remote TLB?


--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 13/14] KVM: ARM: Handle I/O aborts

2013-01-15 Thread Christoffer Dall
On Tue, Jan 15, 2013 at 9:27 AM, Gleb Natapov g...@redhat.com wrote:
 On Tue, Jan 15, 2013 at 01:46:04PM +, Marc Zyngier wrote:
 On 15/01/13 13:34, Gleb Natapov wrote:
  On Tue, Jan 15, 2013 at 01:29:40PM +, Marc Zyngier wrote:
  On 15/01/13 13:18, Gleb Natapov wrote:
  On Tue, Jan 08, 2013 at 01:40:05PM -0500, Christoffer Dall wrote:
  When the guest accesses I/O memory this will create data abort
  exceptions and they are handled by decoding the HSR information
  (physical address, read/write, length, register) and forwarding reads
  and writes to QEMU which performs the device emulation.
 
  Certain classes of load/store operations do not support the syndrome
  information provided in the HSR and we therefore must be able to fetch
  the offending instruction from guest memory and decode it manually.
 
  We only support instruction decoding for valid reasonable MMIO 
  operations
  where trapping them do not provide sufficient information in the HSR (no
  16-bit Thumb instructions provide register writeback that we care 
  about).
 
  The following instruction types are NOT supported for MMIO operations
  despite the HSR not containing decode info:
   - any Load/Store multiple
   - any load/store exclusive
   - any load/store dual
   - anything with the PC as the dest register
 
  This requires changing the general flow somewhat since new calls to run
  the VCPU must check if there's a pending MMIO load and perform the write
  after userspace has made the data available.
 
  Rusty Russell fixed a horrible race pointed out by Ben Herrenschmidt:
  (1) Guest complicated mmio instruction traps.
  (2) The hardware doesn't tell us enough, so we need to read the actual
  instruction which was being exectuted.
  (3) KVM maps the instruction virtual address to a physical address.
  (4) The guest (SMP) swaps out that page, and fills it with something 
  else.
  (5) We read the physical address, but now that's the wrong thing.
  How can this happen?! The guest cannot reuse physical page before it
  flushes it from all vcpus tlb cache. For that it needs to send
  synchronous IPI to all vcpus and IPI will not be processed by a vcpu
  while it does emulation.
 
  I don't know how this works on x86, but a KVM/ARM guest can definitely
  handle an IPI.
 
  How can a vcpu handle an IPI while it is not in a guest mode?

 I think there is some misunderstanding. A guest IPI is of course handled
 while running the guest. You completely lost me here.
 You need IPI from one guest vcpu to another to invalidate its TLB on
 x86. That prevents the race from happening there.


  Furthermore, TLB invalidation doesn't require an IPI on ARMv7 (unless
  we're doing some set/way operation which is handled separately).
 
  What prevents a page to be swapped out while code is fetched from it?

 Why would you prevent it? TLB invalidation is broadcast by the HW. If
 you swap a page out, you flag the entry as invalid and invalidate the
 corresponding TLB. If you hit it, you swap the page back in.

 There is no IPI (or anything that requires response from cpu whose TLB
 is invalidated) involved in invalidating remote TLB?


no there's not, the hardware broadcasts the TLB invalidate operation.

-Christoffer
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 13/14] KVM: ARM: Handle I/O aborts

2013-01-15 Thread Marc Zyngier
On 15/01/13 14:27, Gleb Natapov wrote:
 On Tue, Jan 15, 2013 at 01:46:04PM +, Marc Zyngier wrote:
 On 15/01/13 13:34, Gleb Natapov wrote:
 On Tue, Jan 15, 2013 at 01:29:40PM +, Marc Zyngier wrote:
 On 15/01/13 13:18, Gleb Natapov wrote:
 On Tue, Jan 08, 2013 at 01:40:05PM -0500, Christoffer Dall wrote:
 When the guest accesses I/O memory this will create data abort
 exceptions and they are handled by decoding the HSR information
 (physical address, read/write, length, register) and forwarding reads
 and writes to QEMU which performs the device emulation.

 Certain classes of load/store operations do not support the syndrome
 information provided in the HSR and we therefore must be able to fetch
 the offending instruction from guest memory and decode it manually.

 We only support instruction decoding for valid reasonable MMIO operations
 where trapping them do not provide sufficient information in the HSR (no
 16-bit Thumb instructions provide register writeback that we care about).

 The following instruction types are NOT supported for MMIO operations
 despite the HSR not containing decode info:
  - any Load/Store multiple
  - any load/store exclusive
  - any load/store dual
  - anything with the PC as the dest register

 This requires changing the general flow somewhat since new calls to run
 the VCPU must check if there's a pending MMIO load and perform the write
 after userspace has made the data available.

 Rusty Russell fixed a horrible race pointed out by Ben Herrenschmidt:
 (1) Guest complicated mmio instruction traps.
 (2) The hardware doesn't tell us enough, so we need to read the actual
 instruction which was being exectuted.
 (3) KVM maps the instruction virtual address to a physical address.
 (4) The guest (SMP) swaps out that page, and fills it with something 
 else.
 (5) We read the physical address, but now that's the wrong thing.
 How can this happen?! The guest cannot reuse physical page before it
 flushes it from all vcpus tlb cache. For that it needs to send
 synchronous IPI to all vcpus and IPI will not be processed by a vcpu
 while it does emulation.

 I don't know how this works on x86, but a KVM/ARM guest can definitely
 handle an IPI.

 How can a vcpu handle an IPI while it is not in a guest mode?

 I think there is some misunderstanding. A guest IPI is of course handled
 while running the guest. You completely lost me here.
 You need IPI from one guest vcpu to another to invalidate its TLB on
 x86. That prevents the race from happening there.

We don't need this on ARM (starting with v7, v6 is an entirely different
story, and we do not support KVM on v6).

The TLB is propagated by the HW using the following (pseudocode) sequence:
tlb_invalidate VA
barrier

Leaving the barrier guaranties that all TLB invalidations have been
propagated.


 Furthermore, TLB invalidation doesn't require an IPI on ARMv7 (unless
 we're doing some set/way operation which is handled separately).

 What prevents a page to be swapped out while code is fetched from it?

 Why would you prevent it? TLB invalidation is broadcast by the HW. If
 you swap a page out, you flag the entry as invalid and invalidate the
 corresponding TLB. If you hit it, you swap the page back in.

 There is no IPI (or anything that requires response from cpu whose TLB
 is invalidated) involved in invalidating remote TLB?

No. The above sequence is all you have to do.

This is why the above race is a bit hairy. A vcpu will happily
invalidate TLBs, but as the faulting vcpu already performed the
translation, we're screwed.

Thankfully, this is a case that only matters when we have to emulate an
MMIO operation that is not automatically decoded by the HW. They are
rare (the Linux kernel doesn't use them). In this case, we stop the
world (IPI).

M.
-- 
Jazz is not dead. It just smells funny...

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 13/14] KVM: ARM: Handle I/O aborts

2013-01-15 Thread Gleb Natapov
On Tue, Jan 15, 2013 at 02:48:27PM +, Marc Zyngier wrote:
 On 15/01/13 14:27, Gleb Natapov wrote:
  On Tue, Jan 15, 2013 at 01:46:04PM +, Marc Zyngier wrote:
  On 15/01/13 13:34, Gleb Natapov wrote:
  On Tue, Jan 15, 2013 at 01:29:40PM +, Marc Zyngier wrote:
  On 15/01/13 13:18, Gleb Natapov wrote:
  On Tue, Jan 08, 2013 at 01:40:05PM -0500, Christoffer Dall wrote:
  When the guest accesses I/O memory this will create data abort
  exceptions and they are handled by decoding the HSR information
  (physical address, read/write, length, register) and forwarding reads
  and writes to QEMU which performs the device emulation.
 
  Certain classes of load/store operations do not support the syndrome
  information provided in the HSR and we therefore must be able to fetch
  the offending instruction from guest memory and decode it manually.
 
  We only support instruction decoding for valid reasonable MMIO 
  operations
  where trapping them do not provide sufficient information in the HSR 
  (no
  16-bit Thumb instructions provide register writeback that we care 
  about).
 
  The following instruction types are NOT supported for MMIO operations
  despite the HSR not containing decode info:
   - any Load/Store multiple
   - any load/store exclusive
   - any load/store dual
   - anything with the PC as the dest register
 
  This requires changing the general flow somewhat since new calls to run
  the VCPU must check if there's a pending MMIO load and perform the 
  write
  after userspace has made the data available.
 
  Rusty Russell fixed a horrible race pointed out by Ben Herrenschmidt:
  (1) Guest complicated mmio instruction traps.
  (2) The hardware doesn't tell us enough, so we need to read the actual
  instruction which was being exectuted.
  (3) KVM maps the instruction virtual address to a physical address.
  (4) The guest (SMP) swaps out that page, and fills it with something 
  else.
  (5) We read the physical address, but now that's the wrong thing.
  How can this happen?! The guest cannot reuse physical page before it
  flushes it from all vcpus tlb cache. For that it needs to send
  synchronous IPI to all vcpus and IPI will not be processed by a vcpu
  while it does emulation.
 
  I don't know how this works on x86, but a KVM/ARM guest can definitely
  handle an IPI.
 
  How can a vcpu handle an IPI while it is not in a guest mode?
 
  I think there is some misunderstanding. A guest IPI is of course handled
  while running the guest. You completely lost me here.
  You need IPI from one guest vcpu to another to invalidate its TLB on
  x86. That prevents the race from happening there.
 
 We don't need this on ARM (starting with v7, v6 is an entirely different
 story, and we do not support KVM on v6).
 
 The TLB is propagated by the HW using the following (pseudocode) sequence:
   tlb_invalidate VA
   barrier
 
 Leaving the barrier guaranties that all TLB invalidations have been
 propagated.
 
That explains why __get_user_pages_fast() is missing on ARM :)

 
  Furthermore, TLB invalidation doesn't require an IPI on ARMv7 (unless
  we're doing some set/way operation which is handled separately).
 
  What prevents a page to be swapped out while code is fetched from it?
 
  Why would you prevent it? TLB invalidation is broadcast by the HW. If
  you swap a page out, you flag the entry as invalid and invalidate the
  corresponding TLB. If you hit it, you swap the page back in.
 
  There is no IPI (or anything that requires response from cpu whose TLB
  is invalidated) involved in invalidating remote TLB?
 
 No. The above sequence is all you have to do.
 
 This is why the above race is a bit hairy. A vcpu will happily
 invalidate TLBs, but as the faulting vcpu already performed the
 translation, we're screwed.
 
 Thankfully, this is a case that only matters when we have to emulate an
 MMIO operation that is not automatically decoded by the HW. They are
 rare (the Linux kernel doesn't use them). In this case, we stop the
 world (IPI).
 
Got it. Thanks.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 13/14] KVM: ARM: Handle I/O aborts

2013-01-14 Thread Russell King - ARM Linux
On Tue, Jan 08, 2013 at 01:40:05PM -0500, Christoffer Dall wrote:
 diff --git a/arch/arm/kvm/decode.c b/arch/arm/kvm/decode.c
 new file mode 100644
 index 000..469cf14
 --- /dev/null
 +++ b/arch/arm/kvm/decode.c
 @@ -0,0 +1,462 @@
 +/*
 + * Copyright (C) 2012 - Virtual Open Systems and Columbia University
 + * Author: Christoffer Dall c.d...@virtualopensystems.com
 + *
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License, version 2, as
 + * published by the Free Software Foundation.
 + *
 + * This program is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU General Public License for more details.
 + *
 + * You should have received a copy of the GNU General Public License
 + * along with this program; if not, write to the Free Software
 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 + */
 +#include linux/kvm_host.h
 +#include asm/kvm_mmio.h
 +#include asm/kvm_emulate.h
 +#include asm/kvm_decode.h
 +#include trace/events/kvm.h
 +
 +#include trace.h
 +
 +struct arm_instr {
 + /* Instruction decoding */
 + u32 opc;
 + u32 opc_mask;
 +
 + /* Decoding for the register write back */
 + bool register_form;
 + u32 imm;
 + u8 Rm;
 + u8 type;
 + u8 shift_n;
 +
 + /* Common decoding */
 + u8 len;
 + bool sign_extend;
 + bool w;
 +
 + bool (*decode)(struct kvm_decode *decode, struct kvm_exit_mmio *mmio,
 +unsigned long instr, struct arm_instr *ai);
 +};
 +
 +enum SRType {
 + SRType_LSL,
 + SRType_LSR,
 + SRType_ASR,
 + SRType_ROR,
 + SRType_RRX
 +};
 +
 +/* Modelled after DecodeImmShift() in the ARM ARM */
 +static enum SRType decode_imm_shift(u8 type, u8 imm5, u8 *amount)
 +{
 + switch (type) {
 + case 0x0:
 + *amount = imm5;
 + return SRType_LSL;
 + case 0x1:
 + *amount = (imm5 == 0) ? 32 : imm5;
 + return SRType_LSR;
 + case 0x2:
 + *amount = (imm5 == 0) ? 32 : imm5;
 + return SRType_ASR;
 + case 0x3:
 + if (imm5 == 0) {
 + *amount = 1;
 + return SRType_RRX;
 + } else {
 + *amount = imm5;
 + return SRType_ROR;
 + }
 + }
 +
 + return SRType_LSL;
 +}
 +
 +/* Modelled after Shift() in the ARM ARM */
 +static u32 shift(u32 value, u8 N, enum SRType type, u8 amount, bool carry_in)
 +{
 + u32 mask = (1  N) - 1;
 + s32 svalue = (s32)value;
 +
 + BUG_ON(N  32);
 + BUG_ON(type == SRType_RRX  amount != 1);
 + BUG_ON(amount  N);
 +
 + if (amount == 0)
 + return value;
 +
 + switch (type) {
 + case SRType_LSL:
 + value = amount;
 + break;
 + case SRType_LSR:
 +  value = amount;
 + break;
 + case SRType_ASR:
 + if (value  (1  (N - 1)))
 + svalue |= ((-1UL)  N);
 + value = svalue  amount;
 + break;
 + case SRType_ROR:
 + value = (value  amount) | (value  (N - amount));
 + break;
 + case SRType_RRX: {
 + u32 C = (carry_in) ? 1 : 0;
 + value = (value  1) | (C  (N - 1));
 + break;
 + }
 + }
 +
 + return value  mask;
 +}
 +
 +static bool decode_arm_wb(struct kvm_decode *decode, struct kvm_exit_mmio 
 *mmio,
 +   unsigned long instr, const struct arm_instr *ai)
 +{
 + u8 Rt = (instr  12)  0xf;
 + u8 Rn = (instr  16)  0xf;
 + u8 W = (instr  21)  1;
 + u8 U = (instr  23)  1;
 + u8 P = (instr  24)  1;
 + u32 base_addr = *kvm_decode_reg(decode, Rn);
 + u32 offset_addr, offset;
 +
 + /*
 +  * Technically this is allowed in certain circumstances,
 +  * but we don't support it.
 +  */
 + if (Rt == 15 || Rn == 15)
 + return false;
 +
 + if (P  !W) {
 + kvm_err(Decoding operation with valid ISV?\n);
 + return false;
 + }
 +
 + decode-rt = Rt;
 +
 + if (ai-register_form) {
 + /* Register operation */
 + enum SRType s_type;
 + u8 shift_n = 0;
 + bool c_bit = *kvm_decode_cpsr(decode)  PSR_C_BIT;
 + u32 s_reg = *kvm_decode_reg(decode, ai-Rm);
 +
 + s_type = decode_imm_shift(ai-type, ai-shift_n, shift_n);
 + offset = shift(s_reg, 5, s_type, shift_n, c_bit);
 + } else {
 + /* Immediate operation */
 + offset = ai-imm;
 + }
 +
 + /* Handle Writeback */
 + if (U)
 + offset_addr = base_addr + offset;
 + else
 + offset_addr = base_addr - offset;
 + *kvm_decode_reg(decode, Rn) = offset_addr;
 

Re: [PATCH v5 13/14] KVM: ARM: Handle I/O aborts

2013-01-14 Thread Christoffer Dall
On Mon, Jan 14, 2013 at 11:43 AM, Russell King - ARM Linux
li...@arm.linux.org.uk wrote:
 On Tue, Jan 08, 2013 at 01:40:05PM -0500, Christoffer Dall wrote:
 diff --git a/arch/arm/kvm/decode.c b/arch/arm/kvm/decode.c
 new file mode 100644
 index 000..469cf14
 --- /dev/null
 +++ b/arch/arm/kvm/decode.c
 @@ -0,0 +1,462 @@
 +/*
 + * Copyright (C) 2012 - Virtual Open Systems and Columbia University
 + * Author: Christoffer Dall c.d...@virtualopensystems.com
 + *
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License, version 2, as
 + * published by the Free Software Foundation.
 + *
 + * This program is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU General Public License for more details.
 + *
 + * You should have received a copy of the GNU General Public License
 + * along with this program; if not, write to the Free Software
 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 + */
 +#include linux/kvm_host.h
 +#include asm/kvm_mmio.h
 +#include asm/kvm_emulate.h
 +#include asm/kvm_decode.h
 +#include trace/events/kvm.h
 +
 +#include trace.h
 +
 +struct arm_instr {
 + /* Instruction decoding */
 + u32 opc;
 + u32 opc_mask;
 +
 + /* Decoding for the register write back */
 + bool register_form;
 + u32 imm;
 + u8 Rm;
 + u8 type;
 + u8 shift_n;
 +
 + /* Common decoding */
 + u8 len;
 + bool sign_extend;
 + bool w;
 +
 + bool (*decode)(struct kvm_decode *decode, struct kvm_exit_mmio *mmio,
 +unsigned long instr, struct arm_instr *ai);
 +};
 +
 +enum SRType {
 + SRType_LSL,
 + SRType_LSR,
 + SRType_ASR,
 + SRType_ROR,
 + SRType_RRX
 +};
 +
 +/* Modelled after DecodeImmShift() in the ARM ARM */
 +static enum SRType decode_imm_shift(u8 type, u8 imm5, u8 *amount)
 +{
 + switch (type) {
 + case 0x0:
 + *amount = imm5;
 + return SRType_LSL;
 + case 0x1:
 + *amount = (imm5 == 0) ? 32 : imm5;
 + return SRType_LSR;
 + case 0x2:
 + *amount = (imm5 == 0) ? 32 : imm5;
 + return SRType_ASR;
 + case 0x3:
 + if (imm5 == 0) {
 + *amount = 1;
 + return SRType_RRX;
 + } else {
 + *amount = imm5;
 + return SRType_ROR;
 + }
 + }
 +
 + return SRType_LSL;
 +}
 +
 +/* Modelled after Shift() in the ARM ARM */
 +static u32 shift(u32 value, u8 N, enum SRType type, u8 amount, bool 
 carry_in)
 +{
 + u32 mask = (1  N) - 1;
 + s32 svalue = (s32)value;
 +
 + BUG_ON(N  32);
 + BUG_ON(type == SRType_RRX  amount != 1);
 + BUG_ON(amount  N);
 +
 + if (amount == 0)
 + return value;
 +
 + switch (type) {
 + case SRType_LSL:
 + value = amount;
 + break;
 + case SRType_LSR:
 +  value = amount;
 + break;
 + case SRType_ASR:
 + if (value  (1  (N - 1)))
 + svalue |= ((-1UL)  N);
 + value = svalue  amount;
 + break;
 + case SRType_ROR:
 + value = (value  amount) | (value  (N - amount));
 + break;
 + case SRType_RRX: {
 + u32 C = (carry_in) ? 1 : 0;
 + value = (value  1) | (C  (N - 1));
 + break;
 + }
 + }
 +
 + return value  mask;
 +}
 +
 +static bool decode_arm_wb(struct kvm_decode *decode, struct kvm_exit_mmio 
 *mmio,
 +   unsigned long instr, const struct arm_instr *ai)
 +{
 + u8 Rt = (instr  12)  0xf;
 + u8 Rn = (instr  16)  0xf;
 + u8 W = (instr  21)  1;
 + u8 U = (instr  23)  1;
 + u8 P = (instr  24)  1;
 + u32 base_addr = *kvm_decode_reg(decode, Rn);
 + u32 offset_addr, offset;
 +
 + /*
 +  * Technically this is allowed in certain circumstances,
 +  * but we don't support it.
 +  */
 + if (Rt == 15 || Rn == 15)
 + return false;
 +
 + if (P  !W) {
 + kvm_err(Decoding operation with valid ISV?\n);
 + return false;
 + }
 +
 + decode-rt = Rt;
 +
 + if (ai-register_form) {
 + /* Register operation */
 + enum SRType s_type;
 + u8 shift_n = 0;
 + bool c_bit = *kvm_decode_cpsr(decode)  PSR_C_BIT;
 + u32 s_reg = *kvm_decode_reg(decode, ai-Rm);
 +
 + s_type = decode_imm_shift(ai-type, ai-shift_n, shift_n);
 + offset = shift(s_reg, 5, s_type, shift_n, c_bit);
 + } else {
 + /* Immediate operation */
 + offset = ai-imm;
 + }
 +
 + /* Handle Writeback */
 + if (U)
 + offset_addr = base_addr + offset;
 + else
 +   

Re: [PATCH v5 13/14] KVM: ARM: Handle I/O aborts

2013-01-14 Thread Russell King - ARM Linux
On Mon, Jan 14, 2013 at 01:25:39PM -0500, Christoffer Dall wrote:
 However, unifying all instruction decoding within arch/arm is quite
 the heavy task, and requires agreeing on some canonical API that
 people can live with and it will likely take a long time.  I seem to
 recall there were also arguments against unifying kprobe code with
 other instruction decoding, as the kprobe code was also written to
 work highly optimized under certain assumptions, if I understood
 previous comments correctly.

Yes, I know Rusty had a go.

What I think may make sense is to unify this and the alignment code.
They're really after the same things, which are:

- Given an instruction, and register set, calculate the address of the
  access, size, number of accesses, and the source/destination registers.
- Update the register set as though the instruction had been executed
  by the CPU.

However, I've changed tack slightly from the above in the last 10 minutes
or so.  I'm thinking a little more that we might be able to take what we
already have in alignment.c and provide it with a set of accessors
according to size etc.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 13/14] KVM: ARM: Handle I/O aborts

2013-01-14 Thread Will Deacon
On Mon, Jan 14, 2013 at 06:43:19PM +, Russell King - ARM Linux wrote:
 On Mon, Jan 14, 2013 at 01:25:39PM -0500, Christoffer Dall wrote:
  However, unifying all instruction decoding within arch/arm is quite
  the heavy task, and requires agreeing on some canonical API that
  people can live with and it will likely take a long time.  I seem to
  recall there were also arguments against unifying kprobe code with
  other instruction decoding, as the kprobe code was also written to
  work highly optimized under certain assumptions, if I understood
  previous comments correctly.
 
 Yes, I know Rusty had a go.
 
 What I think may make sense is to unify this and the alignment code.
 They're really after the same things, which are:
 
 - Given an instruction, and register set, calculate the address of the
   access, size, number of accesses, and the source/destination registers.
 - Update the register set as though the instruction had been executed
   by the CPU.
 
 However, I've changed tack slightly from the above in the last 10 minutes
 or so.  I'm thinking a little more that we might be able to take what we
 already have in alignment.c and provide it with a set of accessors
 according to size etc.

FWIW, KVM only needs this code for handling complex MMIO instructions, which
aren't even generated by recent guest kernels. I'm inclined to suggest removing
this emulation code from KVM entirely given that it's likely to bitrot as
it is executed less and less often.

This doesn't solve the problem of having multiple people doing the same
thing, but at least we don't have one extra set of decoding logic for
arch/arm/ (even though the code itself is pretty clean).

Will
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [kvmarm] [PATCH v5 13/14] KVM: ARM: Handle I/O aborts

2013-01-14 Thread Alexander Graf

On 01/14/2013 07:50 PM, Will Deacon wrote:

On Mon, Jan 14, 2013 at 06:43:19PM +, Russell King - ARM Linux wrote:

On Mon, Jan 14, 2013 at 01:25:39PM -0500, Christoffer Dall wrote:

However, unifying all instruction decoding within arch/arm is quite
the heavy task, and requires agreeing on some canonical API that
people can live with and it will likely take a long time.  I seem to
recall there were also arguments against unifying kprobe code with
other instruction decoding, as the kprobe code was also written to
work highly optimized under certain assumptions, if I understood
previous comments correctly.

Yes, I know Rusty had a go.

What I think may make sense is to unify this and the alignment code.
They're really after the same things, which are:

- Given an instruction, and register set, calculate the address of the
   access, size, number of accesses, and the source/destination registers.
- Update the register set as though the instruction had been executed
   by the CPU.

However, I've changed tack slightly from the above in the last 10 minutes
or so.  I'm thinking a little more that we might be able to take what we
already have in alignment.c and provide it with a set of accessors
according to size etc.

FWIW, KVM only needs this code for handling complex MMIO instructions, which
aren't even generated by recent guest kernels. I'm inclined to suggest removing
this emulation code from KVM entirely given that it's likely to bitrot as
it is executed less and less often.


That'd mean that you heavily limit what type of guests you're executing, 
which I don't think is a good idea.



Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [kvmarm] [PATCH v5 13/14] KVM: ARM: Handle I/O aborts

2013-01-14 Thread Christoffer Dall
On Mon, Jan 14, 2013 at 1:53 PM, Alexander Graf ag...@suse.de wrote:
 On 01/14/2013 07:50 PM, Will Deacon wrote:

 On Mon, Jan 14, 2013 at 06:43:19PM +, Russell King - ARM Linux wrote:

 On Mon, Jan 14, 2013 at 01:25:39PM -0500, Christoffer Dall wrote:

 However, unifying all instruction decoding within arch/arm is quite
 the heavy task, and requires agreeing on some canonical API that
 people can live with and it will likely take a long time.  I seem to
 recall there were also arguments against unifying kprobe code with
 other instruction decoding, as the kprobe code was also written to
 work highly optimized under certain assumptions, if I understood
 previous comments correctly.

 Yes, I know Rusty had a go.

 What I think may make sense is to unify this and the alignment code.
 They're really after the same things, which are:

 - Given an instruction, and register set, calculate the address of the
access, size, number of accesses, and the source/destination
 registers.
 - Update the register set as though the instruction had been executed
by the CPU.

 However, I've changed tack slightly from the above in the last 10 minutes
 or so.  I'm thinking a little more that we might be able to take what we
 already have in alignment.c and provide it with a set of accessors
 according to size etc.

 FWIW, KVM only needs this code for handling complex MMIO instructions,
 which
 aren't even generated by recent guest kernels. I'm inclined to suggest
 removing
 this emulation code from KVM entirely given that it's likely to bitrot as
 it is executed less and less often.


 That'd mean that you heavily limit what type of guests you're executing,
 which I don't think is a good idea.

It would limit legacy Linux kernels at least, but I think getting
KVM/ARM code in mainline is the highest priority, so if merging the
current code is unacceptable, I'm willing to drop the mmio emulation
for now and queue the task of unifying the code for later.

A bit of a shame (think about someone wanting to run some proprietary
custom OS in a VM), but this code has been out-of-tree for too long
already, and I'm afraid unifying the decoding pre-merge is going to
hold things up.

-Christoffer
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [kvmarm] [PATCH v5 13/14] KVM: ARM: Handle I/O aborts

2013-01-14 Thread Will Deacon
On Mon, Jan 14, 2013 at 06:53:14PM +, Alexander Graf wrote:
 On 01/14/2013 07:50 PM, Will Deacon wrote:
  FWIW, KVM only needs this code for handling complex MMIO instructions, which
  aren't even generated by recent guest kernels. I'm inclined to suggest 
  removing
  this emulation code from KVM entirely given that it's likely to bitrot as
  it is executed less and less often.
 
 That'd mean that you heavily limit what type of guests you're executing, 
 which I don't think is a good idea.

To be honest, I don't think we know whether that's true or not. How many
guests out there do writeback accesses to MMIO devices? Even on older
Linux guests, it was dependent on how GCC felt.

I see where you're coming from, I just don't think we can quantify it either
way outside of Linux.

Will
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [kvmarm] [PATCH v5 13/14] KVM: ARM: Handle I/O aborts

2013-01-14 Thread Christoffer Dall
On Mon, Jan 14, 2013 at 2:00 PM, Will Deacon will.dea...@arm.com wrote:
 On Mon, Jan 14, 2013 at 06:53:14PM +, Alexander Graf wrote:
 On 01/14/2013 07:50 PM, Will Deacon wrote:
  FWIW, KVM only needs this code for handling complex MMIO instructions, 
  which
  aren't even generated by recent guest kernels. I'm inclined to suggest 
  removing
  this emulation code from KVM entirely given that it's likely to bitrot as
  it is executed less and less often.

 That'd mean that you heavily limit what type of guests you're executing,
 which I don't think is a good idea.

 To be honest, I don't think we know whether that's true or not. How many
 guests out there do writeback accesses to MMIO devices? Even on older
 Linux guests, it was dependent on how GCC felt.

I don't think bitrot'ing is a valid argument: the code doesn't depend
on any other implementation state that's likely to change and break
this code (the instruction encoding is not exactly going to change).
And we should simply finish the selftest code to test this stuff
(which should be finished if the code is unified or not, and is on my
todo list).


 I see where you're coming from, I just don't think we can quantify it either
 way outside of Linux.

FWIW, I know of at least a couple of companies wanting to use KVM for
running non-Linux guests as well.

But, however a shame, I can more easily maintain this single patch
out-of-tree, so I'm willing to drop this logic for now if it gets
things moving.

-Christoffer
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [kvmarm] [PATCH v5 13/14] KVM: ARM: Handle I/O aborts

2013-01-14 Thread Will Deacon
On Mon, Jan 14, 2013 at 07:12:49PM +, Christoffer Dall wrote:
 On Mon, Jan 14, 2013 at 2:00 PM, Will Deacon will.dea...@arm.com wrote:
  On Mon, Jan 14, 2013 at 06:53:14PM +, Alexander Graf wrote:
  On 01/14/2013 07:50 PM, Will Deacon wrote:
   FWIW, KVM only needs this code for handling complex MMIO instructions, 
   which
   aren't even generated by recent guest kernels. I'm inclined to suggest 
   removing
   this emulation code from KVM entirely given that it's likely to bitrot as
   it is executed less and less often.
 
  That'd mean that you heavily limit what type of guests you're executing,
  which I don't think is a good idea.
 
  To be honest, I don't think we know whether that's true or not. How many
  guests out there do writeback accesses to MMIO devices? Even on older
  Linux guests, it was dependent on how GCC felt.
 
 I don't think bitrot'ing is a valid argument: the code doesn't depend
 on any other implementation state that's likely to change and break
 this code (the instruction encoding is not exactly going to change).
 And we should simply finish the selftest code to test this stuff
 (which should be finished if the code is unified or not, and is on my
 todo list).

Maybe `bitrot' is the wrong word. The scenario I envisage is the addition
of new instructions to the architecture which aren't handled by the current
code, then we end up with emulation code that works for some percentage of
the instruction set only. If the code is rarely used, it will likely go
untouched until it crashes somebody's VM.

  I see where you're coming from, I just don't think we can quantify it either
  way outside of Linux.
 
 FWIW, I know of at least a couple of companies wanting to use KVM for
 running non-Linux guests as well.

Oh, I don't doubt that. The point is, do we have any idea how they behave
under KVM? Do they generate complex MMIO accesses? Do they expect firmware
shims, possibly sitting above hyp? Do they require a signed boot sequence?
Do they run on Cortex-A15 (the only target CPU we have at the moment)?

 But, however a shame, I can more easily maintain this single patch
 out-of-tree, so I'm willing to drop this logic for now if it gets
 things moving.

I would hope that, if this code is actually required, you would consider
merging it with what we have rather than maintaining it out-of-tree.

Will
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [kvmarm] [PATCH v5 13/14] KVM: ARM: Handle I/O aborts

2013-01-14 Thread Christoffer Dall
On Mon, Jan 14, 2013 at 5:36 PM, Will Deacon will.dea...@arm.com wrote:
 On Mon, Jan 14, 2013 at 07:12:49PM +, Christoffer Dall wrote:
 On Mon, Jan 14, 2013 at 2:00 PM, Will Deacon will.dea...@arm.com wrote:
  On Mon, Jan 14, 2013 at 06:53:14PM +, Alexander Graf wrote:
  On 01/14/2013 07:50 PM, Will Deacon wrote:
   FWIW, KVM only needs this code for handling complex MMIO instructions, 
   which
   aren't even generated by recent guest kernels. I'm inclined to suggest 
   removing
   this emulation code from KVM entirely given that it's likely to bitrot 
   as
   it is executed less and less often.
 
  That'd mean that you heavily limit what type of guests you're executing,
  which I don't think is a good idea.
 
  To be honest, I don't think we know whether that's true or not. How many
  guests out there do writeback accesses to MMIO devices? Even on older
  Linux guests, it was dependent on how GCC felt.

 I don't think bitrot'ing is a valid argument: the code doesn't depend
 on any other implementation state that's likely to change and break
 this code (the instruction encoding is not exactly going to change).
 And we should simply finish the selftest code to test this stuff
 (which should be finished if the code is unified or not, and is on my
 todo list).

 Maybe `bitrot' is the wrong word. The scenario I envisage is the addition
 of new instructions to the architecture which aren't handled by the current
 code, then we end up with emulation code that works for some percentage of
 the instruction set only. If the code is rarely used, it will likely go
 untouched until it crashes somebody's VM.


How is that worse than KVM crashing all VMs that use any of these
instructions for IO?

At least the code we have now has been tested with a number of old
kernels, and we know that it works. As for correctness, it will be the
case for all implementations and this type of code absolutely requires
a test suite.


  I see where you're coming from, I just don't think we can quantify it 
  either
  way outside of Linux.
 
 FWIW, I know of at least a couple of companies wanting to use KVM for
 running non-Linux guests as well.

 Oh, I don't doubt that. The point is, do we have any idea how they behave
 under KVM? Do they generate complex MMIO accesses? Do they expect firmware
 shims, possibly sitting above hyp? Do they require a signed boot sequence?
 Do they run on Cortex-A15 (the only target CPU we have at the moment)?


No we don't know. But there's a fair chance that they do use complex
mmio instructions seeing as older kernels did, without anything
explicitly being involved.

 But, however a shame, I can more easily maintain this single patch
 out-of-tree, so I'm willing to drop this logic for now if it gets
 things moving.

 I would hope that, if this code is actually required, you would consider
 merging it with what we have rather than maintaining it out-of-tree.

Of course I would, and I would also make an effort to unify the code
if it were merged now, I just don't have the cycles to do the unify
work right now, since it is without doubt a lengthy process.

So from that point of view, I don't quite see how it's better to leave
the code out at this point, but that is not up to me.

-Christoffer
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [kvmarm] [PATCH v5 13/14] KVM: ARM: Handle I/O aborts

2013-01-14 Thread Gleb Natapov
On Mon, Jan 14, 2013 at 10:36:38PM +, Will Deacon wrote:
 On Mon, Jan 14, 2013 at 07:12:49PM +, Christoffer Dall wrote:
  On Mon, Jan 14, 2013 at 2:00 PM, Will Deacon will.dea...@arm.com wrote:
   On Mon, Jan 14, 2013 at 06:53:14PM +, Alexander Graf wrote:
   On 01/14/2013 07:50 PM, Will Deacon wrote:
FWIW, KVM only needs this code for handling complex MMIO instructions, 
which
aren't even generated by recent guest kernels. I'm inclined to suggest 
removing
this emulation code from KVM entirely given that it's likely to bitrot 
as
it is executed less and less often.
  
   That'd mean that you heavily limit what type of guests you're executing,
   which I don't think is a good idea.
  
   To be honest, I don't think we know whether that's true or not. How many
   guests out there do writeback accesses to MMIO devices? Even on older
   Linux guests, it was dependent on how GCC felt.
  
  I don't think bitrot'ing is a valid argument: the code doesn't depend
  on any other implementation state that's likely to change and break
  this code (the instruction encoding is not exactly going to change).
  And we should simply finish the selftest code to test this stuff
  (which should be finished if the code is unified or not, and is on my
  todo list).
 
 Maybe `bitrot' is the wrong word. The scenario I envisage is the addition
 of new instructions to the architecture which aren't handled by the current
 code, then we end up with emulation code that works for some percentage of
 the instruction set only. If the code is rarely used, it will likely go
 untouched until it crashes somebody's VM.
 
This is precisely the situation with x86 too. X86 has to many instruction
that can potentially access MMIO memory, but luckily not all of them
are used for that. When guest appears that uses instruction x86 kvm does
not emulate yet we add emulation of required instruction. If this is the
only concern about the code it should stay IMO.


   I see where you're coming from, I just don't think we can quantify it 
   either
   way outside of Linux.
  
  FWIW, I know of at least a couple of companies wanting to use KVM for
  running non-Linux guests as well.
 
 Oh, I don't doubt that. The point is, do we have any idea how they behave
 under KVM? Do they generate complex MMIO accesses? Do they expect firmware
 shims, possibly sitting above hyp? Do they require a signed boot sequence?
 Do they run on Cortex-A15 (the only target CPU we have at the moment)?
 
  But, however a shame, I can more easily maintain this single patch
  out-of-tree, so I'm willing to drop this logic for now if it gets
  things moving.
 
 I would hope that, if this code is actually required, you would consider
 merging it with what we have rather than maintaining it out-of-tree.
 
 Will
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 13/14] KVM: ARM: Handle I/O aborts

2013-01-08 Thread Christoffer Dall
When the guest accesses I/O memory this will create data abort
exceptions and they are handled by decoding the HSR information
(physical address, read/write, length, register) and forwarding reads
and writes to QEMU which performs the device emulation.

Certain classes of load/store operations do not support the syndrome
information provided in the HSR and we therefore must be able to fetch
the offending instruction from guest memory and decode it manually.

We only support instruction decoding for valid reasonable MMIO operations
where trapping them do not provide sufficient information in the HSR (no
16-bit Thumb instructions provide register writeback that we care about).

The following instruction types are NOT supported for MMIO operations
despite the HSR not containing decode info:
 - any Load/Store multiple
 - any load/store exclusive
 - any load/store dual
 - anything with the PC as the dest register

This requires changing the general flow somewhat since new calls to run
the VCPU must check if there's a pending MMIO load and perform the write
after userspace has made the data available.

Rusty Russell fixed a horrible race pointed out by Ben Herrenschmidt:
(1) Guest complicated mmio instruction traps.
(2) The hardware doesn't tell us enough, so we need to read the actual
instruction which was being exectuted.
(3) KVM maps the instruction virtual address to a physical address.
(4) The guest (SMP) swaps out that page, and fills it with something else.
(5) We read the physical address, but now that's the wrong thing.

Reviewed-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Rusty Russell rusty.russ...@linaro.org
Signed-off-by: Marc Zyngier marc.zyng...@arm.com
Signed-off-by: Christoffer Dall c.d...@virtualopensystems.com
---
 arch/arm/include/asm/kvm_arm.h |3 
 arch/arm/include/asm/kvm_asm.h |2 
 arch/arm/include/asm/kvm_decode.h  |   47 
 arch/arm/include/asm/kvm_emulate.h |8 +
 arch/arm/include/asm/kvm_host.h|7 +
 arch/arm/include/asm/kvm_mmio.h|   51 
 arch/arm/kvm/Makefile  |2 
 arch/arm/kvm/arm.c |   14 +
 arch/arm/kvm/decode.c  |  462 
 arch/arm/kvm/emulate.c |  169 +
 arch/arm/kvm/interrupts.S  |   38 +++
 arch/arm/kvm/mmio.c|  154 
 arch/arm/kvm/mmu.c |7 -
 arch/arm/kvm/trace.h   |   21 ++
 14 files changed, 981 insertions(+), 4 deletions(-)
 create mode 100644 arch/arm/include/asm/kvm_decode.h
 create mode 100644 arch/arm/include/asm/kvm_mmio.h
 create mode 100644 arch/arm/kvm/decode.c
 create mode 100644 arch/arm/kvm/mmio.c

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 3ff6f22..151c4ce 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -173,8 +173,11 @@
 #define HSR_ISS(HSR_IL - 1)
 #define HSR_ISV_SHIFT  (24)
 #define HSR_ISV(1U  HSR_ISV_SHIFT)
+#define HSR_SRT_SHIFT  (16)
+#define HSR_SRT_MASK   (0xf  HSR_SRT_SHIFT)
 #define HSR_FSC(0x3f)
 #define HSR_FSC_TYPE   (0x3c)
+#define HSR_SSE(1  21)
 #define HSR_WNR(1  6)
 #define HSR_CV_SHIFT   (24)
 #define HSR_CV (1U  HSR_CV_SHIFT)
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 5e06e81..58d787b 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -77,6 +77,8 @@ extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+
+extern u64 __kvm_va_to_pa(struct kvm_vcpu *vcpu, u32 va, bool priv);
 #endif
 
 #endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm/include/asm/kvm_decode.h 
b/arch/arm/include/asm/kvm_decode.h
new file mode 100644
index 000..3c37cb9
--- /dev/null
+++ b/arch/arm/include/asm/kvm_decode.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall c.d...@virtualopensystems.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __ARM_KVM_DECODE_H__
+#define __ARM_KVM_DECODE_H__
+
+#include linux/types.h
+
+struct kvm_vcpu;
+struct kvm_exit_mmio;
+
+struct kvm_decode {
+   struct pt_regs *regs;
+   unsigned