[Bug 78331] New: Qemu crash in x86

2014-06-19 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=78331

Bug ID: 78331
   Summary: Qemu crash in x86
   Product: Virtualization
   Version: unspecified
Kernel Version: 2.6.32
  Hardware: All
OS: Linux
  Tree: Mainline
Status: NEW
  Severity: normal
  Priority: P1
 Component: kvm
  Assignee: virtualization_...@kernel-bugs.osdl.org
  Reporter: janakiram.sis...@gmail.com
Regression: No

Hi all,



I am running qemu on an x86 host and when the host reloaded we do see a core on
Qemu and the gdb dump shows as follows. 

Can any one please let me know if any one has come across such failures.

rning: Can't read pathname for load map: Input/output error.

Core was generated by `/usr/bin/qemu-system-x86_64 -name default-sdr--2 -S -M
pc_q35 -enable-kvm -m 61'.

Program terminated with signal 11, Segmentation fault.

#0  0x0049c976 in qemu_bh_delete (bh=0x31) at
/auto/thirdparty-sdk/release/235/thirdparty/opensource/qemu-kvm/contents/async.c:193

193 bh->scheduled = 0;

(gdb) bt

#0  0x0049c976 in qemu_bh_delete (bh=0x31)

at
/auto/thirdparty-sdk/release/235/thirdparty/opensource/qemu-kvm/contents/async.c:193

#1  0x004c50a3 in qemu_chr_generic_open_bh (opaque=0xdc1f670)

at
/auto/thirdparty-sdk/release/235/thirdparty/opensource/qemu-kvm/contents/qemu-char.c:130

#2  0x0049c7e4 in qemu_bh_poll ()

at
/auto/thirdparty-sdk/release/235/thirdparty/opensource/qemu-kvm/contents/async.c:150

#3  0x0042017d in main_loop_wait (nonblocking=0)

at
/auto/thirdparty-sdk/release/235/thirdparty/opensource/qemu-kvm/contents/vl.c:1384

#4  0x0043bae5 in kvm_main_loop ()

at
/auto/thirdparty-sdk/release/235/thirdparty/opensource/qemu-kvm/contents/qemu-kvm.c:1710

#5  0x00420237 in main_loop ()

at
/auto/thirdparty-sdk/release/235/thirdparty/opensource/qemu-kvm/contents/vl.c:1408

#6  0x00423bf4 in main (argc=67, argv=0x7fffc1ec48a8,
envp=0x7fffc1ec4ac8)

at
/auto/thirdparty-sdk/release/235/thirdparty/opensource/qemu-kvm/contents/vl.c:3159





Regards,

sistla.

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/5] KVM: MMU: allow pinning spte translations (TDP-only)

2014-06-19 Thread Gleb Natapov
On Wed, Jun 18, 2014 at 08:12:05PM -0300, mtosa...@redhat.com wrote:
> Allow vcpus to pin spte translations by:
> 
> 1) Creating a per-vcpu list of pinned ranges.
What if memory slot containing pinned range is going away?

> 2) On mmu reload request:
>   - Fault ranges.
>   - Mark sptes with a pinned bit.
Should also be marked "dirty" as per SDM:
 The three DS save area sections should be allocated from a non-paged pool, and 
marked accessed and dirty

Some comment below.

>   - Mark shadow pages as pinned.
> 
> 3) Then modify the following actions:
>   - Page age => skip spte flush.
>   - MMU notifiers => force mmu reload request (which kicks cpu out of
>   guest mode).
>   - GET_DIRTY_LOG => force mmu reload request.
>   - SLAB shrinker => skip shadow page deletion.
> 
> TDP-only.
> 
> Signed-off-by: Marcelo Tosatti 
> 
> ---
>  arch/x86/include/asm/kvm_host.h |   14 ++
>  arch/x86/kvm/mmu.c  |  202 
> ++--
>  arch/x86/kvm/mmu.h  |5 
>  arch/x86/kvm/mmutrace.h |   23 
>  arch/x86/kvm/paging_tmpl.h  |2 
>  arch/x86/kvm/x86.c  |4 
>  6 files changed, 241 insertions(+), 9 deletions(-)
> 
> Index: kvm.pinned-sptes/arch/x86/include/asm/kvm_host.h
> ===
> --- kvm.pinned-sptes.orig/arch/x86/include/asm/kvm_host.h 2014-06-18 
> 17:28:17.549456614 -0300
> +++ kvm.pinned-sptes/arch/x86/include/asm/kvm_host.h  2014-06-18 
> 17:28:24.338435658 -0300
> @@ -221,6 +221,8 @@
>   /* hold the gfn of each spte inside spt */
>   gfn_t *gfns;
>   bool unsync;
> + bool pinned;
> +
>   int root_count;  /* Currently serving as active root */
>   unsigned int unsync_children;
>   unsigned long parent_ptes;  /* Reverse mapping for parent_pte */
> @@ -337,6 +339,14 @@
>   KVM_DEBUGREG_WONT_EXIT = 2,
>  };
>  
> +struct kvm_pinned_page_range {
> + gfn_t base_gfn;
> + unsigned long npages;
> + struct list_head link;
> +};
> +
> +#define KVM_MAX_PER_VCPU_PINNED_RANGE 10
> +
>  struct kvm_vcpu_arch {
>   /*
>* rip and regs accesses must go through
> @@ -392,6 +402,10 @@
>   struct kvm_mmu_memory_cache mmu_page_cache;
>   struct kvm_mmu_memory_cache mmu_page_header_cache;
>  
> + struct list_head pinned_mmu_pages;
> + struct mutex pinned_mmu_mutex;
> + unsigned int nr_pinned_ranges;
> +
>   struct fpu guest_fpu;
>   u64 xcr0;
>   u64 guest_supported_xcr0;
> Index: kvm.pinned-sptes/arch/x86/kvm/mmu.c
> ===
> --- kvm.pinned-sptes.orig/arch/x86/kvm/mmu.c  2014-06-18 17:28:17.550456611 
> -0300
> +++ kvm.pinned-sptes/arch/x86/kvm/mmu.c   2014-06-18 17:28:24.339435654 
> -0300
> @@ -148,6 +148,9 @@
>  
>  #define SPTE_HOST_WRITEABLE  (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
>  #define SPTE_MMU_WRITEABLE   (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1))
> +#define SPTE_PINNED  (1ULL << (PT64_SECOND_AVAIL_BITS_SHIFT))
> +
> +#define SPTE_PINNED_BIT PT64_SECOND_AVAIL_BITS_SHIFT
>  
>  #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
>  
> @@ -327,6 +330,11 @@
>   return pte & PT_PRESENT_MASK && !is_mmio_spte(pte);
>  }
>  
> +static int is_pinned_spte(u64 spte)
> +{
> + return spte & SPTE_PINNED && is_shadow_present_pte(spte);
> +}
> +
>  static int is_large_pte(u64 pte)
>  {
>   return pte & PT_PAGE_SIZE_MASK;
> @@ -2818,7 +2826,7 @@
>   * - false: let the real page fault path to fix it.
>   */
>  static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
> - u32 error_code)
> + u32 error_code, bool pin)
>  {
>   struct kvm_shadow_walk_iterator iterator;
>   struct kvm_mmu_page *sp;
> @@ -2828,6 +2836,9 @@
>   if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
>   return false;
>  
> + if (pin)
> + return false;
> +
>   if (!page_fault_can_be_fast(error_code))
>   return false;
>  
> @@ -2895,9 +2906,55 @@
>  }
>  
>  static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
> -  gva_t gva, pfn_t *pfn, bool write, bool *writable);
> +  gva_t gva, pfn_t *pfn, bool write, bool *writable,
> +  bool pin);
>  static void make_mmu_pages_available(struct kvm_vcpu *vcpu);
>  
> +
> +static int get_sptep_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 
> *sptes[4])
> +{
> + struct kvm_shadow_walk_iterator iterator;
> + int nr_sptes = 0;
> +
> + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
> + return nr_sptes;
> +
> + for_each_shadow_entry(vcpu, addr, iterator) {
> + sptes[iterator.level-1] = iterator.sptep;
> + nr_sptes++;
> + if (!is_shadow_present_pte(*iterator.sptep))
> + break;
> + 

Re: [patch 2/5] KVM: MMU: allow pinning spte translations (TDP-only)

2014-06-19 Thread Avi Kivity


On 06/19/2014 02:12 AM, mtosa...@redhat.com wrote:

Allow vcpus to pin spte translations by:

1) Creating a per-vcpu list of pinned ranges.
2) On mmu reload request:
- Fault ranges.
- Mark sptes with a pinned bit.
- Mark shadow pages as pinned.

3) Then modify the following actions:
- Page age => skip spte flush.
- MMU notifiers => force mmu reload request (which kicks cpu out of
guest mode).
- GET_DIRTY_LOG => force mmu reload request.
- SLAB shrinker => skip shadow page deletion.

TDP-only.

  
+int kvm_mmu_register_pinned_range(struct kvm_vcpu *vcpu,

+ gfn_t base_gfn, unsigned long npages)
+{
+   struct kvm_pinned_page_range *p;
+
+   mutex_lock(&vcpu->arch.pinned_mmu_mutex);
+   list_for_each_entry(p, &vcpu->arch.pinned_mmu_pages, link) {
+   if (p->base_gfn == base_gfn && p->npages == npages) {
+   mutex_unlock(&vcpu->arch.pinned_mmu_mutex);
+   return -EEXIST;
+   }
+   }
+   mutex_unlock(&vcpu->arch.pinned_mmu_mutex);
+
+   if (vcpu->arch.nr_pinned_ranges >=
+   KVM_MAX_PER_VCPU_PINNED_RANGE)
+   return -ENOSPC;
+
+   p = kzalloc(sizeof(struct kvm_pinned_page_range), GFP_KERNEL);
+   if (!p)
+   return -ENOMEM;
+
+   vcpu->arch.nr_pinned_ranges++;
+
+   trace_kvm_mmu_register_pinned_range(vcpu->vcpu_id, base_gfn, npages);
+
+   INIT_LIST_HEAD(&p->link);
+   p->base_gfn = base_gfn;
+   p->npages = npages;
+   mutex_lock(&vcpu->arch.pinned_mmu_mutex);
+   list_add(&p->link, &vcpu->arch.pinned_mmu_pages);
+   mutex_unlock(&vcpu->arch.pinned_mmu_mutex);
+   kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+
+   return 0;
+}
+


What happens if ranges overlap (within a vcpu, cross-vcpu)? Or if a 
range overflows and wraps around 0?  Or if it does not refer to RAM?


Looks like you're limiting the number of ranges, but not the number of 
pages, so a guest can lock all of its memory.



+
+/*
+ * Pin KVM MMU page translations. This guarantees, for valid
+ * addresses registered by kvm_mmu_register_pinned_range (valid address
+ * meaning address which posses sufficient information for fault to
+ * be resolved), valid translations exist while in guest mode and
+ * therefore no VM-exits due to faults will occur.
+ *
+ * Failure to instantiate pages will abort guest entry.
+ *
+ * Page frames should be pinned with get_page in advance.
+ *
+ * Pinning is not guaranteed while executing as L2 guest.


Does this undermine security?


+ *
+ */
+
+static void kvm_mmu_pin_pages(struct kvm_vcpu *vcpu)
+{
+   struct kvm_pinned_page_range *p;
+
+   if (is_guest_mode(vcpu))
+   return;
+
+   if (!vcpu->arch.mmu.direct_map)
+   return;
+
+   ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
+
+   mutex_lock(&vcpu->arch.pinned_mmu_mutex);


Is the mutex actually needed? It seems it's only taken in vcpu context, 
so the vcpu mutex should be sufficient.



+   list_for_each_entry(p, &vcpu->arch.pinned_mmu_pages, link) {
+   gfn_t gfn_offset;
+
+   for (gfn_offset = 0; gfn_offset < p->npages; gfn_offset++) {
+   gfn_t gfn = p->base_gfn + gfn_offset;
+   int r;
+   bool pinned = false;
+
+   r = vcpu->arch.mmu.page_fault(vcpu, gfn << PAGE_SHIFT,
+PFERR_WRITE_MASK, false,
+true, &pinned);
+   /* MMU notifier sequence window: retry */
+   if (!r && !pinned)
+   kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+   if (r) {
+   kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+   break;
+   }
+
+   }
+   }
+   mutex_unlock(&vcpu->arch.pinned_mmu_mutex);
+}
+
  int kvm_mmu_load(struct kvm_vcpu *vcpu)
  {
int r;
@@ -3916,6 +4101,7 @@
goto out;
/* set_cr3() should ensure TLB has been flushed */
vcpu->arch.mmu.set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
+   kvm_mmu_pin_pages(vcpu);
  out:
return r;
  }



I don't see where  you unpin pages, so even if you limit the number of 
pinned pages, a guest can pin all of memory by iterating over all of 
memory and pinning it a chunk at a time.


You might try something similar to guest MTRR handling.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 -next 4/9] DMA, CMA: support arbitrary bitmap granularity

2014-06-19 Thread Joonsoo Kim
On Wed, Jun 18, 2014 at 01:48:15PM -0700, Andrew Morton wrote:
> On Mon, 16 Jun 2014 14:40:46 +0900 Joonsoo Kim  wrote:
> 
> > PPC KVM's CMA area management requires arbitrary bitmap granularity,
> > since they want to reserve very large memory and manage this region
> > with bitmap that one bit for several pages to reduce management overheads.
> > So support arbitrary bitmap granularity for following generalization.
> > 
> > ...
> >
> > --- a/drivers/base/dma-contiguous.c
> > +++ b/drivers/base/dma-contiguous.c
> > @@ -38,6 +38,7 @@ struct cma {
> > unsigned long   base_pfn;
> > unsigned long   count;
> > unsigned long   *bitmap;
> > +   unsigned int order_per_bit; /* Order of pages represented by one bit */
> > struct mutexlock;
> >  };
> >  
> > @@ -157,9 +158,37 @@ void __init dma_contiguous_reserve(phys_addr_t limit)
> >  
> >  static DEFINE_MUTEX(cma_mutex);
> >  
> > +static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int 
> > align_order)
> > +{
> > +   return (1 << (align_order >> cma->order_per_bit)) - 1;
> > +}
> 
> Might want a "1UL << ..." here.

Okay!

> 
> > +static unsigned long cma_bitmap_maxno(struct cma *cma)
> > +{
> > +   return cma->count >> cma->order_per_bit;
> > +}
> > +
> > +static unsigned long cma_bitmap_pages_to_bits(struct cma *cma,
> > +   unsigned long pages)
> > +{
> > +   return ALIGN(pages, 1 << cma->order_per_bit) >> cma->order_per_bit;
> > +}
> 
> Ditto.  I'm not really sure what the compiler will do in these cases,
> but would prefer not to rely on it anyway!

Okay!

Thanks for fix!
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 4/5] KVM: MMU: reload request from GET_DIRTY_LOG path

2014-06-19 Thread Gleb Natapov
On Wed, Jun 18, 2014 at 08:12:07PM -0300, mtosa...@redhat.com wrote:
> Reload remote vcpus MMU from GET_DIRTY_LOG codepath, before
> deleting a pinned spte.
> 
> Signed-off-by: Marcelo Tosatti 
> 
> ---
>  arch/x86/kvm/mmu.c |3 +++
>  1 file changed, 3 insertions(+)
> 
> Index: kvm.pinned-sptes/arch/x86/kvm/mmu.c
> ===
> --- kvm.pinned-sptes.orig/arch/x86/kvm/mmu.c  2014-06-13 16:50:50.040140594 
> -0300
> +++ kvm.pinned-sptes/arch/x86/kvm/mmu.c   2014-06-13 16:51:05.620104451 
> -0300
> @@ -1247,6 +1247,9 @@
>   spte &= ~SPTE_MMU_WRITEABLE;
>   spte = spte & ~PT_WRITABLE_MASK;
>  
> + if (is_pinned_spte(spte))
> + mmu_reload_pinned_vcpus(kvm);
> +
Why write protect it at all? mmu_reload_pinned_vcpus() will unprotected it 
anyway
on the next vmentry. Isn't it better to just report all pinned pages as dirty 
alway.

>   return mmu_spte_update(sptep, spte);
>  }
>  
> 
> 

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] Why I advise against using ivshmem

2014-06-19 Thread David Marchand

On 06/18/2014 05:01 PM, Andreas Färber wrote:

late onto this thread: SUSE Security team has just recently
done a thorough review of QEMU ivshmem code because a customer has
requested this be supported in SLES12. Multiple security-related
patches were submitted by Stefan Hajnoczi and Sebastian Krahmer, and I
fear they are probably still not merged for lack of active
maintainer... In such cases, after review, I expect them to be picked
up by Peter as committer or via qemu-trivial.

So -1, against dropping it.


Are these patches on patchwork ?


Vincent, you will find an RFC for an ivshmem-test in the qemu-devel
list archives or possibly on my qtest branch. The blocking issue that
I haven't worked on yet is that we can't unconditionally run the qtest
because it depends on KVM enabled at configure time (as opposed to
runtime) to have the device available.
http://patchwork.ozlabs.org/patch/336367/

As others have stated before, the nahanni server seems unmaintained,
thus not getting packaged by SUSE either and making testing the
interrupt parts of ivshmem difficult - unless we sort out and fill
with actual test code my proposed qtest.


Thanks for the RFC patch.

About ivshmem server, yes I will look at it.
I will see what I can propose or if importing nahanni implementation 
as-is is the best solution.


Anyway, first, documentation.


--
David Marchand
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 03/20] arm64: GICv3 device tree binding documentation

2014-06-19 Thread Marc Zyngier
Add the necessary documentation to support GICv3.

Cc: Thomas Gleixner 
Cc: Mark Rutland 
Cc: Jason Cooper 
Acked-by: Catalin Marinas 
Acked-by: Rob Herring 
Acked-by: Christoffer Dall 
Signed-off-by: Marc Zyngier 
---
 Documentation/devicetree/bindings/arm/gic-v3.txt | 79 
 1 file changed, 79 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/arm/gic-v3.txt

diff --git a/Documentation/devicetree/bindings/arm/gic-v3.txt 
b/Documentation/devicetree/bindings/arm/gic-v3.txt
new file mode 100644
index 000..33cd05e
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/gic-v3.txt
@@ -0,0 +1,79 @@
+* ARM Generic Interrupt Controller, version 3
+
+AArch64 SMP cores are often associated with a GICv3, providing Private
+Peripheral Interrupts (PPI), Shared Peripheral Interrupts (SPI),
+Software Generated Interrupts (SGI), and Locality-specific Peripheral
+Interrupts (LPI).
+
+Main node required properties:
+
+- compatible : should at least contain  "arm,gic-v3".
+- interrupt-controller : Identifies the node as an interrupt controller
+- #interrupt-cells : Specifies the number of cells needed to encode an
+  interrupt source. Must be a single cell with a value of at least 3.
+
+  The 1st cell is the interrupt type; 0 for SPI interrupts, 1 for PPI
+  interrupts. Other values are reserved for future use.
+
+  The 2nd cell contains the interrupt number for the interrupt type.
+  SPI interrupts are in the range [0-987]. PPI interrupts are in the
+  range [0-15].
+
+  The 3rd cell is the flags, encoded as follows:
+   bits[3:0] trigger type and level flags.
+   1 = edge triggered
+   4 = level triggered
+
+  Cells 4 and beyond are reserved for future use. When the 1st cell
+  has a value of 0 or 1, cells 4 and beyond act as padding, and may be
+  ignored. It is recommended that padding cells have a value of 0.
+
+- reg : Specifies base physical address(s) and size of the GIC
+  registers, in the following order:
+  - GIC Distributor interface (GICD)
+  - GIC Redistributors (GICR), one range per redistributor region
+  - GIC CPU interface (GICC)
+  - GIC Hypervisor interface (GICH)
+  - GIC Virtual CPU interface (GICV)
+
+  GICC, GICH and GICV are optional.
+
+- interrupts : Interrupt source of the VGIC maintenance interrupt.
+
+Optional
+
+- redistributor-stride : If using padding pages, specifies the stride
+  of consecutive redistributors. Must be a multiple of 64kB.
+
+- #redistributor-regions: The number of independent contiguous regions
+  occupied by the redistributors. Required if more than one such
+  region is present.
+
+Examples:
+
+   gic: interrupt-controller@2cf0 {
+   compatible = "arm,gic-v3";
+   #interrupt-cells = <3>;
+   interrupt-controller;
+   reg = <0x0 0x2f00 0 0x1>,   // GICD
+ <0x0 0x2f10 0 0x20>,  // GICR
+ <0x0 0x2c00 0 0x2000>,// GICC
+ <0x0 0x2c01 0 0x2000>,// GICH
+ <0x0 0x2c02 0 0x2000>;// GICV
+   interrupts = <1 9 4>;
+   };
+
+   gic: interrupt-controller@2c01 {
+   compatible = "arm,gic-v3";
+   #interrupt-cells = <3>;
+   interrupt-controller;
+   redistributor-stride = <0x0 0x4>;   // 256kB stride
+   #redistributor-regions = <2>;
+   reg = <0x0 0x2c01 0 0x1>,   // GICD
+ <0x0 0x2d00 0 0x80>,  // GICR 1: CPUs 0-31
+ <0x0 0x2e00 0 0x80>;  // GICR 2: CPUs 32-63
+ <0x0 0x2c04 0 0x2000>,// GICC
+ <0x0 0x2c06 0 0x2000>,// GICH
+ <0x0 0x2c08 0 0x2000>;// GICV
+   interrupts = <1 9 4>;
+   };
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 02/20] arm64: initial support for GICv3

2014-06-19 Thread Marc Zyngier
The Generic Interrupt Controller (version 3) offers services that are
similar to GICv2, with a number of additional features:
- Affinity routing based on the CPU MPIDR (ARE)
- System register for the CPU interfaces (SRE)
- Support for more that 8 CPUs
- Locality-specific Peripheral Interrupts (LPIs)
- Interrupt Translation Services (ITS)

This patch adds preliminary support for GICv3 with ARE and SRE,
non-secure mode only. It relies on higher exception levels to grant ARE
and SRE access.

Support for LPI and ITS will be added at a later time.

Cc: Thomas Gleixner 
Cc: Jason Cooper 
Reviewed-by: Zi Shen Lim 
Reviewed-by: Christoffer Dall 
Reviewed-by: Tirumalesh Chalamarla 
Reviewed-by: Yun Wu 
Reviewed-by: Zhen Lei 
Tested-by: Tirumalesh Chalamarla
Tested-by: Radha Mohan Chintakuntla 
Acked-by: Radha Mohan Chintakuntla 
Acked-by: Catalin Marinas 
Signed-off-by: Marc Zyngier 
---
 arch/arm64/Kconfig |   1 +
 arch/arm64/kernel/head.S   |  18 +
 arch/arm64/kernel/hyp-stub.S   |   1 +
 drivers/irqchip/Kconfig|   5 +
 drivers/irqchip/Makefile   |   1 +
 drivers/irqchip/irq-gic-v3.c   | 690 +
 include/linux/irqchip/arm-gic-v3.h | 193 +++
 7 files changed, 909 insertions(+)
 create mode 100644 drivers/irqchip/irq-gic-v3.c
 create mode 100644 include/linux/irqchip/arm-gic-v3.h

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7295419..be52492 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -9,6 +9,7 @@ config ARM64
select ARM_AMBA
select ARM_ARCH_TIMER
select ARM_GIC
+   select ARM_GIC_V3
select BUILDTIME_EXTABLE_SORT
select CLONE_BACKWARDS
select COMMON_CLK
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index a96d3a6..871b4ee 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -22,6 +22,7 @@
 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -296,6 +297,23 @@ CPU_LE(bic x0, x0, #(3 << 24)  )   // 
Clear the EE and E0E bits for EL1
msr cnthctl_el2, x0
msr cntvoff_el2, xzr// Clear virtual offset
 
+#ifdef CONFIG_ARM_GIC_V3
+   /* GICv3 system register access */
+   mrs x0, id_aa64pfr0_el1
+   ubfxx0, x0, #24, #4
+   cmp x0, #1
+   b.ne3f
+
+   mrs x0, ICC_SRE_EL2
+   orr x0, x0, #1  // Set ICC_SRE_EL2.SRE==1
+   orr x0, x0, #(1 << 3)   // Set ICC_SRE_EL2.Enable==1
+   msr ICC_SRE_EL2, x0
+   isb // Make sure SRE is now 1
+   msr ICH_HCR_EL2, xzr// Reset ICC_HCR_EL2 to defaults
+
+3:
+#endif
+
/* Populate ID registers. */
mrs x0, midr_el1
mrs x1, mpidr_el1
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 0959611..a272f33 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -19,6 +19,7 @@
 
 #include 
 #include 
+#include 
 
 #include 
 #include 
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index bbb746e..7f0c2a3 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -10,6 +10,11 @@ config ARM_GIC
 config GIC_NON_BANKED
bool
 
+config ARM_GIC_V3
+   bool
+   select IRQ_DOMAIN
+   select MULTI_IRQ_HANDLER
+
 config ARM_NVIC
bool
select IRQ_DOMAIN
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 9b9505c..c57e642 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_ARCH_SUNXI)  += irq-sun4i.o
 obj-$(CONFIG_ARCH_SUNXI)   += irq-sunxi-nmi.o
 obj-$(CONFIG_ARCH_SPEAR3XX)+= spear-shirq.o
 obj-$(CONFIG_ARM_GIC)  += irq-gic.o irq-gic-common.o
+obj-$(CONFIG_ARM_GIC_V3)   += irq-gic-v3.o irq-gic-common.o
 obj-$(CONFIG_ARM_NVIC) += irq-nvic.o
 obj-$(CONFIG_ARM_VIC)  += irq-vic.o
 obj-$(CONFIG_IMGPDC_IRQ)   += irq-imgpdc.o
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
new file mode 100644
index 000..c3dd8ad
--- /dev/null
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -0,0 +1,690 @@
+/*
+ * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see 

[PATCH v5 07/20] KVM: ARM: vgic: abstract access to the ELRSR bitmap

2014-06-19 Thread Marc Zyngier
Move the GICH_ELRSR access to its own functions, and add them to
the vgic_ops structure.

Acked-by: Catalin Marinas 
Reviewed-by: Christoffer Dall 
Signed-off-by: Marc Zyngier 
---
 include/kvm/arm_vgic.h |  2 ++
 virt/kvm/arm/vgic.c| 38 +-
 2 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 17bbe51..38864f5 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -84,6 +84,8 @@ struct vgic_lr {
 struct vgic_ops {
struct vgic_lr  (*get_lr)(const struct kvm_vcpu *, int);
void(*set_lr)(struct kvm_vcpu *, int, struct vgic_lr);
+   void(*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
+   u64 (*get_elrsr)(const struct kvm_vcpu *vcpu);
 };
 
 struct vgic_dist {
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 11408fe..8b73cd6 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1015,9 +1015,24 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
 }
 
+static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
+ struct vgic_lr lr_desc)
+{
+   if (!(lr_desc.state & LR_STATE_MASK))
+   set_bit(lr, (unsigned long 
*)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr);
+}
+
+static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
+{
+   const u32 *elrsr = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr;
+   return *(u64 *)elrsr;
+}
+
 static const struct vgic_ops vgic_ops = {
.get_lr = vgic_v2_get_lr,
.set_lr = vgic_v2_set_lr,
+   .sync_lr_elrsr  = vgic_v2_sync_lr_elrsr,
+   .get_elrsr  = vgic_v2_get_elrsr,
 };
 
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
@@ -1031,6 +1046,17 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr,
vgic_ops.set_lr(vcpu, lr, vlr);
 }
 
+static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
+  struct vgic_lr vlr)
+{
+   vgic_ops.sync_lr_elrsr(vcpu, lr, vlr);
+}
+
+static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
+{
+   return vgic_ops.get_elrsr(vcpu);
+}
+
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
 {
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -1260,7 +1286,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu 
*vcpu)
 * Despite being EOIed, the LR may not have
 * been marked as empty.
 */
-   set_bit(lr, (unsigned long 
*)vgic_cpu->vgic_v2.vgic_elrsr);
+   vgic_sync_lr_elrsr(vcpu, lr, vlr);
}
}
 
@@ -1278,14 +1304,17 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu 
*vcpu)
 {
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+   u64 elrsr;
+   unsigned long *elrsr_ptr;
int lr, pending;
bool level_pending;
 
level_pending = vgic_process_maintenance(vcpu);
+   elrsr = vgic_get_elrsr(vcpu);
+   elrsr_ptr = (unsigned long *)&elrsr;
 
/* Clear mappings for empty LRs */
-   for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr,
-vgic_cpu->nr_lr) {
+   for_each_set_bit(lr, elrsr_ptr, vgic_cpu->nr_lr) {
struct vgic_lr vlr;
 
if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
@@ -1298,8 +1327,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
}
 
/* Check if we still have something up our sleeve... */
-   pending = find_first_zero_bit((unsigned long 
*)vgic_cpu->vgic_v2.vgic_elrsr,
- vgic_cpu->nr_lr);
+   pending = find_first_zero_bit(elrsr_ptr, vgic_cpu->nr_lr);
if (level_pending || pending < vgic_cpu->nr_lr)
set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
 }
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 15/20] KVM: ARM: vgic: revisit implementation of irqchip_in_kernel

2014-06-19 Thread Marc Zyngier
So far, irqchip_in_kernel() was implemented by testing the value of
vctrl_base, which worked fine with GICv2.

With GICv3, this field is useless, as we're using system registers
instead of a emmory mapped interface. To solve this, add a boolean
flag indicating if the we're using a vgic or not.

Reviewed-by: Christoffer Dall 
Signed-off-by: Marc Zyngier 
---
 include/kvm/arm_vgic.h | 3 ++-
 virt/kvm/arm/vgic.c| 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index d8d52a9..f6b9fec 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -117,6 +117,7 @@ struct vgic_params {
 struct vgic_dist {
 #ifdef CONFIG_KVM_ARM_VGIC
spinlock_t  lock;
+   boolin_kernel;
boolready;
 
/* Virtual control interface mapping */
@@ -212,7 +213,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
 bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
  struct kvm_exit_mmio *mmio);
 
-#define irqchip_in_kernel(k)   (!!((k)->arch.vgic.vctrl_base))
+#define irqchip_in_kernel(k)   (!!((k)->arch.vgic.in_kernel))
 #define vgic_initialized(k)((k)->arch.vgic.ready)
 
 int vgic_v2_probe(struct device_node *vgic_node,
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index e4b9cbb..1348e74 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1650,6 +1650,7 @@ int kvm_vgic_create(struct kvm *kvm)
}
 
spin_lock_init(&kvm->arch.vgic.lock);
+   kvm->arch.vgic.in_kernel = true;
kvm->arch.vgic.vctrl_base = vgic->vctrl_base;
kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 19/20] KVM: ARM: vgic: add the GICv3 backend

2014-06-19 Thread Marc Zyngier
Introduce the support code for emulating a GICv2 on top of GICv3
hardware.

Acked-by: Catalin Marinas 
Signed-off-by: Marc Zyngier 
---
 arch/arm64/include/asm/kvm_asm.h |   2 +
 arch/arm64/kvm/vgic-v3-switch.S  |  29 +
 include/kvm/arm_vgic.h   |  28 +
 virt/kvm/arm/vgic-v3.c   | 231 +++
 4 files changed, 290 insertions(+)
 create mode 100644 arch/arm64/kvm/vgic-v3-switch.S
 create mode 100644 virt/kvm/arm/vgic-v3.c

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 6252264..ed4987b 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -106,6 +106,8 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, 
phys_addr_t ipa);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
+extern u64 __vgic_v3_get_ich_vtr_el2(void);
+
 extern char __save_vgic_v2_state[];
 extern char __restore_vgic_v2_state[];
 
diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S
new file mode 100644
index 000..9fbf273
--- /dev/null
+++ b/arch/arm64/kvm/vgic-v3-switch.S
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see .
+ */
+
+#include 
+#include 
+
+   .text
+   .pushsection.hyp.text, "ax"
+
+ENTRY(__vgic_v3_get_ich_vtr_el2)
+   mrs x0, ICH_VTR_EL2
+   ret
+ENDPROC(__vgic_v3_get_ich_vtr_el2)
+
+   .popsection
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 65f1121..35b0c12 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -33,6 +33,7 @@
 #define VGIC_MAX_CPUS  KVM_MAX_VCPUS
 
 #define VGIC_V2_MAX_LRS(1 << 6)
+#define VGIC_V3_MAX_LRS16
 
 /* Sanity checks... */
 #if (VGIC_MAX_CPUS > 8)
@@ -72,6 +73,7 @@ struct kvm_vcpu;
 
 enum vgic_type {
VGIC_V2,/* Good ol' GICv2 */
+   VGIC_V3,/* New fancy GICv3 */
 };
 
 #define LR_STATE_PENDING   (1 << 0)
@@ -172,6 +174,19 @@ struct vgic_v2_cpu_if {
u32 vgic_lr[VGIC_V2_MAX_LRS];
 };
 
+struct vgic_v3_cpu_if {
+#ifdef CONFIG_ARM_GIC_V3
+   u32 vgic_hcr;
+   u32 vgic_vmcr;
+   u32 vgic_misr;  /* Saved only */
+   u32 vgic_eisr;  /* Saved only */
+   u32 vgic_elrsr; /* Saved only */
+   u32 vgic_ap0r[4];
+   u32 vgic_ap1r[4];
+   u64 vgic_lr[VGIC_V3_MAX_LRS];
+#endif
+};
+
 struct vgic_cpu {
 #ifdef CONFIG_KVM_ARM_VGIC
/* per IRQ to LR mapping */
@@ -190,6 +205,7 @@ struct vgic_cpu {
/* CPU vif control registers for world switch */
union {
struct vgic_v2_cpu_if   vgic_v2;
+   struct vgic_v3_cpu_if   vgic_v3;
};
 #endif
 };
@@ -224,6 +240,18 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct 
kvm_run *run,
 int vgic_v2_probe(struct device_node *vgic_node,
  const struct vgic_ops **ops,
  const struct vgic_params **params);
+#ifdef CONFIG_ARM_GIC_V3
+int vgic_v3_probe(struct device_node *vgic_node,
+ const struct vgic_ops **ops,
+ const struct vgic_params **params);
+#else
+static inline int vgic_v3_probe(struct device_node *vgic_node,
+   const struct vgic_ops **ops,
+   const struct vgic_params **params)
+{
+   return -ENODEV;
+}
+#endif
 
 #else
 static inline int kvm_vgic_hyp_init(void)
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
new file mode 100644
index 000..f01d446
--- /dev/null
+++ b/virt/kvm/arm/vgic-v3.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright (C) 2013 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see .
+ */
+

[PATCH v5 12/20] KVM: ARM: vgic: introduce vgic_enable

2014-06-19 Thread Marc Zyngier
Move the code dealing with enabling the VGIC on to vgic_ops.

Acked-by: Catalin Marinas 
Reviewed-by: Christoffer Dall 
Signed-off-by: Marc Zyngier 
---
 include/kvm/arm_vgic.h |  1 +
 virt/kvm/arm/vgic.c| 29 +
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index f515800..2228973 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -99,6 +99,7 @@ struct vgic_ops {
void(*disable_underflow)(struct kvm_vcpu *vcpu);
void(*get_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
void(*set_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+   void(*enable)(struct kvm_vcpu *vcpu);
 };
 
 struct vgic_dist {
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index b097a2c..383db29 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1081,6 +1081,19 @@ static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, 
struct vgic_vmcr *vmcrp)
vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
 }
 
+static void vgic_v2_enable(struct kvm_vcpu *vcpu)
+{
+   /*
+* By forcing VMCR to zero, the GIC will restore the binary
+* points to their reset values. Anything else resets to zero
+* anyway.
+*/
+   vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
+
+   /* Get the show on the road... */
+   vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
+}
+
 static const struct vgic_ops vgic_ops = {
.get_lr = vgic_v2_get_lr,
.set_lr = vgic_v2_set_lr,
@@ -1092,6 +1105,7 @@ static const struct vgic_ops vgic_ops = {
.disable_underflow  = vgic_v2_disable_underflow,
.get_vmcr   = vgic_v2_get_vmcr,
.set_vmcr   = vgic_v2_set_vmcr,
+   .enable = vgic_v2_enable,
 };
 
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
@@ -1146,6 +1160,11 @@ static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct 
vgic_vmcr *vmcr)
vgic_ops.set_vmcr(vcpu, vmcr);
 }
 
+static inline void vgic_enable(struct kvm_vcpu *vcpu)
+{
+   vgic_ops.enable(vcpu);
+}
+
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
 {
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -1608,15 +1627,9 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY;
}
 
-   /*
-* By forcing VMCR to zero, the GIC will restore the binary
-* points to their reset values. Anything else resets to zero
-* anyway.
-*/
-   vgic_cpu->vgic_v2.vgic_vmcr = 0;
-
vgic_cpu->nr_lr = vgic_nr_lr;
-   vgic_cpu->vgic_v2.vgic_hcr = GICH_HCR_EN; /* Get the show on the 
road... */
+
+   vgic_enable(vcpu);
 
return 0;
 }
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 13/20] KVM: ARM: introduce vgic_params structure

2014-06-19 Thread Marc Zyngier
Move all the data specific to a given GIC implementation into its own
little structure.

Acked-by: Catalin Marinas 
Reviewed-by: Christoffer Dall 
Signed-off-by: Marc Zyngier 
---
 include/kvm/arm_vgic.h | 11 
 virt/kvm/arm/vgic.c| 70 --
 2 files changed, 44 insertions(+), 37 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 2228973..ce2e142 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -102,6 +102,17 @@ struct vgic_ops {
void(*enable)(struct kvm_vcpu *vcpu);
 };
 
+struct vgic_params {
+   /* Physical address of vgic virtual cpu interface */
+   phys_addr_t vcpu_base;
+   /* Number of list registers */
+   u32 nr_lr;
+   /* Interrupt number */
+   unsigned intmaint_irq;
+   /* Virtual control interface base address */
+   void __iomem*vctrl_base;
+};
+
 struct vgic_dist {
 #ifdef CONFIG_KVM_ARM_VGIC
spinlock_t  lock;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 383db29..09a2135 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -76,14 +76,6 @@
 #define IMPLEMENTER_ARM0x43b
 #define GICC_ARCH_VERSION_V2   0x2
 
-/* Physical address of vgic virtual cpu interface */
-static phys_addr_t vgic_vcpu_base;
-
-/* Virtual control interface base address */
-static void __iomem *vgic_vctrl_base;
-
-static struct device_node *vgic_node;
-
 #define ACCESS_READ_VALUE  (1 << 0)
 #define ACCESS_READ_RAZ(0 << 0)
 #define ACCESS_READ_MASK(x)((x) & (1 << 0))
@@ -103,8 +95,7 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, 
struct vgic_lr lr_desc);
 static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 
-static u32 vgic_nr_lr;
-static unsigned int vgic_maint_irq;
+static struct vgic_params vgic;
 
 static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
int cpuid, u32 offset)
@@ -1190,7 +1181,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu 
*vcpu)
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
int lr;
 
-   for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
+   for_each_set_bit(lr, vgic_cpu->lr_used, vgic.nr_lr) {
struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 
if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
@@ -1234,8 +1225,8 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 
sgi_source_id, int irq)
 
/* Try to use another LR for this interrupt */
lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used,
-  vgic_cpu->nr_lr);
-   if (lr >= vgic_cpu->nr_lr)
+  vgic.nr_lr);
+   if (lr >= vgic.nr_lr)
return false;
 
kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
@@ -1361,7 +1352,6 @@ epilog:
 
 static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 {
-   struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
u32 status = vgic_get_interrupt_status(vcpu);
bool level_pending = false;
 
@@ -1376,7 +1366,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu 
*vcpu)
unsigned long *eisr_ptr = (unsigned long *)&eisr;
int lr;
 
-   for_each_set_bit(lr, eisr_ptr, vgic_cpu->nr_lr) {
+   for_each_set_bit(lr, eisr_ptr, vgic.nr_lr) {
struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 
vgic_irq_clear_active(vcpu, vlr.irq);
@@ -1424,7 +1414,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
elrsr_ptr = (unsigned long *)&elrsr;
 
/* Clear mappings for empty LRs */
-   for_each_set_bit(lr, elrsr_ptr, vgic_cpu->nr_lr) {
+   for_each_set_bit(lr, elrsr_ptr, vgic.nr_lr) {
struct vgic_lr vlr;
 
if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
@@ -1437,8 +1427,8 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
}
 
/* Check if we still have something up our sleeve... */
-   pending = find_first_zero_bit(elrsr_ptr, vgic_cpu->nr_lr);
-   if (level_pending || pending < vgic_cpu->nr_lr)
+   pending = find_first_zero_bit(elrsr_ptr, vgic.nr_lr);
+   if (level_pending || pending < vgic.nr_lr)
set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
 }
 
@@ -1627,7 +1617,12 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY;
}
 
-   vgic_cpu->nr_lr = vgic_nr_lr;
+   /*
+* Store the number of LRs per vcpu, so we don't have to go
+* all the way to the distributor structure to find out. Only
+* assembly code should use this one.
+*/
+   vgic_cpu->nr_lr = vgic.nr_lr;
 
vgic_enable(vcpu);
 
@@ -1636,7 +1631,7 @@ int

[PATCH v5 11/20] KVM: ARM: vgic: abstract VMCR access

2014-06-19 Thread Marc Zyngier
Instead of directly messing with with the GICH_VMCR bits for the CPU
interface save/restore code, add accessors that encode/decode the
entire set of registers exposed by VMCR.

Not the most efficient thing, but given that this code is only used
by the save/restore code, performance is far from being critical.

Reviewed-by: Christoffer Dall 
Signed-off-by: Marc Zyngier 
---
 include/kvm/arm_vgic.h |  9 +++
 virt/kvm/arm/vgic.c| 69 ++
 2 files changed, 62 insertions(+), 16 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index cdfa5d9..f515800 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -81,6 +81,13 @@ struct vgic_lr {
u8  state;
 };
 
+struct vgic_vmcr {
+   u32 ctlr;
+   u32 abpr;
+   u32 bpr;
+   u32 pmr;
+};
+
 struct vgic_ops {
struct vgic_lr  (*get_lr)(const struct kvm_vcpu *, int);
void(*set_lr)(struct kvm_vcpu *, int, struct vgic_lr);
@@ -90,6 +97,8 @@ struct vgic_ops {
u32 (*get_interrupt_status)(const struct kvm_vcpu *vcpu);
void(*enable_underflow)(struct kvm_vcpu *vcpu);
void(*disable_underflow)(struct kvm_vcpu *vcpu);
+   void(*get_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+   void(*set_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 };
 
 struct vgic_dist {
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 369016c..b097a2c 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -100,8 +100,10 @@ static void vgic_kick_vcpus(struct kvm *kvm);
 static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
 static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
-static u32 vgic_nr_lr;
+static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 
+static u32 vgic_nr_lr;
 static unsigned int vgic_maint_irq;
 
 static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
@@ -1057,6 +1059,28 @@ static void vgic_v2_disable_underflow(struct kvm_vcpu 
*vcpu)
vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
 }
 
+static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+   u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr;
+
+   vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT;
+   vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> 
GICH_VMCR_ALIAS_BINPOINT_SHIFT;
+   vmcrp->bpr  = (vmcr & GICH_VMCR_BINPOINT_MASK) >> 
GICH_VMCR_BINPOINT_SHIFT;
+   vmcrp->pmr  = (vmcr & GICH_VMCR_PRIMASK_MASK) >> 
GICH_VMCR_PRIMASK_SHIFT;
+}
+
+static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+   u32 vmcr;
+
+   vmcr  = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK;
+   vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & 
GICH_VMCR_ALIAS_BINPOINT_MASK;
+   vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & 
GICH_VMCR_BINPOINT_MASK;
+   vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & 
GICH_VMCR_PRIMASK_MASK;
+
+   vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
+}
+
 static const struct vgic_ops vgic_ops = {
.get_lr = vgic_v2_get_lr,
.set_lr = vgic_v2_set_lr,
@@ -1066,6 +1090,8 @@ static const struct vgic_ops vgic_ops = {
.get_interrupt_status   = vgic_v2_get_interrupt_status,
.enable_underflow   = vgic_v2_enable_underflow,
.disable_underflow  = vgic_v2_disable_underflow,
+   .get_vmcr   = vgic_v2_get_vmcr,
+   .set_vmcr   = vgic_v2_set_vmcr,
 };
 
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
@@ -1110,6 +1136,16 @@ static inline void vgic_disable_underflow(struct 
kvm_vcpu *vcpu)
vgic_ops.disable_underflow(vcpu);
 }
 
+static inline void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
+{
+   vgic_ops.get_vmcr(vcpu, vmcr);
+}
+
+static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
+{
+   vgic_ops.set_vmcr(vcpu, vmcr);
+}
+
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
 {
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -1863,39 +1899,40 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, 
u64 *addr, bool write)
 static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu,
 struct kvm_exit_mmio *mmio, phys_addr_t offset)
 {
-   struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-   u32 reg, mask = 0, shift = 0;
bool updated = false;
+   struct vgic_vmcr vmcr;
+   u32 *vmcr_field;
+   u32 reg;
+
+   vgic_get_vmcr(vcpu, &vmcr);
 
switch (offset & ~0x3) {
case GIC_CPU_CTRL:
-   mask = GICH_VMCR_CTRL_MASK;
-   shift = GICH_VM

[PATCH v5 17/20] arm64: KVM: split GICv2 world switch from hyp code

2014-06-19 Thread Marc Zyngier
Move the GICv2 world switch code into its own file, and add the
necessary indirection to the arm64 switch code.

Also introduce a new type field to the vgic_params structure.

Acked-by: Catalin Marinas 
Reviewed-by: Christoffer Dall 
Signed-off-by: Marc Zyngier 
---
 arch/arm/include/asm/kvm_host.h   |   5 ++
 arch/arm64/include/asm/kvm_asm.h  |   4 ++
 arch/arm64/include/asm/kvm_host.h |  21 ++
 arch/arm64/kernel/asm-offsets.c   |   3 +
 arch/arm64/kvm/Makefile   |   4 +-
 arch/arm64/kvm/hyp.S  | 104 +
 arch/arm64/kvm/vgic-v2-switch.S   | 133 ++
 include/kvm/arm_vgic.h|   7 +-
 virt/kvm/arm/vgic-v2.c|   1 +
 virt/kvm/arm/vgic.c   |   3 +
 10 files changed, 195 insertions(+), 90 deletions(-)
 create mode 100644 arch/arm64/kvm/vgic-v2-switch.S

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 193ceaf..d6d5227 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -225,6 +225,11 @@ static inline int kvm_arch_dev_ioctl_check_extension(long 
ext)
return 0;
 }
 
+static inline void vgic_arch_setup(const struct vgic_params *vgic)
+{
+   BUG_ON(vgic->type != VGIC_V2);
+}
+
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index d0bfc4b..6252264 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -105,6 +105,10 @@ extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+
+extern char __save_vgic_v2_state[];
+extern char __restore_vgic_v2_state[];
+
 #endif
 
 #endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 92242ce..4c182d0 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -200,4 +200,25 @@ static inline void __cpu_init_hyp_mode(phys_addr_t 
boot_pgd_ptr,
 hyp_stack_ptr, vector_ptr);
 }
 
+struct vgic_sr_vectors {
+   void*save_vgic;
+   void*restore_vgic;
+};
+
+static inline void vgic_arch_setup(const struct vgic_params *vgic)
+{
+   extern struct vgic_sr_vectors __vgic_sr_vectors;
+
+   switch(vgic->type)
+   {
+   case VGIC_V2:
+   __vgic_sr_vectors.save_vgic = __save_vgic_v2_state;
+   __vgic_sr_vectors.restore_vgic  = __restore_vgic_v2_state;
+   break;
+
+   default:
+   BUG();
+   }
+}
+
 #endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 20fd488..dafc415 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -129,6 +129,9 @@ int main(void)
   DEFINE(KVM_TIMER_ENABLED,offsetof(struct kvm, arch.timer.enabled));
   DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
   DEFINE(VCPU_VGIC_CPU,offsetof(struct kvm_vcpu, 
arch.vgic_cpu));
+  DEFINE(VGIC_SAVE_FN, offsetof(struct vgic_sr_vectors, save_vgic));
+  DEFINE(VGIC_RESTORE_FN,  offsetof(struct vgic_sr_vectors, restore_vgic));
+  DEFINE(VGIC_SR_VECTOR_SZ,sizeof(struct vgic_sr_vectors));
   DEFINE(VGIC_V2_CPU_HCR,  offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
   DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
   DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 7e92952..daf24dc 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -19,5 +19,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o 
regmap.o
 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
 kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
 
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o $(KVM)/arm/vgic-v2.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o
 kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 9c5d0ac..56df9a3 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -16,7 +16,6 @@
  */
 
 #include 
-#include 
 
 #include 
 #include 
@@ -376,100 +375,23 @@
 .endm
 
 /*
- * Save the VGIC CPU state into memory
- * x0: Register pointing to VCPU struct
- * Do not corrupt x1!!!
+ * Call into the vgic backend for state saving
  */
 .macro save_vgic_state
-   /* Get VGIC VCTRL base into x2 */
-   ldr x2, [x0, #VCPU_KVM]
-   kern_hyp_va x2
-   ldr x2, [x2, #KVM_VGIC_VCTRL]
-   kern_hyp_va x2
-   cbz x2, 2f  // disabled
-
-   /* Compute the address of struct vgic_cpu */
-   add   

[PATCH v5 18/20] arm64: KVM: move HCR_EL2.{IMO,FMO} manipulation into the vgic switch code

2014-06-19 Thread Marc Zyngier
GICv3 requires the IMO and FMO bits to be tightly coupled with some
of the interrupt controller's register switch.

In order to have similar code paths, move the manipulation of these
bits to the GICv2 switch code.

Acked-by: Catalin Marinas 
Reviewed-by: Christoffer Dall 
Signed-off-by: Marc Zyngier 
---
 arch/arm64/include/asm/kvm_arm.h |  5 +++--
 arch/arm64/kvm/hyp.S | 17 -
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 3d69030..cc83520 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -76,9 +76,10 @@
  */
 #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
 HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
-HCR_AMO | HCR_IMO | HCR_FMO | \
-HCR_SWIO | HCR_TIDCP | HCR_RW)
+HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW)
 #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
+#define HCR_INT_OVERRIDE   (HCR_FMO | HCR_IMO)
+
 
 /* Hyp System Control Register (SCTLR_EL2) bits */
 #define SCTLR_EL2_EE   (1 << 25)
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 56df9a3..5945f3b 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -335,11 +335,8 @@
 .endm
 
 .macro activate_traps
-   ldr x2, [x0, #VCPU_IRQ_LINES]
-   ldr x1, [x0, #VCPU_HCR_EL2]
-   orr x2, x2, x1
-   msr hcr_el2, x2
-
+   ldr x2, [x0, #VCPU_HCR_EL2]
+   msr hcr_el2, x2
ldr x2, =(CPTR_EL2_TTA)
msr cptr_el2, x2
 
@@ -382,12 +379,22 @@
ldr x24, [x24, VGIC_SAVE_FN]
kern_hyp_va x24
blr x24
+   mrs x24, hcr_el2
+   mov x25, #HCR_INT_OVERRIDE
+   neg x25, x25
+   and x24, x24, x25
+   msr hcr_el2, x24
 .endm
 
 /*
  * Call into the vgic backend for state restoring
  */
 .macro restore_vgic_state
+   mrs x24, hcr_el2
+   ldr x25, [x0, #VCPU_IRQ_LINES]
+   orr x24, x24, #HCR_INT_OVERRIDE
+   orr x24, x24, x25
+   msr hcr_el2, x24
adr x24, __vgic_sr_vectors
ldr x24, [x24, #VGIC_RESTORE_FN]
kern_hyp_va x24
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 16/20] arm64: KVM: remove __kvm_hyp_code_{start,end} from hyp.S

2014-06-19 Thread Marc Zyngier
We already have __hyp_text_{start,end} to express the boundaries
of the HYP text section, and __kvm_hyp_code_{start,end} are getting
in the way of a more modular world switch code.

Just turn __kvm_hyp_code_{start,end} into #defines mapping the
linker-emited symbols.

Acked-by: Catalin Marinas 
Reviewed-by: Christoffer Dall 
Signed-off-by: Marc Zyngier 
---
 arch/arm64/include/asm/kvm_asm.h | 6 --
 arch/arm64/include/asm/virt.h| 4 
 arch/arm64/kvm/hyp.S | 6 --
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 9fcd54b..d0bfc4b 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -18,6 +18,8 @@
 #ifndef __ARM_KVM_ASM_H__
 #define __ARM_KVM_ASM_H__
 
+#include 
+
 /*
  * 0 is reserved as an invalid value.
  * Order *must* be kept in sync with the hyp switch code.
@@ -96,8 +98,8 @@ extern char __kvm_hyp_init_end[];
 
 extern char __kvm_hyp_vector[];
 
-extern char __kvm_hyp_code_start[];
-extern char __kvm_hyp_code_end[];
+#define__kvm_hyp_code_start__hyp_text_start
+#define__kvm_hyp_code_end  __hyp_text_end
 
 extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 215ad46..7a5df52 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -50,6 +50,10 @@ static inline bool is_hyp_mode_mismatched(void)
return __boot_cpu_mode[0] != __boot_cpu_mode[1];
 }
 
+/* The section containing the hypervisor text */
+extern char __hyp_text_start[];
+extern char __hyp_text_end[];
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* ! __ASM__VIRT_H */
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 877d82a1..9c5d0ac 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -36,9 +36,6 @@
.pushsection.hyp.text, "ax"
.align  PAGE_SHIFT
 
-__kvm_hyp_code_start:
-   .globl __kvm_hyp_code_start
-
 .macro save_common_regs
// x2: base address for cpu context
// x3: tmp register
@@ -880,7 +877,4 @@ ENTRY(__kvm_hyp_vector)
ventry  el1_error_invalid   // Error 32-bit EL1
 ENDPROC(__kvm_hyp_vector)
 
-__kvm_hyp_code_end:
-   .globl  __kvm_hyp_code_end
-
.popsection
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 14/20] KVM: ARM: vgic: split GICv2 backend from the main vgic code

2014-06-19 Thread Marc Zyngier
Brutally hack the innocent vgic code, and move the GICv2 specific code
to its own file, using vgic_ops and vgic_params as a way to pass
information between the two blocks.

Acked-by: Catalin Marinas 
Reviewed-by: Christoffer Dall 
Signed-off-by: Marc Zyngier 
---
 arch/arm/kvm/Makefile   |   1 +
 arch/arm64/kvm/Makefile |   2 +-
 include/kvm/arm_vgic.h  |  11 ++-
 virt/kvm/arm/vgic-v2.c  | 235 +
 virt/kvm/arm/vgic.c | 251 +---
 5 files changed, 291 insertions(+), 209 deletions(-)
 create mode 100644 virt/kvm/arm/vgic-v2.c

diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 789bca9..f7057ed 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -21,4 +21,5 @@ obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
 obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
 obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
+obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
 obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 72a9fd5..7e92952 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -19,5 +19,5 @@ kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o 
regmap.o
 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
 kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
 
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o $(KVM)/arm/vgic-v2.o
 kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index ce2e142..d8d52a9 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -32,7 +32,8 @@
 #define VGIC_NR_PRIVATE_IRQS   (VGIC_NR_SGIS + VGIC_NR_PPIS)
 #define VGIC_NR_SHARED_IRQS(VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS)
 #define VGIC_MAX_CPUS  KVM_MAX_VCPUS
-#define VGIC_MAX_LRS   (1 << 6)
+
+#define VGIC_V2_MAX_LRS(1 << 6)
 
 /* Sanity checks... */
 #if (VGIC_MAX_CPUS > 8)
@@ -162,7 +163,7 @@ struct vgic_v2_cpu_if {
u32 vgic_eisr[2];   /* Saved only */
u32 vgic_elrsr[2];  /* Saved only */
u32 vgic_apr;
-   u32 vgic_lr[VGIC_MAX_LRS];
+   u32 vgic_lr[VGIC_V2_MAX_LRS];
 };
 
 struct vgic_cpu {
@@ -175,7 +176,7 @@ struct vgic_cpu {
DECLARE_BITMAP( pending_shared, VGIC_NR_SHARED_IRQS);
 
/* Bitmap of used/free list registers */
-   DECLARE_BITMAP( lr_used, VGIC_MAX_LRS);
+   DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS);
 
/* Number of list registers on this CPU */
int nr_lr;
@@ -214,6 +215,10 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct 
kvm_run *run,
 #define irqchip_in_kernel(k)   (!!((k)->arch.vgic.vctrl_base))
 #define vgic_initialized(k)((k)->arch.vgic.ready)
 
+int vgic_v2_probe(struct device_node *vgic_node,
+ const struct vgic_ops **ops,
+ const struct vgic_params **params);
+
 #else
 static inline int kvm_vgic_hyp_init(void)
 {
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
new file mode 100644
index 000..bf2ea86
--- /dev/null
+++ b/virt/kvm/arm/vgic-v2.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2012,2013 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see .
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include 
+#include 
+#include 
+
+static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr)
+{
+   struct vgic_lr lr_desc;
+   u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr];
+
+   lr_desc.irq = val & GICH_LR_VIRTUALID;
+   if (lr_desc.irq <= 15)
+   lr_desc.source  = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
+   else
+   lr_desc.source = 0;
+   lr_desc.state   = 0;
+
+   if (val & GICH_LR_PENDING_BIT)
+   lr_desc.state |= LR_STATE_PENDING;
+   if (val & GICH_LR_ACTIVE_BIT)
+   lr_desc.state |= LR_STATE_ACTIVE;
+   if (val & GICH_LR_EOI)
+   lr_desc.state |= LR_EOI_INT;
+
+   return lr_desc;
+}
+
+/*
+ * This also does some maintenance of ELRSR.
+ */
+static void vgic_v2_set_lr(struct kv

[PATCH v5 20/20] arm64: KVM: vgic: add GICv3 world switch

2014-06-19 Thread Marc Zyngier
Introduce the GICv3 world switch code and helper functions, enabling
GICv2 emulation on GICv3 hardware.

Acked-by: Catalin Marinas 
Reviewed-by: Christoffer Dall 
Signed-off-by: Marc Zyngier 
---
 arch/arm64/include/asm/kvm_asm.h  |   2 +
 arch/arm64/include/asm/kvm_host.h |   7 ++
 arch/arm64/kernel/asm-offsets.c   |   8 ++
 arch/arm64/kvm/Makefile   |   2 +
 arch/arm64/kvm/vgic-v3-switch.S   | 237 ++
 virt/kvm/arm/vgic.c   |   1 +
 6 files changed, 257 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index ed4987b..a28c35b 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -110,6 +110,8 @@ extern u64 __vgic_v3_get_ich_vtr_el2(void);
 
 extern char __save_vgic_v2_state[];
 extern char __restore_vgic_v2_state[];
+extern char __save_vgic_v3_state[];
+extern char __restore_vgic_v3_state[];
 
 #endif
 
diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 4c182d0..4ae9213 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -216,6 +216,13 @@ static inline void vgic_arch_setup(const struct 
vgic_params *vgic)
__vgic_sr_vectors.restore_vgic  = __restore_vgic_v2_state;
break;
 
+#ifdef CONFIG_ARM_GIC_V3
+   case VGIC_V3:
+   __vgic_sr_vectors.save_vgic = __save_vgic_v3_state;
+   __vgic_sr_vectors.restore_vgic  = __restore_vgic_v3_state;
+   break;
+#endif
+
default:
BUG();
}
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index dafc415..e74654c 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -139,6 +139,14 @@ int main(void)
   DEFINE(VGIC_V2_CPU_ELRSR,offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
   DEFINE(VGIC_V2_CPU_APR,  offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
   DEFINE(VGIC_V2_CPU_LR,   offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
+  DEFINE(VGIC_V3_CPU_HCR,  offsetof(struct vgic_cpu, vgic_v3.vgic_hcr));
+  DEFINE(VGIC_V3_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr));
+  DEFINE(VGIC_V3_CPU_MISR, offsetof(struct vgic_cpu, vgic_v3.vgic_misr));
+  DEFINE(VGIC_V3_CPU_EISR, offsetof(struct vgic_cpu, vgic_v3.vgic_eisr));
+  DEFINE(VGIC_V3_CPU_ELRSR,offsetof(struct vgic_cpu, vgic_v3.vgic_elrsr));
+  DEFINE(VGIC_V3_CPU_AP0R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap0r));
+  DEFINE(VGIC_V3_CPU_AP1R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap1r));
+  DEFINE(VGIC_V3_CPU_LR,   offsetof(struct vgic_cpu, vgic_v3.vgic_lr));
   DEFINE(VGIC_CPU_NR_LR,   offsetof(struct vgic_cpu, nr_lr));
   DEFINE(KVM_VTTBR,offsetof(struct kvm, arch.vttbr));
   DEFINE(KVM_VGIC_VCTRL,   offsetof(struct kvm, arch.vgic.vctrl_base));
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index daf24dc..32a0961 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -22,4 +22,6 @@ kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o 
sys_regs_generic_v8.o
 kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
 kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
 kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o
 kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S
index 9fbf273..4ede9d8 100644
--- a/arch/arm64/kvm/vgic-v3-switch.S
+++ b/arch/arm64/kvm/vgic-v3-switch.S
@@ -18,9 +18,246 @@
 #include 
 #include 
 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
.text
.pushsection.hyp.text, "ax"
 
+/*
+ * We store LRs in reverse order to let the CPU deal with streaming
+ * access. Use this macro to make it look saner...
+ */
+#define LR_OFFSET(n)   (VGIC_V3_CPU_LR + (15 - n) * 8)
+
+/*
+ * Save the VGIC CPU state into memory
+ * x0: Register pointing to VCPU struct
+ * Do not corrupt x1!!!
+ */
+.macro save_vgic_v3_state
+   // Compute the address of struct vgic_cpu
+   add x3, x0, #VCPU_VGIC_CPU
+
+   // Make sure stores to the GIC via the memory mapped interface
+   // are now visible to the system register interface
+   dsb st
+
+   // Save all interesting registers
+   mrs x4, ICH_HCR_EL2
+   mrs x5, ICH_VMCR_EL2
+   mrs x6, ICH_MISR_EL2
+   mrs x7, ICH_EISR_EL2
+   mrs x8, ICH_ELSR_EL2
+
+   str w4, [x3, #VGIC_V3_CPU_HCR]
+   str w5, [x3, #VGIC_V3_CPU_VMCR]
+   str w6, [x3, #VGIC_V3_CPU_MISR]
+   str w7, [x3, #VGIC_V3_CPU_EISR]
+   str w8, [x3, #VGIC_V3_CPU_ELRSR]
+
+   msr ICH_HCR_EL2, xzr
+
+   mrs x21, ICH_VTR_EL2
+   mvn w22, w21
+   ubfiz   w23, w22, 2, 4  // w23 = (15 - ListRegs) * 4
+
+   adr x24, 1

Re: [RFC PATCH 1/1] Move two pinned pages to non-movable node in kvm.

2014-06-19 Thread Gleb Natapov
CCing Marcelo,

On Wed, Jun 18, 2014 at 02:50:44PM +0800, Tang Chen wrote:
> Hi Gleb,
> 
> Thanks for the quick reply. Please see below.
> 
> On 06/18/2014 02:12 PM, Gleb Natapov wrote:
> >On Wed, Jun 18, 2014 at 01:50:00PM +0800, Tang Chen wrote:
> >>[Questions]
> >>And by the way, would you guys please answer the following questions for me 
> >>?
> >>
> >>1. What's the ept identity pagetable for ?  Only one page is enough ?
> >>
> >>2. Is the ept identity pagetable only used in realmode ?
> >>Can we free it once the guest is up (vcpu in protect mode)?
> >>
> >>3. Now, ept identity pagetable is allocated in qemu userspace.
> >>Can we allocate it in kernel space ?
> >What would be the benefit?
> 
> I think the benefit is we can hot-remove the host memory a kvm guest
> is using.
> 
> For now, only memory in ZONE_MOVABLE can be migrated/hot-removed. And the
> kernel
> will never use ZONE_MOVABLE memory. So if we can allocate these two pages in
> kernel space, we can pin them without any trouble. When doing memory
> hot-remove,
> the kernel will not try to migrate these two pages.
But we can do that by other means, no? The patch you've sent for instance.

> 
> >
> >>
> >>4. If I want to migrate these two pages, what do you think is the best way ?
> >>
> >I answered most of those here: 
> >http://www.mail-archive.com/kvm@vger.kernel.org/msg103718.html
> 
> I'm sorry I must missed this email.
> 
> Seeing your advice, we can unpin these two pages and repin them in the next
> EPT violation.
> So about this problem, which solution would you prefer, allocate these two
> pages in kernel
> space, or migrate them before memory hot-remove ?
> 
> I think the first solution is simpler. But I'm not quite sure if there is
> any other pages
> pinned in memory. If we have the same problem with other kvm pages, I think
> it is better to
> solve it in the second way.
> 
> What do you think ?
Remove pinning is preferable. In fact looks like for identity pagetable
it should be trivial, just don't pin. APIC access page is a little bit
more complicated since its physical address needs to be tracked to be
updated in VMCS.

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 06/20] KVM: ARM: vgic: introduce vgic_ops and LR manipulation primitives

2014-06-19 Thread Marc Zyngier
In order to split the various register manipulation from the main vgic
code, introduce a vgic_ops structure, and start by abstracting the
LR manipulation code with a couple of accessors.

Reviewed-by: Christoffer Dall 
Signed-off-by: Marc Zyngier 
---
 include/kvm/arm_vgic.h |  18 ++
 virt/kvm/arm/vgic.c| 162 +
 2 files changed, 128 insertions(+), 52 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index f738e5a..17bbe51 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -68,6 +68,24 @@ struct vgic_bytemap {
u32 shared[VGIC_NR_SHARED_IRQS  / 4];
 };
 
+struct kvm_vcpu;
+
+#define LR_STATE_PENDING   (1 << 0)
+#define LR_STATE_ACTIVE(1 << 1)
+#define LR_STATE_MASK  (3 << 0)
+#define LR_EOI_INT (1 << 2)
+
+struct vgic_lr {
+   u16 irq;
+   u8  source;
+   u8  state;
+};
+
+struct vgic_ops {
+   struct vgic_lr  (*get_lr)(const struct kvm_vcpu *, int);
+   void(*set_lr)(struct kvm_vcpu *, int, struct vgic_lr);
+};
+
 struct vgic_dist {
 #ifdef CONFIG_KVM_ARM_VGIC
spinlock_t  lock;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 0ba1ab0..11408fe 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -94,9 +94,12 @@ static struct device_node *vgic_node;
 #define ACCESS_WRITE_MASK(x)   ((x) & (3 << 1))
 
 static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
+static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
 static void vgic_update_state(struct kvm *kvm);
 static void vgic_kick_vcpus(struct kvm *kvm);
 static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
+static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
+static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
 static u32 vgic_nr_lr;
 
 static unsigned int vgic_maint_irq;
@@ -593,18 +596,6 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
return false;
 }
 
-#define LR_CPUID(lr)   \
-   (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT)
-#define LR_IRQID(lr)   \
-   ((lr) & GICH_LR_VIRTUALID)
-
-static void vgic_retire_lr(int lr_nr, int irq, struct vgic_cpu *vgic_cpu)
-{
-   clear_bit(lr_nr, vgic_cpu->lr_used);
-   vgic_cpu->vgic_v2.vgic_lr[lr_nr] &= ~GICH_LR_STATE;
-   vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
-}
-
 /**
  * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor
  * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs
@@ -622,13 +613,10 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
int vcpu_id = vcpu->vcpu_id;
-   int i, irq, source_cpu;
-   u32 *lr;
+   int i;
 
for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
-   lr = &vgic_cpu->vgic_v2.vgic_lr[i];
-   irq = LR_IRQID(*lr);
-   source_cpu = LR_CPUID(*lr);
+   struct vgic_lr lr = vgic_get_lr(vcpu, i);
 
/*
 * There are three options for the state bits:
@@ -640,7 +628,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 * If the LR holds only an active interrupt (not pending) then
 * just leave it alone.
 */
-   if ((*lr & GICH_LR_STATE) == GICH_LR_ACTIVE_BIT)
+   if ((lr.state & LR_STATE_MASK) == LR_STATE_ACTIVE)
continue;
 
/*
@@ -649,18 +637,19 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 * is fine, then we are only setting a few bits that were
 * already set.
 */
-   vgic_dist_irq_set(vcpu, irq);
-   if (irq < VGIC_NR_SGIS)
-   dist->irq_sgi_sources[vcpu_id][irq] |= 1 << source_cpu;
-   *lr &= ~GICH_LR_PENDING_BIT;
+   vgic_dist_irq_set(vcpu, lr.irq);
+   if (lr.irq < VGIC_NR_SGIS)
+   dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << 
lr.source;
+   lr.state &= ~LR_STATE_PENDING;
+   vgic_set_lr(vcpu, i, lr);
 
/*
 * If there's no state left on the LR (it could still be
 * active), then the LR does not hold any useful info and can
 * be marked as free for other use.
 */
-   if (!(*lr & GICH_LR_STATE))
-   vgic_retire_lr(i, irq, vgic_cpu);
+   if (!(lr.state & LR_STATE_MASK))
+   vgic_retire_lr(i, lr.irq, vcpu);
 
/* Finally update the VGIC state. */
vgic_update_state(vcpu->kvm);
@@ -989,8 +978,69 @@ static void vgic_update_state(struct kvm *kvm)
}
 }
 
-#define MK_LR_PEND(src, irq)   \
-   (GICH_LR_PENDING_BIT | (

[PATCH v2 9/9] arm64: KVM: vgic: deal with GIC sub-page alignment

2014-06-19 Thread Marc Zyngier
The GIC CPU interface is always 4k aligned. If the host is using
64k pages, it is critical to place the guest's GICC interface at the
same relative alignment as the host's GICV. Failure to do so results
in an impossibility for the guest to deal with interrupts.

Add a KVM_DEV_ARM_VGIC_GRP_ADDR_OFFSET attribute for the VGIC, allowing
userspace to retrieve the GICV offset in a page. It becomes then trivial
to adjust the GICC base address for the guest.

Signed-off-by: Marc Zyngier 
---
 arch/arm/include/uapi/asm/kvm.h   | 1 +
 arch/arm64/include/uapi/asm/kvm.h | 1 +
 virt/kvm/arm/vgic.c   | 7 +++
 3 files changed, 9 insertions(+)

diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 8b51c1a..056b782 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -174,6 +174,7 @@ struct kvm_arch_memory_slot {
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK (0xULL << 
KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS   3
+#define KVM_DEV_ARM_VGIC_GRP_ADDR_OFFSET 4
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT 24
diff --git a/arch/arm64/include/uapi/asm/kvm.h 
b/arch/arm64/include/uapi/asm/kvm.h
index b5cd6ed..5513de4 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -160,6 +160,7 @@ struct kvm_arch_memory_slot {
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK (0xULL << 
KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS   3
+#define KVM_DEV_ARM_VGIC_GRP_ADDR_OFFSET 4
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT 24
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index b0cd417..68ac9c6 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -2228,6 +2228,12 @@ static int vgic_get_attr(struct kvm_device *dev, struct 
kvm_device_attr *attr)
r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr);
break;
}
+   case KVM_DEV_ARM_VGIC_GRP_ADDR_OFFSET: {
+   u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+   u32 val = vgic->vcpu_base & ~PAGE_MASK;
+   r = put_user(val, uaddr);
+   break;
+   }
 
}
 
@@ -2265,6 +2271,7 @@ static int vgic_has_attr(struct kvm_device *dev, struct 
kvm_device_attr *attr)
offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
return vgic_has_attr_regs(vgic_cpu_ranges, offset);
case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
+   case KVM_DEV_ARM_VGIC_GRP_ADDR_OFFSET:
return 0;
}
return -ENXIO;
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 08/20] KVM: ARM: vgic: abstract EISR bitmap access

2014-06-19 Thread Marc Zyngier
Move the GICH_EISR access to its own function.

Acked-by: Catalin Marinas 
Reviewed-by: Christoffer Dall 
Signed-off-by: Marc Zyngier 
---
 include/kvm/arm_vgic.h |  1 +
 virt/kvm/arm/vgic.c| 17 +++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 38864f5..ccb9b59 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -86,6 +86,7 @@ struct vgic_ops {
void(*set_lr)(struct kvm_vcpu *, int, struct vgic_lr);
void(*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
u64 (*get_elrsr)(const struct kvm_vcpu *vcpu);
+   u64 (*get_eisr)(const struct kvm_vcpu *vcpu);
 };
 
 struct vgic_dist {
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 8b73cd6..0d9701c 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1028,11 +1028,18 @@ static u64 vgic_v2_get_elrsr(const struct kvm_vcpu 
*vcpu)
return *(u64 *)elrsr;
 }
 
+static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
+{
+   const u32 *eisr = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
+   return *(u64 *)eisr;
+}
+
 static const struct vgic_ops vgic_ops = {
.get_lr = vgic_v2_get_lr,
.set_lr = vgic_v2_set_lr,
.sync_lr_elrsr  = vgic_v2_sync_lr_elrsr,
.get_elrsr  = vgic_v2_get_elrsr,
+   .get_eisr   = vgic_v2_get_eisr,
 };
 
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
@@ -1057,6 +1064,11 @@ static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
return vgic_ops.get_elrsr(vcpu);
 }
 
+static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu)
+{
+   return vgic_ops.get_eisr(vcpu);
+}
+
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
 {
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -1263,10 +1275,11 @@ static bool vgic_process_maintenance(struct kvm_vcpu 
*vcpu)
 * Some level interrupts have been EOIed. Clear their
 * active bit.
 */
+   u64 eisr = vgic_get_eisr(vcpu);
+   unsigned long *eisr_ptr = (unsigned long *)&eisr;
int lr;
 
-   for_each_set_bit(lr, (unsigned long 
*)vgic_cpu->vgic_v2.vgic_eisr,
-vgic_cpu->nr_lr) {
+   for_each_set_bit(lr, eisr_ptr, vgic_cpu->nr_lr) {
struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 
vgic_irq_clear_active(vcpu, vlr.irq);
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 04/20] arm64: boot protocol documentation update for GICv3

2014-06-19 Thread Marc Zyngier
Linux has some requirements that must be satisfied in order to boot
on a system built with a GICv3.

Acked-by: Christoffer Dall 
Signed-off-by: Marc Zyngier 
---
 Documentation/arm64/booting.txt | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt
index 37fc4f6..e28ccec 100644
--- a/Documentation/arm64/booting.txt
+++ b/Documentation/arm64/booting.txt
@@ -141,6 +141,12 @@ Before jumping into the kernel, the following conditions 
must be met:
   the kernel image will be entered must be initialised by software at a
   higher exception level to prevent execution in an UNKNOWN state.
 
+  For systems with a GICv3 interrupt controller, it is expected that:
+  - If EL3 is present, it must program ICC_SRE_EL3.Enable (bit 3) to
+0b1 and ICC_SRE_EL3.SRE (bit 0) to 0b1.
+  - If the kernel is entered at EL1, EL2 must set ICC_SRE_EL2.Enable
+(bit 3) to 0b1 and ICC_SRE_EL2.SRE (bit 0) to 0b1.
+
 The requirements described above for CPU mode, caches, MMUs, architected
 timers, coherency and system registers apply to all CPUs.  All CPUs must
 enter the kernel in the same exception level.
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 0/9] arm/arm64: KVM: dynamic VGIC sizing

2014-06-19 Thread Marc Zyngier
So far, the VGIC data structures have been statically sized, meaning
that we always have to support more interrupts than we actually want,
and more CPU interfaces than we should. This is a waste of resource,
and is the kind of things that should be tuneable.

This series addresses that issue by changing the data structures to be
dynamically allocated, and adds a new configuration attribute to
allocate the number of interrupts. When the attribute is not used, we
fallback to the old behaviour of allocating a fixed number of
interrupts.

The last patch of the series is a bit out of context, but tends to fit
well here code-wise. It solves an interesting issue having to do with
the placement of the GICV interface in Stage-2 when using 64k pages
(if the HW is not 64k aligned, we need to tell userspace about the
"sub-page offset" so it can correctly place the guest's GICC region).

This series is also the base for Andre Przywara's GICv3 distributor
emulation code (which can support far more than 8 vcpus and 1020
interrupts).

This has been tested on both ARM (TC2) and arm64 (model).

Marc Zyngier (9):
  KVM: ARM: vgic: plug irq injection race
  arm/arm64: KVM: vgic: switch to dynamic allocation
  arm/arm64: KVM: vgic: Parametrize VGIC_NR_SHARED_IRQS
  arm/arm64: KVM: vgic: kill VGIC_MAX_CPUS
  arm/arm64: KVM: vgic: handle out-of-range MMIO accesses
  arm/arm64: KVM: vgic: kill VGIC_NR_IRQS
  arm/arm64: KVM: vgic: delay vgic allocation until init time
  arm/arm64: KVM: vgic: make number of irqs a configurable attribute
  arm64: KVM: vgic: deal with GIC sub-page alignment

 arch/arm/include/uapi/asm/kvm.h   |   2 +
 arch/arm/kvm/arm.c|  10 +-
 arch/arm64/include/uapi/asm/kvm.h |   2 +
 include/kvm/arm_vgic.h|  54 +++---
 virt/kvm/arm/vgic.c   | 357 --
 5 files changed, 339 insertions(+), 86 deletions(-)

-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 8/9] arm/arm64: KVM: vgic: make number of irqs a configurable attribute

2014-06-19 Thread Marc Zyngier
In order to make the number of interrupt configurable, use the new
fancy device management API to add KVM_DEV_ARM_VGIC_GRP_NR_IRQS as
a VGIC configurable attribute.

Userspace can now specify the exact size of the GIC (by increments
of 32 interrupts).

Signed-off-by: Marc Zyngier 
---
 arch/arm/include/uapi/asm/kvm.h   |  1 +
 arch/arm64/include/uapi/asm/kvm.h |  1 +
 virt/kvm/arm/vgic.c   | 29 +
 3 files changed, 31 insertions(+)

diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index e6ebdd3..8b51c1a 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -173,6 +173,7 @@ struct kvm_arch_memory_slot {
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK  (0xffULL << 
KVM_DEV_ARM_VGIC_CPUID_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK (0xULL << 
KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS   3
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT 24
diff --git a/arch/arm64/include/uapi/asm/kvm.h 
b/arch/arm64/include/uapi/asm/kvm.h
index e633ff8..b5cd6ed 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -159,6 +159,7 @@ struct kvm_arch_memory_slot {
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK  (0xffULL << 
KVM_DEV_ARM_VGIC_CPUID_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK (0xULL << 
KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS   3
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT 24
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 6e13ff9..b0cd417 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -2165,6 +2165,28 @@ static int vgic_set_attr(struct kvm_device *dev, struct 
kvm_device_attr *attr)
 
return vgic_attr_regs_access(dev, attr, ®, true);
}
+   case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
+   u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+   u32 val;
+   int ret = 0;
+
+   if (get_user(val, uaddr))
+   return -EFAULT;
+
+   if (val > 1024 || (val & 31))
+   return -EINVAL;
+
+   mutex_lock(&dev->kvm->lock);
+
+   if (vgic_initialized(dev->kvm))
+   ret = -EBUSY;
+   else
+   dev->kvm->arch.vgic.nr_irqs = val;
+
+   mutex_unlock(&dev->kvm->lock);
+
+   return ret;
+   }
 
}
 
@@ -2201,6 +2223,11 @@ static int vgic_get_attr(struct kvm_device *dev, struct 
kvm_device_attr *attr)
r = put_user(reg, uaddr);
break;
}
+   case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
+   u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+   r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr);
+   break;
+   }
 
}
 
@@ -2237,6 +2264,8 @@ static int vgic_has_attr(struct kvm_device *dev, struct 
kvm_device_attr *attr)
case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
return vgic_has_attr_regs(vgic_cpu_ranges, offset);
+   case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
+   return 0;
}
return -ENXIO;
 }
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 7/9] arm/arm64: KVM: vgic: delay vgic allocation until init time

2014-06-19 Thread Marc Zyngier
It is now quite easy to delay the allocation of the vgic tables
until we actually require it to be up and running (when the first
starting to kick around).

This allow us to allocate memory for the exact number of CPUs we
have. As nobody configures the number of interrupts just yet,
use a fallback to VGIC_NR_IRQS_LEGACY.

Signed-off-by: Marc Zyngier 
---
 arch/arm/kvm/arm.c |  9 +---
 include/kvm/arm_vgic.h |  3 +--
 virt/kvm/arm/vgic.c| 56 ++
 3 files changed, 40 insertions(+), 28 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 66c14ef..9b3957d 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -308,16 +308,9 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
-   int ret;
-
/* Force users to call KVM_ARM_VCPU_INIT */
vcpu->arch.target = -1;
 
-   /* Set up VGIC */
-   ret = kvm_vgic_vcpu_init(vcpu);
-   if (ret)
-   return ret;
-
/* Set up the timer */
kvm_timer_vcpu_init(vcpu);
 
@@ -811,7 +804,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
switch (ioctl) {
case KVM_CREATE_IRQCHIP: {
if (vgic_present)
-   return kvm_vgic_create(kvm, KVM_MAX_VCPUS, 
VGIC_NR_IRQS_LEGACY);
+   return kvm_vgic_create(kvm);
else
return -ENXIO;
}
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index cf17b68..f5788cf 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -231,9 +231,8 @@ struct kvm_exit_mmio;
 int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
 int kvm_vgic_hyp_init(void);
 int kvm_vgic_init(struct kvm *kvm);
-int kvm_vgic_create(struct kvm *kvm, int nr_cpus, int nr_irqs);
+int kvm_vgic_create(struct kvm *kvm);
 void kvm_vgic_destroy(struct kvm *kvm);
-int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
 void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 17ceb8a..6e13ff9 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1589,7 +1589,7 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
  * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to
  * this vcpu and enable the VGIC for this VCPU
  */
-int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
+static int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 {
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
@@ -1599,9 +1599,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
if (ret)
return ret;
 
-   if (vcpu->vcpu_id >= dist->nr_cpus)
-   return -EBUSY;
-
for (i = 0; i < dist->nr_irqs; i++) {
if (i < VGIC_NR_PPIS)
vgic_bitmap_set_irq_val(&dist->irq_enabled,
@@ -1723,9 +1720,6 @@ static int vgic_init_maps(struct vgic_dist *dist, int 
nr_cpus, int nr_irqs)
 {
int ret, i;
 
-   dist->nr_cpus = nr_cpus;
-   dist->nr_irqs = nr_irqs;
-
ret  = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs);
ret |= vgic_init_bitmap(&dist->irq_state, nr_cpus, nr_irqs);
ret |= vgic_init_bitmap(&dist->irq_active, nr_cpus, nr_irqs);
@@ -1775,7 +1769,9 @@ void kvm_vgic_destroy(struct kvm *kvm)
  */
 int kvm_vgic_init(struct kvm *kvm)
 {
-   int ret = 0, i;
+   struct vgic_dist *dist = &kvm->arch.vgic;
+   struct kvm_vcpu *vcpu;
+   int ret = 0, i, v;
 
if (!irqchip_in_kernel(kvm))
return 0;
@@ -1785,30 +1781,58 @@ int kvm_vgic_init(struct kvm *kvm)
if (vgic_initialized(kvm))
goto out;
 
-   if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
-   IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) {
+   dist->nr_cpus = atomic_read(&kvm->online_vcpus);
+
+   /*
+* If nobody configured the number of interrupts, use the
+* legacy one.
+*/
+   if (!dist->nr_irqs)
+   dist->nr_irqs = VGIC_NR_IRQS_LEGACY;
+
+   if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
+   IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) {
kvm_err("Need to set vgic cpu and dist addresses first\n");
ret = -ENXIO;
goto out;
}
 
-   ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
+   ret = vgic_init_maps(dist, dist->nr_cpus, dist->nr_irqs);
+   if (ret) {
+   kvm_err("Unable to allocate maps\n");
+   goto out;
+   }
+
+   ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base,
vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE);
if (ret) {
kvm_err("Unable to remap VGIC CPU to VCPU\n");
   

[PATCH v2 4/9] arm/arm64: KVM: vgic: kill VGIC_MAX_CPUS

2014-06-19 Thread Marc Zyngier
We now have the information about the number of CPU interfaces in
the distributor itself. Let's get rid of VGIC_MAX_CPUS, and just
rely on KVM_MAX_VCPUS where we don't have the choice. Yet.

Signed-off-by: Marc Zyngier 
---
 arch/arm/kvm/arm.c | 2 +-
 include/kvm/arm_vgic.h | 3 +--
 virt/kvm/arm/vgic.c| 6 +++---
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9548bc9..14ba035 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -811,7 +811,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
switch (ioctl) {
case KVM_CREATE_IRQCHIP: {
if (vgic_present)
-   return kvm_vgic_create(kvm, VGIC_MAX_CPUS, 
VGIC_NR_IRQS);
+   return kvm_vgic_create(kvm, KVM_MAX_VCPUS, 
VGIC_NR_IRQS);
else
return -ENXIO;
}
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index b5072b7..5853a67 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -29,13 +29,12 @@
 #define VGIC_NR_SGIS   16
 #define VGIC_NR_PPIS   16
 #define VGIC_NR_PRIVATE_IRQS   (VGIC_NR_SGIS + VGIC_NR_PPIS)
-#define VGIC_MAX_CPUS  KVM_MAX_VCPUS
 
 #define VGIC_V2_MAX_LRS(1 << 6)
 #define VGIC_V3_MAX_LRS16
 
 /* Sanity checks... */
-#if (VGIC_MAX_CPUS > 8)
+#if (KVM_MAX_VCPUS > 8)
 #error Invalid number of CPU interfaces
 #endif
 
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 4b1f0a6..07b6450 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1184,7 +1184,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
 
sources = *vgic_get_sgi_sources(dist, vcpu_id, irq);
 
-   for_each_set_bit(c, &sources, VGIC_MAX_CPUS) {
+   for_each_set_bit(c, &sources, dist->nr_cpus) {
if (vgic_queue_irq(vcpu, c, irq))
clear_bit(c, &sources);
}
@@ -1564,7 +1564,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
if (ret)
return ret;
 
-   if (vcpu->vcpu_id >= VGIC_MAX_CPUS)
+   if (vcpu->vcpu_id >= dist->nr_cpus)
return -EBUSY;
 
for (i = 0; i < VGIC_NR_IRQS; i++) {
@@ -2193,7 +2193,7 @@ static void vgic_destroy(struct kvm_device *dev)
 
 static int vgic_create(struct kvm_device *dev, u32 type)
 {
-   return kvm_vgic_create(dev->kvm, VGIC_MAX_CPUS, VGIC_NR_IRQS);
+   return kvm_vgic_create(dev->kvm, KVM_MAX_VCPUS, VGIC_NR_IRQS);
 }
 
 struct kvm_device_ops kvm_arm_vgic_v2_ops = {
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 10/20] KVM: ARM: vgic: move underflow handling to vgic_ops

2014-06-19 Thread Marc Zyngier
Move the code dealing with LR underflow handling to its own functions,
and make them accessible through vgic_ops.

Acked-by: Catalin Marinas 
Reviewed-by: Christoffer Dall 
Signed-off-by: Marc Zyngier 
---
 include/kvm/arm_vgic.h |  2 ++
 virt/kvm/arm/vgic.c| 28 +---
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 4857508..cdfa5d9 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -88,6 +88,8 @@ struct vgic_ops {
u64 (*get_elrsr)(const struct kvm_vcpu *vcpu);
u64 (*get_eisr)(const struct kvm_vcpu *vcpu);
u32 (*get_interrupt_status)(const struct kvm_vcpu *vcpu);
+   void(*enable_underflow)(struct kvm_vcpu *vcpu);
+   void(*disable_underflow)(struct kvm_vcpu *vcpu);
 };
 
 struct vgic_dist {
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 9491c22..369016c 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1047,6 +1047,16 @@ static u32 vgic_v2_get_interrupt_status(const struct 
kvm_vcpu *vcpu)
return ret;
 }
 
+static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu)
+{
+   vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE;
+}
+
+static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu)
+{
+   vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
+}
+
 static const struct vgic_ops vgic_ops = {
.get_lr = vgic_v2_get_lr,
.set_lr = vgic_v2_set_lr,
@@ -1054,6 +1064,8 @@ static const struct vgic_ops vgic_ops = {
.get_elrsr  = vgic_v2_get_elrsr,
.get_eisr   = vgic_v2_get_eisr,
.get_interrupt_status   = vgic_v2_get_interrupt_status,
+   .enable_underflow   = vgic_v2_enable_underflow,
+   .disable_underflow  = vgic_v2_disable_underflow,
 };
 
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
@@ -1088,6 +1100,16 @@ static inline u32 vgic_get_interrupt_status(struct 
kvm_vcpu *vcpu)
return vgic_ops.get_interrupt_status(vcpu);
 }
 
+static inline void vgic_enable_underflow(struct kvm_vcpu *vcpu)
+{
+   vgic_ops.enable_underflow(vcpu);
+}
+
+static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu)
+{
+   vgic_ops.disable_underflow(vcpu);
+}
+
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
 {
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -1269,9 +1291,9 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu 
*vcpu)
 
 epilog:
if (overflow) {
-   vgic_cpu->vgic_v2.vgic_hcr |= GICH_HCR_UIE;
+   vgic_enable_underflow(vcpu);
} else {
-   vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
+   vgic_disable_underflow(vcpu);
/*
 * We're about to run this VCPU, and we've consumed
 * everything the distributor had in store for
@@ -1324,7 +1346,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu 
*vcpu)
}
 
if (status & INT_STATUS_UNDERFLOW)
-   vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
+   vgic_disable_underflow(vcpu);
 
return level_pending;
 }
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 01/20] ARM: GIC: move some bits of GICv2 to a library-type file

2014-06-19 Thread Marc Zyngier
A few GICv2 low-level function are actually very useful to GICv3,
and it makes some sense to share them across the two drivers.
They end-up in their own file, with an additional parameter used
to ensure an optional synchronization (unused on GICv2).

Cc: Thomas Gleixner 
Cc: Jason Cooper 
Acked-by: Christoffer Dall 
Signed-off-by: Marc Zyngier 
---
 drivers/irqchip/Makefile |   2 +-
 drivers/irqchip/irq-gic-common.c | 115 +++
 drivers/irqchip/irq-gic-common.h |  29 ++
 drivers/irqchip/irq-gic.c|  59 ++--
 4 files changed, 149 insertions(+), 56 deletions(-)
 create mode 100644 drivers/irqchip/irq-gic-common.c
 create mode 100644 drivers/irqchip/irq-gic-common.h

diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 62a13e5..9b9505c 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -15,7 +15,7 @@ obj-$(CONFIG_ORION_IRQCHIP)   += irq-orion.o
 obj-$(CONFIG_ARCH_SUNXI)   += irq-sun4i.o
 obj-$(CONFIG_ARCH_SUNXI)   += irq-sunxi-nmi.o
 obj-$(CONFIG_ARCH_SPEAR3XX)+= spear-shirq.o
-obj-$(CONFIG_ARM_GIC)  += irq-gic.o
+obj-$(CONFIG_ARM_GIC)  += irq-gic.o irq-gic-common.o
 obj-$(CONFIG_ARM_NVIC) += irq-nvic.o
 obj-$(CONFIG_ARM_VIC)  += irq-vic.o
 obj-$(CONFIG_IMGPDC_IRQ)   += irq-imgpdc.o
diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
new file mode 100644
index 000..60ac704
--- /dev/null
+++ b/drivers/irqchip/irq-gic-common.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2002 ARM Limited, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see .
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include "irq-gic-common.h"
+
+void gic_configure_irq(unsigned int irq, unsigned int type,
+  void __iomem *base, void (*sync_access)(void))
+{
+   u32 enablemask = 1 << (irq % 32);
+   u32 enableoff = (irq / 32) * 4;
+   u32 confmask = 0x2 << ((irq % 16) * 2);
+   u32 confoff = (irq / 16) * 4;
+   bool enabled = false;
+   u32 val;
+
+   /*
+* Read current configuration register, and insert the config
+* for "irq", depending on "type".
+*/
+   val = readl_relaxed(base + GIC_DIST_CONFIG + confoff);
+   if (type == IRQ_TYPE_LEVEL_HIGH)
+   val &= ~confmask;
+   else if (type == IRQ_TYPE_EDGE_RISING)
+   val |= confmask;
+
+   /*
+* As recommended by the spec, disable the interrupt before changing
+* the configuration
+*/
+   if (readl_relaxed(base + GIC_DIST_ENABLE_SET + enableoff) & enablemask) 
{
+   writel_relaxed(enablemask, base + GIC_DIST_ENABLE_CLEAR + 
enableoff);
+   if (sync_access)
+   sync_access();
+   enabled = true;
+   }
+
+   /*
+* Write back the new configuration, and possibly re-enable
+* the interrupt.
+*/
+   writel_relaxed(val, base + GIC_DIST_CONFIG + confoff);
+
+   if (enabled)
+   writel_relaxed(enablemask, base + GIC_DIST_ENABLE_SET + 
enableoff);
+
+   if (sync_access)
+   sync_access();
+}
+
+void __init gic_dist_config(void __iomem *base, int gic_irqs,
+   void (*sync_access)(void))
+{
+   unsigned int i;
+
+   /*
+* Set all global interrupts to be level triggered, active low.
+*/
+   for (i = 32; i < gic_irqs; i += 16)
+   writel_relaxed(0, base + GIC_DIST_CONFIG + i / 4);
+
+   /*
+* Set priority on all global interrupts.
+*/
+   for (i = 32; i < gic_irqs; i += 4)
+   writel_relaxed(0xa0a0a0a0, base + GIC_DIST_PRI + i);
+
+   /*
+* Disable all interrupts.  Leave the PPI and SGIs alone
+* as they are enabled by redistributor registers.
+*/
+   for (i = 32; i < gic_irqs; i += 32)
+   writel_relaxed(0x, base + GIC_DIST_ENABLE_CLEAR + i / 
8);
+
+   if (sync_access)
+   sync_access();
+}
+
+void gic_cpu_config(void __iomem *base, void (*sync_access)(void))
+{
+   int i;
+
+   /*
+* Deal with the banked PPI and SGI interrupts - disable all
+* PPI interrupts, ensure all SGI interrupts are enabled.
+*/
+   writel_relaxed(0x

[PATCH v5 05/20] KVM: arm/arm64: vgic: move GICv2 registers to their own structure

2014-06-19 Thread Marc Zyngier
In order to make way for the GICv3 registers, move the v2-specific
registers to their own structure.

Acked-by: Catalin Marinas 
Reviewed-by: Christoffer Dall 
Signed-off-by: Marc Zyngier 
---
 arch/arm/kernel/asm-offsets.c   | 14 +--
 arch/arm/kvm/interrupts_head.S  | 26 +--
 arch/arm64/kernel/asm-offsets.c | 14 +--
 arch/arm64/kvm/hyp.S| 26 +--
 include/kvm/arm_vgic.h  | 20 +--
 virt/kvm/arm/vgic.c | 56 -
 6 files changed, 81 insertions(+), 75 deletions(-)

diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 85598b5..713e807 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -182,13 +182,13 @@ int main(void)
   DEFINE(VCPU_HYP_PC,  offsetof(struct kvm_vcpu, arch.fault.hyp_pc));
 #ifdef CONFIG_KVM_ARM_VGIC
   DEFINE(VCPU_VGIC_CPU,offsetof(struct kvm_vcpu, 
arch.vgic_cpu));
-  DEFINE(VGIC_CPU_HCR, offsetof(struct vgic_cpu, vgic_hcr));
-  DEFINE(VGIC_CPU_VMCR,offsetof(struct vgic_cpu, vgic_vmcr));
-  DEFINE(VGIC_CPU_MISR,offsetof(struct vgic_cpu, vgic_misr));
-  DEFINE(VGIC_CPU_EISR,offsetof(struct vgic_cpu, vgic_eisr));
-  DEFINE(VGIC_CPU_ELRSR,   offsetof(struct vgic_cpu, vgic_elrsr));
-  DEFINE(VGIC_CPU_APR, offsetof(struct vgic_cpu, vgic_apr));
-  DEFINE(VGIC_CPU_LR,  offsetof(struct vgic_cpu, vgic_lr));
+  DEFINE(VGIC_V2_CPU_HCR,  offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
+  DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
+  DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
+  DEFINE(VGIC_V2_CPU_EISR, offsetof(struct vgic_cpu, vgic_v2.vgic_eisr));
+  DEFINE(VGIC_V2_CPU_ELRSR,offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
+  DEFINE(VGIC_V2_CPU_APR,  offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
+  DEFINE(VGIC_V2_CPU_LR,   offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
   DEFINE(VGIC_CPU_NR_LR,   offsetof(struct vgic_cpu, nr_lr));
 #ifdef CONFIG_KVM_ARM_TIMER
   DEFINE(VCPU_TIMER_CNTV_CTL,  offsetof(struct kvm_vcpu, 
arch.timer_cpu.cntv_ctl));
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 76af9302..e4eaf30 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -421,14 +421,14 @@ vcpu  .reqr0  @ vcpu pointer always 
in r0
ldr r9, [r2, #GICH_ELRSR1]
ldr r10, [r2, #GICH_APR]
 
-   str r3, [r11, #VGIC_CPU_HCR]
-   str r4, [r11, #VGIC_CPU_VMCR]
-   str r5, [r11, #VGIC_CPU_MISR]
-   str r6, [r11, #VGIC_CPU_EISR]
-   str r7, [r11, #(VGIC_CPU_EISR + 4)]
-   str r8, [r11, #VGIC_CPU_ELRSR]
-   str r9, [r11, #(VGIC_CPU_ELRSR + 4)]
-   str r10, [r11, #VGIC_CPU_APR]
+   str r3, [r11, #VGIC_V2_CPU_HCR]
+   str r4, [r11, #VGIC_V2_CPU_VMCR]
+   str r5, [r11, #VGIC_V2_CPU_MISR]
+   str r6, [r11, #VGIC_V2_CPU_EISR]
+   str r7, [r11, #(VGIC_V2_CPU_EISR + 4)]
+   str r8, [r11, #VGIC_V2_CPU_ELRSR]
+   str r9, [r11, #(VGIC_V2_CPU_ELRSR + 4)]
+   str r10, [r11, #VGIC_V2_CPU_APR]
 
/* Clear GICH_HCR */
mov r5, #0
@@ -436,7 +436,7 @@ vcpu.reqr0  @ vcpu pointer always 
in r0
 
/* Save list registers */
add r2, r2, #GICH_LR0
-   add r3, r11, #VGIC_CPU_LR
+   add r3, r11, #VGIC_V2_CPU_LR
ldr r4, [r11, #VGIC_CPU_NR_LR]
 1: ldr r6, [r2], #4
str r6, [r3], #4
@@ -463,9 +463,9 @@ vcpu.reqr0  @ vcpu pointer always 
in r0
add r11, vcpu, #VCPU_VGIC_CPU
 
/* We only restore a minimal set of registers */
-   ldr r3, [r11, #VGIC_CPU_HCR]
-   ldr r4, [r11, #VGIC_CPU_VMCR]
-   ldr r8, [r11, #VGIC_CPU_APR]
+   ldr r3, [r11, #VGIC_V2_CPU_HCR]
+   ldr r4, [r11, #VGIC_V2_CPU_VMCR]
+   ldr r8, [r11, #VGIC_V2_CPU_APR]
 
str r3, [r2, #GICH_HCR]
str r4, [r2, #GICH_VMCR]
@@ -473,7 +473,7 @@ vcpu.reqr0  @ vcpu pointer always 
in r0
 
/* Restore list registers */
add r2, r2, #GICH_LR0
-   add r3, r11, #VGIC_CPU_LR
+   add r3, r11, #VGIC_V2_CPU_LR
ldr r4, [r11, #VGIC_CPU_NR_LR]
 1: ldr r6, [r3], #4
str r6, [r2], #4
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 646f888..20fd488 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -129,13 +129,13 @@ int main(void)
   DEFINE(KVM_TIMER_ENABLED,offsetof(struct kvm, arch.timer.enabled));
   DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
   DEFINE(VCPU_VGIC_CPU,offsetof(struct kvm_vcpu, 
arch.vg

[PATCH v2 2/9] arm/arm64: KVM: vgic: switch to dynamic allocation

2014-06-19 Thread Marc Zyngier
So far, all the VGIC data structures are statically defined by the
*maximum* number of vcpus and interrupts it supports. It means that
we always have to oversize it to cater for the worse case.

Start by changing the data structures to be dynamically sizeable,
and allocate them at runtime.

The sizes are still very static though.

Signed-off-by: Marc Zyngier 
---
 arch/arm/kvm/arm.c |   5 +-
 include/kvm/arm_vgic.h |  44 ++-
 virt/kvm/arm/vgic.c| 203 ++---
 3 files changed, 206 insertions(+), 46 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 3c82b37..9548bc9 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -182,6 +182,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm->vcpus[i] = NULL;
}
}
+
+   kvm_vgic_destroy(kvm);
 }
 
 int kvm_dev_ioctl_check_extension(long ext)
@@ -290,6 +292,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
kvm_mmu_free_memory_caches(vcpu);
kvm_timer_vcpu_terminate(vcpu);
+   kvm_vgic_vcpu_destroy(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
@@ -808,7 +811,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
switch (ioctl) {
case KVM_CREATE_IRQCHIP: {
if (vgic_present)
-   return kvm_vgic_create(kvm);
+   return kvm_vgic_create(kvm, VGIC_MAX_CPUS, 
VGIC_NR_IRQS);
else
return -ENXIO;
}
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 35b0c12..c57ffd0 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -54,19 +54,24 @@
  * - a bunch of shared interrupts (SPI)
  */
 struct vgic_bitmap {
-   union {
-   u32 reg[VGIC_NR_PRIVATE_IRQS / 32];
-   DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS);
-   } percpu[VGIC_MAX_CPUS];
-   union {
-   u32 reg[VGIC_NR_SHARED_IRQS / 32];
-   DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS);
-   } shared;
+   /*
+* - One UL per VCPU for private interrupts (assumes UL is at
+* least 32 bits)
+* - As many UL as necessary for shared interrupts.
+*/
+   int nr_cpus;
+   unsigned long *private;
+   unsigned long *shared;
 };
 
 struct vgic_bytemap {
-   u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4];
-   u32 shared[VGIC_NR_SHARED_IRQS  / 4];
+   /*
+* - 8 u32 per VCPU for private interrupts
+* - As many u32 as necessary for shared interrupts.
+*/
+   int nr_cpus;
+   u32 *private;
+   u32 *shared;
 };
 
 struct kvm_vcpu;
@@ -127,6 +132,9 @@ struct vgic_dist {
boolin_kernel;
boolready;
 
+   int nr_cpus;
+   int nr_irqs;
+
/* Virtual control interface mapping */
void __iomem*vctrl_base;
 
@@ -152,12 +160,12 @@ struct vgic_dist {
/* Level/edge triggered */
struct vgic_bitmap  irq_cfg;
 
-   /* Source CPU per SGI and target CPU */
-   u8  irq_sgi_sources[VGIC_MAX_CPUS][VGIC_NR_SGIS];
+   /* Source CPU per SGI and target CPU : 16 bytes per CPU */
+   u8  *irq_sgi_sources;
 
/* Target CPU for each IRQ */
-   u8  irq_spi_cpu[VGIC_NR_SHARED_IRQS];
-   struct vgic_bitmap  irq_spi_target[VGIC_MAX_CPUS];
+   u8  *irq_spi_cpu;
+   struct vgic_bitmap  *irq_spi_target;
 
/* Bitmap indicating which CPU has something pending */
unsigned long   irq_pending_on_cpu;
@@ -190,11 +198,11 @@ struct vgic_v3_cpu_if {
 struct vgic_cpu {
 #ifdef CONFIG_KVM_ARM_VGIC
/* per IRQ to LR mapping */
-   u8  vgic_irq_lr_map[VGIC_NR_IRQS];
+   u8  *vgic_irq_lr_map;
 
/* Pending interrupts on this VCPU */
DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS);
-   DECLARE_BITMAP( pending_shared, VGIC_NR_SHARED_IRQS);
+   unsigned long   *pending_shared;
 
/* Bitmap of used/free list registers */
DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS);
@@ -224,8 +232,10 @@ struct kvm_exit_mmio;
 int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
 int kvm_vgic_hyp_init(void);
 int kvm_vgic_init(struct kvm *kvm);
-int kvm_vgic_create(struct kvm *kvm);
+int kvm_vgic_create(struct kvm *kvm, int nr_cpus, int nr_irqs);
+void kvm_vgic_destroy(struct kvm *kvm);
 int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
+void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index c6da748..220b215 100644
--- a/virt/kv

[PATCH v2 1/9] KVM: ARM: vgic: plug irq injection race

2014-06-19 Thread Marc Zyngier
As it stands, nothing prevents userspace from injecting an interrupt
before the guest's GIC is actually initialized.

This goes unnoticed so far (as everything is pretty much statically
allocated), but ends up exploding in a spectacular way once we switch
to a more dynamic allocation (the GIC data structure isn't there yet).

The fix is to test for the "ready" flag in the VGIC distributor before
trying to inject the interrupt. Note that in order to avoid breaking
userspace, we have to ignore what is essentially an error.

Signed-off-by: Marc Zyngier 
Acked-by: Christoffer Dall 
---
 virt/kvm/arm/vgic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 795ab48..c6da748 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1446,7 +1446,8 @@ out:
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
bool level)
 {
-   if (vgic_update_irq_state(kvm, cpuid, irq_num, level))
+   if (likely(vgic_initialized(kvm)) &&
+   vgic_update_irq_state(kvm, cpuid, irq_num, level))
vgic_kick_vcpus(kvm);
 
return 0;
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 09/20] KVM: ARM: vgic: abstract MISR decoding

2014-06-19 Thread Marc Zyngier
Instead of directly dealing with the GICH_MISR bits, move the code to
its own function and use a couple of public flags to represent the
actual state.

Acked-by: Catalin Marinas 
Reviewed-by: Christoffer Dall 
Signed-off-by: Marc Zyngier 
---
 include/kvm/arm_vgic.h |  4 
 virt/kvm/arm/vgic.c| 26 +++---
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index ccb9b59..4857508 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -87,6 +87,7 @@ struct vgic_ops {
void(*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
u64 (*get_elrsr)(const struct kvm_vcpu *vcpu);
u64 (*get_eisr)(const struct kvm_vcpu *vcpu);
+   u32 (*get_interrupt_status)(const struct kvm_vcpu *vcpu);
 };
 
 struct vgic_dist {
@@ -165,6 +166,9 @@ struct vgic_cpu {
 
 #define LR_EMPTY   0xff
 
+#define INT_STATUS_EOI (1 << 0)
+#define INT_STATUS_UNDERFLOW   (1 << 1)
+
 struct kvm;
 struct kvm_vcpu;
 struct kvm_run;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 0d9701c..9491c22 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1034,12 +1034,26 @@ static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
return *(u64 *)eisr;
 }
 
+static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
+{
+   u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr;
+   u32 ret = 0;
+
+   if (misr & GICH_MISR_EOI)
+   ret |= INT_STATUS_EOI;
+   if (misr & GICH_MISR_U)
+   ret |= INT_STATUS_UNDERFLOW;
+
+   return ret;
+}
+
 static const struct vgic_ops vgic_ops = {
.get_lr = vgic_v2_get_lr,
.set_lr = vgic_v2_set_lr,
.sync_lr_elrsr  = vgic_v2_sync_lr_elrsr,
.get_elrsr  = vgic_v2_get_elrsr,
.get_eisr   = vgic_v2_get_eisr,
+   .get_interrupt_status   = vgic_v2_get_interrupt_status,
 };
 
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
@@ -1069,6 +1083,11 @@ static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu)
return vgic_ops.get_eisr(vcpu);
 }
 
+static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu)
+{
+   return vgic_ops.get_interrupt_status(vcpu);
+}
+
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
 {
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -1266,11 +1285,12 @@ epilog:
 static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 {
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+   u32 status = vgic_get_interrupt_status(vcpu);
bool level_pending = false;
 
-   kvm_debug("MISR = %08x\n", vgic_cpu->vgic_v2.vgic_misr);
+   kvm_debug("STATUS = %08x\n", status);
 
-   if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_EOI) {
+   if (status & INT_STATUS_EOI) {
/*
 * Some level interrupts have been EOIed. Clear their
 * active bit.
@@ -1303,7 +1323,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu 
*vcpu)
}
}
 
-   if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_U)
+   if (status & INT_STATUS_UNDERFLOW)
vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
 
return level_pending;
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 6/9] arm/arm64: KVM: vgic: kill VGIC_NR_IRQS

2014-06-19 Thread Marc Zyngier
Nuke VGIC_NR_IRQS entierly, now that the distributor instance
contains the number of IRQ allocated to this GIC.

Also add VGIC_NR_IRQS_LEGACY to preserve the current API.

Signed-off-by: Marc Zyngier 
---
 arch/arm/kvm/arm.c |  2 +-
 include/kvm/arm_vgic.h |  6 +++---
 virt/kvm/arm/vgic.c| 13 +++--
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 14ba035..66c14ef 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -811,7 +811,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
switch (ioctl) {
case KVM_CREATE_IRQCHIP: {
if (vgic_present)
-   return kvm_vgic_create(kvm, KVM_MAX_VCPUS, 
VGIC_NR_IRQS);
+   return kvm_vgic_create(kvm, KVM_MAX_VCPUS, 
VGIC_NR_IRQS_LEGACY);
else
return -ENXIO;
}
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 3014145..cf17b68 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -25,7 +25,7 @@
 #include 
 #include 
 
-#define VGIC_NR_IRQS   256
+#define VGIC_NR_IRQS_LEGACY256
 #define VGIC_NR_SGIS   16
 #define VGIC_NR_PPIS   16
 #define VGIC_NR_PRIVATE_IRQS   (VGIC_NR_SGIS + VGIC_NR_PPIS)
@@ -39,11 +39,11 @@
 #error Invalid number of CPU interfaces
 #endif
 
-#if (VGIC_NR_IRQS & 31)
+#if (VGIC_NR_IRQS_LEGACY & 31)
 #error "VGIC_NR_IRQS must be a multiple of 32"
 #endif
 
-#if (VGIC_NR_IRQS > VGIC_MAX_IRQS)
+#if (VGIC_NR_IRQS_LEGACY > VGIC_MAX_IRQS)
 #error "VGIC_NR_IRQS must be <= 1024"
 #endif
 
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 13ead5d..17ceb8a 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -373,7 +373,7 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
 
case 4: /* GICD_TYPER */
reg  = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
-   reg |= (VGIC_NR_IRQS >> 5) - 1;
+   reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1;
vgic_reg_access(mmio, ®, word_offset,
ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
break;
@@ -1164,13 +1164,14 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu 
*vcpu)
 static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 {
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+   struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
struct vgic_lr vlr;
int lr;
 
/* Sanitize the input... */
BUG_ON(sgi_source_id & ~7);
BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS);
-   BUG_ON(irq >= VGIC_NR_IRQS);
+   BUG_ON(irq >= dist->nr_irqs);
 
kvm_debug("Queue IRQ%d\n", irq);
 
@@ -1387,7 +1388,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 
vlr = vgic_get_lr(vcpu, lr);
 
-   BUG_ON(vlr.irq >= VGIC_NR_IRQS);
+   BUG_ON(vlr.irq >= dist->nr_irqs);
vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
}
 
@@ -1601,7 +1602,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
if (vcpu->vcpu_id >= dist->nr_cpus)
return -EBUSY;
 
-   for (i = 0; i < VGIC_NR_IRQS; i++) {
+   for (i = 0; i < dist->nr_irqs; i++) {
if (i < VGIC_NR_PPIS)
vgic_bitmap_set_irq_val(&dist->irq_enabled,
vcpu->vcpu_id, i, 1);
@@ -1798,7 +1799,7 @@ int kvm_vgic_init(struct kvm *kvm)
goto out;
}
 
-   for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4)
+   for (i = VGIC_NR_PRIVATE_IRQS; i < kvm->arch.vgic.nr_irqs; i += 4)
vgic_set_target_reg(kvm, 0, i);
 
kvm->arch.vgic.ready = true;
@@ -2227,7 +2228,7 @@ static void vgic_destroy(struct kvm_device *dev)
 
 static int vgic_create(struct kvm_device *dev, u32 type)
 {
-   return kvm_vgic_create(dev->kvm, KVM_MAX_VCPUS, VGIC_NR_IRQS);
+   return kvm_vgic_create(dev->kvm, KVM_MAX_VCPUS, VGIC_NR_IRQS_LEGACY);
 }
 
 struct kvm_device_ops kvm_arm_vgic_v2_ops = {
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 3/9] arm/arm64: KVM: vgic: Parametrize VGIC_NR_SHARED_IRQS

2014-06-19 Thread Marc Zyngier
Having a dynamic number of supported interrupts means that we
cannot relly on VGIC_NR_SHARED_IRQS being fixed anymore.

Instead, make it take the distributor structure as a parameter,
so it can return the right value.

Signed-off-by: Marc Zyngier 
---
 include/kvm/arm_vgic.h |  1 -
 virt/kvm/arm/vgic.c| 16 +++-
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index c57ffd0..b5072b7 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -29,7 +29,6 @@
 #define VGIC_NR_SGIS   16
 #define VGIC_NR_PPIS   16
 #define VGIC_NR_PRIVATE_IRQS   (VGIC_NR_SGIS + VGIC_NR_PPIS)
-#define VGIC_NR_SHARED_IRQS(VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS)
 #define VGIC_MAX_CPUS  KVM_MAX_VCPUS
 
 #define VGIC_V2_MAX_LRS(1 << 6)
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 220b215..4b1f0a6 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -973,11 +973,17 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 
reg)
}
 }
 
+static int vgic_nr_shared_irqs(struct vgic_dist *dist)
+{
+   return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
+}
+
 static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
 {
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
unsigned long *pending, *enabled, *pend_percpu, *pend_shared;
unsigned long pending_private, pending_shared;
+   int shared = vgic_nr_shared_irqs(dist);
int vcpu_id;
 
vcpu_id = vcpu->vcpu_id;
@@ -990,15 +996,15 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
 
pending = vgic_bitmap_get_shared_map(&dist->irq_state);
enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
-   bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS);
+   bitmap_and(pend_shared, pending, enabled, shared);
bitmap_and(pend_shared, pend_shared,
   vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
-  VGIC_NR_SHARED_IRQS);
+  shared);
 
pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS);
-   pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS);
+   pending_shared = find_first_bit(pend_shared, shared);
return (pending_private < VGIC_NR_PRIVATE_IRQS ||
-   pending_shared < VGIC_NR_SHARED_IRQS);
+   pending_shared < vgic_nr_shared_irqs(dist));
 }
 
 /*
@@ -1255,7 +1261,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu 
*vcpu)
}
 
/* SPIs */
-   for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) {
+   for_each_set_bit(i, vgic_cpu->pending_shared, 
vgic_nr_shared_irqs(dist)) {
if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
overflow = 1;
}
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 00/20] arm64: GICv3 support

2014-06-19 Thread Marc Zyngier
GICv3 is the base for a new generation of interrupt controllers
designed to overcome some of the most glaring limitations of GICv2.

Some of the features are:
- Support for more than 8 CPUs (a lot more...)
- System registers for CPU interface access (instead of the memory
  mapped GICC, GICV, GICH)
- Message based interrupts

This patch series currently support:
- Affinity Routing
- System Registers
- Non-Secure Group-1 interrupts only
- KVM support (GICv3 host, GICv2 guest)

What is *not yet* supported in this series (WIP):
- LPI/ITS/MSI (pending, depending on PCI)
- KVM GICv3 guest support (pending patches from Andre)
- Any form of power management (pending patches from Sudeep)
- 32bit systems (pending patches from Jean-Philippe)

To be built, this code requires a fairly recent compiler/binutils
combo. Linaro 13.06 seems to do the trick. This has been tested on the
ARM FVP and Foundation models, with non-regressions run on a VExpress
TC-2 and another Cortex-A57 based platform.

I finally received a few comments from actual implementors and a
number of Tested-by tags, which makes me think we may be on track for
3.17.

Individuals without access to documentation and/or hardware can still
review the code (it shares a lot of concepts with GICv2) and test it
on the freely available Foundation model (see
http://releases.linaro.org/latest/openembedded/aarch64/ for details on
how to use the Foundation model).

The code is also available at the following location:
git://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git 
kvm-arm64/gicv3

* From v4 [4]
  - Some cleanup and better error handling
  - World-switch optimisation (thanks to Will)
  - GICH->ICH namespace fixup (thanks to Jean-Philippe)
  - Moved __vgic_v3_get_ich_vtr_el2 to patch 19
  - Rebased on top of 3.16-rc1

* From v3 [3]
  - Fixed a lot of issues found by Christoffer (too many to report
here, see the email thread)
  - New .sync_lr_elrsr backend
  - New probing method
  - New irqchip_in_kernel implementation
  - Checked full bisectability of the series (hopefully got it right
this time...)
  - rebased on top of 3.15-rc5

* From v2 [2]
  - removed sharing of the xlate method with GICv2 (TI crossbar is now
getting in the way...)
  - Switched to a tree domain to accomodate for the LPI space
  - Fixed more bisectability

* From the initial revision [1]
  - Some code sharing with GICv2
  - Barrier cleanup/clarification
  - Revised boot protocol update
  - Consistent use of the MPIDR access macros
  - Fixed a number of embarassing bugs
  - Fixed DT examples
  - Fixed bisectability of the series

[1]: 
http://lists.infradead.org/pipermail/linux-arm-kernel/2014-February/229959.html
[2]: 
http://lists.infradead.org/pipermail/linux-arm-kernel/2014-March/241972.html
[3]: 
http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/248008.html
[4]: http://lists.infradead.org/pipermail/linux-arm-kernel/2014-May/256835.html

Marc Zyngier (20):
  ARM: GIC: move some bits of GICv2 to a library-type file
  arm64: initial support for GICv3
  arm64: GICv3 device tree binding documentation
  arm64: boot protocol documentation update for GICv3
  KVM: arm/arm64: vgic: move GICv2 registers to their own structure
  KVM: ARM: vgic: introduce vgic_ops and LR manipulation primitives
  KVM: ARM: vgic: abstract access to the ELRSR bitmap
  KVM: ARM: vgic: abstract EISR bitmap access
  KVM: ARM: vgic: abstract MISR decoding
  KVM: ARM: vgic: move underflow handling to vgic_ops
  KVM: ARM: vgic: abstract VMCR access
  KVM: ARM: vgic: introduce vgic_enable
  KVM: ARM: introduce vgic_params structure
  KVM: ARM: vgic: split GICv2 backend from the main vgic code
  KVM: ARM: vgic: revisit implementation of irqchip_in_kernel
  arm64: KVM: remove __kvm_hyp_code_{start,end} from hyp.S
  arm64: KVM: split GICv2 world switch from hyp code
  arm64: KVM: move HCR_EL2.{IMO,FMO} manipulation into the vgic switch
code
  KVM: ARM: vgic: add the GICv3 backend
  arm64: KVM: vgic: add GICv3 world switch

 Documentation/arm64/booting.txt  |   6 +
 Documentation/devicetree/bindings/arm/gic-v3.txt |  79 +++
 arch/arm/include/asm/kvm_host.h  |   5 +
 arch/arm/kernel/asm-offsets.c|  14 +-
 arch/arm/kvm/Makefile|   1 +
 arch/arm/kvm/interrupts_head.S   |  26 +-
 arch/arm64/Kconfig   |   1 +
 arch/arm64/include/asm/kvm_arm.h |   5 +-
 arch/arm64/include/asm/kvm_asm.h |  14 +-
 arch/arm64/include/asm/kvm_host.h|  28 +
 arch/arm64/include/asm/virt.h|   4 +
 arch/arm64/kernel/asm-offsets.c  |  25 +-
 arch/arm64/kernel/head.S |  18 +
 arch/arm64/kernel/hyp-stub.S |   1 +
 arch/arm64/kvm/Makefile  |   4 +
 arch/arm64/kvm/hyp.S | 127 +
 arch/arm64/kvm/vgic-v2-switch.S

[PATCH v2 5/9] arm/arm64: KVM: vgic: handle out-of-range MMIO accesses

2014-06-19 Thread Marc Zyngier
Now that we can (almost) dynamically size the number of interrupts,
we're facing an interesting issue:

We have to evaluate at runtime whether or not an access hits a valid
register, based on the sizing of this particular instance of the
distributor. Furthermore, the GIC spec says that accessing a reserved
register is RAZ/WI.

For this, add a new field to our range structure, indicating the number
of bits a single interrupts uses. That allows us to find out whether or
not the access is in range.

Signed-off-by: Marc Zyngier 
---
 include/kvm/arm_vgic.h |  3 ++-
 virt/kvm/arm/vgic.c| 56 --
 2 files changed, 47 insertions(+), 12 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 5853a67..3014145 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -32,6 +32,7 @@
 
 #define VGIC_V2_MAX_LRS(1 << 6)
 #define VGIC_V3_MAX_LRS16
+#define VGIC_MAX_IRQS  1024
 
 /* Sanity checks... */
 #if (KVM_MAX_VCPUS > 8)
@@ -42,7 +43,7 @@
 #error "VGIC_NR_IRQS must be a multiple of 32"
 #endif
 
-#if (VGIC_NR_IRQS > 1024)
+#if (VGIC_NR_IRQS > VGIC_MAX_IRQS)
 #error "VGIC_NR_IRQS must be <= 1024"
 #endif
 
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 07b6450..13ead5d 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -782,6 +782,7 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
 struct mmio_range {
phys_addr_t base;
unsigned long len;
+   int bits_per_irq;
bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
phys_addr_t offset);
 };
@@ -790,56 +791,67 @@ static const struct mmio_range vgic_dist_ranges[] = {
{
.base   = GIC_DIST_CTRL,
.len= 12,
+   .bits_per_irq   = 0,
.handle_mmio= handle_mmio_misc,
},
{
.base   = GIC_DIST_IGROUP,
-   .len= VGIC_NR_IRQS / 8,
+   .len= VGIC_MAX_IRQS / 8,
+   .bits_per_irq   = 1,
.handle_mmio= handle_mmio_raz_wi,
},
{
.base   = GIC_DIST_ENABLE_SET,
-   .len= VGIC_NR_IRQS / 8,
+   .len= VGIC_MAX_IRQS / 8,
+   .bits_per_irq   = 1,
.handle_mmio= handle_mmio_set_enable_reg,
},
{
.base   = GIC_DIST_ENABLE_CLEAR,
-   .len= VGIC_NR_IRQS / 8,
+   .len= VGIC_MAX_IRQS / 8,
+   .bits_per_irq   = 1,
.handle_mmio= handle_mmio_clear_enable_reg,
},
{
.base   = GIC_DIST_PENDING_SET,
-   .len= VGIC_NR_IRQS / 8,
+   .len= VGIC_MAX_IRQS / 8,
+   .bits_per_irq   = 1,
.handle_mmio= handle_mmio_set_pending_reg,
},
{
.base   = GIC_DIST_PENDING_CLEAR,
-   .len= VGIC_NR_IRQS / 8,
+   .len= VGIC_MAX_IRQS / 8,
+   .bits_per_irq   = 1,
.handle_mmio= handle_mmio_clear_pending_reg,
},
{
.base   = GIC_DIST_ACTIVE_SET,
-   .len= VGIC_NR_IRQS / 8,
+   .len= VGIC_MAX_IRQS / 8,
+   .bits_per_irq   = 1,
.handle_mmio= handle_mmio_raz_wi,
},
{
.base   = GIC_DIST_ACTIVE_CLEAR,
-   .len= VGIC_NR_IRQS / 8,
+   .len= VGIC_MAX_IRQS / 8,
+   .bits_per_irq   = 1,
.handle_mmio= handle_mmio_raz_wi,
},
{
.base   = GIC_DIST_PRI,
-   .len= VGIC_NR_IRQS,
+   .len= VGIC_MAX_IRQS,
+   .bits_per_irq   = 8,
.handle_mmio= handle_mmio_priority_reg,
},
{
.base   = GIC_DIST_TARGET,
-   .len= VGIC_NR_IRQS,
+   .len= VGIC_MAX_IRQS,
+   .bits_per_irq   = 8,
.handle_mmio= handle_mmio_target_reg,
},
{
.base   = GIC_DIST_CONFIG,
-   .len= VGIC_NR_IRQS / 4,
+   .len= VGIC_MAX_IRQS / 4,
+   .bits_per_irq   = 2,
.handle_mmio= handle_mmio_cfg_reg,
},
{
@@ -877,6 +889,22 @@ struct mmio_range *find_matching_range(const struct 
mmio_range *ranges,
return NULL;
 }
 
+static bool vgic_validate_access(const struct vgic_dist *dist,
+const struct mmio_range *range,
+unsigned

Re: [qom-cpu PATCH 3/3] target-i386: Don't enable nested VMX by default

2014-06-19 Thread Paolo Bonzini

Il 18/06/2014 21:55, Eduardo Habkost ha scritto:

TCG doesn't support VMX, and nested VMX is not enabled by default on the
KVM kernel module.

So, there's no reason to have VMX enabled by default on the core2duo and
coreduo CPU models, today. Even the newer Intel CPU model definitions
don't have it enabled.

In this case, we need machine-type compat code, as people may be running
the older machine-types on hosts that had VMX nesting enabled.

Signed-off-by: Eduardo Habkost 
---
 hw/i386/pc_piix.c | 2 ++
 hw/i386/pc_q35.c  | 2 ++
 target-i386/cpu.c | 8 
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index a48e263..61882d5 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -267,6 +267,8 @@ static void pc_init_pci(MachineState *machine)
 static void pc_compat_2_0(MachineState *machine)
 {
 smbios_legacy_mode = true;
+x86_cpu_compat_set_features("coreduo", FEAT_1_ECX, CPUID_EXT_VMX, 0);
+x86_cpu_compat_set_features("core2duo", FEAT_1_ECX, CPUID_EXT_VMX, 0);
 }

 static void pc_compat_1_7(MachineState *machine)
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index b3c02c1..3949267 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -245,6 +245,8 @@ static void pc_q35_init(MachineState *machine)
 static void pc_compat_2_0(MachineState *machine)
 {
 smbios_legacy_mode = true;
+x86_cpu_compat_set_features("coreduo", FEAT_1_ECX, CPUID_EXT_VMX, 0);
+x86_cpu_compat_set_features("core2duo", FEAT_1_ECX, CPUID_EXT_VMX, 0);
 }

 static void pc_compat_1_7(MachineState *machine)
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 2f32d29..6bd44e1 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -719,10 +719,10 @@ static X86CPUDefinition builtin_x86_defs[] = {
 CPUID_MTRR | CPUID_CLFLUSH | CPUID_MCA |
 CPUID_PSE36 | CPUID_VME | CPUID_ACPI | CPUID_SS,
 /* Missing: CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_EST,
- * CPUID_EXT_TM2, CPUID_EXT_XTPR, CPUID_EXT_PDCM */
+ * CPUID_EXT_TM2, CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_VMX */
 .features[FEAT_1_ECX] =
 CPUID_EXT_SSE3 | CPUID_EXT_MONITOR | CPUID_EXT_SSSE3 |
-CPUID_EXT_VMX | CPUID_EXT_CX16,
+CPUID_EXT_CX16,
 .features[FEAT_8000_0001_EDX] =
 CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX,
 .features[FEAT_8000_0001_ECX] =
@@ -803,9 +803,9 @@ static X86CPUDefinition builtin_x86_defs[] = {
 CPUID_MTRR | CPUID_CLFLUSH | CPUID_MCA | CPUID_ACPI |
 CPUID_SS,
 /* Missing: CPUID_EXT_EST, CPUID_EXT_TM2 , CPUID_EXT_XTPR,
- * CPUID_EXT_PDCM */
+ * CPUID_EXT_PDCM, CPUID_EXT_VMX */
 .features[FEAT_1_ECX] =
-CPUID_EXT_SSE3 | CPUID_EXT_MONITOR | CPUID_EXT_VMX,
+CPUID_EXT_SSE3 | CPUID_EXT_MONITOR,
 .features[FEAT_8000_0001_EDX] =
 CPUID_EXT2_NX,
 .xlevel = 0x8008,



Could you please do the same for SVM, perhaps with the exception of 
qemu64?  Nested SVM is enabled by default upstream, but right now it is 
probably less stable than nested VMX.


Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Nested paging in nested SVM setup

2014-06-19 Thread Paolo Bonzini

Il 18/06/2014 18:59, Valentine Sinitsyn ha scritto:


I also noticed that setting PAT MSR from the nested hypervisor leaves
high word unassigned, i.e. the code like this:

  mov $0x70106, %rax
  mov %rax, %rdx
  mov $0x0277, %rcx
  wrmsr
  rdmsr

yields %rax = 0, %rdx = 0x70106.


This should be the trivial fix:

diff --git a/arch/x86/include/asm/kvm_host.h 
b/arch/x86/include/asm/kvm_host.h

index 0b140dc65bee..8a1cdc0f8fe7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -461,7 +461,7 @@ struct kvm_vcpu_arch {
bool nmi_injected;/* Trying to inject an NMI this entry */

struct mtrr_state_type mtrr_state;
-   u32 pat;
+   u64 pat;

unsigned switch_db_regs;
unsigned long db[KVM_NR_DB_REGS];

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Bug 78331] Qemu crash in x86

2014-06-19 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=78331

Paolo Bonzini  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 CC||bonz...@gnu.org
 Resolution|--- |INVALID

--- Comment #1 from Paolo Bonzini  ---
Not a kernel bug, and the bug was also reported (and closed) on the QEMU bug
tracker.

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 9/9] KVM: vmx: vmx instructions handling does not consider cs.l

2014-06-19 Thread Paolo Bonzini

Il 18/06/2014 19:51, Nadav Amit ha scritto:

If you fix it, please fix both VMREAD and VMWRITE. If not, I would resubmit.


Yes, I'm fixing it myself.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 07/14] arm/arm64: KVM: make the value of ICC_SRE_EL1 a per-VM variable

2014-06-19 Thread Andre Przywara
ICC_SRE_EL1 is a system register allowing msr/mrs accesses to the
GIC CPU interface for EL1 (guests). Currently we force it to 0, but
for proper GICv3 support we have to allow guests to use it (depending
on their selected virtual GIC model).
So add ICC_SRE_EL1 to the list of saved/restored registers on a
world switch, but actually disallow a guest to change it by only
restoring a fixed, once-initialized value.
This value depends on the GIC model userland has chosen for a guest.

Signed-off-by: Andre Przywara 
---
 arch/arm64/kernel/asm-offsets.c |1 +
 arch/arm64/kvm/vgic-v3-switch.S |   14 +-
 include/kvm/arm_vgic.h  |1 +
 virt/kvm/arm/vgic-v3.c  |9 +++--
 4 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index e74654c..0f24b21 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -139,6 +139,7 @@ int main(void)
   DEFINE(VGIC_V2_CPU_ELRSR,offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
   DEFINE(VGIC_V2_CPU_APR,  offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
   DEFINE(VGIC_V2_CPU_LR,   offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
+  DEFINE(VGIC_V3_CPU_SRE,  offsetof(struct vgic_cpu, vgic_v3.vgic_sre));
   DEFINE(VGIC_V3_CPU_HCR,  offsetof(struct vgic_cpu, vgic_v3.vgic_hcr));
   DEFINE(VGIC_V3_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr));
   DEFINE(VGIC_V3_CPU_MISR, offsetof(struct vgic_cpu, vgic_v3.vgic_misr));
diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S
index 4ede9d8..c0cfd16 100644
--- a/arch/arm64/kvm/vgic-v3-switch.S
+++ b/arch/arm64/kvm/vgic-v3-switch.S
@@ -148,17 +148,18 @@
  * x0: Register pointing to VCPU struct
  */
 .macro restore_vgic_v3_state
-   // Disable SRE_EL1 access. Necessary, otherwise
-   // ICH_VMCR_EL2.VFIQEn becomes one, and FIQ happens...
-   msr ICC_SRE_EL1, xzr
-   isb
-
// Compute the address of struct vgic_cpu
add x3, x0, #VCPU_VGIC_CPU
 
// Restore all interesting registers
ldr w4, [x3, #VGIC_V3_CPU_HCR]
ldr w5, [x3, #VGIC_V3_CPU_VMCR]
+   ldr w25, [x3, #VGIC_V3_CPU_SRE]
+
+   msr ICC_SRE_EL1, x25
+
+   // make sure SRE is valid before writing the other registers
+   isb
 
msr ICH_HCR_EL2, x4
msr ICH_VMCR_EL2, x5
@@ -243,9 +244,12 @@
dsb sy
 
// Prevent the guest from touching the GIC system registers
+   // if SRE isn't enabled for GICv3 emulation
+   cbnzx25, 1f
mrs x5, ICC_SRE_EL2
and x5, x5, #~ICC_SRE_EL2_ENABLE
msr ICC_SRE_EL2, x5
+1:
 .endm
 
 ENTRY(__save_vgic_v3_state)
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 7e7c99e..8aa8482 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -199,6 +199,7 @@ struct vgic_v3_cpu_if {
 #ifdef CONFIG_ARM_GIC_V3
u32 vgic_hcr;
u32 vgic_vmcr;
+   u32 vgic_sre;   /* Restored only, change ignored */
u32 vgic_misr;  /* Saved only */
u32 vgic_eisr;  /* Saved only */
u32 vgic_elrsr; /* Saved only */
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 40d6817..7d9c85e 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -145,15 +145,20 @@ static void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, 
struct vgic_vmcr *vmcrp)
 
 static void vgic_v3_enable(struct kvm_vcpu *vcpu)
 {
+   struct vgic_v3_cpu_if *vgic_v3;
+
+   vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3;
/*
 * By forcing VMCR to zero, the GIC will restore the binary
 * points to their reset values. Anything else resets to zero
 * anyway.
 */
-   vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = 0;
+   vgic_v3->vgic_vmcr = 0;
+
+   vgic_v3->vgic_sre   = 0;
 
/* Get the show on the road... */
-   vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr = ICH_HCR_EN;
+   vgic_v3->vgic_hcr = ICH_HCR_EN;
 }
 
 static const struct vgic_ops vgic_v3_ops = {
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 12/14] arm/arm64: KVM: add SGI system register trapping

2014-06-19 Thread Andre Przywara
While the injection of a (virtual) inter-processor interrupt (SGI)
on a GICv2 works by writing to a MMIO register, GICv3 uses system
registers to trigger them.
Trap the appropriate registers both on ARM and ARM64 machines and
call the SGI handler function in the vGICv3 emulation code.

Signed-off-by: Andre Przywara 
---
 arch/arm/kvm/coproc.c |   19 +++
 arch/arm64/kvm/sys_regs.c |   26 ++
 2 files changed, 45 insertions(+)

diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index c58a351..4adadb7 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -205,6 +205,22 @@ done:
return true;
 }
 
+static bool access_gic_sgi(struct kvm_vcpu *vcpu,
+  const struct coproc_params *p,
+  const struct coproc_reg *r)
+{
+   u64 val;
+
+   if (!p->is_write)
+   return read_from_write_only(vcpu, p);
+
+   val = *vcpu_reg(vcpu, p->Rt1);
+   val |= (u64)*vcpu_reg(vcpu, p->Rt2) << 32;
+   vgic_v3_dispatch_sgi(vcpu, val);
+
+   return true;
+}
+
 /*
  * Generic accessor for VM registers. Only called as long as HCR_TVM
  * is set.
@@ -376,6 +392,9 @@ static const struct coproc_reg cp15_regs[] = {
{ CRn(10), CRm( 3), Op1( 0), Op2( 1), is32,
access_vm_reg, reset_unknown, c10_AMAIR1},
 
+   /* ICC_SGI1R */
+   { CRm64(12), Op1( 0), is64, access_gic_sgi},
+
/* VBAR: swapped by interrupt.S. */
{ CRn(12), CRm( 0), Op1( 0), Op2( 0), is32,
NULL, reset_val, c12_VBAR, 0x },
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index fa2273a..012f21a 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -164,6 +164,27 @@ static bool access_sctlr(struct kvm_vcpu *vcpu,
 }
 
 /*
+ * Trapping on the GICv3 SGI system register.
+ * Forward the request to the VGIC emulation.
+ * The cp15_64 code makes sure this automatically works
+ * for both AArch64 and AArch32 accesses.
+ */
+static bool access_gic_sgi(struct kvm_vcpu *vcpu,
+  const struct sys_reg_params *p,
+  const struct sys_reg_desc *r)
+{
+   u64 val;
+
+   if (!p->is_write)
+   return read_from_write_only(vcpu, p);
+
+   val = *vcpu_reg(vcpu, p->Rt);
+   vgic_v3_dispatch_sgi(vcpu, val);
+
+   return true;
+}
+
+/*
  * We could trap ID_DFR0 and tell the guest we don't support performance
  * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
  * NAKed, so it will read the PMCR anyway.
@@ -282,6 +303,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
/* VBAR_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b), Op2(0b000),
  NULL, reset_val, VBAR_EL1, 0 },
+   /* ICC_SGI1R_EL1 */
+   { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b1011), Op2(0b101),
+ access_gic_sgi },
/* CONTEXTIDR_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b), Op2(0b001),
  access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
@@ -374,6 +398,8 @@ static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, c6_DFAR },
{ Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, c6_IFAR },
 
+   { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
+
/*
 * DC{C,I,CI}SW operations:
 */
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 11/14] arm/arm64: KVM: add virtual GICv3 distributor emulation

2014-06-19 Thread Andre Przywara
With everything separated and prepared, we implement a model of a
GICv3 distributor and redistributors by using the existing framework
to provide handler functions for each register group.
Currently we limit the emulation to a model enforcing a single
security state, with SRE==1 (forcing system register access) and
ARE==1 (allowing more than 8 VCPUs).
We share some of functions provided for GICv2 emulation, but take
the different ways of addressing (v)CPUs into account.
Save and restore is currently not implemented.

Similar to the split-off GICv2 specific code, the new emulation code
goes into a new file (vgic-v3-emul.c).

Signed-off-by: Andre Przywara 
---
 arch/arm64/kvm/Makefile|1 +
 include/kvm/arm_vgic.h |   11 +-
 include/linux/irqchip/arm-gic-v3.h |   26 ++
 include/linux/kvm_host.h   |1 +
 include/uapi/linux/kvm.h   |1 +
 virt/kvm/arm/vgic-v3-emul.c|  895 
 virt/kvm/arm/vgic.c|   11 +-
 virt/kvm/arm/vgic.h|3 +
 virt/kvm/kvm_main.c|3 +
 9 files changed, 949 insertions(+), 3 deletions(-)
 create mode 100644 virt/kvm/arm/vgic-v3-emul.c

diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index f241db6..2fba3a5 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -21,6 +21,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o 
sys_regs_generic_v8.o
 
 kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
 kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3-emul.o
 kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
 kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o
 kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 8aa8482..3b164ee 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -151,7 +151,11 @@ struct vgic_dist {
 
/* Distributor and vcpu interface mapping in the guest */
phys_addr_t vgic_dist_base;
-   phys_addr_t vgic_cpu_base;
+   /* GICv2 and GICv3 use different mapped register blocks */
+   union {
+   phys_addr_t vgic_cpu_base;
+   phys_addr_t vgic_redist_base;
+   };
 
/* Distributor enabled */
u32 enabled;
@@ -176,6 +180,10 @@ struct vgic_dist {
 
/* Target CPU for each IRQ */
u8  *irq_spi_cpu;
+
+   /* Target MPIDR for each IRQ (needed for GICv3 IROUTERn) only */
+   u32 *irq_spi_mpidr;
+
struct vgic_bitmap  *irq_spi_target;
 
/* Bitmap indicating which CPU has something pending */
@@ -253,6 +261,7 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
bool level);
+void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
 bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
  struct kvm_exit_mmio *mmio);
diff --git a/include/linux/irqchip/arm-gic-v3.h 
b/include/linux/irqchip/arm-gic-v3.h
index 9eac712..9f17e57 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -31,6 +31,7 @@
 #define GICD_SETSPI_SR 0x0050
 #define GICD_CLRSPI_SR 0x0058
 #define GICD_SEIR  0x0068
+#define GICD_IGROUPR   0x0080
 #define GICD_ISENABLER 0x0100
 #define GICD_ICENABLER 0x0180
 #define GICD_ISPENDR   0x0200
@@ -39,19 +40,38 @@
 #define GICD_ICACTIVER 0x0380
 #define GICD_IPRIORITYR0x0400
 #define GICD_ICFGR 0x0C00
+#define GICD_IGRPMODR  0x0D00
+#define GICD_NSACR 0x0E00
 #define GICD_IROUTER   0x6000
+#define GICD_IDREGS0xFFD0
 #define GICD_PIDR2 0xFFE8
 
+/*
+ * Non-ARE distributor registers, needed to provide the RES0
+ * semantics for KVM's emulated GICv3
+ */
+#define GICD_ITARGETSR 0x0800
+#define GICD_SGIR  0x0F00
+#define GICD_CPENDSGIR 0x0F10
+#define GICD_SPENDSGIR 0x0F20
+
+
 #define GICD_CTLR_RWP  (1U << 31)
+#define GICD_CTLR_DS   (1U << 6)
 #define GICD_CTLR_ARE_NS   (1U << 4)
 #define GICD_CTLR_ENABLE_G1A   (1U << 1)
 #define GICD_CTLR_ENABLE_G1(1U << 0)
 
+#define GICD_TYPER_LPIS(1U << 17)
+#define GICD_TYPER_MBIS(1U << 16)
+
 #define GICD_IROUTER_SPI_MODE_ONE  (0U << 31)
 #define GICD_IROUTER_SPI_MODE_ANY  (1U << 31)
 
 #define GIC_PIDR2_

[PATCH 05/14] arm/arm64: KVM: introduce per-VM ops

2014-06-19 Thread Andre Przywara
Currently we only have one virtual GIC model supported, so all guests
use the same emulation code. With the addition of another model we
end up with different guests using potentially different vGIC models,
so we have to split up some functions to be per VM.
Introduce a vgic_vm_ops struct to hold function pointers for those
functions that are different and provide the necessary code to
initialize them.
This includes functions that depend on the emulated GIC model only
and functions that depend on the combination of host and guest GIC.

Signed-off-by: Andre Przywara 
---
 include/kvm/arm_vgic.h |   18 +++-
 virt/kvm/arm/vgic-v2.c |   17 +++-
 virt/kvm/arm/vgic-v3.c |   16 ++-
 virt/kvm/arm/vgic.c|  109 +---
 4 files changed, 121 insertions(+), 39 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 4feac9a..7e7c99e 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -99,8 +99,6 @@ struct vgic_vmcr {
 };
 
 struct vgic_ops {
-   struct vgic_lr  (*get_lr)(const struct kvm_vcpu *, int);
-   void(*set_lr)(struct kvm_vcpu *, int, struct vgic_lr);
void(*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
u64 (*get_elrsr)(const struct kvm_vcpu *vcpu);
u64 (*get_eisr)(const struct kvm_vcpu *vcpu);
@@ -123,6 +121,17 @@ struct vgic_params {
unsigned intmaint_irq;
/* Virtual control interface base address */
void __iomem*vctrl_base;
+   bool (*init_emul)(struct kvm *kvm, int type);
+};
+
+struct vgic_vm_ops {
+   struct vgic_lr  (*get_lr)(const struct kvm_vcpu *, int);
+   void(*set_lr)(struct kvm_vcpu *, int, struct vgic_lr);
+   bool(*handle_mmio)(struct kvm_vcpu *, struct kvm_run *,
+  struct kvm_exit_mmio *);
+   bool(*queue_sgi)(struct kvm_vcpu *vcpu, int irq);
+   void(*unqueue_sgi)(struct kvm_vcpu *vcpu, int irq, int source);
+   int (*vgic_init)(struct kvm *kvm, const struct vgic_params *params);
 };
 
 struct vgic_dist {
@@ -131,6 +140,9 @@ struct vgic_dist {
boolin_kernel;
boolready;
 
+   /* vGIC model the kernel emulates for the guest (GICv2 or GICv3) */
+   u32 vgic_model;
+
int nr_cpus;
int nr_irqs;
 
@@ -168,6 +180,8 @@ struct vgic_dist {
 
/* Bitmap indicating which CPU has something pending */
unsigned long   irq_pending_on_cpu;
+
+   struct vgic_vm_ops  vm_ops;
 #endif
 };
 
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index a55a9a4..f2c214a 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -148,8 +148,6 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu)
 }
 
 static const struct vgic_ops vgic_v2_ops = {
-   .get_lr = vgic_v2_get_lr,
-   .set_lr = vgic_v2_set_lr,
.sync_lr_elrsr  = vgic_v2_sync_lr_elrsr,
.get_elrsr  = vgic_v2_get_elrsr,
.get_eisr   = vgic_v2_get_eisr,
@@ -163,6 +161,20 @@ static const struct vgic_ops vgic_v2_ops = {
 
 static struct vgic_params vgic_v2_params;
 
+static bool vgic_v2_init_emul(struct kvm *kvm, int type)
+{
+   struct vgic_vm_ops *vm_ops = &kvm->arch.vgic.vm_ops;
+
+   switch (type) {
+   case KVM_DEV_TYPE_ARM_VGIC_V2:
+   vm_ops->get_lr = vgic_v2_get_lr;
+   vm_ops->set_lr = vgic_v2_set_lr;
+   return true;
+   }
+
+   return false;
+}
+
 /**
  * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT
  * @node:  pointer to the DT node
@@ -201,6 +213,7 @@ int vgic_v2_probe(struct device_node *vgic_node,
ret = -ENOMEM;
goto out;
}
+   vgic->init_emul = vgic_v2_init_emul;
 
vgic->nr_lr = readl_relaxed(vgic->vctrl_base + GICH_VTR);
vgic->nr_lr = (vgic->nr_lr & 0x3f) + 1;
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index f01d446..f42961c 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -157,8 +157,6 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu)
 }
 
 static const struct vgic_ops vgic_v3_ops = {
-   .get_lr = vgic_v3_get_lr,
-   .set_lr = vgic_v3_set_lr,
.sync_lr_elrsr  = vgic_v3_sync_lr_elrsr,
.get_elrsr  = vgic_v3_get_elrsr,
.get_eisr   = vgic_v3_get_eisr,
@@ -170,6 +168,19 @@ static const struct vgic_ops vgic_v3_ops = {
.enable = vgic_v3_enable,
 };
 
+static bool vgic_v3_init_emul_compat(struct kvm *kvm, int type)
+{
+   struct vgic_vm_ops *vm_ops = &kvm->arch.vgic.vm_ops;
+
+   switch (type) {
+   case KVM_DEV_TYPE_ARM_VGIC_V2:
+   vm_ops->get_lr = vgic_v3_get_lr;
+   vm_ops->set_lr 

[PATCH 09/14] arm/arm64: KVM: split GICv2 specific emulation code from vgic.c

2014-06-19 Thread Andre Przywara
vgic.c is currently a mixture of generic vGIC emulation code and
functions specific to emulating a GICv2. To ease the addition of
GICv3, split off strictly v2 specific parts into a new file
vgic-v2-emul.c.
A new header file vgic.h is introduced to allow separation and later
sharing of functions.

Signed-off-by: Andre Przywara 
---
 arch/arm/kvm/Makefile   |1 +
 arch/arm64/kvm/Makefile |1 +
 virt/kvm/arm/vgic-v2-emul.c |  795 
 virt/kvm/arm/vgic.c |  856 +++
 virt/kvm/arm/vgic.h |  113 ++
 5 files changed, 956 insertions(+), 810 deletions(-)
 create mode 100644 virt/kvm/arm/vgic-v2-emul.c
 create mode 100644 virt/kvm/arm/vgic.h

diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index f7057ed..443b8be 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -22,4 +22,5 @@ obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
 obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
 obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
 obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
+obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o
 obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 32a0961..f241db6 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -20,6 +20,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
 kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
 
 kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o
 kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
 kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o
 kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o
diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c
new file mode 100644
index 000..ba5f873
--- /dev/null
+++ b/virt/kvm/arm/vgic-v2-emul.c
@@ -0,0 +1,795 @@
+/*
+ * Contains GICv2 specific emulation code, was in vgic.c before.
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Marc Zyngier 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see .
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include 
+#include 
+#include 
+
+#include "vgic.h"
+
+#define GICC_ARCH_VERSION_V20x2
+
+static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
+static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi)
+{
+   return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi;
+}
+
+static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
+struct kvm_exit_mmio *mmio, phys_addr_t offset)
+{
+   u32 reg;
+   u32 word_offset = offset & 3;
+
+   switch (offset & ~3) {
+   case 0: /* GICD_CTLR */
+   reg = vcpu->kvm->arch.vgic.enabled;
+   vgic_reg_access(mmio, ®, word_offset,
+   ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
+   if (mmio->is_write) {
+   vcpu->kvm->arch.vgic.enabled = reg & 1;
+   vgic_update_state(vcpu->kvm);
+   return true;
+   }
+   break;
+
+   case 4: /* GICD_TYPER */
+   reg  = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
+   reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1;
+   vgic_reg_access(mmio, ®, word_offset,
+   ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
+   break;
+
+   case 8: /* GICD_IIDR */
+   reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
+   vgic_reg_access(mmio, ®, word_offset,
+   ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
+   break;
+   }
+
+   return false;
+}
+
+static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu,
+  struct kvm_exit_mmio *mmio,
+  phys_addr_t offset)
+{
+   return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
+ vcpu->vcpu_id, ACCESS_WRITE_SETBIT);
+}
+
+static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu,
+struct kvm_exit_mmio *mmio,
+phys_addr_t 

[PATCH 03/14] arm/arm64: KVM: refactor vgic_handle_mmio() function

2014-06-19 Thread Andre Przywara
Currently we only need to deal with one MMIO region for the GIC
emulation, but we soon need to extend this. Refactor the existing
code to allow easier addition of different ranges without code
duplication.

Signed-off-by: Andre Przywara 
---
 virt/kvm/arm/vgic.c |   72 ---
 1 file changed, 51 insertions(+), 21 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 8f1daf2..4c6b212 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -905,37 +905,28 @@ static bool vgic_validate_access(const struct vgic_dist 
*dist,
return true;
 }
 
-/**
- * vgic_handle_mmio - handle an in-kernel MMIO access
+/*
+ * vgic_handle_mmio_range - handle an in-kernel MMIO access
  * @vcpu:  pointer to the vcpu performing the access
  * @run:   pointer to the kvm_run structure
  * @mmio:  pointer to the data describing the access
+ * @ranges:pointer to the register defining structure
+ * @mmio_base: base address for this mapping
  *
- * returns true if the MMIO access has been performed in kernel space,
- * and false if it needs to be emulated in user space.
+ * returns true if the MMIO access could be performed
  */
-bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
- struct kvm_exit_mmio *mmio)
+static bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run,
+   struct kvm_exit_mmio *mmio,
+   const struct mmio_range *ranges,
+   unsigned long mmio_base)
 {
const struct mmio_range *range;
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-   unsigned long base = dist->vgic_dist_base;
bool updated_state;
unsigned long offset;
 
-   if (!irqchip_in_kernel(vcpu->kvm) ||
-   mmio->phys_addr < base ||
-   (mmio->phys_addr + mmio->len) > (base + KVM_VGIC_V2_DIST_SIZE))
-   return false;
-
-   /* We don't support ldrd / strd or ldm / stm to the emulated vgic */
-   if (mmio->len > 4) {
-   kvm_inject_dabt(vcpu, mmio->phys_addr);
-   return true;
-   }
-
-   offset = mmio->phys_addr - base;
-   range = find_matching_range(vgic_dist_ranges, mmio, offset);
+   offset = mmio->phys_addr - mmio_base;
+   range = find_matching_range(ranges, mmio, offset);
if (unlikely(!range || !range->handle_mmio)) {
pr_warn("Unhandled access %d %08llx %d\n",
mmio->is_write, mmio->phys_addr, mmio->len);
@@ -943,7 +934,7 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run 
*run,
}
 
spin_lock(&vcpu->kvm->arch.vgic.lock);
-   offset = mmio->phys_addr - range->base - base;
+   offset -= range->base;
if (vgic_validate_access(dist, range, offset)) {
updated_state = range->handle_mmio(vcpu, mmio, offset);
} else {
@@ -961,6 +952,45 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct 
kvm_run *run,
return true;
 }
 
+#define IS_IN_RANGE(addr, alen, base, len) \
+   (((addr) >= (base)) && (((addr) + (alen)) < ((base) + (len
+
+static bool vgic_v2_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
+   struct kvm_exit_mmio *mmio)
+{
+   unsigned long base = vcpu->kvm->arch.vgic.vgic_dist_base;
+
+   if (!IS_IN_RANGE(mmio->phys_addr, mmio->len, base,
+KVM_VGIC_V2_DIST_SIZE))
+   return false;
+
+   /* GICv2 does not support accesses wider than 32 bits */
+   if (mmio->len > 4) {
+   kvm_inject_dabt(vcpu, mmio->phys_addr);
+   return true;
+   }
+
+   return vgic_handle_mmio_range(vcpu, run, mmio, vgic_dist_ranges, base);
+}
+
+/**
+ * vgic_handle_mmio - handle an in-kernel MMIO access for the GIC emulation
+ * @vcpu:  pointer to the vcpu performing the access
+ * @run:   pointer to the kvm_run structure
+ * @mmio:  pointer to the data describing the access
+ *
+ * returns true if the MMIO access has been performed in kernel space,
+ * and false if it needs to be emulated in user space.
+ */
+bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ struct kvm_exit_mmio *mmio)
+{
+   if (!irqchip_in_kernel(vcpu->kvm))
+   return false;
+
+   return vgic_v2_handle_mmio(vcpu, run, mmio);
+}
+
 static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi)
 {
return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 02/14] arm/arm64: KVM: pass down user space provided GIC type into vGIC code

2014-06-19 Thread Andre Przywara
With the introduction of a second emulated GIC model we need to let
userspace specify the GIC model to use for each VM. Pass the
userspace provided value down into the vGIC code to differentiate
later.

Signed-off-by: Andre Przywara 
---
 arch/arm/kvm/arm.c |2 +-
 include/kvm/arm_vgic.h |4 ++--
 virt/kvm/arm/vgic.c|4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9ffe962..fa37fa1 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -804,7 +804,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
switch (ioctl) {
case KVM_CREATE_IRQCHIP: {
if (vgic_present)
-   return kvm_vgic_create(kvm);
+   return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
else
return -ENXIO;
}
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index f5788cf..4feac9a 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -231,7 +231,7 @@ struct kvm_exit_mmio;
 int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
 int kvm_vgic_hyp_init(void);
 int kvm_vgic_init(struct kvm *kvm);
-int kvm_vgic_create(struct kvm *kvm);
+int kvm_vgic_create(struct kvm *kvm, u32 type);
 void kvm_vgic_destroy(struct kvm *kvm);
 void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
@@ -282,7 +282,7 @@ static inline int kvm_vgic_init(struct kvm *kvm)
return 0;
 }
 
-static inline int kvm_vgic_create(struct kvm *kvm)
+static inline int kvm_vgic_create(struct kvm *kvm, u32 type)
 {
return 0;
 }
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 68ac9c6..8f1daf2 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1832,7 +1832,7 @@ out:
return ret;
 }
 
-int kvm_vgic_create(struct kvm *kvm)
+int kvm_vgic_create(struct kvm *kvm, u32 type)
 {
int i, vcpu_lock_idx = -1, ret = 0;
struct kvm_vcpu *vcpu;
@@ -2284,7 +2284,7 @@ static void vgic_destroy(struct kvm_device *dev)
 
 static int vgic_create(struct kvm_device *dev, u32 type)
 {
-   return kvm_vgic_create(dev->kvm);
+   return kvm_vgic_create(dev->kvm, type);
 }
 
 struct kvm_device_ops kvm_arm_vgic_v2_ops = {
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 06/14] arm/arm64: KVM: make the maximum number of vCPUs a per-VM value

2014-06-19 Thread Andre Przywara
Currently the maximum number of vCPUs supported is a global value
limited by the used GIC model. GICv3 will lift this limit, but we
still need to observe it for guests using GICv2.
So the maximum number of vCPUs is per-VM value, depending on the
GIC model the guest uses.
Store and check the value in struct kvm_arch, but keep it down to
8 for now.

Signed-off-by: Andre Przywara 
---
 arch/arm/include/asm/kvm_host.h   |1 +
 arch/arm/kvm/arm.c|6 ++
 arch/arm64/include/asm/kvm_host.h |3 +++
 virt/kvm/arm/vgic-v2.c|5 +
 virt/kvm/arm/vgic-v3.c|6 ++
 5 files changed, 21 insertions(+)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 8d30f05..49c07ea 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -67,6 +67,7 @@ struct kvm_arch {
 
/* Interrupt controller */
struct vgic_distvgic;
+   int max_vcpus;
 };
 
 #define KVM_NR_MEM_OBJS 40
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index fa37fa1..a291e63 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -142,6 +142,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
/* Mark the initial VMID generation invalid */
kvm->arch.vmid_gen = 0;
+   kvm->arch.max_vcpus = CONFIG_KVM_ARM_MAX_VCPUS;
 
return ret;
 out_free_stage2_pgd:
@@ -260,6 +261,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, 
unsigned int id)
int err;
struct kvm_vcpu *vcpu;
 
+   if (id >= kvm->arch.max_vcpus) {
+   err = -EINVAL;
+   goto out;
+   }
+
vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
if (!vcpu) {
err = -ENOMEM;
diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 4c84250..eef63b1 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -58,6 +58,9 @@ struct kvm_arch {
/* VTTBR value associated with above pgd and vmid */
u64vttbr;
 
+   /* The maximum number of vCPUs depends on the used GIC model */
+   int max_vcpus;
+
/* Interrupt controller */
struct vgic_distvgic;
 
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index f2c214a..4091078 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -164,11 +164,16 @@ static struct vgic_params vgic_v2_params;
 static bool vgic_v2_init_emul(struct kvm *kvm, int type)
 {
struct vgic_vm_ops *vm_ops = &kvm->arch.vgic.vm_ops;
+   int nr_vcpus;
 
switch (type) {
case KVM_DEV_TYPE_ARM_VGIC_V2:
+   nr_vcpus = atomic_read(&kvm->online_vcpus);
+   if (nr_vcpus > 8)
+   return false;
vm_ops->get_lr = vgic_v2_get_lr;
vm_ops->set_lr = vgic_v2_set_lr;
+   kvm->arch.max_vcpus = 8;
return true;
}
 
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index f42961c..40d6817 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -171,11 +171,17 @@ static const struct vgic_ops vgic_v3_ops = {
 static bool vgic_v3_init_emul_compat(struct kvm *kvm, int type)
 {
struct vgic_vm_ops *vm_ops = &kvm->arch.vgic.vm_ops;
+   int nr_vcpus;
 
switch (type) {
case KVM_DEV_TYPE_ARM_VGIC_V2:
+   nr_vcpus = atomic_read(&kvm->online_vcpus);
+   if (nr_vcpus > 8)
+   return false;
+
vm_ops->get_lr = vgic_v3_get_lr;
vm_ops->set_lr = vgic_v3_set_lr;
+   kvm->arch.max_vcpus = 8;
return true;
}
return false;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 08/14] arm/arm64: KVM: refactor MMIO accessors

2014-06-19 Thread Andre Przywara
The MMIO accessors for GICD_I[CS]ENABLER, GICD_I[CS]PENDR and
GICD_ICFGR behave very similiar in GICv3, although the way the
affected vCPU is determined differs.
Factor out a generic, backend-facing implementation and use small
wrappers in the current GICv2 emulation to ease code sharing later.

Signed-off-by: Andre Przywara 
---
 virt/kvm/arm/vgic.c |   93 ---
 1 file changed, 52 insertions(+), 41 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 2de58b3..2a59dff 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -398,35 +398,54 @@ static bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu,
return false;
 }
 
-static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu,
-  struct kvm_exit_mmio *mmio,
-  phys_addr_t offset)
+static bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
+  phys_addr_t offset, int vcpu_id, int access)
 {
-   u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
-  vcpu->vcpu_id, offset);
-   vgic_reg_access(mmio, reg, offset,
-   ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
+   u32 *reg;
+   int mode = ACCESS_READ_VALUE | access;
+   struct kvm_vcpu *target_vcpu = kvm_get_vcpu(kvm, vcpu_id);
+
+   reg = vgic_bitmap_get_reg(&kvm->arch.vgic.irq_enabled, vcpu_id, offset);
+   vgic_reg_access(mmio, reg, offset, mode);
if (mmio->is_write) {
-   vgic_update_state(vcpu->kvm);
+   if (access & ACCESS_WRITE_CLEARBIT) {
+   if (offset < 4) /* Force SGI enabled */
+   *reg |= 0x;
+   vgic_retire_disabled_irqs(target_vcpu);
+   }
+   vgic_update_state(kvm);
return true;
}
 
return false;
 }
 
+static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu,
+  struct kvm_exit_mmio *mmio,
+  phys_addr_t offset)
+{
+   return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
+ vcpu->vcpu_id, ACCESS_WRITE_SETBIT);
+}
+
 static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu,
 struct kvm_exit_mmio *mmio,
 phys_addr_t offset)
 {
-   u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
-  vcpu->vcpu_id, offset);
-   vgic_reg_access(mmio, reg, offset,
-   ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
+   return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
+ vcpu->vcpu_id, ACCESS_WRITE_CLEARBIT);
+}
+
+static bool vgic_handle_pending_reg(struct kvm *kvm, struct kvm_exit_mmio 
*mmio,
+   phys_addr_t offset, int vcpu_id, int access)
+{
+   u32 *reg;
+   int mode = ACCESS_READ_VALUE | access;
+
+   reg = vgic_bitmap_get_reg(&kvm->arch.vgic.irq_state, vcpu_id, offset);
+   vgic_reg_access(mmio, reg, offset, mode);
if (mmio->is_write) {
-   if (offset < 4) /* Force SGI enabled */
-   *reg |= 0x;
-   vgic_retire_disabled_irqs(vcpu);
-   vgic_update_state(vcpu->kvm);
+   vgic_update_state(kvm);
return true;
}
 
@@ -437,31 +456,16 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu 
*vcpu,
struct kvm_exit_mmio *mmio,
phys_addr_t offset)
 {
-   u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
-  vcpu->vcpu_id, offset);
-   vgic_reg_access(mmio, reg, offset,
-   ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
-   if (mmio->is_write) {
-   vgic_update_state(vcpu->kvm);
-   return true;
-   }
-
-   return false;
+   return vgic_handle_pending_reg(vcpu->kvm, mmio, offset,
+  vcpu->vcpu_id, ACCESS_WRITE_SETBIT);
 }
 
 static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
  struct kvm_exit_mmio *mmio,
  phys_addr_t offset)
 {
-   u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
-  vcpu->vcpu_id, offset);
-   vgic_reg_access(mmio, reg, offset,
-   ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
-   if (mmio->is_write) {
-   vgic_update_state(vcpu->kvm);
-   return true;
-   }
-
+   return vgic_handle_pending_reg(vcpu->kvm, mmio, offset,
+  

[PATCH 14/14] arm/arm64: KVM: allow userland to request a virtual GICv3

2014-06-19 Thread Andre Przywara
With everything in place we allow userland to request the kernel
using a virtual GICv3 in the guest, which finally lifts the 8 vCPU
limit for a guest.
Also we provide the necessary support for guests setting the memory
addresses for the virtual distributor and redistributors.
This requires some userland code to make use of that feature and
explicitly ask for a virtual GICv3.

Signed-off-by: Andre Przywara 
---
 arch/arm64/include/uapi/asm/kvm.h |7 ++
 include/kvm/arm_vgic.h|4 ++--
 virt/kvm/arm/vgic-v3-emul.c   |3 +++
 virt/kvm/arm/vgic.c   |   46 ++---
 4 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/include/uapi/asm/kvm.h 
b/arch/arm64/include/uapi/asm/kvm.h
index 5513de4..9a62081 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -77,6 +77,13 @@ struct kvm_regs {
 #define KVM_VGIC_V2_DIST_SIZE  0x1000
 #define KVM_VGIC_V2_CPU_SIZE   0x2000
 
+/* Supported VGICv3 address types  */
+#define KVM_VGIC_V3_ADDR_TYPE_DIST 2
+#define KVM_VGIC_V3_ADDR_TYPE_REDIST   3
+
+#define KVM_VGIC_V3_DIST_SIZE  SZ_64K
+#define KVM_VGIC_V3_REDIST_SIZE(2 * SZ_64K)
+
 #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
 #define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */
 #define KVM_ARM_VCPU_PSCI_0_2  2 /* CPU uses PSCI v0.2 */
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 3b164ee..82e00a5 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -35,8 +35,8 @@
 #define VGIC_MAX_IRQS  1024
 
 /* Sanity checks... */
-#if (KVM_MAX_VCPUS > 8)
-#error Invalid number of CPU interfaces
+#if (KVM_MAX_VCPUS > 255)
+#error Too many KVM VCPUs, the VGIC only supports up to 255 VCPUs for now
 #endif
 
 #if (VGIC_NR_IRQS_LEGACY & 31)
diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c
index 68821fd..88c2cca 100644
--- a/virt/kvm/arm/vgic-v3-emul.c
+++ b/virt/kvm/arm/vgic-v3-emul.c
@@ -873,6 +873,9 @@ static int vgic_v3_has_attr(struct kvm_device *dev,
case KVM_VGIC_V2_ADDR_TYPE_DIST:
case KVM_VGIC_V2_ADDR_TYPE_CPU:
return -ENXIO;
+   case KVM_VGIC_V3_ADDR_TYPE_DIST:
+   case KVM_VGIC_V3_ADDR_TYPE_REDIST:
+   return 0;
}
break;
case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 8a584e0..e3c7189 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1526,7 +1526,7 @@ static int vgic_ioaddr_assign(struct kvm *kvm, 
phys_addr_t *ioaddr,
 /**
  * kvm_vgic_addr - set or get vgic VM base addresses
  * @kvm:   pointer to the vm struct
- * @type:  the VGIC addr type, one of KVM_VGIC_V2_ADDR_TYPE_XXX
+ * @type:  the VGIC addr type, one of KVM_VGIC_V[23]_ADDR_TYPE_XXX
  * @addr:  pointer to address value
  * @write: if true set the address in the VM address space, if false read the
  *  address
@@ -1540,29 +1540,49 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, 
u64 *addr, bool write)
 {
int r = 0;
struct vgic_dist *vgic = &kvm->arch.vgic;
+   int type_needed;
+   phys_addr_t *addr_ptr, block_size;
 
mutex_lock(&kvm->lock);
switch (type) {
case KVM_VGIC_V2_ADDR_TYPE_DIST:
-   if (write) {
-   r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base,
-  *addr, KVM_VGIC_V2_DIST_SIZE);
-   } else {
-   *addr = vgic->vgic_dist_base;
-   }
+   type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
+   addr_ptr = &vgic->vgic_dist_base;
+   block_size = KVM_VGIC_V2_DIST_SIZE;
break;
case KVM_VGIC_V2_ADDR_TYPE_CPU:
-   if (write) {
-   r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base,
-  *addr, KVM_VGIC_V2_CPU_SIZE);
-   } else {
-   *addr = vgic->vgic_cpu_base;
-   }
+   type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
+   addr_ptr = &vgic->vgic_cpu_base;
+   block_size = KVM_VGIC_V2_CPU_SIZE;
break;
+#ifdef CONFIG_ARM_GIC_V3
+   case KVM_VGIC_V3_ADDR_TYPE_DIST:
+   type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
+   addr_ptr = &vgic->vgic_dist_base;
+   block_size = KVM_VGIC_V3_DIST_SIZE;
+   break;
+   case KVM_VGIC_V3_ADDR_TYPE_REDIST:
+   type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
+   addr_ptr = &vgic->vgic_redist_base;
+   block_size = KVM_VGIC_V3_REDIST_SIZE;
+   break;
+#endif
default:
r = -ENODEV;
+   goto out;
+   }
+
+   if (vgic->vgic_model != type_n

[PATCH 00/14] KVM GICv3 emulation

2014-06-19 Thread Andre Przywara
GICv3 is the ARM generic interrupt controller designed to overcome
some limits of the prevalent GICv2. Most notably it lifts the 8-CPU
limit. Though with recent patches from Marc there is support for
hosts to use a GICv3, the CPU limitation still applies to KVM guests,
since the current code emulates a GICv2 only.
Also, GICv2 backward compatibility being optional in GICv3, a number
of systems won't be able to run GICv2 guests.

This patch series provides code to emulate a GICv3 distributor and
redistributor for any KVM guest. It requires a GICv3 in the host to
work. With those patches one can run guests efficiently on any GICv3
host. It has the following features:
- Affinity routing (support for up to 255 VCPUs, more possible)
- System registers (as opposed to MMIO access)
- No ITS
- No priority support (as the GICv2 emulation)
- No save / restore support so far (will be added soon)

The first 10 patches actually refactor the current VGIC code to make
room for a different VGIC model to be dropped in with Patch 11/14.
The remaining patches connect the new model to the kernel backend and
the userland facing code.

The series goes on top of both Marc's GICv3 host support series as
well as his vgic-dyn patches.
The necessary patches for kvmtool to enable the guest's GICv3 will be
posted here as well.
There was some testing on the fast model with some I/O and interrupt
affinity shuffling in a Linux guest with a varying number of VCPUs.

Please review and test.
I would be grateful for people to test for GICv2 regressions also
(so on a GICv2 host with current kvmtool/qemu), as there is quite
some refactoring on that front.

Much of the code was inspired by Marc, so send all praises to him
(while I take the blame).

Cheers,
Andre.

Andre Przywara (14):
  arm/arm64: KVM: rework MPIDR assignment and add accessors
  arm/arm64: KVM: pass down user space provided GIC type into vGIC code
  arm/arm64: KVM: refactor vgic_handle_mmio() function
  arm/arm64: KVM: wrap 64 bit MMIO accesses with two 32 bit ones
  arm/arm64: KVM: introduce per-VM ops
  arm/arm64: KVM: make the maximum number of vCPUs a per-VM value
  arm/arm64: KVM: make the value of ICC_SRE_EL1 a per-VM variable
  arm/arm64: KVM: refactor MMIO accessors
  arm/arm64: KVM: split GICv2 specific emulation code from vgic.c
  arm/arm64: KVM: add opaque private pointer to MMIO accessors
  arm/arm64: KVM: add virtual GICv3 distributor emulation
  arm/arm64: KVM: add SGI system register trapping
  arm/arm64: KVM: enable kernel side of GICv3 emulation
  arm/arm64: KVM: allow userland to request a virtual GICv3

 arch/arm/include/asm/kvm_emulate.h   |2 +-
 arch/arm/include/asm/kvm_host.h  |3 +
 arch/arm/kvm/Makefile|1 +
 arch/arm/kvm/arm.c   |   23 +-
 arch/arm/kvm/coproc.c|   19 +
 arch/arm/kvm/psci.c  |   15 +-
 arch/arm64/include/asm/kvm_emulate.h |3 +-
 arch/arm64/include/asm/kvm_host.h|5 +
 arch/arm64/include/uapi/asm/kvm.h|7 +
 arch/arm64/kernel/asm-offsets.c  |1 +
 arch/arm64/kvm/Makefile  |2 +
 arch/arm64/kvm/sys_regs.c|   37 +-
 arch/arm64/kvm/vgic-v3-switch.S  |   14 +-
 include/kvm/arm_vgic.h   |   38 +-
 include/linux/irqchip/arm-gic-v3.h   |   26 +
 include/linux/kvm_host.h |1 +
 include/uapi/linux/kvm.h |1 +
 virt/kvm/arm/vgic-v2-emul.c  |  802 +++
 virt/kvm/arm/vgic-v2.c   |   22 +-
 virt/kvm/arm/vgic-v3-emul.c  |  898 ++
 virt/kvm/arm/vgic-v3.c   |  157 +-
 virt/kvm/arm/vgic.c  | 1017 +++---
 virt/kvm/arm/vgic.h  |  117 
 virt/kvm/kvm_main.c  |3 +
 24 files changed, 2346 insertions(+), 868 deletions(-)
 create mode 100644 virt/kvm/arm/vgic-v2-emul.c
 create mode 100644 virt/kvm/arm/vgic-v3-emul.c
 create mode 100644 virt/kvm/arm/vgic.h

-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 10/14] arm/arm64: KVM: add opaque private pointer to MMIO accessors

2014-06-19 Thread Andre Przywara
For a GICv2 there is always only one (v)CPU involved: the one that
does the access. On a GICv3 the access to a CPU redistributor is
memory-mapped, but not banked, so the (v)CPU affected is determined by
looking at the MMIO address region being accessed.
To allow passing the affected CPU into the accessors, extend them to
take an opaque private pointer parameter.
For the current GICv2 emulation we ignore it and simply pass NULL
on the call.

Signed-off-by: Andre Przywara 
---
 virt/kvm/arm/vgic-v2-emul.c |   41 -
 virt/kvm/arm/vgic.c |   16 +---
 virt/kvm/arm/vgic.h |7 ---
 3 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c
index ba5f873..30d5c4c 100644
--- a/virt/kvm/arm/vgic-v2-emul.c
+++ b/virt/kvm/arm/vgic-v2-emul.c
@@ -41,7 +41,8 @@ static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int 
vcpu_id, int sgi)
 }
 
 static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
-struct kvm_exit_mmio *mmio, phys_addr_t offset)
+struct kvm_exit_mmio *mmio, phys_addr_t offset,
+void *private)
 {
u32 reg;
u32 word_offset = offset & 3;
@@ -77,7 +78,7 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
 
 static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu,
   struct kvm_exit_mmio *mmio,
-  phys_addr_t offset)
+  phys_addr_t offset, void *private)
 {
return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
  vcpu->vcpu_id, ACCESS_WRITE_SETBIT);
@@ -85,7 +86,7 @@ static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu,
 
 static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu,
 struct kvm_exit_mmio *mmio,
-phys_addr_t offset)
+phys_addr_t offset, void *private)
 {
return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
  vcpu->vcpu_id, ACCESS_WRITE_CLEARBIT);
@@ -93,7 +94,7 @@ static bool handle_mmio_clear_enable_reg(struct kvm_vcpu 
*vcpu,
 
 static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
struct kvm_exit_mmio *mmio,
-   phys_addr_t offset)
+   phys_addr_t offset, void *private)
 {
return vgic_handle_pending_reg(vcpu->kvm, mmio, offset,
   vcpu->vcpu_id, ACCESS_WRITE_SETBIT);
@@ -101,7 +102,7 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu 
*vcpu,
 
 static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
  struct kvm_exit_mmio *mmio,
- phys_addr_t offset)
+ phys_addr_t offset, void *private)
 {
return vgic_handle_pending_reg(vcpu->kvm, mmio, offset,
   vcpu->vcpu_id, ACCESS_WRITE_CLEARBIT);
@@ -109,7 +110,7 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu 
*vcpu,
 
 static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
 struct kvm_exit_mmio *mmio,
-phys_addr_t offset)
+phys_addr_t offset, void *private)
 {
u32 *reg;
 
@@ -169,7 +170,7 @@ static void vgic_set_target_reg(struct kvm *kvm, u32 val, 
int irq)
 
 static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu,
   struct kvm_exit_mmio *mmio,
-  phys_addr_t offset)
+  phys_addr_t offset, void *private)
 {
u32 reg;
 
@@ -197,7 +198,8 @@ static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu,
 }
 
 static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
-   struct kvm_exit_mmio *mmio, phys_addr_t offset)
+   struct kvm_exit_mmio *mmio, phys_addr_t offset,
+   void *private)
 {
u32 *reg;
 
@@ -208,7 +210,8 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
 }
 
 static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
-   struct kvm_exit_mmio *mmio, phys_addr_t offset)
+   struct kvm_exit_mmio *mmio, phys_addr_t offset,
+   void *private)
 {
u32 reg;
vgic_reg_access(mmio, ®, offset,
@@ -281,7 +284,7 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu 
*vcpu,
 
 static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu,
struct kvm_exit_mmio *mmio,
-   phys_

[PATCH 13/14] arm/arm64: KVM: enable kernel side of GICv3 emulation

2014-06-19 Thread Andre Przywara
With all the necessary GICv3 emulation code in place, we can now
connect the code to the GICv3 backend in the kernel.
The LR register handling is different depending on the emulated GIC
model, so provide different implementations for each.
Also allow non-v2-compatible GICv3 implementations (which don't
provide MMIO regions for the virtual CPU interface in the DT), but
restrict those hosts to use GICv3 guests only.

Signed-off-by: Andre Przywara 
---
 virt/kvm/arm/vgic-v3.c |  138 ++--
 virt/kvm/arm/vgic.c|2 +
 2 files changed, 112 insertions(+), 28 deletions(-)

diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 7d9c85e..d26d12f 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -34,6 +34,7 @@
 #define GICH_LR_VIRTUALID  (0x3ffUL << 0)
 #define GICH_LR_PHYSID_CPUID_SHIFT (10)
 #define GICH_LR_PHYSID_CPUID   (7UL << GICH_LR_PHYSID_CPUID_SHIFT)
+#define ICH_LR_VIRTUALID_MASK  (BIT_ULL(32) - 1)
 
 /*
  * LRs are stored in reverse order in memory. make sure we index them
@@ -43,7 +44,35 @@
 
 static u32 ich_vtr_el2;
 
-static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
+static u64 sync_lr_val(u8 state)
+{
+   u64 lr_val = 0;
+
+   if (state & LR_STATE_PENDING)
+   lr_val |= ICH_LR_PENDING_BIT;
+   if (state & LR_STATE_ACTIVE)
+   lr_val |= ICH_LR_ACTIVE_BIT;
+   if (state & LR_EOI_INT)
+   lr_val |= ICH_LR_EOI;
+
+   return lr_val;
+}
+
+static u8 sync_lr_state(u64 lr_val)
+{
+   u8 state = 0;
+
+   if (lr_val & ICH_LR_PENDING_BIT)
+   state |= LR_STATE_PENDING;
+   if (lr_val & ICH_LR_ACTIVE_BIT)
+   state |= LR_STATE_ACTIVE;
+   if (lr_val & ICH_LR_EOI)
+   state |= LR_EOI_INT;
+
+   return state;
+}
+
+static struct vgic_lr vgic_v2_on_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
 {
struct vgic_lr lr_desc;
u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)];
@@ -53,30 +82,53 @@ static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu 
*vcpu, int lr)
lr_desc.source  = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
else
lr_desc.source = 0;
-   lr_desc.state   = 0;
+   lr_desc.state   = sync_lr_state(val);
 
-   if (val & ICH_LR_PENDING_BIT)
-   lr_desc.state |= LR_STATE_PENDING;
-   if (val & ICH_LR_ACTIVE_BIT)
-   lr_desc.state |= LR_STATE_ACTIVE;
-   if (val & ICH_LR_EOI)
-   lr_desc.state |= LR_EOI_INT;
+   return lr_desc;
+}
+
+static struct vgic_lr vgic_v3_on_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
+{
+   struct vgic_lr lr_desc;
+   u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)];
+
+   lr_desc.irq = val & ICH_LR_VIRTUALID_MASK;
+   lr_desc.source  = 0;
+   lr_desc.state   = sync_lr_state(val);
 
return lr_desc;
 }
 
-static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
-  struct vgic_lr lr_desc)
+static void vgic_v3_on_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
+struct vgic_lr lr_desc)
 {
-   u64 lr_val = (((u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) |
- lr_desc.irq);
+   u64 lr_val;
 
-   if (lr_desc.state & LR_STATE_PENDING)
-   lr_val |= ICH_LR_PENDING_BIT;
-   if (lr_desc.state & LR_STATE_ACTIVE)
-   lr_val |= ICH_LR_ACTIVE_BIT;
-   if (lr_desc.state & LR_EOI_INT)
-   lr_val |= ICH_LR_EOI;
+   lr_val = lr_desc.irq;
+
+   /*
+* currently all guest IRQs are Group1, as Group0 would result
+* in a FIQ in the guest, which it wouldn't expect.
+* Eventually we want to make this configurable, so we may revisit
+* this in the future.
+*/
+   lr_val |= ICH_LR_GROUP;
+
+   lr_val |= sync_lr_val(lr_desc.state);
+
+   vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
+}
+
+static void vgic_v2_on_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
+struct vgic_lr lr_desc)
+{
+   u64 lr_val;
+
+   lr_val = lr_desc.irq;
+
+   lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT;
+
+   lr_val |= sync_lr_val(lr_desc.state);
 
vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
 }
@@ -145,9 +197,8 @@ static void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct 
vgic_vmcr *vmcrp)
 
 static void vgic_v3_enable(struct kvm_vcpu *vcpu)
 {
-   struct vgic_v3_cpu_if *vgic_v3;
+   struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3;
 
-   vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3;
/*
 * By forcing VMCR to zero, the GIC will restore the binary
 * points to their reset values. Anything else resets to zero
@@ -155,7 +206,14 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu)
 */
vgic_v3-

[PATCH 04/14] arm/arm64: KVM: wrap 64 bit MMIO accesses with two 32 bit ones

2014-06-19 Thread Andre Przywara
Some GICv3 registers can and will be accessed as 64 bit registers.
Currently the register handling code can only deal with 32 bit
accesses, so we do two consecutive calls to cover this.

Signed-off-by: Andre Przywara 
---
 virt/kvm/arm/vgic.c |   48 +---
 1 file changed, 45 insertions(+), 3 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 4c6b212..b3cf4c7 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -906,6 +906,48 @@ static bool vgic_validate_access(const struct vgic_dist 
*dist,
 }
 
 /*
+ * Call the respective handler function for the given range.
+ * We split up any 64 bit accesses into two consecutive 32 bit
+ * handler calls and merge the result afterwards.
+ */
+static bool call_range_handler(struct kvm_vcpu *vcpu,
+  struct kvm_exit_mmio *mmio,
+  unsigned long offset,
+  const struct mmio_range *range)
+{
+   u32 *data32 = (void *)mmio->data;
+   struct kvm_exit_mmio mmio32;
+   bool ret;
+
+   if (likely(mmio->len <= 4))
+   return range->handle_mmio(vcpu, mmio, offset);
+
+   /*
+* We assume that any access greater than 4 bytes is actually
+* 8 bytes long, caused by a 64-bit access
+*/
+
+   mmio32.len = 4;
+   mmio32.is_write = mmio->is_write;
+
+   mmio32.phys_addr = mmio->phys_addr + 4;
+   if (mmio->is_write)
+   *(u32 *)mmio32.data = data32[1];
+   ret = range->handle_mmio(vcpu, &mmio32, offset + 4);
+   if (!mmio->is_write)
+   data32[1] = *(u32 *)mmio32.data;
+
+   mmio32.phys_addr = mmio->phys_addr;
+   if (mmio->is_write)
+   *(u32 *)mmio32.data = data32[0];
+   ret |= range->handle_mmio(vcpu, &mmio32, offset);
+   if (!mmio->is_write)
+   data32[0] = *(u32 *)mmio32.data;
+
+   return ret;
+}
+
+/*
  * vgic_handle_mmio_range - handle an in-kernel MMIO access
  * @vcpu:  pointer to the vcpu performing the access
  * @run:   pointer to the kvm_run structure
@@ -936,10 +978,10 @@ static bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, 
struct kvm_run *run,
spin_lock(&vcpu->kvm->arch.vgic.lock);
offset -= range->base;
if (vgic_validate_access(dist, range, offset)) {
-   updated_state = range->handle_mmio(vcpu, mmio, offset);
+   updated_state = call_range_handler(vcpu, mmio, offset, range);
} else {
-   vgic_reg_access(mmio, NULL, offset,
-   ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
+   if (!mmio->is_write)
+   memset(mmio->data, 0, mmio->len);
updated_state = false;
}
spin_unlock(&vcpu->kvm->arch.vgic.lock);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/14] arm/arm64: KVM: rework MPIDR assignment and add accessors

2014-06-19 Thread Andre Przywara
The virtual MPIDR registers (containing topology information) for the
guest are currently mapped linearily to the vcpu_id. Improve this
mapping for arm64 by using three levels to not artificially limit the
number of vCPUs. Also add an accessor to later allow easier access to
a vCPU with a given MPIDR.
Use this new accessor in the PSCI emulation.

Signed-off-by: Andre Przywara 
---
 arch/arm/include/asm/kvm_emulate.h   |2 +-
 arch/arm/include/asm/kvm_host.h  |2 ++
 arch/arm/kvm/arm.c   |   15 +++
 arch/arm/kvm/psci.c  |   15 ---
 arch/arm64/include/asm/kvm_emulate.h |3 ++-
 arch/arm64/include/asm/kvm_host.h|2 ++
 arch/arm64/kvm/sys_regs.c|   11 +--
 7 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h 
b/arch/arm/include/asm/kvm_emulate.h
index 0fa90c9..6b528e4 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -159,7 +159,7 @@ static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu 
*vcpu)
 
 static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
 {
-   return vcpu->arch.cp15[c0_MPIDR];
+   return vcpu->arch.cp15[c0_MPIDR] & MPIDR_HWID_BITMASK;
 }
 
 static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index d6d5227..8d30f05 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -236,4 +236,6 @@ int kvm_perf_teardown(void);
 u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
 int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
 
+struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9b3957d..9ffe962 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -1027,6 +1027,21 @@ static void check_kvm_target_cpu(void *ret)
*(int *)ret = kvm_target_cpu();
 }
 
+struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
+{
+   unsigned long c_mpidr;
+   struct kvm_vcpu *vcpu;
+   int i;
+
+   mpidr &= MPIDR_HWID_BITMASK;
+   kvm_for_each_vcpu(i, vcpu, kvm) {
+   c_mpidr = kvm_vcpu_get_mpidr(vcpu);
+   if (c_mpidr == mpidr)
+   return vcpu;
+   }
+   return NULL;
+}
+
 /**
  * Initialize Hyp-mode and memory mappings on all CPUs.
  */
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 09cf377..49f0992 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * This is an implementation of the Power State Coordination Interface
@@ -65,25 +66,17 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
 static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 {
struct kvm *kvm = source_vcpu->kvm;
-   struct kvm_vcpu *vcpu = NULL, *tmp;
+   struct kvm_vcpu *vcpu = NULL;
wait_queue_head_t *wq;
unsigned long cpu_id;
unsigned long context_id;
-   unsigned long mpidr;
phys_addr_t target_pc;
-   int i;
 
-   cpu_id = *vcpu_reg(source_vcpu, 1);
+   cpu_id = *vcpu_reg(source_vcpu, 1) & MPIDR_HWID_BITMASK;
if (vcpu_mode_is_32bit(source_vcpu))
cpu_id &= ~((u32) 0);
 
-   kvm_for_each_vcpu(i, tmp, kvm) {
-   mpidr = kvm_vcpu_get_mpidr(tmp);
-   if ((mpidr & MPIDR_HWID_BITMASK) == (cpu_id & 
MPIDR_HWID_BITMASK)) {
-   vcpu = tmp;
-   break;
-   }
-   }
+   vcpu = kvm_mpidr_to_vcpu(kvm, cpu_id);
 
/*
 * Make sure the caller requested a valid CPU and that the CPU is
diff --git a/arch/arm64/include/asm/kvm_emulate.h 
b/arch/arm64/include/asm/kvm_emulate.h
index dd8ecfc..685ea1b 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 
 unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
 unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
@@ -179,7 +180,7 @@ static inline u8 kvm_vcpu_trap_get_fault(const struct 
kvm_vcpu *vcpu)
 
 static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
 {
-   return vcpu_sys_reg(vcpu, MPIDR_EL1);
+   return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
 }
 
 static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 4ae9213..4c84250 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -228,4 +228,6 @@ static inline void vgic_arch_setup(const struct vgic_params 
*vgic)
}
 }
 
+struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
+
 #endif /* __ARM64_KVM_HOST_H__ */
diff --git a

Re: [PATCH 3/3] KVM: x86: correct mwait and monitor emulation

2014-06-19 Thread Michael S. Tsirkin
On Wed, Jun 18, 2014 at 02:46:01PM -0400, Gabriel L. Somlo wrote:
> On Wed, Jun 18, 2014 at 10:59:14AM -0700, Eric Northup wrote:
> > On Wed, Jun 18, 2014 at 7:19 AM, Nadav Amit  wrote:
> > > mwait and monitor are currently handled as nop. Considering this 
> > > behavior, they
> > > should still be handled correctly, i.e., check execution conditions and 
> > > generate
> > > exceptions when required. mwait and monitor may also be executed in 
> > > real-mode
> > > and are not handled in that case.  This patch performs the emulation of
> > > monitor-mwait according to Intel SDM (other than checking whether 
> > > interrupt can
> > > be used as a break event).
> > >
> > > Signed-off-by: Nadav Amit 
> 
> How about this instead (details in the commit log below) ? Please let
> me know what you think, and if you'd prefer me to send it out as a
> separate patch rather than a reply to this thread.
> 
> Thanks,
> --Gabriel

If there's an easy workaround, I'm inclined to agree.
We can always go back to Gabriel's patch (and then we'll need
Nadav's one too) but if we release a kernel with this
support it becomes an ABI and we can't go back.

So let's be careful here, and revert the hack for 3.16.


Acked-by: Michael S. Tsirkin 



> 
> >From 0375a0aceb54cdbc26a6c0e5b43c46324f830ec3 Mon Sep 17 00:00:00 2001
> From: "Gabriel L. Somlo" 
> Date: Wed, 18 Jun 2014 14:39:15 -0400
> Subject: [PATCH] kvm: x86: revert "emulate monitor and mwait instructions as 
> nop"
> 
> This reverts commit 87c00572ba05aa8c9db118da75c608f47eb10b9e.
> 
> OS X <= 10.7.* are the only known guests which realistically required
> this functionality. As it turns out, OS X can be told to forego using
> monitor/mwait by passing it "idlehalt=0" as a kernel argument, so we're
> better off removing this hack from KVM altogether.
> 
> Signed-off-by: Gabriel L. Somlo 
> ---
>  arch/x86/kvm/cpuid.c |  2 --
>  arch/x86/kvm/svm.c   |  8 +++-
>  arch/x86/kvm/vmx.c   | 10 --
>  3 files changed, 7 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 38a0afe..17b42fa 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -283,8 +283,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 
> *entry, u32 function,
>   0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
>   /* cpuid 1.ecx */
>   const u32 kvm_supported_word4_x86_features =
> - /* NOTE: MONITOR (and MWAIT) are emulated as NOP,
> -  * but *not* advertised to guests via CPUID ! */
>   F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
>   0 /* DS-CPL, VMX, SMX, EST */ |
>   0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index ec8366c..0e8ef20 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -3274,7 +3274,7 @@ static int pause_interception(struct vcpu_svm *svm)
>   return 1;
>  }
>  
> -static int nop_interception(struct vcpu_svm *svm)
> +static int invalid_op_interception(struct vcpu_svm *svm)
>  {
>   skip_emulated_instruction(&(svm->vcpu));
>   return 1;
> @@ -3282,14 +3282,12 @@ static int nop_interception(struct vcpu_svm *svm)
>  
>  static int monitor_interception(struct vcpu_svm *svm)
>  {
> - printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
> - return nop_interception(svm);
> + return invalid_op_interception(svm);
>  }
>  
>  static int mwait_interception(struct vcpu_svm *svm)
>  {
> - printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
> - return nop_interception(svm);
> + return invalid_op_interception(svm);
>  }
>  
>  static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 801332e..577c7df 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -5672,22 +5672,20 @@ static int handle_pause(struct kvm_vcpu *vcpu)
>   return 1;
>  }
>  
> -static int handle_nop(struct kvm_vcpu *vcpu)
> +static int handle_invalid_op(struct kvm_vcpu *vcpu)
>  {
> - skip_emulated_instruction(vcpu);
> + kvm_queue_exception(vcpu, UD_VECTOR);
>   return 1;
>  }
>  
>  static int handle_mwait(struct kvm_vcpu *vcpu)
>  {
> - printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
> - return handle_nop(vcpu);
> + return handle_invalid_op(vcpu);
>  }
>  
>  static int handle_monitor(struct kvm_vcpu *vcpu)
>  {
> - printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
> - return handle_nop(vcpu);
> + return handle_invalid_op(vcpu);
>  }
>  
>  /*
> -- 
> 1.9.3
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvm: fix hyperv tsc page clear wrong address

2014-06-19 Thread newtongao(??????)
>From 6010e24da995d4161cb879e98fda989e3cdb41c4 Mon Sep 17 00:00:00 2001
From: Xiaoming Gao 
Date: Thu, 19 Jun 2014 17:46:05 +0800
Subject: [PATCH] kvm: fix hyperv tsc page clear wrong address

tsc page addr need clear the low HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT bits
before kvm_write_guest write the zero tsc_ref structure to guest, or it will 
lead
guest to get wrong clocksource.

windows guest will get strong impact, QueryPerformanceCounter will always 
return 0
,all things in windows rely on clock source will in serious chaos.

Signed-off-by: Xiaoming Gao 
---
 arch/x86/kvm/x86.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f32a025..ed716cf 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1898,6 +1898,7 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 
msr, u64 data)
if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
break;
gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
+   data &= ~((1 << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT) - 1);
if (kvm_write_guest(kvm, data,
&tsc_ref, sizeof(tsc_ref)))
return 1;
-- 
1.7.1



>From 6010e24da995d4161cb879e98fda989e3cdb41c4 Mon Sep 17 00:00:00 2001
From: Xiaoming Gao 
Date: Thu, 19 Jun 2014 17:46:05 +0800
Subject: [PATCH] kvm: fix hyperv tsc page clear wrong address

tsc page addr need clear the low HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT bits
before kvm_write_guest write the zero tsc_ref structure to guest, or it will 
lead
guest to get wrong clocksource.

windows guest will get strong impact, QueryPerformanceCounter will always 
return 0
,all things in windows rely on clock source, will in serious chaos.

Signed-off-by: Xiaoming Gao 
---
 arch/x86/kvm/x86.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f32a025..ed716cf 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1898,6 +1898,7 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 
msr, u64 data)
if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
break;
gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
+   data &= ~((1 << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT) - 1);
if (kvm_write_guest(kvm, data,
&tsc_ref, sizeof(tsc_ref)))
return 1;
-- 
1.7.1



[PATCH kvm-unit-tests] vmx: always do vmxoff, even if test init function failed

2014-06-19 Thread Paolo Bonzini
Otherwise, the next test breaks with "vmxon failed".

Signed-off-by: Paolo Bonzini 
---
 x86/vmx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/x86/vmx.c b/x86/vmx.c
index 5bb5969..2d601cc 100644
--- a/x86/vmx.c
+++ b/x86/vmx.c
@@ -868,7 +868,7 @@ static int test_run(struct vmx_test *test)
/* Directly call test->init is ok here, init_vmcs has done
   vmcs init, vmclear and vmptrld*/
if (test->init && test->init(test->vmcs) != VMX_TEST_START)
-   return 0;
+   goto out;
test->exits = 0;
current = test;
regs = test->guest_regs;
@@ -876,6 +876,7 @@ static int test_run(struct vmx_test *test)
launched = 0;
printf("\nTest suite: %s\n", test->name);
vmx_run();
+out:
if (vmx_off()) {
printf("%s : vmxoff failed.\n", __func__);
return 1;
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: fix hyperv tsc page clear wrong address

2014-06-19 Thread newtongao(??????)
This is more concisely

>From 6ddeb18347f9dda0fe37702e0d067a4129a89b54 Mon Sep 17 00:00:00 2001
From: Xiaoming Gao 
Date: Thu, 19 Jun 2014 19:14:57 +0800
Subject: [PATCH] kvm: fix hyperv tsc page clear wrong address

tsc page addr need clear the low HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT bits
before kvm_write_guest write the zero tsc_ref structure to guest, or it will 
lead
guest to get wrong clocksource.

windows guest will get strong impact, QueryPerformanceCounter will always 
return 0
,all things in windows rely on clock source, will in serious chaos.

Signed-off-by: Xiaoming Gao 
---
 arch/x86/kvm/x86.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f32a025..f644933 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1898,7 +1898,7 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 
msr, u64 data)
if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
break;
gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
-   if (kvm_write_guest(kvm, data,
+   if (kvm_write_guest(kvm, gfn << 
HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
&tsc_ref, sizeof(tsc_ref)))
return 1;
mark_page_dirty(kvm, gfn);
-- 
1.7.1

>From 6ddeb18347f9dda0fe37702e0d067a4129a89b54 Mon Sep 17 00:00:00 2001
From: Xiaoming Gao 
Date: Thu, 19 Jun 2014 19:14:57 +0800
Subject: [PATCH] kvm: fix hyperv tsc page clear wrong address

tsc page addr need clear the low HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT bits
before kvm_write_guest write the zero tsc_ref structure to guest, or it will 
lead
guest to get wrong clocksource.

windows guest will get strong impact, QueryPerformanceCounter will always 
return 0
,all things in windows rely on clock source, will in serious chaos.

Signed-off-by: Xiaoming Gao 
---
 arch/x86/kvm/x86.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f32a025..f644933 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1898,7 +1898,7 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 
msr, u64 data)
if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
break;
gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
-   if (kvm_write_guest(kvm, data,
+   if (kvm_write_guest(kvm, gfn << 
HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
&tsc_ref, sizeof(tsc_ref)))
return 1;
mark_page_dirty(kvm, gfn);
-- 
1.7.1



Re: [PATCH] kvm: fix hyperv tsc page clear wrong address

2014-06-19 Thread Paolo Bonzini

Il 19/06/2014 13:17, "newtongao(??)" ha scritto:

This is more concisely

From 6ddeb18347f9dda0fe37702e0d067a4129a89b54 Mon Sep 17 00:00:00 2001
From: Xiaoming Gao 
Date: Thu, 19 Jun 2014 19:14:57 +0800
Subject: [PATCH] kvm: fix hyperv tsc page clear wrong address

tsc page addr need clear the low HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT bits
before kvm_write_guest write the zero tsc_ref structure to guest, or it will 
lead
guest to get wrong clocksource.

windows guest will get strong impact, QueryPerformanceCounter will always 
return 0
,all things in windows rely on clock source, will in serious chaos.


Right, this can happen if Windows passes a non-zero page to KVM.

Thanks,

Paolo


Signed-off-by: Xiaoming Gao 
---
 arch/x86/kvm/x86.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f32a025..f644933 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1898,7 +1898,7 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 
msr, u64 data)
if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
break;
gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
-   if (kvm_write_guest(kvm, data,
+   if (kvm_write_guest(kvm, gfn << 
HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
&tsc_ref, sizeof(tsc_ref)))
return 1;
mark_page_dirty(kvm, gfn);



--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/3] KVM: x86: correct mwait and monitor emulation

2014-06-19 Thread Gleb Natapov
On Thu, Jun 19, 2014 at 01:53:36PM +0300, Nadav Amit wrote:
> 
> On Jun 19, 2014, at 1:18 PM, Michael S. Tsirkin  wrote:
> 
> > On Wed, Jun 18, 2014 at 02:46:01PM -0400, Gabriel L. Somlo wrote:
> >> On Wed, Jun 18, 2014 at 10:59:14AM -0700, Eric Northup wrote:
> >>> On Wed, Jun 18, 2014 at 7:19 AM, Nadav Amit  
> >>> wrote:
>  mwait and monitor are currently handled as nop. Considering this 
>  behavior, they
>  should still be handled correctly, i.e., check execution conditions and 
>  generate
>  exceptions when required. mwait and monitor may also be executed in 
>  real-mode
>  and are not handled in that case.  This patch performs the emulation of
>  monitor-mwait according to Intel SDM (other than checking whether 
>  interrupt can
>  be used as a break event).
>  
>  Signed-off-by: Nadav Amit 
> >> 
> >> How about this instead (details in the commit log below) ? Please let
> >> me know what you think, and if you'd prefer me to send it out as a
> >> separate patch rather than a reply to this thread.
> >> 
> >> Thanks,
> >> --Gabriel
> > 
> > If there's an easy workaround, I'm inclined to agree.
> > We can always go back to Gabriel's patch (and then we'll need
> > Nadav's one too) but if we release a kernel with this
> > support it becomes an ABI and we can't go back.
> > 
> > So let's be careful here, and revert the hack for 3.16.
> > 
> > 
> > Acked-by: Michael S. Tsirkin 
> > 
> Personally, I got a custom guest which requires mwait for executing correctly.
Can you elaborate on this guest a little bit. With nop implementation
for mwait the guest will hog a host cpu. Do you consider this to be
"executing correctly?"

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/3] KVM: x86: correct mwait and monitor emulation

2014-06-19 Thread Paolo Bonzini

Il 18/06/2014 19:59, Eric Northup ha scritto:

On Wed, Jun 18, 2014 at 7:19 AM, Nadav Amit  wrote:

mwait and monitor are currently handled as nop. Considering this behavior, they
should still be handled correctly, i.e., check execution conditions and generate
exceptions when required. mwait and monitor may also be executed in real-mode
and are not handled in that case.  This patch performs the emulation of
monitor-mwait according to Intel SDM (other than checking whether interrupt can
be used as a break event).

Signed-off-by: Nadav Amit 
---
 arch/x86/kvm/emulate.c | 41 +++--
 arch/x86/kvm/svm.c | 22 ++
 arch/x86/kvm/vmx.c | 27 +++
 3 files changed, 52 insertions(+), 38 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index ef7a5a0..424b58d 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3344,6 +3344,43 @@ static int em_bswap(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
 }

+static int em_monitor(struct x86_emulate_ctxt *ctxt)
+{
+   int rc;
+   struct segmented_address addr;
+   u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
+   u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
+   u8 byte;


I'd request:

u32 ebx, ecx, edx, eax = 1;
ctxt->opt->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
if (!(ecx & FFL(MWAIT)))
return emulate_ud(ctxt);

and also in em_mwait.


Ignoring the fact that this should never be true 
(KVM_GET_SUPPORTED_CPUID never reports the MWAIT bit), why should 
MONITOR and MWAIT be special?  We do not do this kind of check for SSE 
or AVX instructions.


An alternative is to record the address that was being waited on, and 
invoke PLE (kvm_vcpu_on_spin) if the current address matches the last 
one.  A VMEXIT + emulation takes a couple thousand cycles, which is the 
same order of magnitude as the PLE window.


Even if there is a workaround, I don't think reverting the patch is 
necessary.  The patch was there for a fringe case anyway (recent 
versions of Mac OS X get CPUID right), so I don't think the availability 
of a work around changes the assessment of how ugly/useful MONITOR/MWAIT is.


Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH kvm-unit-tests v2 0/3] More nvmx unit tests changes

2014-06-19 Thread Paolo Bonzini

Il 09/06/2014 23:04, Bandan Das ha scritto:

A couple more updates to test_vmxon, test_vmptrld
and test_vmclear based on kvm commits -

3573e22cfecaac83f82ef4f6847d90e466fc8e10
KVM: nVMX: additional checks on vmxon region

96ec146330d18a938b4773be8d6dd1f93399507c
KVM: nVMX: fail on invalid vmclear/vmptrld pointer

v2:
 - 1/3(New): Move the writing of flags register inside asm blocks

Bandan Das (3):
  VMX: move write_rflags inside asm blocks for vmx instructions
  VMX: Add more checks to test_vmxon
  VMX: Updated test_vmclear and test_vmptrld

 lib/x86/processor.h |   7 ---
 x86/vmx.c   | 121 +---
 x86/vmx.h   |  10 -
 3 files changed, 103 insertions(+), 35 deletions(-)



Applied, thanks.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/3] KVM: x86: correct mwait and monitor emulation

2014-06-19 Thread Nadav Amit

On 6/19/14, 2:23 PM, Gleb Natapov wrote:

On Thu, Jun 19, 2014 at 01:53:36PM +0300, Nadav Amit wrote:


On Jun 19, 2014, at 1:18 PM, Michael S. Tsirkin  wrote:


On Wed, Jun 18, 2014 at 02:46:01PM -0400, Gabriel L. Somlo wrote:

On Wed, Jun 18, 2014 at 10:59:14AM -0700, Eric Northup wrote:

On Wed, Jun 18, 2014 at 7:19 AM, Nadav Amit  wrote:

mwait and monitor are currently handled as nop. Considering this behavior, they
should still be handled correctly, i.e., check execution conditions and generate
exceptions when required. mwait and monitor may also be executed in real-mode
and are not handled in that case.  This patch performs the emulation of
monitor-mwait according to Intel SDM (other than checking whether interrupt can
be used as a break event).

Signed-off-by: Nadav Amit 


How about this instead (details in the commit log below) ? Please let
me know what you think, and if you'd prefer me to send it out as a
separate patch rather than a reply to this thread.

Thanks,
--Gabriel


If there's an easy workaround, I'm inclined to agree.
We can always go back to Gabriel's patch (and then we'll need
Nadav's one too) but if we release a kernel with this
support it becomes an ABI and we can't go back.

So let's be careful here, and revert the hack for 3.16.


Acked-by: Michael S. Tsirkin 


Personally, I got a custom guest which requires mwait for executing correctly.

Can you elaborate on this guest a little bit. With nop implementation
for mwait the guest will hog a host cpu. Do you consider this to be
"executing correctly?"

--


mwait is not as "clean" as it may appear. It encounters false wake-ups 
due to a variety of reasons, and any code need to recheck the wake-up 
condition afterwards. Actually, some CPUs had bugs that caused excessive 
wake-ups that degraded performance considerably (Nehalem, if I am not 
mistaken).
Therefore, handling mwait as nop is logically correct (although it may 
degrade performance).


For the reference, if you look at the SDM 8.10.4, you'll see:
"Multiple events other than a write to the triggering address range can 
cause a processor that executed MWAIT to wake up. These include events 
that would lead to voluntary or involuntary context switches, such as..."


Note the words "include" in the sentence "These include events". 
Software has no way of controlling whether it gets false wake-ups and 
cannot rely on the wake-up as indication to anything.


Nadav


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/3] KVM: x86: correct mwait and monitor emulation

2014-06-19 Thread Michael S. Tsirkin
On Thu, Jun 19, 2014 at 02:52:20PM +0300, Nadav Amit wrote:
> On 6/19/14, 2:23 PM, Gleb Natapov wrote:
> >On Thu, Jun 19, 2014 at 01:53:36PM +0300, Nadav Amit wrote:
> >>
> >>On Jun 19, 2014, at 1:18 PM, Michael S. Tsirkin  wrote:
> >>
> >>>On Wed, Jun 18, 2014 at 02:46:01PM -0400, Gabriel L. Somlo wrote:
> On Wed, Jun 18, 2014 at 10:59:14AM -0700, Eric Northup wrote:
> >On Wed, Jun 18, 2014 at 7:19 AM, Nadav Amit  
> >wrote:
> >>mwait and monitor are currently handled as nop. Considering this 
> >>behavior, they
> >>should still be handled correctly, i.e., check execution conditions and 
> >>generate
> >>exceptions when required. mwait and monitor may also be executed in 
> >>real-mode
> >>and are not handled in that case.  This patch performs the emulation of
> >>monitor-mwait according to Intel SDM (other than checking whether 
> >>interrupt can
> >>be used as a break event).
> >>
> >>Signed-off-by: Nadav Amit 
> 
> How about this instead (details in the commit log below) ? Please let
> me know what you think, and if you'd prefer me to send it out as a
> separate patch rather than a reply to this thread.
> 
> Thanks,
> --Gabriel
> >>>
> >>>If there's an easy workaround, I'm inclined to agree.
> >>>We can always go back to Gabriel's patch (and then we'll need
> >>>Nadav's one too) but if we release a kernel with this
> >>>support it becomes an ABI and we can't go back.
> >>>
> >>>So let's be careful here, and revert the hack for 3.16.
> >>>
> >>>
> >>>Acked-by: Michael S. Tsirkin 
> >>>
> >>Personally, I got a custom guest which requires mwait for executing 
> >>correctly.
> >Can you elaborate on this guest a little bit. With nop implementation
> >for mwait the guest will hog a host cpu. Do you consider this to be
> >"executing correctly?"
> >
> >--
> 
> mwait is not as "clean" as it may appear. It encounters false wake-ups due
> to a variety of reasons, and any code need to recheck the wake-up condition
> afterwards. Actually, some CPUs had bugs that caused excessive wake-ups that
> degraded performance considerably (Nehalem, if I am not mistaken).
> Therefore, handling mwait as nop is logically correct (although it may
> degrade performance).
> 
> For the reference, if you look at the SDM 8.10.4, you'll see:
> "Multiple events other than a write to the triggering address range can
> cause a processor that executed MWAIT to wake up. These include events that
> would lead to voluntary or involuntary context switches, such as..."
> 
> Note the words "include" in the sentence "These include events". Software
> has no way of controlling whether it gets false wake-ups and cannot rely on
> the wake-up as indication to anything.
> 
> Nadav

It's a quality of implementation question.
It is correct in the same sense that
a NIC dropping each second packet is correct.

If we ship this hack we have to maintain it forever,
so there needs to be a compelling reason beyond
just "because we can".


-- 
MST
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/3] KVM: x86: correct mwait and monitor emulation

2014-06-19 Thread Gleb Natapov
On Thu, Jun 19, 2014 at 02:52:20PM +0300, Nadav Amit wrote:
> On 6/19/14, 2:23 PM, Gleb Natapov wrote:
> >On Thu, Jun 19, 2014 at 01:53:36PM +0300, Nadav Amit wrote:
> >>
> >>On Jun 19, 2014, at 1:18 PM, Michael S. Tsirkin  wrote:
> >>
> >>>On Wed, Jun 18, 2014 at 02:46:01PM -0400, Gabriel L. Somlo wrote:
> On Wed, Jun 18, 2014 at 10:59:14AM -0700, Eric Northup wrote:
> >On Wed, Jun 18, 2014 at 7:19 AM, Nadav Amit  
> >wrote:
> >>mwait and monitor are currently handled as nop. Considering this 
> >>behavior, they
> >>should still be handled correctly, i.e., check execution conditions and 
> >>generate
> >>exceptions when required. mwait and monitor may also be executed in 
> >>real-mode
> >>and are not handled in that case.  This patch performs the emulation of
> >>monitor-mwait according to Intel SDM (other than checking whether 
> >>interrupt can
> >>be used as a break event).
> >>
> >>Signed-off-by: Nadav Amit 
> 
> How about this instead (details in the commit log below) ? Please let
> me know what you think, and if you'd prefer me to send it out as a
> separate patch rather than a reply to this thread.
> 
> Thanks,
> --Gabriel
> >>>
> >>>If there's an easy workaround, I'm inclined to agree.
> >>>We can always go back to Gabriel's patch (and then we'll need
> >>>Nadav's one too) but if we release a kernel with this
> >>>support it becomes an ABI and we can't go back.
> >>>
> >>>So let's be careful here, and revert the hack for 3.16.
> >>>
> >>>
> >>>Acked-by: Michael S. Tsirkin 
> >>>
> >>Personally, I got a custom guest which requires mwait for executing 
> >>correctly.
> >Can you elaborate on this guest a little bit. With nop implementation
> >for mwait the guest will hog a host cpu. Do you consider this to be
> >"executing correctly?"
> >
> >--
> 
> mwait is not as "clean" as it may appear. It encounters false wake-ups due
> to a variety of reasons, and any code need to recheck the wake-up condition
> afterwards. Actually, some CPUs had bugs that caused excessive wake-ups that
> degraded performance considerably (Nehalem, if I am not mistaken).
> Therefore, handling mwait as nop is logically correct (although it may
> degrade performance).
> 
> For the reference, if you look at the SDM 8.10.4, you'll see:
> "Multiple events other than a write to the triggering address range can
> cause a processor that executed MWAIT to wake up. These include events that
> would lead to voluntary or involuntary context switches, such as..."
> 
> Note the words "include" in the sentence "These include events". Software
> has no way of controlling whether it gets false wake-ups and cannot rely on
> the wake-up as indication to anything.
> 
That's all well and good and I didn't say that nop is not a valid
mwait implementation, it is, though there is a big difference between
"encounters false wake-ups" and never sleeps.  What I asked is do you
consider your guest hogging host cpu to be "executing correctly?". What
this guest is doing that such behaviour is tolerated and shouldn't it
be better to just poll for a condition you are waiting for instead of
executing expensive vmexits. This will also hog 100% host cpu, but will
be actually faster.

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/3] KVM: x86: correct mwait and monitor emulation

2014-06-19 Thread Nadav Amit

On 6/19/14, 3:07 PM, Gleb Natapov wrote:

On Thu, Jun 19, 2014 at 02:52:20PM +0300, Nadav Amit wrote:

On 6/19/14, 2:23 PM, Gleb Natapov wrote:

On Thu, Jun 19, 2014 at 01:53:36PM +0300, Nadav Amit wrote:


On Jun 19, 2014, at 1:18 PM, Michael S. Tsirkin  wrote:


On Wed, Jun 18, 2014 at 02:46:01PM -0400, Gabriel L. Somlo wrote:

On Wed, Jun 18, 2014 at 10:59:14AM -0700, Eric Northup wrote:

On Wed, Jun 18, 2014 at 7:19 AM, Nadav Amit  wrote:

mwait and monitor are currently handled as nop. Considering this behavior, they
should still be handled correctly, i.e., check execution conditions and generate
exceptions when required. mwait and monitor may also be executed in real-mode
and are not handled in that case.  This patch performs the emulation of
monitor-mwait according to Intel SDM (other than checking whether interrupt can
be used as a break event).

Signed-off-by: Nadav Amit 


How about this instead (details in the commit log below) ? Please let
me know what you think, and if you'd prefer me to send it out as a
separate patch rather than a reply to this thread.

Thanks,
--Gabriel


If there's an easy workaround, I'm inclined to agree.
We can always go back to Gabriel's patch (and then we'll need
Nadav's one too) but if we release a kernel with this
support it becomes an ABI and we can't go back.

So let's be careful here, and revert the hack for 3.16.


Acked-by: Michael S. Tsirkin 


Personally, I got a custom guest which requires mwait for executing correctly.

Can you elaborate on this guest a little bit. With nop implementation
for mwait the guest will hog a host cpu. Do you consider this to be
"executing correctly?"

--


mwait is not as "clean" as it may appear. It encounters false wake-ups due
to a variety of reasons, and any code need to recheck the wake-up condition
afterwards. Actually, some CPUs had bugs that caused excessive wake-ups that
degraded performance considerably (Nehalem, if I am not mistaken).
Therefore, handling mwait as nop is logically correct (although it may
degrade performance).

For the reference, if you look at the SDM 8.10.4, you'll see:
"Multiple events other than a write to the triggering address range can
cause a processor that executed MWAIT to wake up. These include events that
would lead to voluntary or involuntary context switches, such as..."

Note the words "include" in the sentence "These include events". Software
has no way of controlling whether it gets false wake-ups and cannot rely on
the wake-up as indication to anything.


That's all well and good and I didn't say that nop is not a valid
mwait implementation, it is, though there is a big difference between
"encounters false wake-ups" and never sleeps.  What I asked is do you
consider your guest hogging host cpu to be "executing correctly?". What
this guest is doing that such behaviour is tolerated and shouldn't it
be better to just poll for a condition you are waiting for instead of
executing expensive vmexits. This will also hog 100% host cpu, but will
be actually faster.

You are correct, but unfortunately I have no control over the guest 
workload. In this specific workload I do not care about performance but 
only about correctness.


Nadav

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/3] KVM: x86: correct mwait and monitor emulation

2014-06-19 Thread Gleb Natapov
On Thu, Jun 19, 2014 at 03:10:21PM +0300, Nadav Amit wrote:
> On 6/19/14, 3:07 PM, Gleb Natapov wrote:
> >On Thu, Jun 19, 2014 at 02:52:20PM +0300, Nadav Amit wrote:
> >>On 6/19/14, 2:23 PM, Gleb Natapov wrote:
> >>>On Thu, Jun 19, 2014 at 01:53:36PM +0300, Nadav Amit wrote:
> 
> On Jun 19, 2014, at 1:18 PM, Michael S. Tsirkin  wrote:
> 
> >On Wed, Jun 18, 2014 at 02:46:01PM -0400, Gabriel L. Somlo wrote:
> >>On Wed, Jun 18, 2014 at 10:59:14AM -0700, Eric Northup wrote:
> >>>On Wed, Jun 18, 2014 at 7:19 AM, Nadav Amit  
> >>>wrote:
> mwait and monitor are currently handled as nop. Considering this 
> behavior, they
> should still be handled correctly, i.e., check execution conditions 
> and generate
> exceptions when required. mwait and monitor may also be executed in 
> real-mode
> and are not handled in that case.  This patch performs the emulation 
> of
> monitor-mwait according to Intel SDM (other than checking whether 
> interrupt can
> be used as a break event).
> 
> Signed-off-by: Nadav Amit 
> >>
> >>How about this instead (details in the commit log below) ? Please let
> >>me know what you think, and if you'd prefer me to send it out as a
> >>separate patch rather than a reply to this thread.
> >>
> >>Thanks,
> >>--Gabriel
> >
> >If there's an easy workaround, I'm inclined to agree.
> >We can always go back to Gabriel's patch (and then we'll need
> >Nadav's one too) but if we release a kernel with this
> >support it becomes an ABI and we can't go back.
> >
> >So let's be careful here, and revert the hack for 3.16.
> >
> >
> >Acked-by: Michael S. Tsirkin 
> >
> Personally, I got a custom guest which requires mwait for executing 
> correctly.
> >>>Can you elaborate on this guest a little bit. With nop implementation
> >>>for mwait the guest will hog a host cpu. Do you consider this to be
> >>>"executing correctly?"
> >>>
> >>>--
> >>
> >>mwait is not as "clean" as it may appear. It encounters false wake-ups due
> >>to a variety of reasons, and any code need to recheck the wake-up condition
> >>afterwards. Actually, some CPUs had bugs that caused excessive wake-ups that
> >>degraded performance considerably (Nehalem, if I am not mistaken).
> >>Therefore, handling mwait as nop is logically correct (although it may
> >>degrade performance).
> >>
> >>For the reference, if you look at the SDM 8.10.4, you'll see:
> >>"Multiple events other than a write to the triggering address range can
> >>cause a processor that executed MWAIT to wake up. These include events that
> >>would lead to voluntary or involuntary context switches, such as..."
> >>
> >>Note the words "include" in the sentence "These include events". Software
> >>has no way of controlling whether it gets false wake-ups and cannot rely on
> >>the wake-up as indication to anything.
> >>
> >That's all well and good and I didn't say that nop is not a valid
> >mwait implementation, it is, though there is a big difference between
> >"encounters false wake-ups" and never sleeps.  What I asked is do you
> >consider your guest hogging host cpu to be "executing correctly?". What
> >this guest is doing that such behaviour is tolerated and shouldn't it
> >be better to just poll for a condition you are waiting for instead of
> >executing expensive vmexits. This will also hog 100% host cpu, but will
> >be actually faster.
> >
> You are correct, but unfortunately I have no control over the guest
> workload. In this specific workload I do not care about performance but only
> about correctness.
> 
Fair enough. But can you at least hint what is this mysterious guest?

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/3] KVM: x86: correct mwait and monitor emulation

2014-06-19 Thread Michael S. Tsirkin
On Thu, Jun 19, 2014 at 03:10:21PM +0300, Nadav Amit wrote:
> On 6/19/14, 3:07 PM, Gleb Natapov wrote:
> >On Thu, Jun 19, 2014 at 02:52:20PM +0300, Nadav Amit wrote:
> >>On 6/19/14, 2:23 PM, Gleb Natapov wrote:
> >>>On Thu, Jun 19, 2014 at 01:53:36PM +0300, Nadav Amit wrote:
> 
> On Jun 19, 2014, at 1:18 PM, Michael S. Tsirkin  wrote:
> 
> >On Wed, Jun 18, 2014 at 02:46:01PM -0400, Gabriel L. Somlo wrote:
> >>On Wed, Jun 18, 2014 at 10:59:14AM -0700, Eric Northup wrote:
> >>>On Wed, Jun 18, 2014 at 7:19 AM, Nadav Amit  
> >>>wrote:
> mwait and monitor are currently handled as nop. Considering this 
> behavior, they
> should still be handled correctly, i.e., check execution conditions 
> and generate
> exceptions when required. mwait and monitor may also be executed in 
> real-mode
> and are not handled in that case.  This patch performs the emulation 
> of
> monitor-mwait according to Intel SDM (other than checking whether 
> interrupt can
> be used as a break event).
> 
> Signed-off-by: Nadav Amit 
> >>
> >>How about this instead (details in the commit log below) ? Please let
> >>me know what you think, and if you'd prefer me to send it out as a
> >>separate patch rather than a reply to this thread.
> >>
> >>Thanks,
> >>--Gabriel
> >
> >If there's an easy workaround, I'm inclined to agree.
> >We can always go back to Gabriel's patch (and then we'll need
> >Nadav's one too) but if we release a kernel with this
> >support it becomes an ABI and we can't go back.
> >
> >So let's be careful here, and revert the hack for 3.16.
> >
> >
> >Acked-by: Michael S. Tsirkin 
> >
> Personally, I got a custom guest which requires mwait for executing 
> correctly.
> >>>Can you elaborate on this guest a little bit. With nop implementation
> >>>for mwait the guest will hog a host cpu. Do you consider this to be
> >>>"executing correctly?"
> >>>
> >>>--
> >>
> >>mwait is not as "clean" as it may appear. It encounters false wake-ups due
> >>to a variety of reasons, and any code need to recheck the wake-up condition
> >>afterwards. Actually, some CPUs had bugs that caused excessive wake-ups that
> >>degraded performance considerably (Nehalem, if I am not mistaken).
> >>Therefore, handling mwait as nop is logically correct (although it may
> >>degrade performance).
> >>
> >>For the reference, if you look at the SDM 8.10.4, you'll see:
> >>"Multiple events other than a write to the triggering address range can
> >>cause a processor that executed MWAIT to wake up. These include events that
> >>would lead to voluntary or involuntary context switches, such as..."
> >>
> >>Note the words "include" in the sentence "These include events". Software
> >>has no way of controlling whether it gets false wake-ups and cannot rely on
> >>the wake-up as indication to anything.
> >>
> >That's all well and good and I didn't say that nop is not a valid
> >mwait implementation, it is, though there is a big difference between
> >"encounters false wake-ups" and never sleeps.  What I asked is do you
> >consider your guest hogging host cpu to be "executing correctly?". What
> >this guest is doing that such behaviour is tolerated and shouldn't it
> >be better to just poll for a condition you are waiting for instead of
> >executing expensive vmexits. This will also hog 100% host cpu, but will
> >be actually faster.
> >
> You are correct, but unfortunately I have no control over the guest
> workload. In this specific workload I do not care about performance but only
> about correctness.
> 
> Nadav

No one prevents you from patching your kernel to run this workload.  But
is this of use to anyone else? If yes why?

-- 
MST
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/3] KVM: x86: correct mwait and monitor emulation

2014-06-19 Thread Nadav Amit

On 6/19/14, 3:17 PM, Michael S. Tsirkin wrote:

On Thu, Jun 19, 2014 at 03:10:21PM +0300, Nadav Amit wrote:

On 6/19/14, 3:07 PM, Gleb Natapov wrote:

On Thu, Jun 19, 2014 at 02:52:20PM +0300, Nadav Amit wrote:

On 6/19/14, 2:23 PM, Gleb Natapov wrote:

On Thu, Jun 19, 2014 at 01:53:36PM +0300, Nadav Amit wrote:


On Jun 19, 2014, at 1:18 PM, Michael S. Tsirkin  wrote:


On Wed, Jun 18, 2014 at 02:46:01PM -0400, Gabriel L. Somlo wrote:

On Wed, Jun 18, 2014 at 10:59:14AM -0700, Eric Northup wrote:

On Wed, Jun 18, 2014 at 7:19 AM, Nadav Amit  wrote:

mwait and monitor are currently handled as nop. Considering this behavior, they
should still be handled correctly, i.e., check execution conditions and generate
exceptions when required. mwait and monitor may also be executed in real-mode
and are not handled in that case.  This patch performs the emulation of
monitor-mwait according to Intel SDM (other than checking whether interrupt can
be used as a break event).

Signed-off-by: Nadav Amit 


How about this instead (details in the commit log below) ? Please let
me know what you think, and if you'd prefer me to send it out as a
separate patch rather than a reply to this thread.

Thanks,
--Gabriel


If there's an easy workaround, I'm inclined to agree.
We can always go back to Gabriel's patch (and then we'll need
Nadav's one too) but if we release a kernel with this
support it becomes an ABI and we can't go back.

So let's be careful here, and revert the hack for 3.16.


Acked-by: Michael S. Tsirkin 


Personally, I got a custom guest which requires mwait for executing correctly.

Can you elaborate on this guest a little bit. With nop implementation
for mwait the guest will hog a host cpu. Do you consider this to be
"executing correctly?"

--


mwait is not as "clean" as it may appear. It encounters false wake-ups due
to a variety of reasons, and any code need to recheck the wake-up condition
afterwards. Actually, some CPUs had bugs that caused excessive wake-ups that
degraded performance considerably (Nehalem, if I am not mistaken).
Therefore, handling mwait as nop is logically correct (although it may
degrade performance).

For the reference, if you look at the SDM 8.10.4, you'll see:
"Multiple events other than a write to the triggering address range can
cause a processor that executed MWAIT to wake up. These include events that
would lead to voluntary or involuntary context switches, such as..."

Note the words "include" in the sentence "These include events". Software
has no way of controlling whether it gets false wake-ups and cannot rely on
the wake-up as indication to anything.


That's all well and good and I didn't say that nop is not a valid
mwait implementation, it is, though there is a big difference between
"encounters false wake-ups" and never sleeps.  What I asked is do you
consider your guest hogging host cpu to be "executing correctly?". What
this guest is doing that such behaviour is tolerated and shouldn't it
be better to just poll for a condition you are waiting for instead of
executing expensive vmexits. This will also hog 100% host cpu, but will
be actually faster.


You are correct, but unfortunately I have no control over the guest
workload. In this specific workload I do not care about performance but only
about correctness.

Nadav


No one prevents you from patching your kernel to run this workload.  But
is this of use to anyone else? If yes why?

I do not say it should be the default behavior, and I can try to push to 
qemu some setting to turn it on by demand.


Anyhow, I believe there are cases you may want mwait support - either an 
OS X guest which was not modified to run without mwait, or for debugging 
the monitor-mwait flow of a guest OS.


I am not going to argue too much. Since I was under the impression there 
are needs for mwait, other than mine, I thought it would make all of our 
lives easier to have a better implementation.


Nadav
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/4] kvmtool: add support for supplying GICv3 redistributor addresses

2014-06-19 Thread Andre Przywara
The code currently is assuming fixed sized memory regions for the
distributor and CPU interface. GICv3 needs a dynamic allocation of
it's redistributor region, since it's size depends on the number of
vCPUs.
Also add the necessary code to create a GICv3 IRQ chip instance.

Signed-off-by: Andre Przywara 
---
 tools/kvm/arm/aarch64/include/kvm/kvm-arch.h |6 ++
 tools/kvm/arm/gic.c  |   29 --
 tools/kvm/arm/include/arm-common/kvm-arch.h  |   11 ++
 tools/kvm/arm/kvm-cpu.c  |4 +++-
 4 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/tools/kvm/arm/aarch64/include/kvm/kvm-arch.h 
b/tools/kvm/arm/aarch64/include/kvm/kvm-arch.h
index 4925736..42d0fd6 100644
--- a/tools/kvm/arm/aarch64/include/kvm/kvm-arch.h
+++ b/tools/kvm/arm/aarch64/include/kvm/kvm-arch.h
@@ -3,6 +3,12 @@
 
 #define ARM_GIC_DIST_SIZE  0x1
 #define ARM_GIC_CPUI_SIZE  0x2
+/*
+ * On a GICv3 there must be one redistributor per vCPU.
+ * The value here is the size for one, we multiply this at runtime with
+ * the number of requested vCPUs to get the actual size.
+ */
+#define ARM_GIC_REDIST_SIZE0x2
 
 #define ARM_KERN_OFFSET(kvm)   ((kvm)->cfg.arch.aarch32_guest  ?   \
0x8000  :   \
diff --git a/tools/kvm/arm/gic.c b/tools/kvm/arm/gic.c
index 770c6e7..5141868 100644
--- a/tools/kvm/arm/gic.c
+++ b/tools/kvm/arm/gic.c
@@ -11,6 +11,7 @@
 static int irq_ids;
 static int gic_fd = -1;
 static u64 cpu_if_addr = ARM_GIC_CPUI_BASE;
+static int nr_redists = 0;
 
 int gic__alloc_irqnum(void)
 {
@@ -27,6 +28,7 @@ static int gic__create_device(struct kvm *kvm, u32 type)
int err;
u32 offset = 0;
u64 dist_addr = ARM_GIC_DIST_BASE;
+   u64 redist_addr;
struct kvm_create_device gic_device = {
.type   = type,
};
@@ -41,9 +43,13 @@ static int gic__create_device(struct kvm *kvm, u32 type)
};
struct kvm_device_attr dist_attr = {
.group  = KVM_DEV_ARM_VGIC_GRP_ADDR,
-   .attr   = KVM_VGIC_V2_ADDR_TYPE_DIST,
.addr   = (u64)(unsigned long)&dist_addr,
};
+   struct kvm_device_attr redist_attr = {
+   .group  = KVM_DEV_ARM_VGIC_GRP_ADDR,
+   .attr   = KVM_VGIC_V3_ADDR_TYPE_REDIST,
+   .addr   = (u64)(unsigned long)&redist_addr,
+   };
 
err = ioctl(kvm->vm_fd, KVM_CREATE_DEVICE, &gic_device);
if (err)
@@ -64,13 +70,23 @@ static int gic__create_device(struct kvm *kvm, u32 type)
err = ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &cpu_if_attr);
if (err)
return err;
+   dist_attr.attr = KVM_VGIC_V2_ADDR_TYPE_DIST;
pr_info("creating GICv2 KVM device");
break;
+   case KVM_DEV_TYPE_ARM_VGIC_V3:
+   dist_attr.attr = KVM_VGIC_V3_ADDR_TYPE_DIST;
+   redist_addr = dist_addr - nr_redists * ARM_GIC_REDIST_SIZE;
+   break;
default:
return -ENODEV;
}
 
err = ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &dist_attr);
+   if (err)
+   return err;
+
+   if (type == KVM_DEV_TYPE_ARM_VGIC_V3)
+   err = ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &redist_attr);
 
return err;
 }
@@ -166,17 +182,26 @@ void gic__generate_fdt_nodes(void *fdt, u32 phandle, u32 
type)
u64 reg_prop[] = {
cpu_to_fdt64(ARM_GIC_DIST_BASE),
cpu_to_fdt64(ARM_GIC_DIST_SIZE),
-   cpu_to_fdt64(cpu_if_addr), cpu_to_fdt64(ARM_GIC_CPUI_SIZE),
+   0, 0,   /* to be filled */
};
 
switch (type) {
case KVM_DEV_TYPE_ARM_VGIC_V2:
compatible = "arm,cortex-a15-gic";
+   reg_prop[2] = cpu_if_addr;
+   reg_prop[3] = ARM_GIC_CPUI_SIZE;
pr_info("creating FDT for a GICv2");
break;
+   case KVM_DEV_TYPE_ARM_VGIC_V3:
+   compatible = "arm,gic-v3";
+   reg_prop[2] = ARM_GIC_DIST_BASE - nr_redists * 
ARM_GIC_REDIST_SIZE;
+   reg_prop[3] = ARM_GIC_REDIST_SIZE * nr_redists;
+   break;
default:
return;
}
+   reg_prop[2] = cpu_to_fdt64(reg_prop[2]);
+   reg_prop[3] = cpu_to_fdt64(reg_prop[3]);
 
_FDT(fdt_begin_node(fdt, "intc"));
_FDT(fdt_property_string(fdt, "compatible", compatible));
diff --git a/tools/kvm/arm/include/arm-common/kvm-arch.h 
b/tools/kvm/arm/include/arm-common/kvm-arch.h
index 72b204f..c7bfd9a 100644
--- a/tools/kvm/arm/include/arm-common/kvm-arch.h
+++ b/tools/kvm/arm/include/arm-common/kvm-arch.h
@@ -15,10 +15,8 @@
 
 #define ARM_GIC_DIST_BASE  (ARM_AXI_AREA - ARM_GIC_DIST_SIZE)
 #define ARM_GIC_CPUI_BASE  (ARM_GIC_DIST_BASE - ARM_GIC_CPUI_SIZE)
-#define ARM_GIC_SIZE   (

[PATCH 2/4] kvmtool: prepare for instantiating different IRQ chip devices

2014-06-19 Thread Andre Przywara
Extend the vGIC handling code to deal with different IRQ chip devices
instead of hard-coding the GICv2 in.

Signed-off-by: Andre Przywara 
---
 tools/kvm/arm/aarch64/arm-cpu.c|2 +-
 tools/kvm/arm/gic.c|   59 
 tools/kvm/arm/include/arm-common/gic.h |4 +--
 tools/kvm/arm/kvm.c|2 +-
 tools/kvm/virtio/mmio.c|2 +-
 5 files changed, 49 insertions(+), 20 deletions(-)

diff --git a/tools/kvm/arm/aarch64/arm-cpu.c b/tools/kvm/arm/aarch64/arm-cpu.c
index ce5ea2f..35771e0 100644
--- a/tools/kvm/arm/aarch64/arm-cpu.c
+++ b/tools/kvm/arm/aarch64/arm-cpu.c
@@ -12,7 +12,7 @@
 static void generate_fdt_nodes(void *fdt, struct kvm *kvm, u32 gic_phandle)
 {
int timer_interrupts[4] = {13, 14, 11, 10};
-   gic__generate_fdt_nodes(fdt, gic_phandle);
+   gic__generate_fdt_nodes(fdt, gic_phandle, KVM_DEV_TYPE_ARM_VGIC_V2);
timer__generate_fdt_nodes(fdt, kvm, timer_interrupts);
 }
 
diff --git a/tools/kvm/arm/gic.c b/tools/kvm/arm/gic.c
index c92deaa..770c6e7 100644
--- a/tools/kvm/arm/gic.c
+++ b/tools/kvm/arm/gic.c
@@ -22,13 +22,13 @@ int gic__alloc_irqnum(void)
return irq;
 }
 
-static int gic__create_device(struct kvm *kvm)
+static int gic__create_device(struct kvm *kvm, u32 type)
 {
int err;
u32 offset = 0;
u64 dist_addr = ARM_GIC_DIST_BASE;
struct kvm_create_device gic_device = {
-   .type   = KVM_DEV_TYPE_ARM_VGIC_V2,
+   .type   = type,
};
struct kvm_device_attr offset_attr = {
.group  = KVM_DEV_ARM_VGIC_GRP_ADDR_OFFSET,
@@ -51,19 +51,28 @@ static int gic__create_device(struct kvm *kvm)
 
gic_fd = gic_device.fd;
 
-   if (!ioctl(gic_fd, KVM_HAS_DEVICE_ATTR, &offset_attr)) {
-   err = ioctl(gic_fd, KVM_GET_DEVICE_ATTR, &offset_attr);
+   switch (type) {
+   case KVM_DEV_TYPE_ARM_VGIC_V2:
+   if (!ioctl(gic_fd, KVM_HAS_DEVICE_ATTR, &offset_attr)) {
+   err = ioctl(gic_fd, KVM_GET_DEVICE_ATTR, &offset_attr);
+   if (err)
+   return err;
+   }
+
+   cpu_if_addr += offset;
+
+   err = ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &cpu_if_attr);
if (err)
return err;
+   pr_info("creating GICv2 KVM device");
+   break;
+   default:
+   return -ENODEV;
}
 
-   cpu_if_addr += offset;
+   err = ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &dist_attr);
 
-   err = ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &cpu_if_attr);
-   if (err)
-   return err;
-
-   return ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &dist_attr);
+   return err;
 }
 
 static int gic__create_irqchip(struct kvm *kvm)
@@ -94,7 +103,7 @@ static int gic__create_irqchip(struct kvm *kvm)
return err;
 }
 
-int gic__init_irqchip(struct kvm *kvm)
+static int gicv2__init_irqchip(struct kvm *kvm)
 {
int err;
int psz;
@@ -113,13 +122,22 @@ int gic__init_irqchip(struct kvm *kvm)
}
 
/* Try the new way first, and fallback on legacy method otherwise */
-   err = gic__create_device(kvm);
+   err = gic__create_device(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
if (err)
err = gic__create_irqchip(kvm);
 
return err;
 }
 
+int gic__init_irqchip(struct kvm *kvm, u32 type)
+{
+   switch (type) {
+   case KVM_DEV_TYPE_ARM_VGIC_V2:
+   return gicv2__init_irqchip(kvm);
+   }
+   return -ENODEV;
+}
+
 static int gic__init_max_irq(struct kvm *kvm)
 {
u32 nr_irqs = ALIGN(irq_ids, 32) + GIC_SPI_IRQ_BASE;
@@ -142,15 +160,26 @@ static int gic__init_max_irq(struct kvm *kvm)
 }
 late_init(gic__init_max_irq)
 
-void gic__generate_fdt_nodes(void *fdt, u32 phandle)
+void gic__generate_fdt_nodes(void *fdt, u32 phandle, u32 type)
 {
+   const char *compatible;
u64 reg_prop[] = {
-   cpu_to_fdt64(ARM_GIC_DIST_BASE), 
cpu_to_fdt64(ARM_GIC_DIST_SIZE),
+   cpu_to_fdt64(ARM_GIC_DIST_BASE),
+   cpu_to_fdt64(ARM_GIC_DIST_SIZE),
cpu_to_fdt64(cpu_if_addr), cpu_to_fdt64(ARM_GIC_CPUI_SIZE),
};
 
+   switch (type) {
+   case KVM_DEV_TYPE_ARM_VGIC_V2:
+   compatible = "arm,cortex-a15-gic";
+   pr_info("creating FDT for a GICv2");
+   break;
+   default:
+   return;
+   }
+
_FDT(fdt_begin_node(fdt, "intc"));
-   _FDT(fdt_property_string(fdt, "compatible", "arm,cortex-a15-gic"));
+   _FDT(fdt_property_string(fdt, "compatible", compatible));
_FDT(fdt_property_cell(fdt, "#interrupt-cells", GIC_FDT_IRQ_NUM_CELLS));
_FDT(fdt_property(fdt, "interrupt-controller", NULL, 0));
_FDT(fdt_property(fdt, "reg", reg_prop, sizeof(reg_prop)));
diff --git a/tools/kvm/arm/include/arm-common/gic.h 
b/

[PATCH 4/4] kvmtool: add command line parameter to instantiate a vGICv3

2014-06-19 Thread Andre Przywara
Add the command line parameter "--gicv3" to request GICv3 emulation
in the kernel. Connect that to the already existing GICv3 code.

Signed-off-by: Andre Przywara 
---
 tools/kvm/arm/aarch64/arm-cpu.c|5 -
 .../kvm/arm/aarch64/include/kvm/kvm-config-arch.h  |4 +++-
 tools/kvm/arm/gic.c|   17 +
 tools/kvm/arm/include/arm-common/kvm-config-arch.h |1 +
 tools/kvm/arm/kvm.c|4 +++-
 5 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/tools/kvm/arm/aarch64/arm-cpu.c b/tools/kvm/arm/aarch64/arm-cpu.c
index 35771e0..e3514ea 100644
--- a/tools/kvm/arm/aarch64/arm-cpu.c
+++ b/tools/kvm/arm/aarch64/arm-cpu.c
@@ -12,7 +12,10 @@
 static void generate_fdt_nodes(void *fdt, struct kvm *kvm, u32 gic_phandle)
 {
int timer_interrupts[4] = {13, 14, 11, 10};
-   gic__generate_fdt_nodes(fdt, gic_phandle, KVM_DEV_TYPE_ARM_VGIC_V2);
+   gic__generate_fdt_nodes(fdt, gic_phandle,
+   kvm->cfg.arch.gicv3 ?
+   KVM_DEV_TYPE_ARM_VGIC_V3 :
+   KVM_DEV_TYPE_ARM_VGIC_V2);
timer__generate_fdt_nodes(fdt, kvm, timer_interrupts);
 }
 
diff --git a/tools/kvm/arm/aarch64/include/kvm/kvm-config-arch.h 
b/tools/kvm/arm/aarch64/include/kvm/kvm-config-arch.h
index 89860ae..780a6d1 100644
--- a/tools/kvm/arm/aarch64/include/kvm/kvm-config-arch.h
+++ b/tools/kvm/arm/aarch64/include/kvm/kvm-config-arch.h
@@ -3,7 +3,9 @@
 
 #define ARM_OPT_ARCH_RUN(cfg)  \
OPT_BOOLEAN('\0', "aarch32", &(cfg)->aarch32_guest, \
-   "Run AArch32 guest"),
+   "Run AArch32 guest"),   \
+   OPT_BOOLEAN('\0', "gicv3", &(cfg)->gicv3,   \
+   "use a GICv3 interrupt controller in the guest"),
 
 #include "arm-common/kvm-config-arch.h"
 
diff --git a/tools/kvm/arm/gic.c b/tools/kvm/arm/gic.c
index 5141868..15dc99d 100644
--- a/tools/kvm/arm/gic.c
+++ b/tools/kvm/arm/gic.c
@@ -76,6 +76,7 @@ static int gic__create_device(struct kvm *kvm, u32 type)
case KVM_DEV_TYPE_ARM_VGIC_V3:
dist_attr.attr = KVM_VGIC_V3_ADDR_TYPE_DIST;
redist_addr = dist_addr - nr_redists * ARM_GIC_REDIST_SIZE;
+   pr_info("creating GICv3 KVM device");
break;
default:
return -ENODEV;
@@ -119,6 +120,17 @@ static int gic__create_irqchip(struct kvm *kvm)
return err;
 }
 
+static int gicv3__init_irqchip(struct kvm *kvm)
+{
+   if (kvm->nrcpus > 255) {
+   pr_warning("%d CPUS greater than maximum of %d -- truncating\n",
+   kvm->nrcpus, 255);
+   kvm->nrcpus = 255;
+   }
+
+   return gic__create_device(kvm, KVM_DEV_TYPE_ARM_VGIC_V3);
+}
+
 static int gicv2__init_irqchip(struct kvm *kvm)
 {
int err;
@@ -150,6 +162,9 @@ int gic__init_irqchip(struct kvm *kvm, u32 type)
switch (type) {
case KVM_DEV_TYPE_ARM_VGIC_V2:
return gicv2__init_irqchip(kvm);
+   case KVM_DEV_TYPE_ARM_VGIC_V3:
+   nr_redists = kvm->cfg.nrcpus;
+   return gicv3__init_irqchip(kvm);
}
return -ENODEV;
 }
@@ -196,6 +211,8 @@ void gic__generate_fdt_nodes(void *fdt, u32 phandle, u32 
type)
compatible = "arm,gic-v3";
reg_prop[2] = ARM_GIC_DIST_BASE - nr_redists * 
ARM_GIC_REDIST_SIZE;
reg_prop[3] = ARM_GIC_REDIST_SIZE * nr_redists;
+   pr_info("creating FDT for a GICv3 with %d redistributors",
+   nr_redists);
break;
default:
return;
diff --git a/tools/kvm/arm/include/arm-common/kvm-config-arch.h 
b/tools/kvm/arm/include/arm-common/kvm-config-arch.h
index f3baf39..088b012 100644
--- a/tools/kvm/arm/include/arm-common/kvm-config-arch.h
+++ b/tools/kvm/arm/include/arm-common/kvm-config-arch.h
@@ -7,6 +7,7 @@ struct kvm_config_arch {
const char *dump_dtb_filename;
unsigned int force_cntfrq;
bool aarch32_guest;
+   bool gicv3;
 };
 
 #define OPT_ARCH_RUN(pfx, cfg) 
\
diff --git a/tools/kvm/arm/kvm.c b/tools/kvm/arm/kvm.c
index faf4702..3858170 100644
--- a/tools/kvm/arm/kvm.c
+++ b/tools/kvm/arm/kvm.c
@@ -80,6 +80,8 @@ void kvm__arch_init(struct kvm *kvm, const char 
*hugetlbfs_path, u64 ram_size)
MADV_MERGEABLE);
 
/* Initialise the virtual GIC. */
-   if (gic__init_irqchip(kvm, KVM_DEV_TYPE_ARM_VGIC_V2))
+   if (gic__init_irqchip(kvm, kvm->cfg.arch.gicv3 ?
+  KVM_DEV_TYPE_ARM_VGIC_V3 :
+  KVM_DEV_TYPE_ARM_VGIC_V2))
die("Failed to initialise virtual GIC");
 }
-- 
1.7.9.5

--
To unsubscribe from thi

[PATCH 0/4] kvmtool: Add GICv3 emulation support

2014-06-19 Thread Andre Przywara
This is a first version of the kvmtool changes needed for GICv3
emulation. Allows testing of the GICv3 emulation code and allows
creating guests with more than 8 VCPUs (given that in the host
CONFIG_KVM_ARM_MAX_VCPUS and in the guest CONFIG_NR_CPUS have been
raised accordingly).
Use --gicv3 to create a GICv3 IRQ chip in the guest.

This goes on top of Marc's kvmtool-vgic-dyn branch on kernel.org:
https://git.kernel.org/cgit/linux/kernel/git/maz/arm-platforms.git/log/?h=kvm-arm64/kvmtool-vgic-dyn

This is not meant for merging right now, as it eventually needs to
be rebased on a more recent kvmtool version first.

Cheers,
Andre

Andre Przywara (4):
  kvmtool: public header definitions from GICv3 emulation patch series
  kvmtool: prepare for instantiating different IRQ chip devices
  kvmtool: add support for supplying GICv3 redistributor addresses
  kvmtool: add command line parameter to instantiate a vGICv3

 arch/arm64/include/uapi/asm/kvm.h  |6 ++
 include/uapi/linux/kvm.h   |1 +
 tools/kvm/arm/aarch64/arm-cpu.c|5 +-
 tools/kvm/arm/aarch64/include/kvm/kvm-arch.h   |6 ++
 .../kvm/arm/aarch64/include/kvm/kvm-config-arch.h  |4 +-
 tools/kvm/arm/gic.c|  101 +---
 tools/kvm/arm/include/arm-common/gic.h |4 +-
 tools/kvm/arm/include/arm-common/kvm-arch.h|   11 ++-
 tools/kvm/arm/include/arm-common/kvm-config-arch.h |1 +
 tools/kvm/arm/kvm-cpu.c|4 +-
 tools/kvm/arm/kvm.c|4 +-
 tools/kvm/virtio/mmio.c|2 +-
 12 files changed, 123 insertions(+), 26 deletions(-)

-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/4] kvmtool: public header definitions from GICv3 emulation patch series

2014-06-19 Thread Andre Przywara
This pulls the necessary defines for the GICv3 constants from the
Linux tree into kvmtool for now. Should be obsolete as soon as
the vGICv3 patches are upstream and kvmtool is rebased on top of
it.

Signed-off-by: Andre Przywara 
---
 arch/arm64/include/uapi/asm/kvm.h |6 ++
 include/uapi/linux/kvm.h  |1 +
 2 files changed, 7 insertions(+)

diff --git a/arch/arm64/include/uapi/asm/kvm.h 
b/arch/arm64/include/uapi/asm/kvm.h
index 0dc731d..2c26821 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -75,6 +75,12 @@ struct kvm_regs {
 #define KVM_VGIC_V2_DIST_SIZE  0x1000
 #define KVM_VGIC_V2_CPU_SIZE   0x2000
 
+#define KVM_VGIC_V3_ADDR_TYPE_DIST 2
+#define KVM_VGIC_V3_ADDR_TYPE_REDIST   3
+
+#define KVM_VGIC_V3_DIST_SIZE  0x1
+#define KVM_VGIC_V3_REDIST_SIZE0x2
+
 #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
 #define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 932d7f2..3af4b60 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -855,6 +855,7 @@ struct kvm_device_attr {
 #define   KVM_DEV_VFIO_GROUP_ADD   1
 #define   KVM_DEV_VFIO_GROUP_DEL   2
 #define KVM_DEV_TYPE_ARM_VGIC_V2   5
+#define KVM_DEV_TYPE_ARM_VGIC_V3   7
 
 /*
  * ioctls for VM fds
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Revert "kvm: x86: emulate monitor and mwait instructions as nop"

2014-06-19 Thread Gabriel L. Somlo
This reverts commit 87c00572ba05aa8c9db118da75c608f47eb10b9e.

OS X <= 10.7.* are the only known guests which realistically required
this functionality. As it turns out, OS X can be told to forego using
monitor/mwait by passing it "idlehalt=0" as a kernel argument, so we're
better off removing this hack from KVM altogether, at least for now.

Signed-off-by: Gabriel L. Somlo 
Acked-by: Michael S. Tsirkin 
---

OK, here's the formal proposal to revert my original monitor/mwait hack...
I wish I knew about the "idlehalt=0" before I submitted it, but such is life.
Depending on how those of you higher up the food chain feel, I can tolerate
leaving the hack in if it's never on by default (i.e. no CPUID advertisement),
but would feel better if we could simply undo this until we have a better
solution, something less controversial...

Thanks, and sorry for all the noise,
  Gabriel

 arch/x86/kvm/cpuid.c |  2 --
 arch/x86/kvm/svm.c   | 28 
 arch/x86/kvm/vmx.c   | 20 
 3 files changed, 12 insertions(+), 38 deletions(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 38a0afe..17b42fa 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -283,8 +283,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 
*entry, u32 function,
0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
/* cpuid 1.ecx */
const u32 kvm_supported_word4_x86_features =
-   /* NOTE: MONITOR (and MWAIT) are emulated as NOP,
-* but *not* advertised to guests via CPUID ! */
F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
0 /* DS-CPL, VMX, SMX, EST */ |
0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ec8366c..6b88b6a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2763,6 +2763,12 @@ static int xsetbv_interception(struct vcpu_svm *svm)
return 1;
 }
 
+static int invalid_op_interception(struct vcpu_svm *svm)
+{
+   kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+   return 1;
+}
+
 static int task_switch_interception(struct vcpu_svm *svm)
 {
u16 tss_selector;
@@ -3274,24 +3280,6 @@ static int pause_interception(struct vcpu_svm *svm)
return 1;
 }
 
-static int nop_interception(struct vcpu_svm *svm)
-{
-   skip_emulated_instruction(&(svm->vcpu));
-   return 1;
-}
-
-static int monitor_interception(struct vcpu_svm *svm)
-{
-   printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
-   return nop_interception(svm);
-}
-
-static int mwait_interception(struct vcpu_svm *svm)
-{
-   printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
-   return nop_interception(svm);
-}
-
 static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_READ_CR0] = cr_interception,
[SVM_EXIT_READ_CR3] = cr_interception,
@@ -3349,8 +3337,8 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm 
*svm) = {
[SVM_EXIT_CLGI] = clgi_interception,
[SVM_EXIT_SKINIT]   = skinit_interception,
[SVM_EXIT_WBINVD]   = emulate_on_interception,
-   [SVM_EXIT_MONITOR]  = monitor_interception,
-   [SVM_EXIT_MWAIT]= mwait_interception,
+   [SVM_EXIT_MONITOR]  = invalid_op_interception,
+   [SVM_EXIT_MWAIT]= invalid_op_interception,
[SVM_EXIT_XSETBV]   = xsetbv_interception,
[SVM_EXIT_NPF]  = pf_interception,
 };
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 801332e..163958f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5672,24 +5672,12 @@ static int handle_pause(struct kvm_vcpu *vcpu)
return 1;
 }
 
-static int handle_nop(struct kvm_vcpu *vcpu)
+static int handle_invalid_op(struct kvm_vcpu *vcpu)
 {
-   skip_emulated_instruction(vcpu);
+   kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
 }
 
-static int handle_mwait(struct kvm_vcpu *vcpu)
-{
-   printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
-   return handle_nop(vcpu);
-}
-
-static int handle_monitor(struct kvm_vcpu *vcpu)
-{
-   printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
-   return handle_nop(vcpu);
-}
-
 /*
  * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12.
  * We could reuse a single VMCS for all the L2 guests, but we also want the
@@ -6651,8 +6639,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct 
kvm_vcpu *vcpu) = {
[EXIT_REASON_EPT_VIOLATION]   = handle_ept_violation,
[EXIT_REASON_EPT_MISCONFIG]   = handle_ept_misconfig,
[EXIT_REASON_PAUSE_INSTRUCTION]   = handle_pause,
- 

Re: [PATCH v5 04/20] arm64: boot protocol documentation update for GICv3

2014-06-19 Thread Mark Rutland
Hi Marc,

On Thu, Jun 19, 2014 at 10:19:27AM +0100, Marc Zyngier wrote:
> Linux has some requirements that must be satisfied in order to boot
> on a system built with a GICv3.
> 
> Acked-by: Christoffer Dall 
> Signed-off-by: Marc Zyngier 
> ---
>  Documentation/arm64/booting.txt | 6 ++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt
> index 37fc4f6..e28ccec 100644
> --- a/Documentation/arm64/booting.txt
> +++ b/Documentation/arm64/booting.txt
> @@ -141,6 +141,12 @@ Before jumping into the kernel, the following conditions 
> must be met:
>the kernel image will be entered must be initialised by software at a
>higher exception level to prevent execution in an UNKNOWN state.
>  
> +  For systems with a GICv3 interrupt controller, it is expected that:
> +  - If EL3 is present, it must program ICC_SRE_EL3.Enable (bit 3) to
> +0b1 and ICC_SRE_EL3.SRE (bit 0) to 0b1.
> +  - If the kernel is entered at EL1, EL2 must set ICC_SRE_EL2.Enable
> +(bit 3) to 0b1 and ICC_SRE_EL2.SRE (bit 0) to 0b1.

Apologies for spotting this so late, but to me this sounds slightly
ambiguous. The use of "it is expected" doesn't read like a hard
requirement, and in the first point, it's ambiguous as to what "it" is.

I assume that if the GIC is communicated to the kernel as a GICv2 then
these points do not hold?

How about:

  For systems with a GICv3 interrupt controller, where the presence of
  GICv3 is communicated to the kernel:
  - If EL3 is present:
ICC_SRE_EL3.Enable (bit 3) must be initialiased to 0b1.
ICC_SRE_EL3.SRE (bit 0) must be initialised to 0b1.
  - If the kernel is entered at EL1:
ICC.SRE_EL2.Enable (bit 3) must be initialised to 0b1
ICC_SRE_EL2.SRE (bit 0) must be initialised to 0b1.

Thanks,
Mark.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Coupling between KVM_IRQFD and KVM_SET_GSI_ROUTING?

2014-06-19 Thread Eric Auger
Hi,

If I am not wrong I did not get any reply to this question. Can anyone
share his/her experience/knowledge on this gsi routing/irqfd use case?

Thank you in advance

Best Regards

Eric

On 06/17/2014 01:39 PM, Eric Auger wrote:
> Hello,
> 
> I have a question related to KVM_IRQFD and KVM_SET_GSI_ROUTING ioctl
> relationship.
> 
> When reading the KVM API documentation I do not understand there is any
> dependency between KVM_IRQFD and KVM_SET_GSI_ROUTING. According to the
> text it seems only the gsi field is used and interpreted as the irqchip pin.
> 
> However irqchip.c kvm_set_irq code relies on an existing and not dummy
> routing table.
> 
> My question is: does anyone agree on the fact the user-side must set a
> consistent routing table using KVM_SET_GSI_ROUTING before using
> KVM_IRQFD? The other alternative would have been to build a default
> identity GSI routing table in the kernel (gsi = irqchip.pin).
> 
> In the positive, shouldn't we clarify the KVM API documentation?
> 
> Thank you in advance
> 
> Best Regards
> 
> Eric
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/5] KVM: MMU: allow pinning spte translations (TDP-only)

2014-06-19 Thread Andi Kleen
> >+ * Failure to instantiate pages will abort guest entry.
> >+ *
> >+ * Page frames should be pinned with get_page in advance.
> >+ *
> >+ * Pinning is not guaranteed while executing as L2 guest.
> 
> Does this undermine security?

It should not. In the worst case it'll randomly lose PEBS records.

-Andi
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] ARM: KVM: add irqfd and irq routing support

2014-06-19 Thread Will Deacon
Hi all,

I'm currently adding VFIO support for kvmtool, so I'm interested in this
patch series (although actually from a PCI perspective).

Eric: can you CC me on future versions of this series please? Once things
start to stabilise, I can help with testing.

On Thu, Jun 05, 2014 at 03:39:50PM +0100, Christoffer Dall wrote:
> On Thu, Jun 05, 2014 at 03:15:15PM +0200, Eric Auger wrote:
> > On 06/05/2014 12:28 PM, Christoffer Dall wrote:
> > > On Mon, Jun 02, 2014 at 09:29:56AM +0200, Eric Auger wrote:
> > >> +  kvm_debug("Inject irqchip routed vIRQ %d\n",
> > >> +  e->irqchip.pin);
> > >> +  kvm_vgic_inject_irq(kvm, 0, spi, level);
> > >> +  /*
> > >> +   * toggling down vIRQ wire is directly handled in
> > >> +   * process_maintenance for this reason:
> > >> +   * irqfd_resampler_ack is called in
> > >> +   * process_maintenance which holds the dist lock.
> > >> +   * irqfd_resampler_ack calls kvm_set_irq
> > >> +   * which ends_up calling kvm_vgic_inject_irq.
> > >> +   * This later attempts to take the lock -> deadlock!
> > >> +   */
> > >
> > > Not sure I understand this comment.  What are we trying to achieve, are
> > > we using some sort of a workaround to avoid a deadlock?
> >
> > What I wanted to point out here is I would have prefered to handle both
> > levels 0 and 1 in a symetrical manner. irqfd_resampler_ack (in eventfd)
> > is calling kvm_set_irq with level 0. This would be the prefered way to
> > toggle down the SPI at GIC input instead of doing this in
> > process_maintenance in a dirty manner. However this does work because
> > irqfd_resampler_ack is called in process_maintenance (the place where
> > the EOI is analyzed). process_maintenance holds the dist lock and would
> > eventually call kvm_vgic_inject_irq which also attempts to take the lock.
> >
> 
> I'm afraid that's too much of a hack.  There's an external mechanism to
> set an interrupt line to active (level=1) or inactive (level=0) and we
> must support both.
> 
> The fact that vgic_process_maintenance() can set the interrupt line to
> inactive is just something we exploit to properly handle level-triggered
> interrupts, but the main API to the VGIC must absolutely be supported.
> 
> Am I completely wrong here?
> 
> The locking issue can be solved by splitting up the locking into a finer
> granularity as needed or deferring the call to irqfd_resampler_ack()
> until after unlocking the distributor lock in kvm_vgic_sync_hwstate().

Why can't we do what PowerPC does for mpic and x86 does for IOAPIC and
simply drop the distributor lock across the call to kvm_notify_acked_irq?

Given that I think the eventfd callbacks can block, holding a spinlock isn't
safe anyway, regardless of the vgic re-entrancy issue.

Will
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4] kvm: Implement PEBS virtualization

2014-06-19 Thread Paolo Bonzini

Il 10/06/2014 23:06, Marcelo Tosatti ha scritto:

> BTW how about general PMU migration? As far as I can tell there
> is no code to save/restore the state for that currently, right?

Paolo wrote support for it, recently. Paolo?


Yes, on the KVM side all that is needed is to special case MSR reads and 
writes that have side effects, for example:


case MSR_CORE_PERF_GLOBAL_STATUS:
if (msr_info->host_initiated) {
pmu->global_status = data;
return 0;
}
break; /* RO MSR */
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62 {
if (!msr_info->host_initiated)
pmu->global_status &= ~data;
pmu->global_ovf_ctrl = data;
return 0;
}
break;

Right now this is only needed for writes.

Userspace then can read/write these MSRs, and add them to the migration 
stream.  QEMU has code for that.


Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4] kvm: Implement PEBS virtualization

2014-06-19 Thread Paolo Bonzini

Il 02/06/2014 21:57, Andi Kleen ha scritto:

> It would be a bigger concern if we expected virtual PMU migration to
> work, but I think it would be nice to update kvm_pmu_cpuid_update() to
> notice the presence/absence of the new CPUID bits, and then store that
> into per-VM kvm_pmu->pebs_allowed rather than relying only on the
> per-host perf_pebs_virtualization().

I hope at some point it can work. There shouldn't be any problems
with migrating to the same CPU model, in many cases (same event
and same PEBS format) it'll likely even work between models or
gracefully degrade.


The code is there in both kernel and QEMU, it's just very little tested.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] ARM: KVM: add irqfd and irq routing support

2014-06-19 Thread Eric Auger
On 06/19/2014 04:13 PM, Will Deacon wrote:
> Hi all,
> 
> I'm currently adding VFIO support for kvmtool, so I'm interested in this
> patch series (although actually from a PCI perspective).
> 
> Eric: can you CC me on future versions of this series please? Once things
> start to stabilise, I can help with testing.

Hi Will,

sure I will CC you.
> 
> On Thu, Jun 05, 2014 at 03:39:50PM +0100, Christoffer Dall wrote:
>> On Thu, Jun 05, 2014 at 03:15:15PM +0200, Eric Auger wrote:
>>> On 06/05/2014 12:28 PM, Christoffer Dall wrote:
 On Mon, Jun 02, 2014 at 09:29:56AM +0200, Eric Auger wrote:
> +  kvm_debug("Inject irqchip routed vIRQ %d\n",
> +  e->irqchip.pin);
> +  kvm_vgic_inject_irq(kvm, 0, spi, level);
> +  /*
> +   * toggling down vIRQ wire is directly handled in
> +   * process_maintenance for this reason:
> +   * irqfd_resampler_ack is called in
> +   * process_maintenance which holds the dist lock.
> +   * irqfd_resampler_ack calls kvm_set_irq
> +   * which ends_up calling kvm_vgic_inject_irq.
> +   * This later attempts to take the lock -> deadlock!
> +   */

 Not sure I understand this comment.  What are we trying to achieve, are
 we using some sort of a workaround to avoid a deadlock?
>>>
>>> What I wanted to point out here is I would have prefered to handle both
>>> levels 0 and 1 in a symetrical manner. irqfd_resampler_ack (in eventfd)
>>> is calling kvm_set_irq with level 0. This would be the prefered way to
>>> toggle down the SPI at GIC input instead of doing this in
>>> process_maintenance in a dirty manner. However this does work because
>>> irqfd_resampler_ack is called in process_maintenance (the place where
>>> the EOI is analyzed). process_maintenance holds the dist lock and would
>>> eventually call kvm_vgic_inject_irq which also attempts to take the lock.
>>>
>>
>> I'm afraid that's too much of a hack.  There's an external mechanism to
>> set an interrupt line to active (level=1) or inactive (level=0) and we
>> must support both.
>>
>> The fact that vgic_process_maintenance() can set the interrupt line to
>> inactive is just something we exploit to properly handle level-triggered
>> interrupts, but the main API to the VGIC must absolutely be supported.
>>
>> Am I completely wrong here?
>>
>> The locking issue can be solved by splitting up the locking into a finer
>> granularity as needed or deferring the call to irqfd_resampler_ack()
>> until after unlocking the distributor lock in kvm_vgic_sync_hwstate().
> 
> Why can't we do what PowerPC does for mpic and x86 does for IOAPIC and
> simply drop the distributor lock across the call to kvm_notify_acked_irq?

Yes, I am about to release a new version for this RFC that uses a finer
granularity for the dist lock, as you and Christoffer suggested.

> 
> Given that I think the eventfd callbacks can block, holding a spinlock isn't
> safe anyway, regardless of the vgic re-entrancy issue.
yes you're fully right.

Best Regards

Eric
> 
> Will
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


BUG at mm/memory.c

2014-06-19 Thread Ortwin Glück
Hi,

I was hitting a BUG while running a couple of qemu 2.0 on a 3.15.0 kernel. KSM 
was running. This box uses NUMA with two E5 6-core Xeons.

Linux toaster 3.15.0 #1 SMP PREEMPT Thu Jun 12 14:05:12 CEST 2014 x86_64 
Intel(R) Xeon(R) CPU E5-2620 v2 @ 2.10GHz GenuineIntel GNU/Linux

Jun 17 16:59:47 toaster kernel: [ cut here ]
Jun 17 16:59:47 toaster kernel: kernel BUG at mm/memory.c:3924!
Jun 17 16:59:47 toaster kernel: invalid opcode:  [#1] PREEMPT SMP 
Jun 17 16:59:47 toaster kernel: Modules linked in: iTCO_wdt iTCO_vendor_support
Jun 17 16:59:47 toaster kernel: CPU: 14 PID: 13058 Comm: qemu-system-x86 Not 
tainted 3.15.0 #1
Jun 17 16:59:47 toaster kernel: Hardware name: ASUSTeK COMPUTER INC. Z9PE-D8 
WS/Z9PE-D8 WS, BIOS 5404 02/10/2014
Jun 17 16:59:47 toaster kernel: task: 880ffcef8000 ti: 88065873c000 
task.ti: 88065873c000
Jun 17 16:59:47 toaster kernel: RIP: 0010:[]  
[] handle_mm_fault+0xc92/0xdb0
Jun 17 16:59:47 toaster kernel: RSP: 0018:88065873f968  EFLAGS: 00010246
Jun 17 16:59:47 toaster kernel: RAX: 80092e0001e6 RBX: 7fa0d2c2 
RCX: 88065873f6f0
Jun 17 16:59:47 toaster kernel: RDX: 0100 RSI: 0009 
RDI: 004352da
Jun 17 16:59:47 toaster kernel: RBP: 88065873f9f8 R08:  
R09: 0d88
Jun 17 16:59:47 toaster kernel: R10: 0019 R11:  
R12: 880179943140
Jun 17 16:59:47 toaster kernel: R13: 0800 R14: 88069ac9d4b0 
R15: 88109a7a8a80
Jun 17 16:59:47 toaster kernel: FS:  7fa1ad61c700() 
GS:88089fd0() knlGS:
Jun 17 16:59:47 toaster kernel: CS:  0010 DS:  ES:  CR0: 
80050033
Jun 17 16:59:47 toaster kernel: CR2: 7f6a20743000 CR3: 0009582aa000 
CR4: 001427e0
Jun 17 16:59:47 toaster kernel: Stack:
Jun 17 16:59:47 toaster kernel: 8808df251b88 88049543a540 
0f08 7fa0d33e1000
Jun 17 16:59:47 toaster kernel: 88065873f9f8 0019 
8003076fd067 880958268418
Jun 17 16:59:47 toaster kernel: d33e1fff 0f08 
88069ac9d4b0 0001d2c2
Jun 17 16:59:47 toaster kernel: Call Trace:
Jun 17 16:59:47 toaster kernel: [] 
__get_user_pages+0x156/0x5f0
Jun 17 16:59:47 toaster kernel: [] 
__gfn_to_pfn_memslot+0x15c/0x3e0
Jun 17 16:59:47 toaster kernel: [] ? 
emulator_read_write+0x110/0x180
Jun 17 16:59:47 toaster kernel: [] __gfn_to_pfn+0x60/0x70
Jun 17 16:59:47 toaster kernel: [] gfn_to_pfn_async+0x1a/0x20
Jun 17 16:59:47 toaster kernel: [] try_async_pf+0x4a/0x230
Jun 17 16:59:47 toaster kernel: [] tdp_page_fault+0x103/0x1f0
Jun 17 16:59:47 toaster kernel: [] 
kvm_mmu_page_fault+0x31/0x100
Jun 17 16:59:47 toaster kernel: [] 
handle_ept_violation+0x96/0x180
Jun 17 16:59:47 toaster kernel: [] vmx_handle_exit+0xb5/0xa30
Jun 17 16:59:47 toaster kernel: [] ? 
vmx_handle_external_intr+0x66/0x70
Jun 17 16:59:47 toaster kernel: [] ? 
vmx_invpcid_supported+0x20/0x20
Jun 17 16:59:47 toaster kernel: [] 
kvm_arch_vcpu_ioctl_run+0xc45/0x1120
Jun 17 16:59:47 toaster kernel: [] ? 
kvm_arch_vcpu_load+0x4e/0x1e0
Jun 17 16:59:47 toaster kernel: [] kvm_vcpu_ioctl+0x2f4/0x580
Jun 17 16:59:47 toaster kernel: [] ? fsnotify+0x22c/0x2f0
Jun 17 16:59:47 toaster kernel: [] do_vfs_ioctl+0x83/0x510
Jun 17 16:59:47 toaster kernel: [] ? __fget+0x79/0xb0
Jun 17 16:59:47 toaster kernel: [] SyS_ioctl+0x4c/0x90
Jun 17 16:59:47 toaster kernel: [] 
system_call_fastpath+0x16/0x1b
Jun 17 16:59:47 toaster kernel: Code: 49 8b 55 00 e9 1c f4 ff ff 48 89 c7 e8 58 
26 fe ff e9 51 f6 ff ff f6 42 51 01 0f 85 c9 fc ff ff 41 bd 02 00 00 00 e9 47 
f6 ff ff <0f> 0b 48 89 d9 4c 89 f2 4c 89 e6 4c 89 ff 44 89 55 98 e8 07 b7 
Jun 17 16:59:47 toaster kernel: RIP  [] 
handle_mm_fault+0xc92/0xdb0
Jun 17 16:59:47 toaster kernel: RSP 
Jun 17 16:59:47 toaster kernel: kernel BUG at arch/x86/mm/pageattr.c:216!
Jun 17 16:59:47 toaster kernel: invalid opcode:  [#2] PREEMPT SMP 
Jun 17 16:59:47 toaster kernel: Modules linked in: iTCO_wdt iTCO_vendor_support
Jun 17 16:59:47 toaster kernel: CPU: 14 PID: 13058 Comm: qemu-system-x86 Not 
tainted 3.15.0 #1
Jun 17 16:59:47 toaster kernel: Hardware name: ASUSTeK COMPUTER INC. Z9PE-D8 
WS/Z9PE-D8 WS, BIOS 5404 02/10/2014
Jun 17 16:59:47 toaster kernel: task: 880ffcef8000 ti: 88065873c000 
task.ti: 88065873c000
Jun 17 16:59:47 toaster kernel: RIP: 0010:[]  
[] change_page_attr_set_clr+0x469/0x470
Jun 17 16:59:47 toaster kernel: RSP: 0018:88065873ec68  EFLAGS: 00010046
Jun 17 16:59:47 toaster kernel: RAX: 0046 RBX:  
RCX: 0005
Jun 17 16:59:47 toaster kernel: RDX:  RSI:  
RDI: 8000
Jun 17 16:59:47 toaster kernel: RBP: 88065873ed18 R08: 8000 
R09: 
Jun 17 16:59:47 toaster kernel: R10: 88016d95a000 R11: 0001 
R12: 
Jun 17 16:59:47 toaster kernel: R13:  R14: 

Re: BUG at mm/memory.c

2014-06-19 Thread Kirill A. Shutemov
On Thu, Jun 19, 2014 at 06:30:38PM +0200, Ortwin Glück wrote:
> Hi,
> 
> I was hitting a BUG while running a couple of qemu 2.0 on a 3.15.0 kernel. 
> KSM was running. This box uses NUMA with two E5 6-core Xeons.
> 
> Linux toaster 3.15.0 #1 SMP PREEMPT Thu Jun 12 14:05:12 CEST 2014 x86_64 
> Intel(R) Xeon(R) CPU E5-2620 v2 @ 2.10GHz GenuineIntel GNU/Linux
> 
> Jun 17 16:59:47 toaster kernel: [ cut here ]
> Jun 17 16:59:47 toaster kernel: kernel BUG at mm/memory.c:3924!

http://marc.info/?l=linux-kernel&m=140319579508104&w=2

-- 
 Kirill A. Shutemov
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH v5 10/12] hw/mips: malta: Add KVM support

2014-06-19 Thread Aurelien Jarno
On Tue, Jun 17, 2014 at 11:10:35PM +0100, James Hogan wrote:
> In KVM mode the bootrom is loaded and executed from the last 1MB of
> DRAM.

What is the reason for that? I am not opposed to that, but if it is
really needed, it means that loading a bootloader into the flash area
(for example YAMON) won't work and that this should be forbidden to the
user.

> Based on "[PATCH 12/12] KVM/MIPS: General KVM support and support for
> SMP Guests" by Sanjay Lal .
> 
> Signed-off-by: James Hogan 
> Reviewed-by: Aurelien Jarno 
> Cc: Peter Maydell 
> Cc: Sanjay Lal 
> ---
> Changes in v5:
>  - Kseg0 doesn't actually change size, so use cpu_mips_kseg0_to_phys()
>rather than having the KVM specific cpu_mips_kvm_um_kseg0_to_phys().
> 
> Changes in v3:
>  - Remove unnecessary includes, especially linux/kvm.h which isn't a
>good idea on non-Linux (Peter Maydell).
> 
> Changes in v2:
>  - Removal of cps / GIC / SMP support
>  - Minimal bootloader modified to execute safely from RAM
>  - Remove "Writing bootloader to final 1MB of RAM" printf
> ---
>  hw/mips/mips_malta.c | 73 
> ++--
>  1 file changed, 53 insertions(+), 20 deletions(-)
> 
> diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c
> index f4a7d4712952..8bc5392b4223 100644
> --- a/hw/mips/mips_malta.c
> +++ b/hw/mips/mips_malta.c
> @@ -51,6 +51,7 @@
>  #include "sysemu/qtest.h"
>  #include "qemu/error-report.h"
>  #include "hw/empty_slot.h"
> +#include "sysemu/kvm.h"
>  
>  //#define DEBUG_BOARD_INIT
>  
> @@ -603,29 +604,31 @@ static void network_init(PCIBus *pci_bus)
>  */
>  
>  static void write_bootloader (CPUMIPSState *env, uint8_t *base,
> -  int64_t kernel_entry)
> +  int64_t run_addr, int64_t kernel_entry)
>  {
>  uint32_t *p;
>  
>  /* Small bootloader */
>  p = (uint32_t *)base;
> -stl_p(p++, 0x0bf00160);  /* j 
> 0x1fc00580 */
> +
> +stl_p(p++, 0x0800 |  /* j 
> 0x1fc00580 */
> + ((run_addr + 0x580) & 0x0fff) >> 2);
>  stl_p(p++, 0x);  /* nop */
>  
>  /* YAMON service vector */
> -stl_p(base + 0x500, 0xbfc00580);  /* start: */
> -stl_p(base + 0x504, 0xbfc0083c);  /* print_count: */
> -stl_p(base + 0x520, 0xbfc00580);  /* start: */
> -stl_p(base + 0x52c, 0xbfc00800);  /* flush_cache: */
> -stl_p(base + 0x534, 0xbfc00808);  /* print: */
> -stl_p(base + 0x538, 0xbfc00800);  /* reg_cpu_isr: */
> -stl_p(base + 0x53c, 0xbfc00800);  /* unred_cpu_isr: */
> -stl_p(base + 0x540, 0xbfc00800);  /* reg_ic_isr: */
> -stl_p(base + 0x544, 0xbfc00800);  /* unred_ic_isr: */
> -stl_p(base + 0x548, 0xbfc00800);  /* reg_esr: */
> -stl_p(base + 0x54c, 0xbfc00800);  /* unreg_esr: */
> -stl_p(base + 0x550, 0xbfc00800);  /* getchar: */
> -stl_p(base + 0x554, 0xbfc00800);  /* syscon_read: */
> +stl_p(base + 0x500, run_addr + 0x0580);  /* start: */
> +stl_p(base + 0x504, run_addr + 0x083c);  /* print_count: */
> +stl_p(base + 0x520, run_addr + 0x0580);  /* start: */
> +stl_p(base + 0x52c, run_addr + 0x0800);  /* flush_cache: */
> +stl_p(base + 0x534, run_addr + 0x0808);  /* print: */
> +stl_p(base + 0x538, run_addr + 0x0800);  /* reg_cpu_isr: */
> +stl_p(base + 0x53c, run_addr + 0x0800);  /* unred_cpu_isr: */
> +stl_p(base + 0x540, run_addr + 0x0800);  /* reg_ic_isr: */
> +stl_p(base + 0x544, run_addr + 0x0800);  /* unred_ic_isr: */
> +stl_p(base + 0x548, run_addr + 0x0800);  /* reg_esr: */
> +stl_p(base + 0x54c, run_addr + 0x0800);  /* unreg_esr: */
> +stl_p(base + 0x550, run_addr + 0x0800);  /* getchar: */
> +stl_p(base + 0x554, run_addr + 0x0800);  /* syscon_read: */
>  
>  
>  /* Second part of the bootloader */
> @@ -701,7 +704,7 @@ static void write_bootloader (CPUMIPSState *env, uint8_t 
> *base,
>  p = (uint32_t *) (base + 0x800);
>  stl_p(p++, 0x03e8); /* jr ra */
>  stl_p(p++, 0x2402); /* li v0,0 */
> -   /* 808 YAMON print */
> +/* 808 YAMON print */
>  stl_p(p++, 0x03e06821); /* move 
> t5,ra */
>  stl_p(p++, 0x00805821); /* move 
> t3,a0 */
>  stl_p(p++, 0x00a05021); /* move 
> t2,a1 */
> @@ -774,6 +777,7 @@ static int64_t load_kernel (void)
>  uint32_t *prom_buf;
>  long prom_size;
>  int prom_index = 0;
> +uint64_t (*xlate_to_kseg0) (void *opaque, uint64_t addr);
>  
>  #ifdef TARGET_WORDS_BIGENDIAN
>  big_endian = 1;
> @@ -788,6 +792,11 @@ static int64_t load_kernel (void)
>  loaderparams.kernel_filename);
>  exit(1);
>

Re: [PATCH v5 00/12] KVM Support for MIPS32 Processors

2014-06-19 Thread Aurelien Jarno
On Wed, Jun 18, 2014 at 05:00:47PM +0200, Paolo Bonzini wrote:
> Il 18/06/2014 00:10, James Hogan ha scritto:
> >The patchset depends on v4 of "target-mips: implement UserLocal
> >Register". I'm aiming for QEMU 2.1, hopefully it isn't too late to get
> >some final review.
> >
> >Thanks to everybody who has already taken part in review.
> >
> >This patchset implements KVM support for MIPS32 processors, using Trap &
> >Emulation.
> >
> >In KVM mode, CPU virtualization is handled via the kvm kernel module,
> >while system and I/O virtualization leverage the Malta model already
> >present in QEMU.
> >
> >Both Guest kernel and Guest Userspace execute in UM. The Guest address
> >space is as folows:
> >Guest User address space:   0x -> 0x4000
> >Guest Kernel Unmapped:  0x4000 -> 0x6000
> >Guest Kernel Mapped:0x6000 -> 0x8000
> >
> >As a result, Guest Usermode virtual memory is limited to 1GB.
> >
> >KVM support (by trap and emulate) was added to the Linux kernel in
> >v3.10. This patchset partly depends on MIPS KVM work which will land in
> >v3.16 (for example to save/restore the state of various registers and
> >the KVM Count/Compare timer).
> >
> >Changes in v5:
> >
> >Changes addressing review comments from v4 patchset, and to use the MIPS
> >KVM timer API added in v3.16.
> >
> >A git tag for this version of the patchset can also be found on github:
> >https://github.com/jahogan/qemu-kvm-mips.git kvm-mips-v5
> >
> > - Rebase on master + v4 of "target-mips: implement UserLocal Register".
> > - New patch ([01/12] target-mips: Reset CPU timer consistently) to
> >   address timer reset behaviour (reported by Paolo Bonzini).
> > - New patch ([08/12] target-mips: Call kvm_mips_reset_vcpu() from
> >   mips_cpu_reset()) and rename kvm_arch_reset_vcpu to
> >   kvm_mips_reset_vcpu, based on commit 50a2c6e55fa2 (kvm: reset state
> >   from the CPU's reset method).
> > - KSEG0 doesn't actually change size, so fix mask in
> >   cpu_mips_kseg0_to_phys() (patch 3) and use that instead of having the
> >   KVM specific cpu_mips_kvm_um_kseg0_to_phys() (patch 10).
> > - Fix typo in patch 9 subject (s/interupts/interrupts/).
> > - Rename kvm_mips_te_{put,get}_cp0_registers() functions to drop the
> >   "te_" since they're not really specific to T&E.
> > - Pass level through from kvm_arch_put_registers() to
> >   kvm_mips_put_cp0_registers() rather than hard coding it to
> >   KVM_PUT_FULL_STATE.
> > - Fix KVM_REG_MIPS_CP0_* definitions to set KVM_REG_MIPS and
> >   KVM_REG_SIZE_U32/KVM_REG_SIZE_U64 (using a macro).
> > - Remove unused KVM_REG_MIPS_CP0_* definitions for now.
> > - Correct type of kvm_mips_{get,put}_one_{,ul}reg() reg_id argument to
> >   uint64_t. Various high bits must be set to disambiguate the
> >   architecture and register size.
> > - Simplify register access functions slightly.
> > - Add register accessors for always-64-bit registers (rather than ulong
> >   registers). These are needed for virtual KVM registers for
> >   controlling the KVM Compare/Count timer.
> > - Save and restore KVM timer state with the rest of the state, and also
> >   when VM clock is started or stopped. When the KVM timer state is
> >   restored (or VM clock restarted) it is resumed with the stored count
> >   at the monotonic time when the VM clock was last stopped. If the VM
> >   clock hasn't been stopped it resumes from the monotonic time when the
> >   state was saved (i.e. as if the timer was never stopped).
> >   Changes since RFC patch on kernel KVM thread "[PATCH v2 00/23] MIPS:
> > KVM: Fixes and guest timer rewrite"):
> >- Simplified, removing extra state for storing VM time of
> >  save/restore, at the cost of losing/gaining time when VM gets
> >  stopped and started (Paolo Bonzini).
> > - Save and restore the UserLocal and HWREna CP0 registers.
> > - Improve get/put KVM register error handling with DPRINTFs and fall
> >   through so that getting/putting of all the registers is attempted
> >   even if one of them fails due to being unimplemented in the kernel.
> >
> >Changes in v4:
> >
> >Changes mostly addressing a few review comments from v3 patchset.
> >
> >A git tag for this version of the patchset can also be found on github:
> >https://github.com/jahogan/qemu-kvm-mips.git kvm-mips-v4
> >
> > - Rebase on v2.0.0-rc0.
> > - Use int32_t instead of int32 (which is for softfloat) in kvm register
> >   accessors (Andreas Färber).
> > - Use uint64_t instead of __u64 (which is really just for kernel
> >   headers) in the kvm register accessors (Andreas Färber).
> > - Cast pointer to uintptr_t rather than target_ulong in kvm register
> >   accessors.
> > - Remove some redundant casts in kvm register accessors.
> > - Add MAINTAINERS entry for MIPS KVM.
> >
> >Changes in v3:
> >
> >Changes mostly addressing review comments from v2 patchset.
> >
> >A git tag for this version of the patchset can also be found on github:
> >https://github.com/jahogan/qemu-kvm-mip

Re: [PATCH 4/4] kvm: Implement PEBS virtualization

2014-06-19 Thread Andi Kleen
> Userspace then can read/write these MSRs, and add them to the migration
> stream.  QEMU has code for that.

Thanks. The PEBS setup always redoes its state, can be arbitarily often redone.

So the only change needed would be to add the MSRs to some list in qemu?

-Andi

-- 
a...@linux.intel.com -- Speaking for myself only
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] perf: ignore LBR and offcore_rsp.

2014-06-19 Thread Andi Kleen
Andi Kleen  writes:
>>
>>> Signed-off-by: Kan Liang 
>>
>> And here I thought that Andi was of the opinion that if you set CPUID to
>> indicate a particular CPU you had better also handle all its MSRs.
>
> Yes, philosophically that would be the right way,
> but we needed a short term fix to stop things from crashing, and that
> was the simplest.

I should add there is another reason for this patch now,
and doing it in perf instead of somewhere else
(this should probably go into the description).

With PT on enabling LBR can #GP. So perf needs to handle
this case without crashing. This can happen independently
of any hypervisors.

-Andi

-- 
a...@linux.intel.com -- Speaking for myself only
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 04/20] arm64: boot protocol documentation update for GICv3

2014-06-19 Thread Marc Zyngier
Hi Mark,

On 19/06/14 15:01, Mark Rutland wrote:
> Hi Marc,
> 
> On Thu, Jun 19, 2014 at 10:19:27AM +0100, Marc Zyngier wrote:
>> Linux has some requirements that must be satisfied in order to boot
>> on a system built with a GICv3.
>>
>> Acked-by: Christoffer Dall 
>> Signed-off-by: Marc Zyngier 
>> ---
>>  Documentation/arm64/booting.txt | 6 ++
>>  1 file changed, 6 insertions(+)
>>
>> diff --git a/Documentation/arm64/booting.txt 
>> b/Documentation/arm64/booting.txt
>> index 37fc4f6..e28ccec 100644
>> --- a/Documentation/arm64/booting.txt
>> +++ b/Documentation/arm64/booting.txt
>> @@ -141,6 +141,12 @@ Before jumping into the kernel, the following 
>> conditions must be met:
>>the kernel image will be entered must be initialised by software at a
>>higher exception level to prevent execution in an UNKNOWN state.
>>  
>> +  For systems with a GICv3 interrupt controller, it is expected that:
>> +  - If EL3 is present, it must program ICC_SRE_EL3.Enable (bit 3) to
>> +0b1 and ICC_SRE_EL3.SRE (bit 0) to 0b1.
>> +  - If the kernel is entered at EL1, EL2 must set ICC_SRE_EL2.Enable
>> +(bit 3) to 0b1 and ICC_SRE_EL2.SRE (bit 0) to 0b1.
> 
> Apologies for spotting this so late, but to me this sounds slightly
> ambiguous. The use of "it is expected" doesn't read like a hard
> requirement, and in the first point, it's ambiguous as to what "it" is.
> 
> I assume that if the GIC is communicated to the kernel as a GICv2 then
> these points do not hold?

The first point always holds, specially if the kernel is entered at EL2
(see patch #2 and the way we initialize System Registers in head.S). At
this stage, we haven't looked at DT yet, and must setup EL2
independently of what the platform will describe. The only source of
information we have is whether or not this CPU implements GICv3 System
Registers (id_aa64pfr0_el1).

Assuming EL3 doesn't set these two bits, you will end up trapping back
to EL3. You can hope that EL3 will do the right thing (do what is
described above and restart the offending instruction at EL2). If it
doesn't, you're dead.

> How about:
> 
>   For systems with a GICv3 interrupt controller, where the presence of
>   GICv3 is communicated to the kernel:
>   - If EL3 is present:
> ICC_SRE_EL3.Enable (bit 3) must be initialiased to 0b1.
> ICC_SRE_EL3.SRE (bit 0) must be initialised to 0b1.
>   - If the kernel is entered at EL1:
> ICC.SRE_EL2.Enable (bit 3) must be initialised to 0b1
> ICC_SRE_EL2.SRE (bit 0) must be initialised to 0b1.

I'm happy with that change, provided that we get rid of the ", where the
presence of GICv3 is communicated to the kernel".

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 4/5] KVM: MMU: reload request from GET_DIRTY_LOG path

2014-06-19 Thread Marcelo Tosatti
On Thu, Jun 19, 2014 at 11:17:19AM +0300, Gleb Natapov wrote:
> On Wed, Jun 18, 2014 at 08:12:07PM -0300, mtosa...@redhat.com wrote:
> > Reload remote vcpus MMU from GET_DIRTY_LOG codepath, before
> > deleting a pinned spte.
> > 
> > Signed-off-by: Marcelo Tosatti 
> > 
> > ---
> >  arch/x86/kvm/mmu.c |3 +++
> >  1 file changed, 3 insertions(+)
> > 
> > Index: kvm.pinned-sptes/arch/x86/kvm/mmu.c
> > ===
> > --- kvm.pinned-sptes.orig/arch/x86/kvm/mmu.c2014-06-13 
> > 16:50:50.040140594 -0300
> > +++ kvm.pinned-sptes/arch/x86/kvm/mmu.c 2014-06-13 16:51:05.620104451 
> > -0300
> > @@ -1247,6 +1247,9 @@
> > spte &= ~SPTE_MMU_WRITEABLE;
> > spte = spte & ~PT_WRITABLE_MASK;
> >  
> > +   if (is_pinned_spte(spte))
> > +   mmu_reload_pinned_vcpus(kvm);
> > +
> Why write protect it at all? mmu_reload_pinned_vcpus() will unprotected it 
> anyway
> on the next vmentry. Isn't it better to just report all pinned pages as dirty 
> alway.

That was the initial plan, however its awkward to stop vcpus, execute
get_dirty_log twice, and have pages marked as dirty on the second
execution.

That is, it is in "incorrect" to report pages as dirty when they are
clean.

Moreover, if the number of pinned pages is larger than the dirty
threshold to stop VM and migrate, you'll never migrate. If vcpus are
in HLT and don't VM-enter immediately, the pages should not be refaulted
right away.

Do you think the optimization is worthwhile ?

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/5] KVM: MMU: allow pinning spte translations (TDP-only)

2014-06-19 Thread Marcelo Tosatti
On Thu, Jun 19, 2014 at 11:01:06AM +0300, Avi Kivity wrote:
> 
> On 06/19/2014 02:12 AM, mtosa...@redhat.com wrote:
> >Allow vcpus to pin spte translations by:
> >
> >1) Creating a per-vcpu list of pinned ranges.
> >2) On mmu reload request:
> > - Fault ranges.
> > - Mark sptes with a pinned bit.
> > - Mark shadow pages as pinned.
> >
> >3) Then modify the following actions:
> > - Page age => skip spte flush.
> > - MMU notifiers => force mmu reload request (which kicks cpu out of
> > guest mode).
> > - GET_DIRTY_LOG => force mmu reload request.
> > - SLAB shrinker => skip shadow page deletion.
> >
> >TDP-only.
> >
> >+int kvm_mmu_register_pinned_range(struct kvm_vcpu *vcpu,
> >+  gfn_t base_gfn, unsigned long npages)
> >+{
> >+struct kvm_pinned_page_range *p;
> >+
> >+mutex_lock(&vcpu->arch.pinned_mmu_mutex);
> >+list_for_each_entry(p, &vcpu->arch.pinned_mmu_pages, link) {
> >+if (p->base_gfn == base_gfn && p->npages == npages) {
> >+mutex_unlock(&vcpu->arch.pinned_mmu_mutex);
> >+return -EEXIST;
> >+}
> >+}
> >+mutex_unlock(&vcpu->arch.pinned_mmu_mutex);
> >+
> >+if (vcpu->arch.nr_pinned_ranges >=
> >+KVM_MAX_PER_VCPU_PINNED_RANGE)
> >+return -ENOSPC;
> >+
> >+p = kzalloc(sizeof(struct kvm_pinned_page_range), GFP_KERNEL);
> >+if (!p)
> >+return -ENOMEM;
> >+
> >+vcpu->arch.nr_pinned_ranges++;
> >+
> >+trace_kvm_mmu_register_pinned_range(vcpu->vcpu_id, base_gfn, npages);
> >+
> >+INIT_LIST_HEAD(&p->link);
> >+p->base_gfn = base_gfn;
> >+p->npages = npages;
> >+mutex_lock(&vcpu->arch.pinned_mmu_mutex);
> >+list_add(&p->link, &vcpu->arch.pinned_mmu_pages);
> >+mutex_unlock(&vcpu->arch.pinned_mmu_mutex);
> >+kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
> >+
> >+return 0;
> >+}
> >+
> 
> What happens if ranges overlap (within a vcpu, cross-vcpu)?

The page(s) are faulted multiple times if ranges overlap within a vcpu.

I see no reason to disallow overlapping ranges. Do you?

> Or if a range overflows and wraps around 0? 

Pagefault fails on vm-entry -> KVM_REQ_TRIPLE_FAULT.

Will double check for overflows to make sure.

> Or if it does not refer to RAM?

User should have pinned the page(s) before with gfn_to_page / get_page,
which ensures it is guest RAM ? (hum, although it might be good to 
double check here as well).

> Looks like you're limiting the number of ranges, but not the number
> of pages, so a guest can lock all of its memory.

Yes. The page pinning at get_page time can also lock all of
guest memory.

> >+
> >+/*
> >+ * Pin KVM MMU page translations. This guarantees, for valid
> >+ * addresses registered by kvm_mmu_register_pinned_range (valid address
> >+ * meaning address which posses sufficient information for fault to
> >+ * be resolved), valid translations exist while in guest mode and
> >+ * therefore no VM-exits due to faults will occur.
> >+ *
> >+ * Failure to instantiate pages will abort guest entry.
> >+ *
> >+ * Page frames should be pinned with get_page in advance.
> >+ *
> >+ * Pinning is not guaranteed while executing as L2 guest.
> 
> Does this undermine security?

PEBS writes should not be enabled when L2 guest is executing.

> >+static void kvm_mmu_pin_pages(struct kvm_vcpu *vcpu)
> >+{
> >+struct kvm_pinned_page_range *p;
> >+
> >+if (is_guest_mode(vcpu))
> >+return;
> >+
> >+if (!vcpu->arch.mmu.direct_map)
> >+return;
> >+
> >+ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
> >+
> >+mutex_lock(&vcpu->arch.pinned_mmu_mutex);
> 
> Is the mutex actually needed? It seems it's only taken in vcpu
> context, so the vcpu mutex should be sufficient.

Right. Actually the list_empty() access from kicker function might be unsafe.
Will double check.

> >+list_for_each_entry(p, &vcpu->arch.pinned_mmu_pages, link) {
> >+gfn_t gfn_offset;
> >+
> >+for (gfn_offset = 0; gfn_offset < p->npages; gfn_offset++) {
> >+gfn_t gfn = p->base_gfn + gfn_offset;
> >+int r;
> >+bool pinned = false;
> >+
> >+r = vcpu->arch.mmu.page_fault(vcpu, gfn << PAGE_SHIFT,
> >+ PFERR_WRITE_MASK, false,
> >+ true, &pinned);
> >+/* MMU notifier sequence window: retry */
> >+if (!r && !pinned)
> >+kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
> >+if (r) {
> >+kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
> >+break;
> >+}
> >+
> >+}
> >+}
> >+mutex_unlock(&vcpu->arch.pinned_mmu_mutex);
> >+}
> >+
> >  int kvm_mmu_load(struct kvm_vcpu *vcpu)
> >  {
> > int

Re: [RFC PATCH 1/1] Move two pinned pages to non-movable node in kvm.

2014-06-19 Thread Marcelo Tosatti
On Thu, Jun 19, 2014 at 12:20:32PM +0300, Gleb Natapov wrote:
> CCing Marcelo,
> 
> On Wed, Jun 18, 2014 at 02:50:44PM +0800, Tang Chen wrote:
> > Hi Gleb,
> > 
> > Thanks for the quick reply. Please see below.
> > 
> > On 06/18/2014 02:12 PM, Gleb Natapov wrote:
> > >On Wed, Jun 18, 2014 at 01:50:00PM +0800, Tang Chen wrote:
> > >>[Questions]
> > >>And by the way, would you guys please answer the following questions for 
> > >>me ?
> > >>
> > >>1. What's the ept identity pagetable for ?  Only one page is enough ?
> > >>
> > >>2. Is the ept identity pagetable only used in realmode ?
> > >>Can we free it once the guest is up (vcpu in protect mode)?
> > >>
> > >>3. Now, ept identity pagetable is allocated in qemu userspace.
> > >>Can we allocate it in kernel space ?
> > >What would be the benefit?
> > 
> > I think the benefit is we can hot-remove the host memory a kvm guest
> > is using.
> > 
> > For now, only memory in ZONE_MOVABLE can be migrated/hot-removed. And the
> > kernel
> > will never use ZONE_MOVABLE memory. So if we can allocate these two pages in
> > kernel space, we can pin them without any trouble. When doing memory
> > hot-remove,
> > the kernel will not try to migrate these two pages.
> But we can do that by other means, no? The patch you've sent for instance.
> 
> > 
> > >
> > >>
> > >>4. If I want to migrate these two pages, what do you think is the best 
> > >>way ?
> > >>
> > >I answered most of those here: 
> > >http://www.mail-archive.com/kvm@vger.kernel.org/msg103718.html
> > 
> > I'm sorry I must missed this email.
> > 
> > Seeing your advice, we can unpin these two pages and repin them in the next
> > EPT violation.
> > So about this problem, which solution would you prefer, allocate these two
> > pages in kernel
> > space, or migrate them before memory hot-remove ?
> > 
> > I think the first solution is simpler. But I'm not quite sure if there is
> > any other pages
> > pinned in memory. If we have the same problem with other kvm pages, I think
> > it is better to
> > solve it in the second way.
> > 
> > What do you think ?
> Remove pinning is preferable. In fact looks like for identity pagetable
> it should be trivial, just don't pin. APIC access page is a little bit
> more complicated since its physical address needs to be tracked to be
> updated in VMCS.

Yes, and there are new users of page pinning as well soon (see PEBS
threads on kvm-devel).

Was thinking of notifiers scheme. Perhaps:

->begin_page_unpin(struct page *page)
- Remove any possible access to page.

->end_page_unpin(struct page *page)
- Reinstantiate any possible access to page.

For KVM:

->begin_page_unpin()
- Remove APIC-access page address from VMCS.
  or
- Remove spte translation to pinned page.

- Put vcpu in state where no VM-entries are allowed.

->end_page_unpin()
- Setup APIC-access page, ...
- Allow vcpu to VM-entry.


Because allocating APIC access page from distant NUMA node can
be a performance problem, i believe.

I'd be happy to know why notifiers are overkill.


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 3/5] KVM: MMU: notifiers support for pinned sptes

2014-06-19 Thread Marcelo Tosatti
On Thu, Jun 19, 2014 at 09:48:50AM +0300, Gleb Natapov wrote:
> On Wed, Jun 18, 2014 at 08:12:06PM -0300, mtosa...@redhat.com wrote:
> > Request KVM_REQ_MMU_RELOAD when deleting sptes from MMU notifiers.
> > 
> > Keep pinned sptes intact if page aging.
> > 
> > Signed-off-by: Marcelo Tosatti 
> > 
> > ---
> >  arch/x86/kvm/mmu.c |   71 
> > ++---
> >  1 file changed, 62 insertions(+), 9 deletions(-)
> > 
> > Index: kvm.pinned-sptes/arch/x86/kvm/mmu.c
> > ===
> > --- kvm.pinned-sptes.orig/arch/x86/kvm/mmu.c2014-06-18 
> > 17:28:24.339435654 -0300
> > +++ kvm.pinned-sptes/arch/x86/kvm/mmu.c 2014-06-18 17:29:32.510225755 
> > -0300
> > @@ -1184,6 +1184,42 @@
> > kvm_flush_remote_tlbs(vcpu->kvm);
> >  }
> >  
> > +static void ack_flush(void *_completed)
> > +{
> > +}
> > +
> > +static void mmu_reload_pinned_vcpus(struct kvm *kvm)
> > +{
> > +   int i, cpu, me;
> > +   cpumask_var_t cpus;
> > +   struct kvm_vcpu *vcpu;
> > +   unsigned int req = KVM_REQ_MMU_RELOAD;
> > +
> > +   zalloc_cpumask_var(&cpus, GFP_ATOMIC);
> > +
> > +   me = get_cpu();
> > +   kvm_for_each_vcpu(i, vcpu, kvm) {
> > +   if (list_empty(&vcpu->arch.pinned_mmu_pages))
> > +   continue;
> > +   kvm_make_request(req, vcpu);
> > +   cpu = vcpu->cpu;
> > +
> > +   /* Set ->requests bit before we read ->mode */
> > +   smp_mb();
> > +
> > +   if (cpus != NULL && cpu != -1 && cpu != me &&
> > + kvm_vcpu_exiting_guest_mode(vcpu) != OUTSIDE_GUEST_MODE)
> > +   cpumask_set_cpu(cpu, cpus);
> > +   }
> > +   if (unlikely(cpus == NULL))
> > +   smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1);
> > +   else if (!cpumask_empty(cpus))
> > +   smp_call_function_many(cpus, ack_flush, NULL, 1);
> > +   put_cpu();
> > +   free_cpumask_var(cpus);
> > +   return;
> > +}
> This is a c&p of make_all_cpus_request(), the only difference is checking
> of vcpu->arch.pinned_mmu_pages.  You can add make_some_cpus_request(..., bool 
> (*predicate)(struct kvm_vcpu *))
> to kvm_main.c and rewrite make_all_cpus_request() to use it instead.

Half-way through it i decided it was better to c&p.

Can change make_all_cpus_request() though if it makes more sense to you.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/5] KVM: MMU: allow pinning spte translations (TDP-only)

2014-06-19 Thread Marcelo Tosatti
On Thu, Jun 19, 2014 at 10:21:16AM +0300, Gleb Natapov wrote:
> On Wed, Jun 18, 2014 at 08:12:05PM -0300, mtosa...@redhat.com wrote:
> > Allow vcpus to pin spte translations by:
> > 
> > 1) Creating a per-vcpu list of pinned ranges.
> What if memory slot containing pinned range is going away?

->page_fault() should fail and guest abort. Will double check.

> > 2) On mmu reload request:
> > - Fault ranges.
> > - Mark sptes with a pinned bit.
> Should also be marked "dirty" as per SDM:
>  The three DS save area sections should be allocated from a non-paged pool, 
> and marked accessed and dirty

This (SDM text) is about guest pagetable AFAICS.

> > +   mutex_unlock(&vcpu->arch.pinned_mmu_mutex);
> > +
> > +   if (vcpu->arch.nr_pinned_ranges >=
> > +   KVM_MAX_PER_VCPU_PINNED_RANGE)
> > +   return -ENOSPC;
> Shouldn't we refuse to register pinned range if !TDP?

Sure.

> > +   /* MMU notifier sequence window: retry */
> > +   if (!r && !pinned)
> > +   kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
> > +   if (r) {
> > +   kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
> I do not think triple fault is appropriate here. The reasons for triple fault 
> are
> documented in SDM and this is not one of them. What about error exit to user 
> space?

Agree, will change.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH v5 10/12] hw/mips: malta: Add KVM support

2014-06-19 Thread Sanjay Lal

On Jun 19, 2014, at 9:27 AM, Aurelien Jarno  wrote:

> On Tue, Jun 17, 2014 at 11:10:35PM +0100, James Hogan wrote:
>> In KVM mode the bootrom is loaded and executed from the last 1MB of
>> DRAM.
> 
> What is the reason for that? I am not opposed to that, but if it is
> really needed, it means that loading a bootloader into the flash area
> (for example YAMON) won't work and that this should be forbidden to the
> user.
> 

In trap and emulate mode, both the kernel and userland run in user mode on the 
processor. Virtual addresses >= 0x8000 are only accessible in kernel mode, 
and the default flash area (VA: 0xbfc0/PA: 0x1fc0) falls in this range.

We therefore decided to relocate the bootloader to the last 1MB of RAM.  This 
area is excluded from the RAM ranges supplied to the kernel, so it should not 
be accessible to the user.

Regards
Sanjay



signature.asc
Description: Message signed with OpenPGP using GPGMail


Re: [PATCH 4/4] kvm: Implement PEBS virtualization

2014-06-19 Thread Paolo Bonzini
> > Userspace then can read/write these MSRs, and add them to the migration
> > stream.  QEMU has code for that.
> 
> Thanks. The PEBS setup always redoes its state, can be arbitarily often
> redone.
> 
> So the only change needed would be to add the MSRs to some list in qemu?

Yes, and also adding them to the migration stream if the MSRs do not
have the default (all-zero? need to look at the SDM) values.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 04/14] arm/arm64: KVM: wrap 64 bit MMIO accesses with two 32 bit ones

2014-06-19 Thread Chalamarla, Tirumalesh


-Original Message-
From: kvmarm-boun...@lists.cs.columbia.edu 
[mailto:kvmarm-boun...@lists.cs.columbia.edu] On Behalf Of Andre Przywara
Sent: Thursday, June 19, 2014 2:46 AM
To: linux-arm-ker...@lists.infradead.org; kvm...@lists.cs.columbia.edu; 
kvm@vger.kernel.org
Cc: christoffer.d...@linaro.org
Subject: [PATCH 04/14] arm/arm64: KVM: wrap 64 bit MMIO accesses with two 32 
bit ones

Some GICv3 registers can and will be accessed as 64 bit registers.
Currently the register handling code can only deal with 32 bit accesses, so we 
do two consecutive calls to cover this.

Signed-off-by: Andre Przywara 
---
 virt/kvm/arm/vgic.c |   48 +---
 1 file changed, 45 insertions(+), 3 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 4c6b212..b3cf4c7 
100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -906,6 +906,48 @@ static bool vgic_validate_access(const struct vgic_dist 
*dist,  }
 
 /*
+ * Call the respective handler function for the given range.
+ * We split up any 64 bit accesses into two consecutive 32 bit
+ * handler calls and merge the result afterwards.
+ */
+static bool call_range_handler(struct kvm_vcpu *vcpu,
+  struct kvm_exit_mmio *mmio,
+  unsigned long offset,
+  const struct mmio_range *range) {
+   u32 *data32 = (void *)mmio->data;
+   struct kvm_exit_mmio mmio32;
+   bool ret;
+
+   if (likely(mmio->len <= 4))
+   return range->handle_mmio(vcpu, mmio, offset);
+
+   /*
+* We assume that any access greater than 4 bytes is actually
+* 8 bytes long, caused by a 64-bit access
+*/
+
+   mmio32.len = 4;
+   mmio32.is_write = mmio->is_write;
+
+   mmio32.phys_addr = mmio->phys_addr + 4;
+   if (mmio->is_write)
+   *(u32 *)mmio32.data = data32[1];
+   ret = range->handle_mmio(vcpu, &mmio32, offset + 4);
+   if (!mmio->is_write)
+   data32[1] = *(u32 *)mmio32.data;
+
+   mmio32.phys_addr = mmio->phys_addr;
+   if (mmio->is_write)
+   *(u32 *)mmio32.data = data32[0];
+   ret |= range->handle_mmio(vcpu, &mmio32, offset);
+   if (!mmio->is_write)
+   data32[0] = *(u32 *)mmio32.data;
+
+   return ret;
+}

Any reason to use two 32 bits instead of one 64 bit. AArch32 on ARMv8 may be.

+
+/*
  * vgic_handle_mmio_range - handle an in-kernel MMIO access
  * @vcpu:  pointer to the vcpu performing the access
  * @run:   pointer to the kvm_run structure
@@ -936,10 +978,10 @@ static bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, 
struct kvm_run *run,
spin_lock(&vcpu->kvm->arch.vgic.lock);
offset -= range->base;
if (vgic_validate_access(dist, range, offset)) {
-   updated_state = range->handle_mmio(vcpu, mmio, offset);
+   updated_state = call_range_handler(vcpu, mmio, offset, range);
} else {
-   vgic_reg_access(mmio, NULL, offset,
-   ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
+   if (!mmio->is_write)
+   memset(mmio->data, 0, mmio->len);

 What is the use of this memset.

updated_state = false;
}
spin_unlock(&vcpu->kvm->arch.vgic.lock);
--
1.7.9.5

___
kvmarm mailing list
kvm...@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


  1   2   >