[PATCH] KVM: make the declaration of functions within 80 characters

2015-09-11 Thread Wei Yang
After 'commit 0b8ba4a2b658 ("KVM: fix checkpatch.pl errors in
kvm/coalesced_mmio.h")', the declaration of the two function will exceed 80
characters.

This patch reduces the TAPs to make each line in 80 characters.

Signed-off-by: Wei Yang 
---
 virt/kvm/coalesced_mmio.h |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h
index 5cbf190..6bca74c 100644
--- a/virt/kvm/coalesced_mmio.h
+++ b/virt/kvm/coalesced_mmio.h
@@ -24,9 +24,9 @@ struct kvm_coalesced_mmio_dev {
 int kvm_coalesced_mmio_init(struct kvm *kvm);
 void kvm_coalesced_mmio_free(struct kvm *kvm);
 int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
-   

struct kvm_coalesced_mmio_zone *zone);
+   struct kvm_coalesced_mmio_zone *zone);
 int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
-   

struct kvm_coalesced_mmio_zone *zone);
+   struct kvm_coalesced_mmio_zone *zone);
 
 #else
 
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH] vfio/pci: Use kernel VPD access functions

2015-09-11 Thread Alex Williamson
The PCI VPD capability operates on a set of window registers in PCI
config space.  Writing to the address register triggers either a read
or write, depending on the setting of the PCI_VPD_ADDR_F bit within
the address register.  The data register provides either the source
for writes or the target for reads.

This model is susceptible to being broken by concurrent access, for
which the kernel has adopted a set of access functions to serialize
these registers.  Additionally, commits like 932c435caba8 ("PCI: Add
dev_flags bit to access VPD through function 0") and 7aa6ca4d39ed
("PCI: Add VPD function 0 quirk for Intel Ethernet devices") indicate
that VPD registers can be shared between functions on multifunction
devices creating dependencies between otherwise independent devices.

Fortunately it's quite easy to emulate the VPD registers, simply
storing copies of the address and data registers in memory and
triggering a VPD read or write on writes to the address register.
This allows vfio users to avoid seeing spurious register changes from
accesses on other devices and enables the use of shared quirks in the
host kernel.  We can theoretically still race with access through
sysfs, but the window of opportunity is much smaller.

Signed-off-by: Alex Williamson 
---

RFC - Is this something we should do?  Should we consider providing
similar emulation through PCI sysfs to allow lspci to also make use
of the vpd interfaces?

 drivers/vfio/pci/vfio_pci_config.c |   70 +++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/pci/vfio_pci_config.c 
b/drivers/vfio/pci/vfio_pci_config.c
index ff75ca3..a8657ef 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -671,6 +671,73 @@ static int __init init_pci_cap_pm_perm(struct perm_bits 
*perm)
return 0;
 }
 
+static int vfio_vpd_config_write(struct vfio_pci_device *vdev, int pos,
+int count, struct perm_bits *perm,
+int offset, __le32 val)
+{
+   struct pci_dev *pdev = vdev->pdev;
+   __le16 *paddr = (__le16 *)(vdev->vconfig + pos - offset + PCI_VPD_ADDR);
+   __le32 *pdata = (__le32 *)(vdev->vconfig + pos - offset + PCI_VPD_DATA);
+   u16 addr;
+   u32 data;
+
+   /*
+* Write through to emulation.  If the write includes the upper byte
+* of PCI_VPD_ADDR, then the PCI_VPD_ADDR_F bit is written and we
+* have work to do.
+*/
+   count = vfio_default_config_write(vdev, pos, count, perm, offset, val);
+   if (count < 0 || offset > PCI_VPD_ADDR + 1 ||
+   offset + count <= PCI_VPD_ADDR + 1)
+   return count;
+
+   addr = le16_to_cpu(*paddr);
+
+   if (addr & PCI_VPD_ADDR_F) {
+   data = le32_to_cpu(*pdata);
+   if (pci_write_vpd(pdev, addr & ~PCI_VPD_ADDR_F, 4, ) != 4)
+   return count;
+   } else {
+   if (pci_read_vpd(pdev, addr, 4, ) != 4)
+   return count;
+   *pdata = cpu_to_le32(data);
+   }
+
+   /*
+* Toggle PCI_VPD_ADDR_F in the emulated PCI_VPD_ADDR register to
+* signal completion.  If an error occurs above, we assume that not
+* toggling this bit will induce a driver timeout.
+*/
+   addr ^= PCI_VPD_ADDR_F;
+   *paddr = cpu_to_le16(addr);
+
+   return count;
+}
+
+/* Permissions for Vital Product Data capability */
+static int __init init_pci_cap_vpd_perm(struct perm_bits *perm)
+{
+   if (alloc_perm_bits(perm, pci_cap_length[PCI_CAP_ID_VPD]))
+   return -ENOMEM;
+
+   perm->writefn = vfio_vpd_config_write;
+
+   /*
+* We always virtualize the next field so we can remove
+* capabilities from the chain if we want to.
+*/
+   p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE);
+
+   /*
+* Both the address and data registers are virtualized to
+* enable access through the pci_vpd_read/write functions
+*/
+   p_setw(perm, PCI_VPD_ADDR, (u16)ALL_VIRT, (u16)ALL_WRITE);
+   p_setd(perm, PCI_VPD_DATA, ALL_VIRT, ALL_WRITE);
+
+   return 0;
+}
+
 /* Permissions for PCI-X capability */
 static int __init init_pci_cap_pcix_perm(struct perm_bits *perm)
 {
@@ -790,6 +857,7 @@ void vfio_pci_uninit_perm_bits(void)
free_perm_bits(_perms[PCI_CAP_ID_BASIC]);
 
free_perm_bits(_perms[PCI_CAP_ID_PM]);
+   free_perm_bits(_perms[PCI_CAP_ID_VPD]);
free_perm_bits(_perms[PCI_CAP_ID_PCIX]);
free_perm_bits(_perms[PCI_CAP_ID_EXP]);
free_perm_bits(_perms[PCI_CAP_ID_AF]);
@@ -807,7 +875,7 @@ int __init vfio_pci_init_perm_bits(void)
 
/* Capabilities */
ret |= init_pci_cap_pm_perm(_perms[PCI_CAP_ID_PM]);
-   cap_perms[PCI_CAP_ID_VPD].writefn = vfio_raw_config_write;
+   ret |= 

Re: [PATCH 01/23] userfaultfd: linux/Documentation/vm/userfaultfd.txt

2015-09-11 Thread Michael Kerrisk (man-pages)
On 05/14/2015 07:30 PM, Andrea Arcangeli wrote:
> Add documentation.

Hi Andrea,

I do not recall... Did you write a man page also for this new system call?

Thanks,

Michael


> Signed-off-by: Andrea Arcangeli 
> ---
>  Documentation/vm/userfaultfd.txt | 140 
> +++
>  1 file changed, 140 insertions(+)
>  create mode 100644 Documentation/vm/userfaultfd.txt
> 
> diff --git a/Documentation/vm/userfaultfd.txt 
> b/Documentation/vm/userfaultfd.txt
> new file mode 100644
> index 000..c2f5145
> --- /dev/null
> +++ b/Documentation/vm/userfaultfd.txt
> @@ -0,0 +1,140 @@
> += Userfaultfd =
> +
> +== Objective ==
> +
> +Userfaults allow the implementation of on-demand paging from userland
> +and more generally they allow userland to take control various memory
> +page faults, something otherwise only the kernel code could do.
> +
> +For example userfaults allows a proper and more optimal implementation
> +of the PROT_NONE+SIGSEGV trick.
> +
> +== Design ==
> +
> +Userfaults are delivered and resolved through the userfaultfd syscall.
> +
> +The userfaultfd (aside from registering and unregistering virtual
> +memory ranges) provides two primary functionalities:
> +
> +1) read/POLLIN protocol to notify a userland thread of the faults
> +   happening
> +
> +2) various UFFDIO_* ioctls that can manage the virtual memory regions
> +   registered in the userfaultfd that allows userland to efficiently
> +   resolve the userfaults it receives via 1) or to manage the virtual
> +   memory in the background
> +
> +The real advantage of userfaults if compared to regular virtual memory
> +management of mremap/mprotect is that the userfaults in all their
> +operations never involve heavyweight structures like vmas (in fact the
> +userfaultfd runtime load never takes the mmap_sem for writing).
> +
> +Vmas are not suitable for page- (or hugepage) granular fault tracking
> +when dealing with virtual address spaces that could span
> +Terabytes. Too many vmas would be needed for that.
> +
> +The userfaultfd once opened by invoking the syscall, can also be
> +passed using unix domain sockets to a manager process, so the same
> +manager process could handle the userfaults of a multitude of
> +different processes without them being aware about what is going on
> +(well of course unless they later try to use the userfaultfd
> +themselves on the same region the manager is already tracking, which
> +is a corner case that would currently return -EBUSY).
> +
> +== API ==
> +
> +When first opened the userfaultfd must be enabled invoking the
> +UFFDIO_API ioctl specifying a uffdio_api.api value set to UFFD_API (or
> +a later API version) which will specify the read/POLLIN protocol
> +userland intends to speak on the UFFD. The UFFDIO_API ioctl if
> +successful (i.e. if the requested uffdio_api.api is spoken also by the
> +running kernel), will return into uffdio_api.features and
> +uffdio_api.ioctls two 64bit bitmasks of respectively the activated
> +feature of the read(2) protocol and the generic ioctl available.
> +
> +Once the userfaultfd has been enabled the UFFDIO_REGISTER ioctl should
> +be invoked (if present in the returned uffdio_api.ioctls bitmask) to
> +register a memory range in the userfaultfd by setting the
> +uffdio_register structure accordingly. The uffdio_register.mode
> +bitmask will specify to the kernel which kind of faults to track for
> +the range (UFFDIO_REGISTER_MODE_MISSING would track missing
> +pages). The UFFDIO_REGISTER ioctl will return the
> +uffdio_register.ioctls bitmask of ioctls that are suitable to resolve
> +userfaults on the range registered. Not all ioctls will necessarily be
> +supported for all memory types depending on the underlying virtual
> +memory backend (anonymous memory vs tmpfs vs real filebacked
> +mappings).
> +
> +Userland can use the uffdio_register.ioctls to manage the virtual
> +address space in the background (to add or potentially also remove
> +memory from the userfaultfd registered range). This means a userfault
> +could be triggering just before userland maps in the background the
> +user-faulted page.
> +
> +The primary ioctl to resolve userfaults is UFFDIO_COPY. That
> +atomically copies a page into the userfault registered range and wakes
> +up the blocked userfaults (unless uffdio_copy.mode &
> +UFFDIO_COPY_MODE_DONTWAKE is set). Other ioctl works similarly to
> +UFFDIO_COPY.
> +
> +== QEMU/KVM ==
> +
> +QEMU/KVM is using the userfaultfd syscall to implement postcopy live
> +migration. Postcopy live migration is one form of memory
> +externalization consisting of a virtual machine running with part or
> +all of its memory residing on a different node in the cloud. The
> +userfaultfd abstraction is generic enough that not a single line of
> +KVM kernel code had to be modified in order to add postcopy live
> +migration to QEMU.
> +
> +Guest async page faults, FOLL_NOWAIT and all other GUP features work
> +just fine in 

[PATCH v2 13/22] KVM: ARM64: Add reset and access handlers for PMINTENSET and PMINTENCLR register

2015-09-11 Thread Shannon Zhao
From: Shannon Zhao 

Since the reset value of PMINTENSET and PMINTENCLR is UNKNOWN, use
reset_unknown for its reset handler. Add a new case to emulate writing
PMINTENSET or PMINTENCLR register.

Signed-off-by: Shannon Zhao 
---
 arch/arm64/kvm/sys_regs.c | 34 ++
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 8307189..c3d4fb5 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -311,6 +311,18 @@ static bool access_pmu_regs(struct kvm_vcpu *vcpu,
vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val;
break;
}
+   case PMINTENSET_EL1: {
+   val = *vcpu_reg(vcpu, p->Rt);
+   vcpu_sys_reg(vcpu, r->reg) |= val;
+   vcpu_sys_reg(vcpu, PMINTENCLR_EL1) |= val;
+   break;
+   }
+   case PMINTENCLR_EL1: {
+   val = *vcpu_reg(vcpu, p->Rt);
+   vcpu_sys_reg(vcpu, r->reg) &= ~val;
+   vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val;
+   break;
+   }
case PMCR_EL0: {
/* Only update writeable bits of PMCR */
val = vcpu_sys_reg(vcpu, r->reg);
@@ -500,10 +512,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 
/* PMINTENSET_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001),
- trap_raz_wi },
+ access_pmu_regs, reset_unknown, PMINTENSET_EL1 },
/* PMINTENCLR_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010),
- trap_raz_wi },
+ access_pmu_regs, reset_unknown, PMINTENCLR_EL1 },
 
/* MAIR_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000),
@@ -785,6 +797,18 @@ static bool access_pmu_cp15_regs(struct kvm_vcpu *vcpu,
vcpu_cp15(vcpu, c9_PMCNTENSET) &= ~val;
break;
}
+   case c9_PMINTENSET: {
+   val = *vcpu_reg(vcpu, p->Rt);
+   vcpu_cp15(vcpu, r->reg) |= val;
+   vcpu_cp15(vcpu, c9_PMINTENCLR) |= val;
+   break;
+   }
+   case c9_PMINTENCLR: {
+   val = *vcpu_reg(vcpu, p->Rt);
+   vcpu_cp15(vcpu, r->reg) &= ~val;
+   vcpu_cp15(vcpu, c9_PMINTENSET) &= ~val;
+   break;
+   }
case c9_PMCR: {
/* Only update writeable bits of PMCR */
val = vcpu_cp15(vcpu, r->reg);
@@ -871,8 +895,10 @@ static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn( 9), CRm(13), Op2( 2), access_pmu_cp15_regs,
  reset_unknown_cp15, c9_PMXEVCNTR },
{ Op1( 0), CRn( 9), CRm(14), Op2( 0), trap_raz_wi },
-   { Op1( 0), CRn( 9), CRm(14), Op2( 1), trap_raz_wi },
-   { Op1( 0), CRn( 9), CRm(14), Op2( 2), trap_raz_wi },
+   { Op1( 0), CRn( 9), CRm(14), Op2( 1), access_pmu_cp15_regs,
+ reset_unknown_cp15, c9_PMINTENSET },
+   { Op1( 0), CRn( 9), CRm(14), Op2( 2), access_pmu_cp15_regs,
+ reset_unknown_cp15, c9_PMINTENCLR },
 
{ Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR },
{ Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR },
-- 
2.0.4


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 02/22] KVM: ARM64: Define PMU data structure for each vcpu

2015-09-11 Thread Marc Zyngier
On 11/09/15 09:54, Shannon Zhao wrote:
> From: Shannon Zhao 
> 
> Here we plan to support virtual PMU for guest by full software
> emulation, so define some basic structs and functions preparing for
> futher steps. Define struct kvm_pmc for performance monitor counter and
> struct kvm_pmu for performance monitor unit for each vcpu. According to
> ARMv8 spec, the PMU contains at most 32(ARMV8_MAX_COUNTERS) counters.
> 
> Since this only supports ARM64 (or PMUv3), add a separate config symbol
> for it.
> 
> Signed-off-by: Shannon Zhao 
> ---
>  arch/arm64/include/asm/kvm_host.h |  2 ++
>  arch/arm64/include/asm/pmu.h  |  2 ++
>  arch/arm64/kvm/Kconfig|  8 
>  include/kvm/arm_pmu.h | 39 
> +++
>  4 files changed, 51 insertions(+)
>  create mode 100644 include/kvm/arm_pmu.h
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index 2709db2..3c88873 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -42,6 +42,7 @@
>  
>  #include 
>  #include 
> +#include 
>  
>  #define KVM_VCPU_MAX_FEATURES 3
>  
> @@ -116,6 +117,7 @@ struct kvm_vcpu_arch {
>   /* VGIC state */
>   struct vgic_cpu vgic_cpu;
>   struct arch_timer_cpu timer_cpu;
> + struct kvm_pmu pmu;
>  
>   /*
>* Anything that is not used directly from assembly code goes
> diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
> index b9f394a..95681e6 100644
> --- a/arch/arm64/include/asm/pmu.h
> +++ b/arch/arm64/include/asm/pmu.h
> @@ -19,6 +19,8 @@
>  #ifndef __ASM_PMU_H
>  #define __ASM_PMU_H
>  
> +#include 
> +

This include file seems out of place here, as I don't think anything in
this file is directly PMU related.

>  #define ARMV8_MAX_COUNTERS  32
>  #define ARMV8_COUNTER_MASK  (ARMV8_MAX_COUNTERS - 1)
>  
> diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
> index bfffe8f..3c7c58d 100644
> --- a/arch/arm64/kvm/Kconfig
> +++ b/arch/arm64/kvm/Kconfig
> @@ -31,6 +31,7 @@ config KVM
>   select KVM_VFIO
>   select HAVE_KVM_EVENTFD
>   select HAVE_KVM_IRQFD
> + select KVM_ARM_PMU
>   ---help---
> Support hosting virtualized guest machines.
>  
> @@ -52,4 +53,11 @@ config KVM_ARM_MAX_VCPUS
> large, so only choose a reasonable number that you expect to
> actually use.
>  
> +config KVM_ARM_PMU
> + bool
> + depends on KVM_ARM_HOST
> + ---help---
> +   Adds support for a virtual Performance Monitoring Unit (PMU) in
> +   virtual machines.
> +
>  endif # VIRTUALIZATION
> diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
> new file mode 100644
> index 000..64af88a
> --- /dev/null
> +++ b/include/kvm/arm_pmu.h
> @@ -0,0 +1,39 @@
> +/*
> + * Copyright (C) 2015 Linaro Ltd.
> + * Author: Shannon Zhao 
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see .
> + */
> +
> +#ifndef __ASM_ARM_KVM_PMU_H
> +#define __ASM_ARM_KVM_PMU_H
> +
> +#include 
> +
> +struct kvm_pmc {
> + u8 idx;/* index into the pmu->pmc array */
> + struct perf_event *perf_event;
> + struct kvm_vcpu *vcpu;
> +};
> +

Since you have a struct perf_event here, it makes more sense to #include
linux/perf.h here.

> +struct kvm_pmu {
> +#ifdef CONFIG_KVM_ARM_PMU
> + /* PMU IRQ Number per VCPU */
> + int irq_num;
> + /* IRQ pending flag */
> + bool irq_pending;
> + struct kvm_pmc pmc[ARMV8_MAX_COUNTERS];
> +#endif
> +};
> +
> +#endif
> 

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 02/22] KVM: ARM64: Define PMU data structure for each vcpu

2015-09-11 Thread Shannon Zhao


On 2015/9/11 17:10, Marc Zyngier wrote:
> On 11/09/15 09:54, Shannon Zhao wrote:
>> From: Shannon Zhao 
>>
>> Here we plan to support virtual PMU for guest by full software
>> emulation, so define some basic structs and functions preparing for
>> futher steps. Define struct kvm_pmc for performance monitor counter and
>> struct kvm_pmu for performance monitor unit for each vcpu. According to
>> ARMv8 spec, the PMU contains at most 32(ARMV8_MAX_COUNTERS) counters.
>>
>> Since this only supports ARM64 (or PMUv3), add a separate config symbol
>> for it.
>>
>> Signed-off-by: Shannon Zhao 
>> ---
>>  arch/arm64/include/asm/kvm_host.h |  2 ++
>>  arch/arm64/include/asm/pmu.h  |  2 ++
>>  arch/arm64/kvm/Kconfig|  8 
>>  include/kvm/arm_pmu.h | 39 
>> +++
>>  4 files changed, 51 insertions(+)
>>  create mode 100644 include/kvm/arm_pmu.h
>>
>> diff --git a/arch/arm64/include/asm/kvm_host.h 
>> b/arch/arm64/include/asm/kvm_host.h
>> index 2709db2..3c88873 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -42,6 +42,7 @@
>>  
>>  #include 
>>  #include 
>> +#include 
>>  
>>  #define KVM_VCPU_MAX_FEATURES 3
>>  
>> @@ -116,6 +117,7 @@ struct kvm_vcpu_arch {
>>  /* VGIC state */
>>  struct vgic_cpu vgic_cpu;
>>  struct arch_timer_cpu timer_cpu;
>> +struct kvm_pmu pmu;
>>  
>>  /*
>>   * Anything that is not used directly from assembly code goes
>> diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
>> index b9f394a..95681e6 100644
>> --- a/arch/arm64/include/asm/pmu.h
>> +++ b/arch/arm64/include/asm/pmu.h
>> @@ -19,6 +19,8 @@
>>  #ifndef __ASM_PMU_H
>>  #define __ASM_PMU_H
>>  
>> +#include 
>> +
> 
> This include file seems out of place here, as I don't think anything in
> this file is directly PMU related.
> 
>>  #define ARMV8_MAX_COUNTERS  32
>>  #define ARMV8_COUNTER_MASK  (ARMV8_MAX_COUNTERS - 1)
>>  
>> diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
>> index bfffe8f..3c7c58d 100644
>> --- a/arch/arm64/kvm/Kconfig
>> +++ b/arch/arm64/kvm/Kconfig
>> @@ -31,6 +31,7 @@ config KVM
>>  select KVM_VFIO
>>  select HAVE_KVM_EVENTFD
>>  select HAVE_KVM_IRQFD
>> +select KVM_ARM_PMU
>>  ---help---
>>Support hosting virtualized guest machines.
>>  
>> @@ -52,4 +53,11 @@ config KVM_ARM_MAX_VCPUS
>>large, so only choose a reasonable number that you expect to
>>actually use.
>>  
>> +config KVM_ARM_PMU
>> +bool
>> +depends on KVM_ARM_HOST
>> +---help---
>> +  Adds support for a virtual Performance Monitoring Unit (PMU) in
>> +  virtual machines.
>> +
>>  endif # VIRTUALIZATION
>> diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
>> new file mode 100644
>> index 000..64af88a
>> --- /dev/null
>> +++ b/include/kvm/arm_pmu.h
>> @@ -0,0 +1,39 @@
>> +/*
>> + * Copyright (C) 2015 Linaro Ltd.
>> + * Author: Shannon Zhao 
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program.  If not, see .
>> + */
>> +
>> +#ifndef __ASM_ARM_KVM_PMU_H
>> +#define __ASM_ARM_KVM_PMU_H
>> +
>> +#include 
>> +
>> +struct kvm_pmc {
>> +u8 idx;/* index into the pmu->pmc array */
>> +struct perf_event *perf_event;
>> +struct kvm_vcpu *vcpu;
>> +};
>> +
> 
> Since you have a struct perf_event here, it makes more sense to #include
> linux/perf.h here.
> 

Ok, will fix this.

>> +struct kvm_pmu {
>> +#ifdef CONFIG_KVM_ARM_PMU
>> +/* PMU IRQ Number per VCPU */
>> +int irq_num;
>> +/* IRQ pending flag */
>> +bool irq_pending;
>> +struct kvm_pmc pmc[ARMV8_MAX_COUNTERS];
>> +#endif
>> +};
>> +
>> +#endif
>>
> 
> Thanks,
> 
>   M.
> 

-- 
Shannon

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V4 3/4] kvm: fix zero length mmio searching

2015-09-11 Thread Paolo Bonzini


On 11/09/2015 05:17, Jason Wang wrote:
> + int len = r2->len ? r1->len : 0;
> +
>   if (r1->addr < r2->addr)
>   return -1;
> - if (r1->addr + r1->len > r2->addr + r2->len)
> + if (r1->addr + len > r2->addr + r2->len)
>   return 1;

Perhaps better:

gpa_t addr1 = r1->addr;
gpa_t addr2 = r2->addr;

if (addr1 < addr2)
return -1;

/* If r2->len == 0, match the exact address.  If r2->len != 0,
 * accept any overlapping write.  Any order is acceptable for
 * overlapping ranges, because kvm_io_bus_get_first_dev ensures
 * we process all of them.
 */
if (r2->len) {
addr1 += r1->len;
addr2 += r2->len;
}

if (addr1 > addr2)
return 1;

return 0;
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V4 3/4] kvm: fix zero length mmio searching

2015-09-11 Thread Cornelia Huck
On Fri, 11 Sep 2015 10:26:41 +0200
Paolo Bonzini  wrote:

> On 11/09/2015 05:17, Jason Wang wrote:
> > +   int len = r2->len ? r1->len : 0;
> > +
> > if (r1->addr < r2->addr)
> > return -1;
> > -   if (r1->addr + r1->len > r2->addr + r2->len)
> > +   if (r1->addr + len > r2->addr + r2->len)
> > return 1;
> 
> Perhaps better:
> 
>   gpa_t addr1 = r1->addr;
>   gpa_t addr2 = r2->addr;
> 
>   if (addr1 < addr2)
>   return -1;
> 
>   /* If r2->len == 0, match the exact address.  If r2->len != 0,
>* accept any overlapping write.  Any order is acceptable for
>* overlapping ranges, because kvm_io_bus_get_first_dev ensures
>* we process all of them.
>*/
>   if (r2->len) {
>   addr1 += r1->len;
>   addr2 += r2->len;
>   }
> 
>   if (addr1 > addr2)
>   return 1;
> 
>   return 0;
> 

+1 to documenting what the semantics are :)

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V4 0/4] Fast MMIO eventfd fixes

2015-09-11 Thread Paolo Bonzini


On 11/09/2015 10:15, Michael S. Tsirkin wrote:
> I think we should add a capability for fast mmio.
> This way, userspace can avoid crashing buggy kernels.

I agree.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 10/22] KVM: ARM64: Add reset and access handlers for PMXEVCNTR register

2015-09-11 Thread Shannon Zhao
From: Shannon Zhao 

Since the reset value of PMXEVCNTR is UNKNOWN, use reset_unknown for
its reset handler. Add access handler which emulates writing and reading
PMXEVCNTR register. When reading PMXEVCNTR, call perf_event_read_value
to get the count value of the perf event.

Signed-off-by: Shannon Zhao 
---
 arch/arm64/kvm/sys_regs.c | 41 +
 1 file changed, 37 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 59b7bc9..a51c8de 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -280,6 +280,12 @@ static bool access_pmu_regs(struct kvm_vcpu *vcpu,
 
if (p->is_write) {
switch (r->reg) {
+   case PMXEVCNTR_EL0: {
+   val = PMEVCNTR0_EL0 + vcpu_sys_reg(vcpu, PMSELR_EL0);
+   vcpu_sys_reg(vcpu, val) =
+ *vcpu_reg(vcpu, p->Rt) & 0xUL;
+   break;
+   }
case PMXEVTYPER_EL0: {
val = vcpu_sys_reg(vcpu, PMSELR_EL0);
kvm_pmu_set_counter_event_type(vcpu,
@@ -303,7 +309,17 @@ static bool access_pmu_regs(struct kvm_vcpu *vcpu,
break;
}
} else {
-   *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg);
+   switch (r->reg) {
+   case PMXEVCNTR_EL0: {
+   val = kvm_pmu_get_counter_value(vcpu,
+   vcpu_sys_reg(vcpu, PMSELR_EL0));
+   *vcpu_reg(vcpu, p->Rt) = val;
+   break;
+   }
+   default:
+   *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg);
+   break;
+   }
}
 
return true;
@@ -530,7 +546,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
  access_pmu_regs, reset_unknown, PMXEVTYPER_EL0 },
/* PMXEVCNTR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010),
- trap_raz_wi },
+ access_pmu_regs, reset_unknown, PMXEVCNTR_EL0 },
/* PMUSERENR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000),
  trap_raz_wi },
@@ -714,6 +730,12 @@ static bool access_pmu_cp15_regs(struct kvm_vcpu *vcpu,
 
if (p->is_write) {
switch (r->reg) {
+   case c9_PMXEVCNTR: {
+   val = c14_PMEVCNTR0 + vcpu_cp15(vcpu, c9_PMSELR);
+   vcpu_cp15(vcpu, val) =
+ *vcpu_reg(vcpu, p->Rt) & 0xUL;
+   break;
+   }
case c9_PMXEVTYPER: {
val = vcpu_cp15(vcpu, c9_PMSELR);
kvm_pmu_set_counter_event_type(vcpu,
@@ -737,7 +759,17 @@ static bool access_pmu_cp15_regs(struct kvm_vcpu *vcpu,
break;
}
} else {
-   *vcpu_reg(vcpu, p->Rt) = vcpu_cp15(vcpu, r->reg);
+   switch (r->reg) {
+   case c9_PMXEVCNTR: {
+   val = kvm_pmu_get_counter_value(vcpu,
+   vcpu_cp15(vcpu, c9_PMSELR));
+   *vcpu_reg(vcpu, p->Rt) = val;
+   break;
+   }
+   default:
+   *vcpu_reg(vcpu, p->Rt) = vcpu_cp15(vcpu, r->reg);
+   break;
+   }
}
 
return true;
@@ -785,7 +817,8 @@ static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn( 9), CRm(13), Op2( 0), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(13), Op2( 1), access_pmu_cp15_regs,
  reset_unknown_cp15, c9_PMXEVTYPER },
-   { Op1( 0), CRn( 9), CRm(13), Op2( 2), trap_raz_wi },
+   { Op1( 0), CRn( 9), CRm(13), Op2( 2), access_pmu_cp15_regs,
+ reset_unknown_cp15, c9_PMXEVCNTR },
{ Op1( 0), CRn( 9), CRm(14), Op2( 0), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(14), Op2( 1), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(14), Op2( 2), trap_raz_wi },
-- 
2.0.4


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 08/22] KVM: ARM64: PMU: Add perf event map and introduce perf event creating function

2015-09-11 Thread Shannon Zhao
From: Shannon Zhao 

When we use tools like perf on host, perf passes the event type and the
id of this event type category to kernel, then kernel will map them to
hardware event number and write this number to PMU PMEVTYPER_EL0
register. While we're trapping and emulating guest accesses to PMU
registers, we get the hardware event number and map it to the event type
and the id reversely. Then call perf_event kernel API to create an event
for it.

Signed-off-by: Shannon Zhao 
---
 arch/arm64/include/asm/pmu.h |   2 +
 arch/arm64/kvm/Makefile  |   1 +
 include/kvm/arm_pmu.h|  15 +++
 virt/kvm/arm/pmu.c   | 240 +++
 4 files changed, 258 insertions(+)
 create mode 100644 virt/kvm/arm/pmu.c

diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
index 95681e6..42e7093 100644
--- a/arch/arm64/include/asm/pmu.h
+++ b/arch/arm64/include/asm/pmu.h
@@ -33,6 +33,8 @@
 #define ARMV8_PMCR_D   (1 << 3) /* CCNT counts every 64th cpu cycle */
 #define ARMV8_PMCR_X   (1 << 4) /* Export to ETM */
 #define ARMV8_PMCR_DP  (1 << 5) /* Disable CCNT if non-invasive debug*/
+/* Determines which PMCCNTR_EL0 bit generates an overflow */
+#define ARMV8_PMCR_LC  (1 << 6)
 #defineARMV8_PMCR_N_SHIFT  11   /* Number of counters 
supported */
 #defineARMV8_PMCR_N_MASK   0x1f
 #defineARMV8_PMCR_MASK 0x3f /* Mask for writable bits */
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index f90f4aa..78db4ee 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -27,3 +27,4 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o
 kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v3-switch.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
+kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 64af88a..387ec6f 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -36,4 +36,19 @@ struct kvm_pmu {
 #endif
 };
 
+#ifdef CONFIG_KVM_ARM_PMU
+unsigned long kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu,
+   unsigned long select_idx);
+void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, unsigned long data,
+   unsigned long select_idx);
+#else
+unsigned long kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu,
+   unsigned long select_idx)
+{
+   return 0;
+}
+void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, unsigned long data,
+   unsigned long select_idx) {}
+#endif
+
 #endif
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
new file mode 100644
index 000..0c7fe5c
--- /dev/null
+++ b/virt/kvm/arm/pmu.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright (C) 2015 Linaro Ltd.
+ * Author: Shannon Zhao 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see .
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* PMU HW events mapping. */
+static struct kvm_pmu_hw_event_map {
+   unsigned eventsel;
+   unsigned event_type;
+} kvm_pmu_hw_events[] = {
+   [0] = { 0x11, PERF_COUNT_HW_CPU_CYCLES },
+   [1] = { 0x08, PERF_COUNT_HW_INSTRUCTIONS },
+   [2] = { 0x04, PERF_COUNT_HW_CACHE_REFERENCES },
+   [3] = { 0x03, PERF_COUNT_HW_CACHE_MISSES },
+   [4] = { 0x10, PERF_COUNT_HW_BRANCH_MISSES },
+};
+
+/* PMU HW cache events mapping. */
+static struct kvm_pmu_hw_cache_event_map {
+   unsigned eventsel;
+   unsigned cache_type;
+   unsigned cache_op;
+   unsigned cache_result;
+} kvm_pmu_hw_cache_events[] = {
+   [0] = { 0x12, PERF_COUNT_HW_CACHE_BPU, PERF_COUNT_HW_CACHE_OP_READ,
+ PERF_COUNT_HW_CACHE_RESULT_ACCESS },
+   [1] = { 0x12, PERF_COUNT_HW_CACHE_BPU, PERF_COUNT_HW_CACHE_OP_WRITE,
+ PERF_COUNT_HW_CACHE_RESULT_ACCESS },
+};
+
+static void kvm_pmu_set_evttyper(struct kvm_vcpu *vcpu, unsigned long idx,
+unsigned long val)
+{
+   if (!vcpu_mode_is_32bit(vcpu))
+   vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + idx) = val;
+   else
+   vcpu_cp15(vcpu, c14_PMEVTYPER0 + idx) = val;
+}
+
+static unsigned long kvm_pmu_get_evttyper(struct kvm_vcpu *vcpu,
+ 

[PATCH v2 12/22] KVM: ARM64: Add reset and access handlers for PMCNTENSET and PMCNTENCLR register

2015-09-11 Thread Shannon Zhao
From: Shannon Zhao 

Since the reset value of PMCNTENSET and PMCNTENCLR is UNKNOWN, use
reset_unknown for its reset handler. Add a new case to emulate writing
PMCNTENSET or PMCNTENCLR register.

When writing to PMCNTENSET, call perf_event_enable to enable the perf
event. When writing to PMCNTENCLR, call perf_event_disable to disable
the perf event.

Signed-off-by: Shannon Zhao 
---
 arch/arm64/kvm/sys_regs.c | 46 +
 include/kvm/arm_pmu.h |  4 
 virt/kvm/arm/pmu.c| 48 +++
 3 files changed, 94 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index f8d7de0..8307189 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -293,6 +293,24 @@ static bool access_pmu_regs(struct kvm_vcpu *vcpu,
   val);
break;
}
+   case PMCNTENSET_EL0: {
+   val = *vcpu_reg(vcpu, p->Rt);
+   kvm_pmu_enable_counter(vcpu, val);
+   /*Value 1 of PMCNTENSET_EL0 and PMCNTENCLR_EL0 means
+* corresponding counter enabled */
+   vcpu_sys_reg(vcpu, r->reg) |= val;
+   vcpu_sys_reg(vcpu, PMCNTENCLR_EL0) |= val;
+   break;
+   }
+   case PMCNTENCLR_EL0: {
+   val = *vcpu_reg(vcpu, p->Rt);
+   kvm_pmu_disable_counter(vcpu, val);
+   /*Value 0 of PMCNTENSET_EL0 and PMCNTENCLR_EL0 means
+* corresponding counter disabled */
+   vcpu_sys_reg(vcpu, r->reg) &= ~val;
+   vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val;
+   break;
+   }
case PMCR_EL0: {
/* Only update writeable bits of PMCR */
val = vcpu_sys_reg(vcpu, r->reg);
@@ -525,10 +543,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
  access_pmu_regs, reset_pmcr, PMCR_EL0, },
/* PMCNTENSET_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001),
- trap_raz_wi },
+ access_pmu_regs, reset_unknown, PMCNTENSET_EL0 },
/* PMCNTENCLR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010),
- trap_raz_wi },
+ access_pmu_regs, reset_unknown, PMCNTENCLR_EL0 },
/* PMOVSCLR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011),
  trap_raz_wi },
@@ -749,6 +767,24 @@ static bool access_pmu_cp15_regs(struct kvm_vcpu *vcpu,
   val);
break;
}
+   case c9_PMCNTENSET: {
+   val = *vcpu_reg(vcpu, p->Rt);
+   kvm_pmu_enable_counter(vcpu, val);
+   /*Value 1 of PMCNTENSET_EL0 and PMCNTENCLR_EL0 means
+* corresponding counter enabled */
+   vcpu_cp15(vcpu, r->reg) |= val;
+   vcpu_cp15(vcpu, c9_PMCNTENCLR) |= val;
+   break;
+   }
+   case c9_PMCNTENCLR: {
+   val = *vcpu_reg(vcpu, p->Rt);
+   kvm_pmu_disable_counter(vcpu, val);
+   /*Value 0 of PMCNTENSET_EL0 and PMCNTENCLR_EL0 means
+* corresponding counter disabled */
+   vcpu_cp15(vcpu, r->reg) &= ~val;
+   vcpu_cp15(vcpu, c9_PMCNTENSET) &= ~val;
+   break;
+   }
case c9_PMCR: {
/* Only update writeable bits of PMCR */
val = vcpu_cp15(vcpu, r->reg);
@@ -817,8 +853,10 @@ static const struct sys_reg_desc cp15_regs[] = {
/* PMU */
{ Op1( 0), CRn( 9), CRm(12), Op2( 0), access_pmu_cp15_regs,
  reset_pmcr, c9_PMCR },
-   { Op1( 0), CRn( 9), CRm(12), Op2( 1), trap_raz_wi },
-   { Op1( 0), CRn( 9), CRm(12), Op2( 2), trap_raz_wi },
+   { Op1( 0), CRn( 9), CRm(12), Op2( 1), access_pmu_cp15_regs,
+ reset_unknown_cp15, c9_PMCNTENSET },
+   { Op1( 0), CRn( 9), CRm(12), Op2( 2), access_pmu_cp15_regs,
+ reset_unknown_cp15, c9_PMCNTENCLR },
{ Op1( 0), CRn( 9), CRm(12), Op2( 3), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(12), Op2( 5), access_pmu_cp15_regs,
  reset_unknown_cp15, c9_PMSELR },
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 387ec6f..59e70af 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -39,6 +39,8 @@ struct kvm_pmu {
 #ifdef CONFIG_KVM_ARM_PMU
 unsigned long kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu,
   

[PATCH v2 05/22] KVM: ARM64: Add a helper for CP15 registers reset to UNKNOWN

2015-09-11 Thread Shannon Zhao
From: Shannon Zhao 

Signed-off-by: Shannon Zhao 
---
 arch/arm64/kvm/sys_regs.h | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index d411e25..a0b3811 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -104,6 +104,14 @@ static inline void reset_unknown(struct kvm_vcpu *vcpu,
vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL;
 }
 
+static inline void reset_unknown_cp15(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *r)
+{
+   BUG_ON(!r->reg);
+   BUG_ON(r->reg >= NR_COPRO_REGS);
+   vcpu_cp15(vcpu, r->reg) = 0xdecafbad;
+}
+
 static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc 
*r)
 {
BUG_ON(!r->reg);
-- 
2.0.4


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3] ppc/spapr: Implement H_RANDOM hypercall in QEMU

2015-09-11 Thread Thomas Huth
The PAPR interface defines a hypercall to pass high-quality
hardware generated random numbers to guests. Recent kernels can
already provide this hypercall to the guest if the right hardware
random number generator is available. But in case the user wants
to use another source like EGD, or QEMU is running with an older
kernel, we should also have this call in QEMU, so that guests that
do not support virtio-rng yet can get good random numbers, too.

This patch now adds a new pseude-device to QEMU that either
directly provides this hypercall to the guest or is able to
enable the in-kernel hypercall if available. The in-kernel
hypercall can be enabled with the use-kvm property, e.g.:

 qemu-system-ppc64 -device spapr-rng,use-kvm=true

For handling the hypercall in QEMU instead, a RngBackend is required
since the hypercall should provide "good" random data instead of
pseudo-random (like from a "simple" library function like rand()
or g_random_int()). Since there are multiple RngBackends available,
the user must select an appropriate backend via the "backend"
property of the device, e.g.:

 qemu-system-ppc64 -object rng-random,filename=/dev/hwrng,id=rng0 \
   -device spapr-rng,backend=rng0 ...

See http://wiki.qemu-project.org/Features-Done/VirtIORNG for
other example of specifying RngBackends.

Signed-off-by: Thomas Huth 
---
 v3:
 - Completely reworked the patch set accordingly to discussion
   on the mailing list, so that the code is now encapsulated
   as a QEMU device in a separate file.

 hw/ppc/Makefile.objs   |   2 +-
 hw/ppc/spapr.c |   8 +++
 hw/ppc/spapr_rng.c | 178 +
 include/hw/ppc/spapr.h |   4 ++
 target-ppc/kvm.c   |   9 +++
 target-ppc/kvm_ppc.h   |   5 ++
 6 files changed, 205 insertions(+), 1 deletion(-)
 create mode 100644 hw/ppc/spapr_rng.c

diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index c8ab06e..c1ffc77 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -3,7 +3,7 @@ obj-y += ppc.o ppc_booke.o
 # IBM pSeries (sPAPR)
 obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
 obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
-obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o
+obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
 ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
 obj-y += spapr_pci_vfio.o
 endif
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index bf0c64f..34e7d24 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -768,6 +768,14 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr,
 exit(1);
 }
 
+if (object_resolve_path_type("", TYPE_SPAPR_RNG, NULL)) {
+ret = spapr_rng_populate_dt(fdt);
+if (ret < 0) {
+fprintf(stderr, "couldn't setup rng device in fdt\n");
+exit(1);
+}
+}
+
 QLIST_FOREACH(phb, >phbs, list) {
 ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt);
 }
diff --git a/hw/ppc/spapr_rng.c b/hw/ppc/spapr_rng.c
new file mode 100644
index 000..d4923bc
--- /dev/null
+++ b/hw/ppc/spapr_rng.c
@@ -0,0 +1,178 @@
+/*
+ * QEMU sPAPR random number generator "device" for H_RANDOM hypercall
+ *
+ * Copyright 2015 Thomas Huth, Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see .
+ */
+
+#include "qemu/error-report.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/rng.h"
+#include "hw/ppc/spapr.h"
+#include "kvm_ppc.h"
+
+#define SPAPR_RNG(obj) \
+OBJECT_CHECK(sPAPRRngState, (obj), TYPE_SPAPR_RNG)
+
+typedef struct sPAPRRngState {
+/*< private >*/
+DeviceState ds;
+RngBackend *backend;
+bool use_kvm;
+} sPAPRRngState;
+
+typedef struct HRandomData {
+QemuSemaphore sem;
+union {
+uint64_t v64;
+uint8_t v8[8];
+} val;
+int received;
+} HRandomData;
+
+/* Callback function for the RngBackend */
+static void random_recv(void *dest, const void *src, size_t size)
+{
+HRandomData *hrdp = dest;
+
+if (src && size > 0) {
+assert(size + hrdp->received <= sizeof(hrdp->val.v8));
+memcpy(>val.v8[hrdp->received], src, size);
+hrdp->received += size;
+}
+
+qemu_sem_post(>sem);
+}
+
+/* Handler for the H_RANDOM hypercall */
+static target_ulong h_random(PowerPCCPU *cpu, sPAPRMachineState 

Re: [PATCH V4 0/4] Fast MMIO eventfd fixes

2015-09-11 Thread Jason Wang


On 09/11/2015 04:33 PM, Paolo Bonzini wrote:
>
> On 11/09/2015 10:15, Michael S. Tsirkin wrote:
>> I think we should add a capability for fast mmio.
>> This way, userspace can avoid crashing buggy kernels.
> I agree.
>
> Paolo

Right, then qemu will use datamatch eventfd if kenrel dost not have the
capability.

Thanks

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Bug 104271] Boot crashes on Lenovo W520 with KVM and Discrete Graphic enabled in BIOS

2015-09-11 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=104271

Hans Streibel  changed:

   What|Removed |Added

  Component|kvm |PCI
Version|unspecified |2.5
Product|Virtualization  |Drivers

--- Comment #5 from Hans Streibel  ---
Ok, will move it to Drivers/PCI.

Output of lspci -vvv now is in the appendix.
Output of dmesg is not included because that only shows output of a
successfully booted kernel. I even installed bootlogd but its log file does not
show error messages.

I made some pictures with my camera but I do not dare to attach those somewhat
big jpg files here. But if you really need them I can attach them.

The trouble starts (most of the time) right behind the line:
shpchp: Standard Hot Plug PCI Controller Driver Version: 0.4

Yes, disabling VT-d did help. Booting succeeded again.
Using "iommu=pt" however did not help. Same symptoms.

BTW, I wanted to express that my special notebook is not broken.
Not more broken that all (many/most) of the others W520s around.

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 04/22] KVM: ARM64: Add reset and access handlers for PMCR_EL0 register

2015-09-11 Thread Marc Zyngier
On 11/09/15 09:54, Shannon Zhao wrote:
> From: Shannon Zhao 
> 
> Add reset handler which gets host value of PMCR_EL0 and make writable
> bits architecturally UNKNOWN. Add a common access handler for PMU
> registers which emulates writing and reading register and add emulation
> for PMCR.
> 
> Signed-off-by: Shannon Zhao 
> ---
>  arch/arm64/kvm/sys_regs.c | 76 
> +--
>  1 file changed, 74 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index c370b40..db1be44 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -33,6 +33,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  
> @@ -236,6 +237,48 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const 
> struct sys_reg_desc *r)
>   vcpu_sys_reg(vcpu, MPIDR_EL1) = (1ULL << 31) | mpidr;
>  }
>  
> +static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
> +{
> + u32 pmcr;
> +
> + asm volatile("mrs %0, pmcr_el0\n" : "=r" (pmcr));
> + /* Writable bits of PMCR_EL0 (ARMV8_PMCR_MASK) is reset to UNKNOWN*/
> + if (!vcpu_mode_is_32bit(vcpu))
> + vcpu_sys_reg(vcpu, r->reg) = (pmcr & ~ARMV8_PMCR_MASK)
> +  | (ARMV8_PMCR_MASK & 0xdecafbad);
> + else
> + vcpu_cp15(vcpu, r->reg) = (pmcr & ~ARMV8_PMCR_MASK)
> +   | (ARMV8_PMCR_MASK & 0xdecafbad);

I have some concerns about blindly reusing the top bits of the host's
PMCR_EL0 register, specially when it comes to the PMCR_EL0.N. Given that
we're fully emulating the PMU, shouldn't we simply define how many
counters we're emulating?

> +}
> +
> +/* PMU registers accessor. */
> +static bool access_pmu_regs(struct kvm_vcpu *vcpu,
> + const struct sys_reg_params *p,
> + const struct sys_reg_desc *r)
> +{
> + unsigned long val;
> +
> + if (p->is_write) {
> + switch (r->reg) {
> + case PMCR_EL0: {
> + /* Only update writeable bits of PMCR */
> + val = vcpu_sys_reg(vcpu, r->reg);
> + val &= ~ARMV8_PMCR_MASK;
> + val |= *vcpu_reg(vcpu, p->Rt) & ARMV8_PMCR_MASK;
> + vcpu_sys_reg(vcpu, r->reg) = val;
> + break;
> + }
> + default:
> + vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
> + break;
> + }
> + } else {
> + *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg);
> + }
> +
> + return true;
> +}
> +
>  /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
>  #define DBG_BCR_BVR_WCR_WVR_EL1(n)   \
>   /* DBGBVRn_EL1 */   \
> @@ -427,7 +470,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
>  
>   /* PMCR_EL0 */
>   { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000),
> -   trap_raz_wi },
> +   access_pmu_regs, reset_pmcr, PMCR_EL0, },
>   /* PMCNTENSET_EL0 */
>   { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001),
> trap_raz_wi },
> @@ -632,6 +675,34 @@ static const struct sys_reg_desc cp14_64_regs[] = {
>   { Op1( 0), CRm( 2), .access = trap_raz_wi },
>  };
>  
> +/* PMU CP15 registers accessor. */
> +static bool access_pmu_cp15_regs(struct kvm_vcpu *vcpu,
> +  const struct sys_reg_params *p,
> +  const struct sys_reg_desc *r)
> +{
> + unsigned long val;
> +
> + if (p->is_write) {
> + switch (r->reg) {
> + case c9_PMCR: {
> + /* Only update writeable bits of PMCR */
> + val = vcpu_cp15(vcpu, r->reg);
> + val &= ~ARMV8_PMCR_MASK;
> + val |= *vcpu_reg(vcpu, p->Rt) & ARMV8_PMCR_MASK;
> + vcpu_cp15(vcpu, r->reg) = val;
> + break;
> + }
> + default:
> + vcpu_cp15(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
> + break;
> + }
> + } else {
> + *vcpu_reg(vcpu, p->Rt) = vcpu_cp15(vcpu, r->reg);
> + }
> +
> + return true;
> +}
> +
>  /*
>   * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding,
>   * depending on the way they are accessed (as a 32bit or a 64bit
> @@ -660,7 +731,8 @@ static const struct sys_reg_desc cp15_regs[] = {
>   { Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
>  
>   /* PMU */
> - { Op1( 0), CRn( 9), CRm(12), Op2( 0), trap_raz_wi },
> + { Op1( 0), CRn( 9), CRm(12), Op2( 0), access_pmu_cp15_regs,
> +   reset_pmcr, c9_PMCR },
>   { Op1( 0), CRn( 9), CRm(12), Op2( 1), trap_raz_wi },
>   

Re: [PATCH V4 1/4] kvm: factor out core eventfd assign/deassign logic

2015-09-11 Thread Cornelia Huck
On Fri, 11 Sep 2015 11:17:34 +0800
Jason Wang  wrote:

> This patch factors out core eventfd assign/deassign logic and leave
> the argument checking and bus index selection to callers.
> 
> Cc: Gleb Natapov 
> Cc: Paolo Bonzini 
> Signed-off-by: Jason Wang 
> ---
>  virt/kvm/eventfd.c | 83 
> --
>  1 file changed, 49 insertions(+), 34 deletions(-)
> 
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index 9ff4193..163258d 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -771,40 +771,14 @@ static enum kvm_bus ioeventfd_bus_from_flags(__u32 
> flags)
>   return KVM_MMIO_BUS;
>  }
> 
> -static int
> -kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
> +static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
> + enum kvm_bus bus_idx,
> + struct kvm_ioeventfd *args)
>  {
> - enum kvm_bus  bus_idx;
> - struct _ioeventfd*p;
> - struct eventfd_ctx   *eventfd;
> - int   ret;
> -
> - bus_idx = ioeventfd_bus_from_flags(args->flags);
> - /* must be natural-word sized, or 0 to ignore length */
> - switch (args->len) {
> - case 0:
> - case 1:
> - case 2:
> - case 4:
> - case 8:
> - break;
> - default:
> - return -EINVAL;
> - }
> 
> - /* check for range overflow */
> - if (args->addr + args->len < args->addr)
> - return -EINVAL;
> -
> - /* check for extra flags that we don't understand */
> - if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
> - return -EINVAL;
> -
> - /* ioeventfd with no length can't be combined with DATAMATCH */
> - if (!args->len &&
> - args->flags & (KVM_IOEVENTFD_FLAG_PIO |
> -KVM_IOEVENTFD_FLAG_DATAMATCH))
> - return -EINVAL;
> + struct eventfd_ctx *eventfd;
> + struct _ioeventfd *p;
> + int ret;
> 
>   eventfd = eventfd_ctx_fdget(args->fd);
>   if (IS_ERR(eventfd))
> @@ -873,14 +847,48 @@ fail:
>  }
> 
>  static int
> -kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
> +kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)

You'll move this function to below the deassign function in patch 2.
Maybe do it already here?

>  {
>   enum kvm_bus  bus_idx;
> +
> + bus_idx = ioeventfd_bus_from_flags(args->flags);
> + /* must be natural-word sized, or 0 to ignore length */
> + switch (args->len) {
> + case 0:
> + case 1:
> + case 2:
> + case 4:
> + case 8:
> + break;
> + default:
> + return -EINVAL;
> + }
> +
> + /* check for range overflow */
> + if (args->addr + args->len < args->addr)
> + return -EINVAL;
> +
> + /* check for extra flags that we don't understand */
> + if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
> + return -EINVAL;
> +
> + /* ioeventfd with no length can't be combined with DATAMATCH */
> + if (!args->len &&
> + args->flags & (KVM_IOEVENTFD_FLAG_PIO |
> +KVM_IOEVENTFD_FLAG_DATAMATCH))
> + return -EINVAL;
> +
> + return kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
> +}
> +
> +static int
> +kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
> +struct kvm_ioeventfd *args)

While this file uses newline before function name quite often, putting
it on the same line seems more common - don't know which one the
maintainers prefer.

> +{
>   struct _ioeventfd*p, *tmp;
>   struct eventfd_ctx   *eventfd;
>   int   ret = -ENOENT;
> 
> - bus_idx = ioeventfd_bus_from_flags(args->flags);
>   eventfd = eventfd_ctx_fdget(args->fd);
>   if (IS_ERR(eventfd))
>   return PTR_ERR(eventfd);
> @@ -918,6 +926,13 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct 
> kvm_ioeventfd *args)
>   return ret;
>  }
> 
> +static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd 
> *args)
> +{
> + enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags);
> +
> + return kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
> +}
> +
>  int
>  kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
>  {

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V4 1/4] kvm: factor out core eventfd assign/deassign logic

2015-09-11 Thread Paolo Bonzini


On 11/09/2015 09:39, Cornelia Huck wrote:
> > +static int
> > +kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
> > +  struct kvm_ioeventfd *args)
> 
> While this file uses newline before function name quite often, putting
> it on the same line seems more common - don't know which one the
> maintainers prefer.

I prefer it this way if it doesn't make the declaration one line longer,
which seems to be the case here.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V4 1/4] kvm: factor out core eventfd assign/deassign logic

2015-09-11 Thread Jason Wang


On 09/11/2015 03:39 PM, Cornelia Huck wrote:
> On Fri, 11 Sep 2015 11:17:34 +0800
> Jason Wang  wrote:
>
>> This patch factors out core eventfd assign/deassign logic and leave
>> the argument checking and bus index selection to callers.
>>
>> Cc: Gleb Natapov 
>> Cc: Paolo Bonzini 
>> Signed-off-by: Jason Wang 
>> ---
>>  virt/kvm/eventfd.c | 83 
>> --
>>  1 file changed, 49 insertions(+), 34 deletions(-)
>>
>> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
>> index 9ff4193..163258d 100644
>> --- a/virt/kvm/eventfd.c
>> +++ b/virt/kvm/eventfd.c
>> @@ -771,40 +771,14 @@ static enum kvm_bus ioeventfd_bus_from_flags(__u32 
>> flags)
>>  return KVM_MMIO_BUS;
>>  }
>>
>> -static int
>> -kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
>> +static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
>> +enum kvm_bus bus_idx,
>> +struct kvm_ioeventfd *args)
>>  {
>> -enum kvm_bus  bus_idx;
>> -struct _ioeventfd*p;
>> -struct eventfd_ctx   *eventfd;
>> -int   ret;
>> -
>> -bus_idx = ioeventfd_bus_from_flags(args->flags);
>> -/* must be natural-word sized, or 0 to ignore length */
>> -switch (args->len) {
>> -case 0:
>> -case 1:
>> -case 2:
>> -case 4:
>> -case 8:
>> -break;
>> -default:
>> -return -EINVAL;
>> -}
>>
>> -/* check for range overflow */
>> -if (args->addr + args->len < args->addr)
>> -return -EINVAL;
>> -
>> -/* check for extra flags that we don't understand */
>> -if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
>> -return -EINVAL;
>> -
>> -/* ioeventfd with no length can't be combined with DATAMATCH */
>> -if (!args->len &&
>> -args->flags & (KVM_IOEVENTFD_FLAG_PIO |
>> -   KVM_IOEVENTFD_FLAG_DATAMATCH))
>> -return -EINVAL;
>> +struct eventfd_ctx *eventfd;
>> +struct _ioeventfd *p;
>> +int ret;
>>
>>  eventfd = eventfd_ctx_fdget(args->fd);
>>  if (IS_ERR(eventfd))
>> @@ -873,14 +847,48 @@ fail:
>>  }
>>
>>  static int
>> -kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
>> +kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
> You'll move this function to below the deassign function in patch 2.
> Maybe do it already here?
>

Yes, this can reduce the changes for patch2.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Bug 104271] Boot crashes on Lenovo W520 with KVM and Discrete Graphic enabled in BIOS

2015-09-11 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=104271

--- Comment #4 from Hans Streibel  ---
Created attachment 187321
  --> https://bugzilla.kernel.org/attachment.cgi?id=187321=edit
Output of lspci -vvv

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V4 2/4] kvm: fix double free for fast mmio eventfd

2015-09-11 Thread Cornelia Huck
On Fri, 11 Sep 2015 11:17:35 +0800
Jason Wang  wrote:

> We register wildcard mmio eventfd on two buses, one for KVM_MMIO_BUS
> and another is KVM_FAST_MMIO_BUS but with a single iodev
> instance. This will lead an issue: kvm_io_bus_destroy() knows nothing
> about the devices on two buses points to a single dev. Which will lead

s/points/pointing/

> double free[1] during exit. Fixing this by using allocate two

s/using allocate/allocating/

> instances of iodevs then register one on KVM_MMIO_BUS and another on
> KVM_FAST_MMIO_BUS.
> 
(...)

> @@ -929,8 +878,66 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus 
> bus_idx,
>  static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd 
> *args)
>  {
>   enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags);
> + int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
> +
> + if (!args->len)
> + kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);

I think it would be good to explicitly check for bus_idx ==
KVM_MMIO_BUS here.

> +
> + return ret;
> +}
> 
> - return kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
> +static int
> +kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
> +{
> + enum kvm_bus  bus_idx;
> + int ret;
> +
> + bus_idx = ioeventfd_bus_from_flags(args->flags);
> + /* must be natural-word sized, or 0 to ignore length */
> + switch (args->len) {
> + case 0:
> + case 1:
> + case 2:
> + case 4:
> + case 8:
> + break;
> + default:
> + return -EINVAL;
> + }
> +
> + /* check for range overflow */
> + if (args->addr + args->len < args->addr)
> + return -EINVAL;
> +
> + /* check for extra flags that we don't understand */
> + if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
> + return -EINVAL;
> +
> + /* ioeventfd with no length can't be combined with DATAMATCH */
> + if (!args->len &&
> + args->flags & (KVM_IOEVENTFD_FLAG_PIO |
> +KVM_IOEVENTFD_FLAG_DATAMATCH))
> + return -EINVAL;
> +
> + ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
> + if (ret)
> + goto fail;
> +
> + /* When length is ignored, MMIO is also put on a separate bus, for
> +  * faster lookups.
> +  */
> + if (!args->len && !(args->flags & KVM_IOEVENTFD_FLAG_PIO)) {

Dito on a positive check for bus_idx == KVM_MMIO_BUS.

> + ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
> + if (ret < 0)
> + goto fast_fail;
> + }
> +
> + return 0;
> +
> +fast_fail:
> + kvm_deassign_ioeventfd(kvm, args);

Shouldn't you use kvm_deassign_ioeventfd(kvm, bus_idx, args) here?

> +fail:
> + return ret;
>  }
> 
>  int

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V4 0/4] Fast MMIO eventfd fixes

2015-09-11 Thread Michael S. Tsirkin
On Fri, Sep 11, 2015 at 11:17:33AM +0800, Jason Wang wrote:
> Hi:
> 
> This series fixes two issues of fast mmio eventfd:
> 
> 1) A single iodev instance were registerd on two buses: KVM_MMIO_BUS
>and KVM_FAST_MMIO_BUS. This will cause double in
>ioeventfd_destructor()
> 2) A zero length iodev on KVM_MMIO_BUS will never be found but
>kvm_io_bus_cmp(). This will lead e.g the eventfd will be trapped by
>qemu instead of host.
> 
> 1 is fixed by allocating two instances of iodev. 2 is fixed by ignore
> the actual length if the length of iodev is zero in kvm_io_bus_cmp().
> 
> Please review.

I think we should add a capability for fast mmio.
This way, userspace can avoid crashing buggy kernels.

> Changes from V3:
> 
> - Don't do search on two buses when trying to do write on
>   KVM_MMIO_BUS. This fixes a small regression found by vmexit.flat.
> - Since we don't do search on two buses, change kvm_io_bus_cmp() to
>   let it can find zero length iodevs.
> - Fix the unnecessary lines in tracepoint patch.
> 
> Changes from V2:
> - Tweak styles and comment suggested by Cornelia.
> 
> Changes from v1:
> - change ioeventfd_bus_from_flags() to return KVM_FAST_MMIO_BUS when
>   needed to save lots of unnecessary changes.
> 
> Jason Wang (4):
>   kvm: factor out core eventfd assign/deassign logic
>   kvm: fix double free for fast mmio eventfd
>   kvm: fix zero length mmio searching
>   kvm: add tracepoint for fast mmio
> 
>  arch/x86/kvm/trace.h |  18 
>  arch/x86/kvm/vmx.c   |   1 +
>  arch/x86/kvm/x86.c   |   1 +
>  virt/kvm/eventfd.c   | 124 
> ++-
>  virt/kvm/kvm_main.c  |   4 +-
>  5 files changed, 96 insertions(+), 52 deletions(-)
> 
> -- 
> 2.1.4
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 01/22] ARM64: Move PMU register related defines to asm/pmu.h

2015-09-11 Thread Shannon Zhao
From: Shannon Zhao 

To use the ARMv8 PMU related register defines from the KVM code,
we move the relevant definitions to asm/pmu.h header file.

Signed-off-by: Anup Patel 
Signed-off-by: Shannon Zhao 
---
 arch/arm64/include/asm/pmu.h   | 45 ++
 arch/arm64/kernel/perf_event.c | 35 
 2 files changed, 45 insertions(+), 35 deletions(-)

diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
index b7710a5..b9f394a 100644
--- a/arch/arm64/include/asm/pmu.h
+++ b/arch/arm64/include/asm/pmu.h
@@ -19,6 +19,51 @@
 #ifndef __ASM_PMU_H
 #define __ASM_PMU_H
 
+#define ARMV8_MAX_COUNTERS  32
+#define ARMV8_COUNTER_MASK  (ARMV8_MAX_COUNTERS - 1)
+
+/*
+ * Per-CPU PMCR: config reg
+ */
+#define ARMV8_PMCR_E   (1 << 0) /* Enable all counters */
+#define ARMV8_PMCR_P   (1 << 1) /* Reset all counters */
+#define ARMV8_PMCR_C   (1 << 2) /* Cycle counter reset */
+#define ARMV8_PMCR_D   (1 << 3) /* CCNT counts every 64th cpu cycle */
+#define ARMV8_PMCR_X   (1 << 4) /* Export to ETM */
+#define ARMV8_PMCR_DP  (1 << 5) /* Disable CCNT if non-invasive debug*/
+#defineARMV8_PMCR_N_SHIFT  11   /* Number of counters 
supported */
+#defineARMV8_PMCR_N_MASK   0x1f
+#defineARMV8_PMCR_MASK 0x3f /* Mask for writable bits */
+
+/*
+ * PMCNTEN: counters enable reg
+ */
+#defineARMV8_CNTEN_MASK0x  /* Mask for writable 
bits */
+
+/*
+ * PMINTEN: counters interrupt enable reg
+ */
+#defineARMV8_INTEN_MASK0x  /* Mask for writable 
bits */
+
+/*
+ * PMOVSR: counters overflow flag status reg
+ */
+#defineARMV8_OVSR_MASK 0x  /* Mask for writable 
bits */
+#defineARMV8_OVERFLOWED_MASK   ARMV8_OVSR_MASK
+
+/*
+ * PMXEVTYPER: Event selection reg
+ */
+#defineARMV8_EVTYPE_MASK   0xc80003ff  /* Mask for writable 
bits */
+#defineARMV8_EVTYPE_EVENT  0x3ff   /* Mask for EVENT bits 
*/
+
+/*
+ * Event filters for PMUv3
+ */
+#defineARMV8_EXCLUDE_EL1   (1 << 31)
+#defineARMV8_EXCLUDE_EL0   (1 << 30)
+#defineARMV8_INCLUDE_EL2   (1 << 27)
+
 #ifdef CONFIG_HW_PERF_EVENTS
 
 /* The events for a given PMU register set. */
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index b31e9a4..96538d7 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -824,9 +824,6 @@ static const unsigned 
armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 #defineARMV8_IDX_COUNTER0  1
 #defineARMV8_IDX_COUNTER_LAST  (ARMV8_IDX_CYCLE_COUNTER + 
cpu_pmu->num_events - 1)
 
-#defineARMV8_MAX_COUNTERS  32
-#defineARMV8_COUNTER_MASK  (ARMV8_MAX_COUNTERS - 1)
-
 /*
  * ARMv8 low level PMU access
  */
@@ -837,38 +834,6 @@ static const unsigned 
armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 #defineARMV8_IDX_TO_COUNTER(x) \
(((x) - ARMV8_IDX_COUNTER0) & ARMV8_COUNTER_MASK)
 
-/*
- * Per-CPU PMCR: config reg
- */
-#define ARMV8_PMCR_E   (1 << 0) /* Enable all counters */
-#define ARMV8_PMCR_P   (1 << 1) /* Reset all counters */
-#define ARMV8_PMCR_C   (1 << 2) /* Cycle counter reset */
-#define ARMV8_PMCR_D   (1 << 3) /* CCNT counts every 64th cpu cycle */
-#define ARMV8_PMCR_X   (1 << 4) /* Export to ETM */
-#define ARMV8_PMCR_DP  (1 << 5) /* Disable CCNT if non-invasive debug*/
-#defineARMV8_PMCR_N_SHIFT  11   /* Number of counters 
supported */
-#defineARMV8_PMCR_N_MASK   0x1f
-#defineARMV8_PMCR_MASK 0x3f /* Mask for writable bits */
-
-/*
- * PMOVSR: counters overflow flag status reg
- */
-#defineARMV8_OVSR_MASK 0x  /* Mask for writable 
bits */
-#defineARMV8_OVERFLOWED_MASK   ARMV8_OVSR_MASK
-
-/*
- * PMXEVTYPER: Event selection reg
- */
-#defineARMV8_EVTYPE_MASK   0xc80003ff  /* Mask for writable 
bits */
-#defineARMV8_EVTYPE_EVENT  0x3ff   /* Mask for EVENT bits 
*/
-
-/*
- * Event filters for PMUv3
- */
-#defineARMV8_EXCLUDE_EL1   (1 << 31)
-#defineARMV8_EXCLUDE_EL0   (1 << 30)
-#defineARMV8_INCLUDE_EL2   (1 << 27)
-
 static inline u32 armv8pmu_pmcr_read(void)
 {
u32 val;
-- 
2.0.4


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 17/22] KVM: ARM64: Add reset and access handlers for PMSWINC register

2015-09-11 Thread Shannon Zhao
From: Shannon Zhao 

Add access handler which emulates writing and reading PMSWINC
register and add support for creating software increment event.

Signed-off-by: Shannon Zhao 
---
 arch/arm64/kvm/sys_regs.c | 18 +-
 include/kvm/arm_pmu.h |  2 ++
 virt/kvm/arm/pmu.c| 33 +
 3 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index b47cd0b..24d00a0 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -335,6 +335,11 @@ static bool access_pmu_regs(struct kvm_vcpu *vcpu,
vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~val;
break;
}
+   case PMSWINC_EL0: {
+   val = *vcpu_reg(vcpu, p->Rt);
+   kvm_pmu_software_increment(vcpu, val);
+   break;
+   }
case PMCR_EL0: {
/* Only update writeable bits of PMCR */
val = vcpu_sys_reg(vcpu, r->reg);
@@ -364,6 +369,8 @@ static bool access_pmu_regs(struct kvm_vcpu *vcpu,
*vcpu_reg(vcpu, p->Rt) = val;
break;
}
+   case PMSWINC_EL0:
+   return read_zero(vcpu, p);
default:
*vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg);
break;
@@ -576,7 +583,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
  access_pmu_regs, reset_unknown, PMOVSCLR_EL0 },
/* PMSWINC_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100),
- trap_raz_wi },
+ access_pmu_regs, reset_unknown, PMSWINC_EL0 },
/* PMSELR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101),
  access_pmu_regs, reset_unknown, PMSELR_EL0 },
@@ -833,6 +840,11 @@ static bool access_pmu_cp15_regs(struct kvm_vcpu *vcpu,
vcpu_cp15(vcpu, c9_PMOVSSET) &= ~val;
break;
}
+   case c9_PMSWINC: {
+   val = *vcpu_reg(vcpu, p->Rt);
+   kvm_pmu_software_increment(vcpu, val);
+   break;
+   }
case c9_PMCR: {
/* Only update writeable bits of PMCR */
val = vcpu_cp15(vcpu, r->reg);
@@ -862,6 +874,8 @@ static bool access_pmu_cp15_regs(struct kvm_vcpu *vcpu,
*vcpu_reg(vcpu, p->Rt) = val;
break;
}
+   case c9_PMSWINC:
+   return read_zero(vcpu, p);
default:
*vcpu_reg(vcpu, p->Rt) = vcpu_cp15(vcpu, r->reg);
break;
@@ -907,6 +921,8 @@ static const struct sys_reg_desc cp15_regs[] = {
  reset_unknown_cp15, c9_PMCNTENCLR },
{ Op1( 0), CRn( 9), CRm(12), Op2( 3), access_pmu_cp15_regs,
  reset_unknown_cp15, c9_PMOVSCLR },
+   { Op1( 0), CRn( 9), CRm(12), Op2( 4), access_pmu_cp15_regs,
+ reset_unknown_cp15, c9_PMSWINC },
{ Op1( 0), CRn( 9), CRm(12), Op2( 5), access_pmu_cp15_regs,
  reset_unknown_cp15, c9_PMSELR },
{ Op1( 0), CRn( 9), CRm(12), Op2( 6), access_pmu_cp15_regs,
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 59e70af..1a27357 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -41,6 +41,7 @@ unsigned long kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu,
unsigned long select_idx);
 void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, unsigned long val);
 void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, unsigned long val);
+void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, unsigned long val);
 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, unsigned long data,
unsigned long select_idx);
 #else
@@ -51,6 +52,7 @@ unsigned long kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu,
 }
 void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, unsigned long val) {}
 void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, unsigned long val) {}
+void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, unsigned long val) {}
 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, unsigned long data,
unsigned long select_idx) {}
 #endif
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index c6cdc4e..5f5a483 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -163,6 +163,35 @@ void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, 
unsigned long val)
 }
 
 /**
+ * kvm_pmu_software_increment - do software increment
+ * @vcpu: The vcpu pointer
+ * @val: the value guest writes to PMSWINC register
+ */
+void kvm_pmu_software_increment(struct 

[PATCH v2 19/22] KVM: ARM64: Add PMU overflow interrupt routing

2015-09-11 Thread Shannon Zhao
From: Shannon Zhao 

When calling perf_event_create_kernel_counter to create perf_event,
assign a overflow handler. Then when perf event overflows, set
irq_pending and call kvm_vcpu_kick() to sync the interrupt.

Signed-off-by: Shannon Zhao 
---
 arch/arm/kvm/arm.c|  3 +++
 include/kvm/arm_pmu.h |  2 ++
 virt/kvm/arm/pmu.c| 54 ++-
 3 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index bc738d2..ee5d667 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define CREATE_TRACE_POINTS
 #include "trace.h"
@@ -547,6 +548,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
preempt_enable();
kvm_timer_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu);
+   kvm_pmu_sync_hwstate(vcpu);
continue;
}
 
@@ -591,6 +593,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
 
kvm_timer_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu);
+   kvm_pmu_sync_hwstate(vcpu);
 
ret = handle_exit(vcpu, run, ret);
}
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 1a27357..5f1ea2b 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -37,6 +37,7 @@ struct kvm_pmu {
 };
 
 #ifdef CONFIG_KVM_ARM_PMU
+void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu);
 unsigned long kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu,
unsigned long select_idx);
 void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, unsigned long val);
@@ -45,6 +46,7 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, 
unsigned long val);
 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, unsigned long data,
unsigned long select_idx);
 #else
+void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) {}
 unsigned long kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu,
unsigned long select_idx)
 {
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index 5f5a483..9e75372 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* PMU HW events mapping. */
 static struct kvm_pmu_hw_event_map {
@@ -90,6 +91,56 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu,
 }
 
 /**
+ * kvm_pmu_sync_hwstate - sync pmu state for cpu
+ * @vcpu: The vcpu pointer
+ *
+ * Inject virtual PMU IRQ if IRQ is pending for this cpu.
+ */
+void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
+{
+   struct kvm_pmu *pmu = >arch.pmu;
+
+   if (pmu->irq_pending && (pmu->irq_num != -1)) {
+   kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, pmu->irq_num, 1);
+   pmu->irq_pending = false;
+   }
+}
+
+/**
+ * When perf event overflows, set irq_pending and call kvm_vcpu_kick() to 
inject
+ * the interrupt.
+ */
+static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+   struct kvm_pmc *pmc = perf_event->overflow_handler_context;
+   struct kvm_vcpu *vcpu = pmc->vcpu;
+   struct kvm_pmu *pmu = >arch.pmu;
+   int idx = pmc->idx;
+
+   if (!vcpu_mode_is_32bit(vcpu)) {
+   if ((vcpu_sys_reg(vcpu, PMINTENSET_EL1) >> idx) & 0x1) {
+   __set_bit(idx,
+   (unsigned long *)_sys_reg(vcpu, PMOVSSET_EL0));
+   __set_bit(idx,
+   (unsigned long *)_sys_reg(vcpu, PMOVSCLR_EL0));
+   pmu->irq_pending = true;
+   kvm_vcpu_kick(vcpu);
+   }
+   } else {
+   if ((vcpu_cp15(vcpu, c9_PMINTENSET) >> idx) & 0x1) {
+   __set_bit(idx,
+   (unsigned long *)_cp15(vcpu, c9_PMOVSSET));
+   __set_bit(idx,
+   (unsigned long *)_cp15(vcpu, c9_PMOVSCLR));
+   pmu->irq_pending = true;
+   kvm_vcpu_kick(vcpu);
+   }
+   }
+}
+
+/**
  * kvm_pmu_get_counter_value - get PMU counter value
  * @vcpu: The vcpu pointer
  * @select_idx: The counter index
@@ -311,7 +362,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, 
unsigned long data,
/* The initial sample period (overflow count) of an event. */
attr.sample_period = (-counter) & (((u64)1 << overflow_bit) - 1);
 
-   event = perf_event_create_kernel_counter(, -1, current, NULL, pmc);
+   event = perf_event_create_kernel_counter(, -1, current,
+   

[PATCH v2 18/22] KVM: ARM64: Add access handlers for PMEVCNTRn and PMEVTYPERn register

2015-09-11 Thread Shannon Zhao
From: Shannon Zhao 

Add access handler which emulates writing and reading PMEVCNTRn and
PMEVTYPERn.

Signed-off-by: Shannon Zhao 
---
 arch/arm64/kvm/sys_regs.c | 164 ++
 1 file changed, 164 insertions(+)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 24d00a0..cf2cb47 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -395,6 +395,20 @@ static bool access_pmu_regs(struct kvm_vcpu *vcpu,
{ Op0(0b10), Op1(0b000), CRn(0b), CRm((n)), Op2(0b111), \
  trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 }
 
+/* Macro to expand the PMEVCNTRn_EL0 register */
+#define PMU_PMEVCNTR_EL0(n)\
+   /* PMEVCNTRn_EL0 */ \
+   { Op0(0b11), Op1(0b011), CRn(0b1110),   \
+ CRm((0b1000 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
+ access_pmu_regs, reset_unknown, (PMEVCNTR0_EL0 + n), }
+
+/* Macro to expand the PMEVTYPERn_EL0 register */
+#define PMU_PMEVTYPER_EL0(n)   \
+   /* PMEVTYPERn_EL0 */\
+   { Op0(0b11), Op1(0b011), CRn(0b1110),   \
+ CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
+ access_pmu_regs, reset_unknown, (PMEVTYPER0_EL0 + n), }
+
 /*
  * Architected system registers.
  * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
@@ -616,6 +630,74 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b), Op2(0b011),
  NULL, reset_unknown, TPIDRRO_EL0 },
 
+   /* PMEVCNTRn_EL0 */
+   PMU_PMEVCNTR_EL0(0),
+   PMU_PMEVCNTR_EL0(1),
+   PMU_PMEVCNTR_EL0(2),
+   PMU_PMEVCNTR_EL0(3),
+   PMU_PMEVCNTR_EL0(4),
+   PMU_PMEVCNTR_EL0(5),
+   PMU_PMEVCNTR_EL0(6),
+   PMU_PMEVCNTR_EL0(7),
+   PMU_PMEVCNTR_EL0(8),
+   PMU_PMEVCNTR_EL0(9),
+   PMU_PMEVCNTR_EL0(10),
+   PMU_PMEVCNTR_EL0(11),
+   PMU_PMEVCNTR_EL0(12),
+   PMU_PMEVCNTR_EL0(13),
+   PMU_PMEVCNTR_EL0(14),
+   PMU_PMEVCNTR_EL0(15),
+   PMU_PMEVCNTR_EL0(16),
+   PMU_PMEVCNTR_EL0(17),
+   PMU_PMEVCNTR_EL0(18),
+   PMU_PMEVCNTR_EL0(19),
+   PMU_PMEVCNTR_EL0(20),
+   PMU_PMEVCNTR_EL0(21),
+   PMU_PMEVCNTR_EL0(22),
+   PMU_PMEVCNTR_EL0(23),
+   PMU_PMEVCNTR_EL0(24),
+   PMU_PMEVCNTR_EL0(25),
+   PMU_PMEVCNTR_EL0(26),
+   PMU_PMEVCNTR_EL0(27),
+   PMU_PMEVCNTR_EL0(28),
+   PMU_PMEVCNTR_EL0(29),
+   PMU_PMEVCNTR_EL0(30),
+   /* PMEVTYPERn_EL0 */
+   PMU_PMEVTYPER_EL0(0),
+   PMU_PMEVTYPER_EL0(1),
+   PMU_PMEVTYPER_EL0(2),
+   PMU_PMEVTYPER_EL0(3),
+   PMU_PMEVTYPER_EL0(4),
+   PMU_PMEVTYPER_EL0(5),
+   PMU_PMEVTYPER_EL0(6),
+   PMU_PMEVTYPER_EL0(7),
+   PMU_PMEVTYPER_EL0(8),
+   PMU_PMEVTYPER_EL0(9),
+   PMU_PMEVTYPER_EL0(10),
+   PMU_PMEVTYPER_EL0(11),
+   PMU_PMEVTYPER_EL0(12),
+   PMU_PMEVTYPER_EL0(13),
+   PMU_PMEVTYPER_EL0(14),
+   PMU_PMEVTYPER_EL0(15),
+   PMU_PMEVTYPER_EL0(16),
+   PMU_PMEVTYPER_EL0(17),
+   PMU_PMEVTYPER_EL0(18),
+   PMU_PMEVTYPER_EL0(19),
+   PMU_PMEVTYPER_EL0(20),
+   PMU_PMEVTYPER_EL0(21),
+   PMU_PMEVTYPER_EL0(22),
+   PMU_PMEVTYPER_EL0(23),
+   PMU_PMEVTYPER_EL0(24),
+   PMU_PMEVTYPER_EL0(25),
+   PMU_PMEVTYPER_EL0(26),
+   PMU_PMEVTYPER_EL0(27),
+   PMU_PMEVTYPER_EL0(28),
+   PMU_PMEVTYPER_EL0(29),
+   PMU_PMEVTYPER_EL0(30),
+   /* PMCCFILTR_EL0 */
+   { Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b), Op2(0b111),
+ access_pmu_regs, reset_unknown, PMCCFILTR_EL0, },
+
/* DACR32_EL2 */
{ Op0(0b11), Op1(0b100), CRn(0b0011), CRm(0b), Op2(0b000),
  NULL, reset_unknown, DACR32_EL2 },
@@ -885,6 +967,20 @@ static bool access_pmu_cp15_regs(struct kvm_vcpu *vcpu,
return true;
 }
 
+/* Macro to expand the PMEVCNTRn register */
+#define PMU_PMEVCNTR(n)
\
+   /* PMEVCNTRn */ \
+   { Op1(0), CRn(0b1110),  \
+ CRm((0b1000 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
+ access_pmu_cp15_regs, reset_unknown_cp15, (c14_PMEVCNTR0 + n), }
+
+/* Macro to expand the PMEVTYPERn register */
+#define PMU_PMEVTYPER(n)   \
+   /* PMEVTYPERn */\
+   { Op1(0), CRn(0b1110),  \
+ CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
+ access_pmu_cp15_regs, reset_unknown_cp15, (c14_PMEVTYPER0 + n), }
+
 /*
  * Trapped 

[PATCH v2 09/22] KVM: ARM64: Add reset and access handlers for PMXEVTYPER register

2015-09-11 Thread Shannon Zhao
From: Shannon Zhao 

Since the reset value of PMXEVTYPER is UNKNOWN, use reset_unknown or
reset_unknown_cp15 for its reset handler. Add access handler which
emulates writing and reading PMXEVTYPER register. When writing to
PMXEVTYPER, call kvm_pmu_set_counter_event_type to create a perf_event
for the selected event type.

Signed-off-by: Shannon Zhao 
---
 arch/arm64/kvm/sys_regs.c | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index b3bc717..59b7bc9 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -280,6 +280,13 @@ static bool access_pmu_regs(struct kvm_vcpu *vcpu,
 
if (p->is_write) {
switch (r->reg) {
+   case PMXEVTYPER_EL0: {
+   val = vcpu_sys_reg(vcpu, PMSELR_EL0);
+   kvm_pmu_set_counter_event_type(vcpu,
+  *vcpu_reg(vcpu, p->Rt),
+  val);
+   break;
+   }
case PMCR_EL0: {
/* Only update writeable bits of PMCR */
val = vcpu_sys_reg(vcpu, r->reg);
@@ -520,7 +527,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
  trap_raz_wi },
/* PMXEVTYPER_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001),
- trap_raz_wi },
+ access_pmu_regs, reset_unknown, PMXEVTYPER_EL0 },
/* PMXEVCNTR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010),
  trap_raz_wi },
@@ -707,6 +714,13 @@ static bool access_pmu_cp15_regs(struct kvm_vcpu *vcpu,
 
if (p->is_write) {
switch (r->reg) {
+   case c9_PMXEVTYPER: {
+   val = vcpu_cp15(vcpu, c9_PMSELR);
+   kvm_pmu_set_counter_event_type(vcpu,
+  *vcpu_reg(vcpu, p->Rt),
+  val);
+   break;
+   }
case c9_PMCR: {
/* Only update writeable bits of PMCR */
val = vcpu_cp15(vcpu, r->reg);
@@ -769,7 +783,8 @@ static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn( 9), CRm(12), Op2( 7), access_pmu_cp15_regs,
  reset_pmceid, c9_PMCEID1 },
{ Op1( 0), CRn( 9), CRm(13), Op2( 0), trap_raz_wi },
-   { Op1( 0), CRn( 9), CRm(13), Op2( 1), trap_raz_wi },
+   { Op1( 0), CRn( 9), CRm(13), Op2( 1), access_pmu_cp15_regs,
+ reset_unknown_cp15, c9_PMXEVTYPER },
{ Op1( 0), CRn( 9), CRm(13), Op2( 2), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(14), Op2( 0), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(14), Op2( 1), trap_raz_wi },
-- 
2.0.4


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 22/22] KVM: ARM64: Add a new kvm ARM PMU device

2015-09-11 Thread Shannon Zhao
From: Shannon Zhao 

Add a new kvm device type KVM_DEV_TYPE_ARM_PMU_V3 for ARM PMU. Implement
the kvm_device_ops for it.

Signed-off-by: Shannon Zhao 
---
 Documentation/virtual/kvm/devices/arm-pmu.txt | 15 +
 arch/arm64/include/uapi/asm/kvm.h |  3 +
 include/linux/kvm_host.h  |  1 +
 include/uapi/linux/kvm.h  |  2 +
 virt/kvm/arm/pmu.c| 88 +++
 virt/kvm/kvm_main.c   |  4 ++
 6 files changed, 113 insertions(+)
 create mode 100644 Documentation/virtual/kvm/devices/arm-pmu.txt

diff --git a/Documentation/virtual/kvm/devices/arm-pmu.txt 
b/Documentation/virtual/kvm/devices/arm-pmu.txt
new file mode 100644
index 000..49481c4
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/arm-pmu.txt
@@ -0,0 +1,15 @@
+ARM Virtual Performance Monitor Unit (vPMU)
+===
+
+Device types supported:
+  KVM_DEV_TYPE_ARM_PMU_V3 ARM Performance Monitor Unit v3
+
+Instantiate one PMU instance for per VCPU through this API.
+
+Groups:
+  KVM_DEV_ARM_PMU_GRP_IRQ
+  Attributes:
+A value describing the interrupt number of PMU overflow interrupt.
+
+  Errors:
+-EINVAL: Value set is out of the expected range
diff --git a/arch/arm64/include/uapi/asm/kvm.h 
b/arch/arm64/include/uapi/asm/kvm.h
index d268320..9960889 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -171,6 +171,9 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_CTRL  4
 #define   KVM_DEV_ARM_VGIC_CTRL_INIT   0
 
+/* Device Control API: ARM PMU */
+#define KVM_DEV_ARM_PMU_GRP_IRQ0
+
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT 24
 #define KVM_ARM_IRQ_TYPE_MASK  0xff
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 05e99b8..9136dec 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1129,6 +1129,7 @@ extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
 extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
 extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
+extern struct kvm_device_ops kvm_arm_pmu_ops;
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 716ad4a..591d185 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1018,6 +1018,8 @@ enum kvm_device_type {
 #define KVM_DEV_TYPE_FLIC  KVM_DEV_TYPE_FLIC
KVM_DEV_TYPE_ARM_VGIC_V3,
 #define KVM_DEV_TYPE_ARM_VGIC_V3   KVM_DEV_TYPE_ARM_VGIC_V3
+   KVM_DEV_TYPE_ARM_PMU_V3,
+#defineKVM_DEV_TYPE_ARM_PMU_V3 KVM_DEV_TYPE_ARM_PMU_V3
KVM_DEV_TYPE_MAX,
 };
 
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index 8eccac9..d9f2e5b 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -410,3 +411,90 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, 
unsigned long data,
}
pmc->perf_event = event;
 }
+
+static int kvm_arm_pmu_set_irq(struct kvm *kvm, int irq)
+{
+   int j;
+   struct kvm_vcpu *vcpu;
+
+   kvm_for_each_vcpu(j, vcpu, kvm) {
+   struct kvm_pmu *pmu = >arch.pmu;
+
+   kvm_debug("Set kvm ARM PMU irq: %d\n", irq);
+   pmu->irq_num = irq;
+   }
+
+   return 0;
+}
+
+static int kvm_arm_pmu_create(struct kvm_device *dev, u32 type)
+{
+   int i, j;
+   struct kvm_vcpu *vcpu;
+   struct kvm *kvm = dev->kvm;
+
+   kvm_for_each_vcpu(j, vcpu, kvm) {
+   struct kvm_pmu *pmu = >arch.pmu;
+
+   memset(pmu, 0, sizeof(*pmu));
+   for (i = 0; i < ARMV8_MAX_COUNTERS; i++) {
+   pmu->pmc[i].idx = i;
+   pmu->pmc[i].vcpu = vcpu;
+   }
+   pmu->irq_num = -1;
+   }
+
+   return 0;
+}
+
+static void kvm_arm_pmu_destroy(struct kvm_device *dev)
+{
+   kfree(dev);
+}
+
+static int kvm_arm_pmu_set_attr(struct kvm_device *dev,
+   struct kvm_device_attr *attr)
+{
+   switch (attr->group) {
+   case KVM_DEV_ARM_PMU_GRP_IRQ: {
+   int __user *uaddr = (int __user *)(long)attr->addr;
+   int reg;
+
+   if (get_user(reg, uaddr))
+   return -EFAULT;
+
+   if (reg < VGIC_NR_SGIS || reg > dev->kvm->arch.vgic.nr_irqs)
+   return -EINVAL;
+
+   return kvm_arm_pmu_set_irq(dev->kvm, reg);
+   }
+   }
+
+   return -ENXIO;
+}
+
+static int kvm_arm_pmu_get_attr(struct kvm_device *dev,
+   struct kvm_device_attr *attr)
+{
+   return 0;
+}
+
+static int kvm_arm_pmu_has_attr(struct kvm_device *dev,
+  

[PATCH v2 02/22] KVM: ARM64: Define PMU data structure for each vcpu

2015-09-11 Thread Shannon Zhao
From: Shannon Zhao 

Here we plan to support virtual PMU for guest by full software
emulation, so define some basic structs and functions preparing for
futher steps. Define struct kvm_pmc for performance monitor counter and
struct kvm_pmu for performance monitor unit for each vcpu. According to
ARMv8 spec, the PMU contains at most 32(ARMV8_MAX_COUNTERS) counters.

Since this only supports ARM64 (or PMUv3), add a separate config symbol
for it.

Signed-off-by: Shannon Zhao 
---
 arch/arm64/include/asm/kvm_host.h |  2 ++
 arch/arm64/include/asm/pmu.h  |  2 ++
 arch/arm64/kvm/Kconfig|  8 
 include/kvm/arm_pmu.h | 39 +++
 4 files changed, 51 insertions(+)
 create mode 100644 include/kvm/arm_pmu.h

diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 2709db2..3c88873 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -42,6 +42,7 @@
 
 #include 
 #include 
+#include 
 
 #define KVM_VCPU_MAX_FEATURES 3
 
@@ -116,6 +117,7 @@ struct kvm_vcpu_arch {
/* VGIC state */
struct vgic_cpu vgic_cpu;
struct arch_timer_cpu timer_cpu;
+   struct kvm_pmu pmu;
 
/*
 * Anything that is not used directly from assembly code goes
diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
index b9f394a..95681e6 100644
--- a/arch/arm64/include/asm/pmu.h
+++ b/arch/arm64/include/asm/pmu.h
@@ -19,6 +19,8 @@
 #ifndef __ASM_PMU_H
 #define __ASM_PMU_H
 
+#include 
+
 #define ARMV8_MAX_COUNTERS  32
 #define ARMV8_COUNTER_MASK  (ARMV8_MAX_COUNTERS - 1)
 
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index bfffe8f..3c7c58d 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -31,6 +31,7 @@ config KVM
select KVM_VFIO
select HAVE_KVM_EVENTFD
select HAVE_KVM_IRQFD
+   select KVM_ARM_PMU
---help---
  Support hosting virtualized guest machines.
 
@@ -52,4 +53,11 @@ config KVM_ARM_MAX_VCPUS
  large, so only choose a reasonable number that you expect to
  actually use.
 
+config KVM_ARM_PMU
+   bool
+   depends on KVM_ARM_HOST
+   ---help---
+ Adds support for a virtual Performance Monitoring Unit (PMU) in
+ virtual machines.
+
 endif # VIRTUALIZATION
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
new file mode 100644
index 000..64af88a
--- /dev/null
+++ b/include/kvm/arm_pmu.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2015 Linaro Ltd.
+ * Author: Shannon Zhao 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see .
+ */
+
+#ifndef __ASM_ARM_KVM_PMU_H
+#define __ASM_ARM_KVM_PMU_H
+
+#include 
+
+struct kvm_pmc {
+   u8 idx;/* index into the pmu->pmc array */
+   struct perf_event *perf_event;
+   struct kvm_vcpu *vcpu;
+};
+
+struct kvm_pmu {
+#ifdef CONFIG_KVM_ARM_PMU
+   /* PMU IRQ Number per VCPU */
+   int irq_num;
+   /* IRQ pending flag */
+   bool irq_pending;
+   struct kvm_pmc pmc[ARMV8_MAX_COUNTERS];
+#endif
+};
+
+#endif
-- 
2.0.4


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 07/22] KVM: ARM64: Add reset and access handlers for PMCEID0 and PMCEID1 register

2015-09-11 Thread Shannon Zhao
From: Shannon Zhao 

Add reset handler which gets host value of PMCEID0 or PMCEID1. Since
write action to PMCEID0 or PMCEID1 is ignored, add a new case for this.

Signed-off-by: Shannon Zhao 
---
 arch/arm64/kvm/sys_regs.c | 36 
 1 file changed, 32 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 24b8972..b3bc717 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -251,6 +251,26 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct 
sys_reg_desc *r)
  | (ARMV8_PMCR_MASK & 0xdecafbad);
 }
 
+static void reset_pmceid(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+   u32 pmceid;
+
+   if (r->reg == PMCEID0_EL0 || r->reg == c9_PMCEID0) {
+   asm volatile("mrs %0, pmceid0_el0\n" : "=r" (pmceid));
+   if (!vcpu_mode_is_32bit(vcpu))
+   vcpu_sys_reg(vcpu, r->reg) = pmceid;
+   else
+   vcpu_cp15(vcpu, r->reg) = pmceid;
+   } else {
+   /* PMCEID1_EL0 or c9_PMCEID1 */
+   asm volatile("mrs %0, pmceid1_el0\n" : "=r" (pmceid));
+   if (!vcpu_mode_is_32bit(vcpu))
+   vcpu_sys_reg(vcpu, r->reg) = pmceid;
+   else
+   vcpu_cp15(vcpu, r->reg) = pmceid;
+   }
+}
+
 /* PMU registers accessor. */
 static bool access_pmu_regs(struct kvm_vcpu *vcpu,
const struct sys_reg_params *p,
@@ -268,6 +288,9 @@ static bool access_pmu_regs(struct kvm_vcpu *vcpu,
vcpu_sys_reg(vcpu, r->reg) = val;
break;
}
+   case PMCEID0_EL0:
+   case PMCEID1_EL0:
+   return ignore_write(vcpu, p);
default:
vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
break;
@@ -488,10 +511,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
  access_pmu_regs, reset_unknown, PMSELR_EL0 },
/* PMCEID0_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110),
- trap_raz_wi },
+ access_pmu_regs, reset_pmceid, PMCEID0_EL0 },
/* PMCEID1_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111),
- trap_raz_wi },
+ access_pmu_regs, reset_pmceid, PMCEID1_EL0 },
/* PMCCNTR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000),
  trap_raz_wi },
@@ -692,6 +715,9 @@ static bool access_pmu_cp15_regs(struct kvm_vcpu *vcpu,
vcpu_cp15(vcpu, r->reg) = val;
break;
}
+   case c9_PMCEID0:
+   case c9_PMCEID1:
+   return ignore_write(vcpu, p);
default:
vcpu_cp15(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
break;
@@ -738,8 +764,10 @@ static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn( 9), CRm(12), Op2( 3), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(12), Op2( 5), access_pmu_cp15_regs,
  reset_unknown_cp15, c9_PMSELR },
-   { Op1( 0), CRn( 9), CRm(12), Op2( 6), trap_raz_wi },
-   { Op1( 0), CRn( 9), CRm(12), Op2( 7), trap_raz_wi },
+   { Op1( 0), CRn( 9), CRm(12), Op2( 6), access_pmu_cp15_regs,
+ reset_pmceid, c9_PMCEID0 },
+   { Op1( 0), CRn( 9), CRm(12), Op2( 7), access_pmu_cp15_regs,
+ reset_pmceid, c9_PMCEID1 },
{ Op1( 0), CRn( 9), CRm(13), Op2( 0), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(13), Op2( 1), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(13), Op2( 2), trap_raz_wi },
-- 
2.0.4


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 15/22] KVM: ARM64: Add a helper for CP15 registers reset to specified value

2015-09-11 Thread Shannon Zhao
From: Shannon Zhao 

Signed-off-by: Shannon Zhao 
---
 arch/arm64/kvm/sys_regs.h | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index a0b3811..a476b1b 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -119,6 +119,14 @@ static inline void reset_val(struct kvm_vcpu *vcpu, const 
struct sys_reg_desc *r
vcpu_sys_reg(vcpu, r->reg) = r->val;
 }
 
+static inline void reset_val_cp15(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *r)
+{
+   BUG_ON(!r->reg);
+   BUG_ON(r->reg >= NR_SYS_REGS);
+   vcpu_cp15(vcpu, r->reg) = r->val;
+}
+
 static inline int cmp_sys_reg(const struct sys_reg_desc *i1,
  const struct sys_reg_desc *i2)
 {
-- 
2.0.4


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 14/22] KVM: ARM64: Add reset and access handlers for PMOVSSET and PMOVSCLR register

2015-09-11 Thread Shannon Zhao
From: Shannon Zhao 

Since the reset value of PMOVSSET and PMOVSCLR is UNKNOWN, use
reset_unknown for its reset handler. Add a new case to emulate writing
PMOVSSET or PMOVSCLR register.

Signed-off-by: Shannon Zhao 
---
 arch/arm64/kvm/sys_regs.c | 33 ++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index c3d4fb5..11fc183 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -323,6 +323,18 @@ static bool access_pmu_regs(struct kvm_vcpu *vcpu,
vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val;
break;
}
+   case PMOVSSET_EL0: {
+   val = *vcpu_reg(vcpu, p->Rt);
+   vcpu_sys_reg(vcpu, r->reg) |= val;
+   vcpu_sys_reg(vcpu, PMOVSCLR_EL0) |= val;
+   break;
+   }
+   case PMOVSCLR_EL0: {
+   val = *vcpu_reg(vcpu, p->Rt);
+   vcpu_sys_reg(vcpu, r->reg) &= ~val;
+   vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~val;
+   break;
+   }
case PMCR_EL0: {
/* Only update writeable bits of PMCR */
val = vcpu_sys_reg(vcpu, r->reg);
@@ -561,7 +573,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
  access_pmu_regs, reset_unknown, PMCNTENCLR_EL0 },
/* PMOVSCLR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011),
- trap_raz_wi },
+ access_pmu_regs, reset_unknown, PMOVSCLR_EL0 },
/* PMSWINC_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100),
  trap_raz_wi },
@@ -588,7 +600,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
  trap_raz_wi },
/* PMOVSSET_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011),
- trap_raz_wi },
+ access_pmu_regs, reset_unknown, PMOVSSET_EL0 },
 
/* TPIDR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b), Op2(0b010),
@@ -809,6 +821,18 @@ static bool access_pmu_cp15_regs(struct kvm_vcpu *vcpu,
vcpu_cp15(vcpu, c9_PMINTENSET) &= ~val;
break;
}
+   case c9_PMOVSSET: {
+   val = *vcpu_reg(vcpu, p->Rt);
+   vcpu_cp15(vcpu, r->reg) |= val;
+   vcpu_cp15(vcpu, c9_PMOVSCLR) |= val;
+   break;
+   }
+   case c9_PMOVSCLR: {
+   val = *vcpu_reg(vcpu, p->Rt);
+   vcpu_cp15(vcpu, r->reg) &= ~val;
+   vcpu_cp15(vcpu, c9_PMOVSSET) &= ~val;
+   break;
+   }
case c9_PMCR: {
/* Only update writeable bits of PMCR */
val = vcpu_cp15(vcpu, r->reg);
@@ -881,7 +905,8 @@ static const struct sys_reg_desc cp15_regs[] = {
  reset_unknown_cp15, c9_PMCNTENSET },
{ Op1( 0), CRn( 9), CRm(12), Op2( 2), access_pmu_cp15_regs,
  reset_unknown_cp15, c9_PMCNTENCLR },
-   { Op1( 0), CRn( 9), CRm(12), Op2( 3), trap_raz_wi },
+   { Op1( 0), CRn( 9), CRm(12), Op2( 3), access_pmu_cp15_regs,
+ reset_unknown_cp15, c9_PMOVSCLR },
{ Op1( 0), CRn( 9), CRm(12), Op2( 5), access_pmu_cp15_regs,
  reset_unknown_cp15, c9_PMSELR },
{ Op1( 0), CRn( 9), CRm(12), Op2( 6), access_pmu_cp15_regs,
@@ -899,6 +924,8 @@ static const struct sys_reg_desc cp15_regs[] = {
  reset_unknown_cp15, c9_PMINTENSET },
{ Op1( 0), CRn( 9), CRm(14), Op2( 2), access_pmu_cp15_regs,
  reset_unknown_cp15, c9_PMINTENCLR },
+   { Op1( 0), CRn( 9), CRm(14), Op2( 3), access_pmu_cp15_regs,
+ reset_unknown_cp15, c9_PMOVSSET },
 
{ Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR },
{ Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR },
-- 
2.0.4


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 06/22] KVM: ARM64: Add reset and access handlers for PMSELR register

2015-09-11 Thread Shannon Zhao
From: Shannon Zhao 

Since the reset value of PMSELR_EL0 is UNKNOWN, use reset_unknown for
its reset handler. As it doesn't need to deal with the acsessing action
specially, it uses default case to emulate writing and reading PMSELR
register.

Signed-off-by: Shannon Zhao 
---
 arch/arm64/kvm/sys_regs.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index db1be44..24b8972 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -485,7 +485,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
  trap_raz_wi },
/* PMSELR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101),
- trap_raz_wi },
+ access_pmu_regs, reset_unknown, PMSELR_EL0 },
/* PMCEID0_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110),
  trap_raz_wi },
@@ -736,7 +736,8 @@ static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn( 9), CRm(12), Op2( 1), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(12), Op2( 2), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(12), Op2( 3), trap_raz_wi },
-   { Op1( 0), CRn( 9), CRm(12), Op2( 5), trap_raz_wi },
+   { Op1( 0), CRn( 9), CRm(12), Op2( 5), access_pmu_cp15_regs,
+ reset_unknown_cp15, c9_PMSELR },
{ Op1( 0), CRn( 9), CRm(12), Op2( 6), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(12), Op2( 7), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(13), Op2( 0), trap_raz_wi },
-- 
2.0.4


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 21/22] KVM: ARM64: Free perf event of PMU when destroying vcpu

2015-09-11 Thread Shannon Zhao
From: Shannon Zhao 

When KVM frees VCPU, it needs to free the perf_event of PMU.

Signed-off-by: Shannon Zhao 
---
 arch/arm/kvm/arm.c|  1 +
 include/kvm/arm_pmu.h |  2 ++
 virt/kvm/arm/pmu.c| 21 +
 3 files changed, 24 insertions(+)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index ee5d667..a7215c0 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -257,6 +257,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
kvm_mmu_free_memory_caches(vcpu);
kvm_timer_vcpu_terminate(vcpu);
kvm_vgic_vcpu_destroy(vcpu);
+   kvm_pmu_vcpu_destroy(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index d74e183..2b6fcb1 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -38,6 +38,7 @@ struct kvm_pmu {
 
 #ifdef CONFIG_KVM_ARM_PMU
 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu);
+void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu);
 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu);
 unsigned long kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu,
unsigned long select_idx);
@@ -48,6 +49,7 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, 
unsigned long data,
unsigned long select_idx);
 #else
 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) {}
+void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) {}
 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) {}
 unsigned long kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu,
unsigned long select_idx)
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index 8476d5e..8eccac9 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -109,6 +109,27 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
 }
 
 /**
+ * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
+ * @vcpu: The vcpu pointer
+ *
+ */
+void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+   int i;
+   struct kvm_pmu *pmu = >arch.pmu;
+
+   for (i = 0; i < ARMV8_MAX_COUNTERS; i++) {
+   struct kvm_pmc *pmc = >pmc[i];
+
+   if (pmc->perf_event) {
+   perf_event_disable(pmc->perf_event);
+   perf_event_release_kernel(pmc->perf_event);
+   pmc->perf_event = NULL;
+   }
+   }
+}
+
+/**
  * kvm_pmu_sync_hwstate - sync pmu state for cpu
  * @vcpu: The vcpu pointer
  *
-- 
2.0.4


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 00/22] KVM: ARM64: Add guest PMU support

2015-09-11 Thread Shannon Zhao
From: Shannon Zhao 

This patchset adds guest PMU support for KVM on ARM64. It takes
trap-and-emulate approach. When guest wants to monitor one event, it
will be trapped by KVM and KVM will call perf_event API to create a perf
event and call relevant perf_event APIs to get the count value of event.

Use perf to test this patchset in guest. When using "perf list", it
shows the list of the hardware events and hardware cache events perf
supports. Then use "perf stat -e EVENT" to monitor some event. For
example, use "perf stat -e cycles" to count cpu cycles and
"perf stat -e cache-misses" to count cache misses.

Below are the outputs of "perf stat -r 5 sleep 5" when running in host
and guest.

Host:
 Performance counter stats for 'sleep 5' (5 runs):

  0.551428  task-clock (msec) #0.000 CPUs utilized  
  ( +-  0.91% )
 1  context-switches  #0.002 M/sec
 0  cpu-migrations#0.000 K/sec
48  page-faults   #0.088 M/sec  
  ( +-  1.05% )
   1150265  cycles#2.086 GHz
  ( +-  0.92% )
 stalled-cycles-frontend
 stalled-cycles-backend
526398  instructions  #0.46  insns per cycle
  ( +-  0.89% )
 branches
  9485  branch-misses #   17.201 M/sec  
  ( +-  2.35% )

   5.000831616 seconds time elapsed 
 ( +-  0.00% )

Guest:
 Performance counter stats for 'sleep 5' (5 runs):

  0.730868  task-clock (msec) #0.000 CPUs utilized  
  ( +-  1.13% )
 1  context-switches  #0.001 M/sec
 0  cpu-migrations#0.000 K/sec
48  page-faults   #0.065 M/sec  
  ( +-  0.42% )
   1642982  cycles#2.248 GHz
  ( +-  1.04% )
 stalled-cycles-frontend
 stalled-cycles-backend
637964  instructions  #0.39  insns per cycle
  ( +-  0.65% )
 branches
 10377  branch-misses #   14.198 M/sec  
  ( +-  1.09% )

   5.001289068 seconds time elapsed 
 ( +-  0.00% )

This patchset can be fetched from [1] and the relevant QEMU version for
test can be fetched from [2].

Thanks,
Shannon

[1] https://git.linaro.org/people/shannon.zhao/linux-mainline.git  
KVM_ARM64_PMU_v2
[2] https://git.linaro.org/people/shannon.zhao/qemu.git  PMU_v2

Shannon Zhao (22):
  ARM64: Move PMU register related defines to asm/pmu.h
  KVM: ARM64: Define PMU data structure for each vcpu
  KVM: ARM64: Add offset defines for PMU registers
  KVM: ARM64: Add reset and access handlers for PMCR_EL0 register
  KVM: ARM64: Add a helper for CP15 registers reset to UNKNOWN
  KVM: ARM64: Add reset and access handlers for PMSELR register
  KVM: ARM64: Add reset and access handlers for PMCEID0 and PMCEID1
register
  KVM: ARM64: PMU: Add perf event map and introduce perf event creating
function
  KVM: ARM64: Add reset and access handlers for PMXEVTYPER register
  KVM: ARM64: Add reset and access handlers for PMXEVCNTR register
  KVM: ARM64: Add reset and access handlers for PMCCNTR register
  KVM: ARM64: Add reset and access handlers for PMCNTENSET and
PMCNTENCLR register
  KVM: ARM64: Add reset and access handlers for PMINTENSET and
PMINTENCLR register
  KVM: ARM64: Add reset and access handlers for PMOVSSET and PMOVSCLR
register
  KVM: ARM64: Add a helper for CP15 registers reset to specified value
  KVM: ARM64: Add reset and access handlers for PMUSERENR register
  KVM: ARM64: Add reset and access handlers for PMSWINC register
  KVM: ARM64: Add access handlers for PMEVCNTRn and PMEVTYPERn register
  KVM: ARM64: Add PMU overflow interrupt routing
  KVM: ARM64: Reset PMU state when resetting vcpu
  KVM: ARM64: Free perf event of PMU when destroying vcpu
  KVM: ARM64: Add a new kvm ARM PMU device

 Documentation/virtual/kvm/devices/arm-pmu.txt |  15 +
 arch/arm/kvm/arm.c|   4 +
 arch/arm64/include/asm/kvm_asm.h  |  59 ++-
 arch/arm64/include/asm/kvm_host.h |   2 +
 arch/arm64/include/asm/pmu.h  |  49 +++
 arch/arm64/include/uapi/asm/kvm.h |   3 +
 arch/arm64/kernel/perf_event.c|  35 --
 arch/arm64/kvm/Kconfig|   8 +
 arch/arm64/kvm/Makefile   |   1 +
 arch/arm64/kvm/reset.c|   3 +
 arch/arm64/kvm/sys_regs.c | 490 +++--
 arch/arm64/kvm/sys_regs.h |  16 +
 include/kvm/arm_pmu.h |  66 
 include/linux/kvm_host.h

[PATCH v2 11/22] KVM: ARM64: Add reset and access handlers for PMCCNTR register

2015-09-11 Thread Shannon Zhao
From: Shannon Zhao 

Since the reset value of PMCCNTR is UNKNOWN, use reset_unknown for its
reset handler. Add a new case to emulate reading to PMCCNTR register.

Signed-off-by: Shannon Zhao 
---
 arch/arm64/kvm/sys_regs.c | 17 +++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index a51c8de..f8d7de0 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -310,6 +310,12 @@ static bool access_pmu_regs(struct kvm_vcpu *vcpu,
}
} else {
switch (r->reg) {
+   case PMCCNTR_EL0: {
+   val = kvm_pmu_get_counter_value(vcpu,
+   ARMV8_MAX_COUNTERS - 1);
+   *vcpu_reg(vcpu, p->Rt) = val;
+   break;
+   }
case PMXEVCNTR_EL0: {
val = kvm_pmu_get_counter_value(vcpu,
vcpu_sys_reg(vcpu, PMSELR_EL0));
@@ -540,7 +546,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
  access_pmu_regs, reset_pmceid, PMCEID1_EL0 },
/* PMCCNTR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000),
- trap_raz_wi },
+ access_pmu_regs, reset_unknown, PMCCNTR_EL0 },
/* PMXEVTYPER_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001),
  access_pmu_regs, reset_unknown, PMXEVTYPER_EL0 },
@@ -760,6 +766,12 @@ static bool access_pmu_cp15_regs(struct kvm_vcpu *vcpu,
}
} else {
switch (r->reg) {
+   case c9_PMCCNTR: {
+   val = kvm_pmu_get_counter_value(vcpu,
+   ARMV8_MAX_COUNTERS - 1);
+   *vcpu_reg(vcpu, p->Rt) = val;
+   break;
+   }
case c9_PMXEVCNTR: {
val = kvm_pmu_get_counter_value(vcpu,
vcpu_cp15(vcpu, c9_PMSELR));
@@ -814,7 +826,8 @@ static const struct sys_reg_desc cp15_regs[] = {
  reset_pmceid, c9_PMCEID0 },
{ Op1( 0), CRn( 9), CRm(12), Op2( 7), access_pmu_cp15_regs,
  reset_pmceid, c9_PMCEID1 },
-   { Op1( 0), CRn( 9), CRm(13), Op2( 0), trap_raz_wi },
+   { Op1( 0), CRn( 9), CRm(13), Op2( 0), access_pmu_cp15_regs,
+ reset_unknown_cp15, c9_PMCCNTR },
{ Op1( 0), CRn( 9), CRm(13), Op2( 1), access_pmu_cp15_regs,
  reset_unknown_cp15, c9_PMXEVTYPER },
{ Op1( 0), CRn( 9), CRm(13), Op2( 2), access_pmu_cp15_regs,
-- 
2.0.4


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 03/22] KVM: ARM64: Add offset defines for PMU registers

2015-09-11 Thread Shannon Zhao
From: Shannon Zhao 

We are about to trap and emulate acccesses to each PMU register
individually. This adds the context offsets for the AArch64 PMU
registers and their AArch32 counterparts.

Signed-off-by: Shannon Zhao 
---
 arch/arm64/include/asm/kvm_asm.h | 59 +++-
 1 file changed, 52 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 3c5fe68..3a1df48 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -56,14 +56,36 @@
 #define DBGWVR15_EL1   86
 #define MDCCINT_EL187  /* Monitor Debug Comms Channel Interrupt Enable 
Reg */
 
+/* Performance Monitors Registers */
+#define PMCR_EL0   88  /* Control Register */
+#define PMOVSSET_EL0   89  /* Overflow Flag Status Set Register */
+#define PMOVSCLR_EL0   90  /* Overflow Flag Status Clear Register */
+#define PMSELR_EL0 91  /* Event Counter Selection Register */
+#define PMCEID0_EL092  /* Common Event Identification Register 0 */
+#define PMCEID1_EL093  /* Common Event Identification Register 1 */
+#define PMEVCNTR0_EL0  94  /* Event Counter Register (0-30) */
+#define PMEVCNTR30_EL0 124
+#define PMCCNTR_EL0125 /* Cycle Counter Register */
+#define PMEVTYPER0_EL0 126 /* Event Type Register (0-30) */
+#define PMEVTYPER30_EL0156
+#define PMCCFILTR_EL0  157 /* Cycle Count Filter Register */
+#define PMXEVCNTR_EL0  158 /* Selected Event Count Register */
+#define PMXEVTYPER_EL0 159 /* Selected Event Type Register */
+#define PMCNTENSET_EL0 160 /* Count Enable Set Register */
+#define PMCNTENCLR_EL0 161 /* Count Enable Clear Register */
+#define PMINTENSET_EL1 162 /* Interrupt Enable Set Register */
+#define PMINTENCLR_EL1 163 /* Interrupt Enable Clear Register */
+#define PMUSERENR_EL0  164 /* User Enable Register */
+#define PMSWINC_EL0165 /* Software Increment Register */
+
 /* 32bit specific registers. Keep them at the end of the range */
-#defineDACR32_EL2  88  /* Domain Access Control Register */
-#defineIFSR32_EL2  89  /* Instruction Fault Status Register */
-#defineFPEXC32_EL2 90  /* Floating-Point Exception Control 
Register */
-#defineDBGVCR32_EL291  /* Debug Vector Catch Register */
-#defineTEECR32_EL1 92  /* ThumbEE Configuration Register */
-#defineTEEHBR32_EL193  /* ThumbEE Handler Base Register */
-#defineNR_SYS_REGS 94
+#defineDACR32_EL2  166 /* Domain Access Control Register */
+#defineIFSR32_EL2  167 /* Instruction Fault Status Register */
+#defineFPEXC32_EL2 168 /* Floating-Point Exception Control 
Register */
+#defineDBGVCR32_EL2169 /* Debug Vector Catch Register */
+#defineTEECR32_EL1 170 /* ThumbEE Configuration Register */
+#defineTEEHBR32_EL1171 /* ThumbEE Handler Base Register */
+#defineNR_SYS_REGS 172
 
 /* 32bit mapping */
 #define c0_MPIDR   (MPIDR_EL1 * 2) /* MultiProcessor ID Register */
@@ -85,6 +107,24 @@
 #define c6_IFAR(c6_DFAR + 1)   /* Instruction Fault Address 
Register */
 #define c7_PAR (PAR_EL1 * 2)   /* Physical Address Register */
 #define c7_PAR_high(c7_PAR + 1)/* PAR top 32 bits */
+
+/* Performance Monitors*/
+#define c9_PMCR(PMCR_EL0 * 2)
+#define c9_PMOVSSET(PMOVSSET_EL0 * 2)
+#define c9_PMOVSCLR(PMOVSCLR_EL0 * 2)
+#define c9_PMCCNTR (PMCCNTR_EL0 * 2)
+#define c9_PMSELR  (PMSELR_EL0 * 2)
+#define c9_PMCEID0 (PMCEID0_EL0 * 2)
+#define c9_PMCEID1 (PMCEID1_EL0 * 2)
+#define c9_PMXEVCNTR   (PMXEVCNTR_EL0 * 2)
+#define c9_PMXEVTYPER  (PMXEVTYPER_EL0 * 2)
+#define c9_PMCNTENSET  (PMCNTENSET_EL0 * 2)
+#define c9_PMCNTENCLR  (PMCNTENCLR_EL0 * 2)
+#define c9_PMINTENSET  (PMINTENSET_EL1 * 2)
+#define c9_PMINTENCLR  (PMINTENCLR_EL1 * 2)
+#define c9_PMUSERENR   (PMUSERENR_EL0 * 2)
+#define c9_PMSWINC (PMSWINC_EL0 * 2)
+
 #define c10_PRRR   (MAIR_EL1 * 2)  /* Primary Region Remap Register */
 #define c10_NMRR   (c10_PRRR + 1)  /* Normal Memory Remap Register */
 #define c12_VBAR   (VBAR_EL1 * 2)  /* Vector Base Address Register */
@@ -96,6 +136,11 @@
 #define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
 #define c14_CNTKCTL(CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
 
+/* Performance Monitors*/
+#define c14_PMEVCNTR0  (PMEVCNTR0_EL0 * 2)
+#define c14_PMEVTYPER0 (PMEVTYPER0_EL0 * 2)
+#define c14_PMCCFILTR  (PMCCFILTR_EL0 * 2)
+
 #define cp14_DBGDSCRext(MDSCR_EL1 * 2)
 #define cp14_DBGBCR0   (DBGBCR0_EL1 * 2)
 #define cp14_DBGBVR0   (DBGBVR0_EL1 * 2)
-- 
2.0.4


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More 

[PATCH v2 04/22] KVM: ARM64: Add reset and access handlers for PMCR_EL0 register

2015-09-11 Thread Shannon Zhao
From: Shannon Zhao 

Add reset handler which gets host value of PMCR_EL0 and make writable
bits architecturally UNKNOWN. Add a common access handler for PMU
registers which emulates writing and reading register and add emulation
for PMCR.

Signed-off-by: Shannon Zhao 
---
 arch/arm64/kvm/sys_regs.c | 76 +--
 1 file changed, 74 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index c370b40..db1be44 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -236,6 +237,48 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const 
struct sys_reg_desc *r)
vcpu_sys_reg(vcpu, MPIDR_EL1) = (1ULL << 31) | mpidr;
 }
 
+static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+   u32 pmcr;
+
+   asm volatile("mrs %0, pmcr_el0\n" : "=r" (pmcr));
+   /* Writable bits of PMCR_EL0 (ARMV8_PMCR_MASK) is reset to UNKNOWN*/
+   if (!vcpu_mode_is_32bit(vcpu))
+   vcpu_sys_reg(vcpu, r->reg) = (pmcr & ~ARMV8_PMCR_MASK)
+| (ARMV8_PMCR_MASK & 0xdecafbad);
+   else
+   vcpu_cp15(vcpu, r->reg) = (pmcr & ~ARMV8_PMCR_MASK)
+ | (ARMV8_PMCR_MASK & 0xdecafbad);
+}
+
+/* PMU registers accessor. */
+static bool access_pmu_regs(struct kvm_vcpu *vcpu,
+   const struct sys_reg_params *p,
+   const struct sys_reg_desc *r)
+{
+   unsigned long val;
+
+   if (p->is_write) {
+   switch (r->reg) {
+   case PMCR_EL0: {
+   /* Only update writeable bits of PMCR */
+   val = vcpu_sys_reg(vcpu, r->reg);
+   val &= ~ARMV8_PMCR_MASK;
+   val |= *vcpu_reg(vcpu, p->Rt) & ARMV8_PMCR_MASK;
+   vcpu_sys_reg(vcpu, r->reg) = val;
+   break;
+   }
+   default:
+   vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
+   break;
+   }
+   } else {
+   *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg);
+   }
+
+   return true;
+}
+
 /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
 #define DBG_BCR_BVR_WCR_WVR_EL1(n) \
/* DBGBVRn_EL1 */   \
@@ -427,7 +470,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 
/* PMCR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000),
- trap_raz_wi },
+ access_pmu_regs, reset_pmcr, PMCR_EL0, },
/* PMCNTENSET_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001),
  trap_raz_wi },
@@ -632,6 +675,34 @@ static const struct sys_reg_desc cp14_64_regs[] = {
{ Op1( 0), CRm( 2), .access = trap_raz_wi },
 };
 
+/* PMU CP15 registers accessor. */
+static bool access_pmu_cp15_regs(struct kvm_vcpu *vcpu,
+const struct sys_reg_params *p,
+const struct sys_reg_desc *r)
+{
+   unsigned long val;
+
+   if (p->is_write) {
+   switch (r->reg) {
+   case c9_PMCR: {
+   /* Only update writeable bits of PMCR */
+   val = vcpu_cp15(vcpu, r->reg);
+   val &= ~ARMV8_PMCR_MASK;
+   val |= *vcpu_reg(vcpu, p->Rt) & ARMV8_PMCR_MASK;
+   vcpu_cp15(vcpu, r->reg) = val;
+   break;
+   }
+   default:
+   vcpu_cp15(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
+   break;
+   }
+   } else {
+   *vcpu_reg(vcpu, p->Rt) = vcpu_cp15(vcpu, r->reg);
+   }
+
+   return true;
+}
+
 /*
  * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding,
  * depending on the way they are accessed (as a 32bit or a 64bit
@@ -660,7 +731,8 @@ static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
 
/* PMU */
-   { Op1( 0), CRn( 9), CRm(12), Op2( 0), trap_raz_wi },
+   { Op1( 0), CRn( 9), CRm(12), Op2( 0), access_pmu_cp15_regs,
+ reset_pmcr, c9_PMCR },
{ Op1( 0), CRn( 9), CRm(12), Op2( 1), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(12), Op2( 2), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(12), Op2( 3), trap_raz_wi },
-- 
2.0.4


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 20/22] KVM: ARM64: Reset PMU state when resetting vcpu

2015-09-11 Thread Shannon Zhao
From: Shannon Zhao 

Signed-off-by: Shannon Zhao 
---
 arch/arm64/kvm/reset.c |  3 +++
 include/kvm/arm_pmu.h  |  2 ++
 virt/kvm/arm/pmu.c | 18 ++
 3 files changed, 23 insertions(+)

diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 0b43265..ee2c2e9 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -107,5 +107,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
/* Reset timer */
kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
 
+   /* Reset PMU */
+   kvm_pmu_vcpu_reset(vcpu);
+
return 0;
 }
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 5f1ea2b..d74e183 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -37,6 +37,7 @@ struct kvm_pmu {
 };
 
 #ifdef CONFIG_KVM_ARM_PMU
+void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu);
 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu);
 unsigned long kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu,
unsigned long select_idx);
@@ -46,6 +47,7 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, 
unsigned long val);
 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, unsigned long data,
unsigned long select_idx);
 #else
+void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) {}
 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) {}
 unsigned long kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu,
unsigned long select_idx)
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index 9e75372..8476d5e 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -91,6 +91,24 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu,
 }
 
 /**
+ * kvm_pmu_vcpu_reset - reset pmu state for cpu
+ * @vcpu: The vcpu pointer
+ *
+ */
+void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
+{
+   int i;
+   struct kvm_pmu *pmu = >arch.pmu;
+
+   for (i = 0; i < ARMV8_MAX_COUNTERS; i++) {
+   kvm_pmu_stop_counter(vcpu, i);
+   pmu->pmc[i].idx = i;
+   pmu->pmc[i].vcpu = vcpu;
+   }
+   pmu->irq_pending = false;
+}
+
+/**
  * kvm_pmu_sync_hwstate - sync pmu state for cpu
  * @vcpu: The vcpu pointer
  *
-- 
2.0.4


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 16/22] KVM: ARM64: Add reset and access handlers for PMUSERENR register

2015-09-11 Thread Shannon Zhao
From: Shannon Zhao 

Since the reset value of PMUSERENR is zero, use reset_val(_cp15) with
zero for its reset handler.

Signed-off-by: Shannon Zhao 
---
 arch/arm64/kvm/sys_regs.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 11fc183..b47cd0b 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -597,7 +597,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
  access_pmu_regs, reset_unknown, PMXEVCNTR_EL0 },
/* PMUSERENR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000),
- trap_raz_wi },
+ access_pmu_regs, reset_unknown, PMUSERENR_EL0 },
/* PMOVSSET_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011),
  access_pmu_regs, reset_unknown, PMOVSSET_EL0 },
@@ -919,7 +919,8 @@ static const struct sys_reg_desc cp15_regs[] = {
  reset_unknown_cp15, c9_PMXEVTYPER },
{ Op1( 0), CRn( 9), CRm(13), Op2( 2), access_pmu_cp15_regs,
  reset_unknown_cp15, c9_PMXEVCNTR },
-   { Op1( 0), CRn( 9), CRm(14), Op2( 0), trap_raz_wi },
+   { Op1( 0), CRn( 9), CRm(14), Op2( 0), access_pmu_cp15_regs,
+ reset_val_cp15,  c9_PMUSERENR, 0 },
{ Op1( 0), CRn( 9), CRm(14), Op2( 1), access_pmu_cp15_regs,
  reset_unknown_cp15, c9_PMINTENSET },
{ Op1( 0), CRn( 9), CRm(14), Op2( 2), access_pmu_cp15_regs,
-- 
2.0.4


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V4 2/4] kvm: fix double free for fast mmio eventfd

2015-09-11 Thread Jason Wang


On 09/11/2015 03:46 PM, Cornelia Huck wrote:
> On Fri, 11 Sep 2015 11:17:35 +0800
> Jason Wang  wrote:
>
>> We register wildcard mmio eventfd on two buses, one for KVM_MMIO_BUS
>> and another is KVM_FAST_MMIO_BUS but with a single iodev
>> instance. This will lead an issue: kvm_io_bus_destroy() knows nothing
>> about the devices on two buses points to a single dev. Which will lead
> s/points/pointing/

Will fix this in V5.

>> double free[1] during exit. Fixing this by using allocate two
> s/using allocate/allocating/

Will fix this in V5.

>
>> instances of iodevs then register one on KVM_MMIO_BUS and another on
>> KVM_FAST_MMIO_BUS.
>>
> (...)
>
>> @@ -929,8 +878,66 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum 
>> kvm_bus bus_idx,
>>  static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd 
>> *args)
>>  {
>>  enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags);
>> +int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
>> +
>> +if (!args->len)
>> +kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
> I think it would be good to explicitly check for bus_idx ==
> KVM_MMIO_BUS here.

Ok.

>
>> +
>> +return ret;
>> +}
>>
>> -return kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
>> +static int
>> +kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
>> +{
>> +enum kvm_bus  bus_idx;
>> +int ret;
>> +
>> +bus_idx = ioeventfd_bus_from_flags(args->flags);
>> +/* must be natural-word sized, or 0 to ignore length */
>> +switch (args->len) {
>> +case 0:
>> +case 1:
>> +case 2:
>> +case 4:
>> +case 8:
>> +break;
>> +default:
>> +return -EINVAL;
>> +}
>> +
>> +/* check for range overflow */
>> +if (args->addr + args->len < args->addr)
>> +return -EINVAL;
>> +
>> +/* check for extra flags that we don't understand */
>> +if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
>> +return -EINVAL;
>> +
>> +/* ioeventfd with no length can't be combined with DATAMATCH */
>> +if (!args->len &&
>> +args->flags & (KVM_IOEVENTFD_FLAG_PIO |
>> +   KVM_IOEVENTFD_FLAG_DATAMATCH))
>> +return -EINVAL;
>> +
>> +ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
>> +if (ret)
>> +goto fail;
>> +
>> +/* When length is ignored, MMIO is also put on a separate bus, for
>> + * faster lookups.
>> + */
>> +if (!args->len && !(args->flags & KVM_IOEVENTFD_FLAG_PIO)) {
> Dito on a positive check for bus_idx == KVM_MMIO_BUS.

I was thinking maybe this should be done in a separate patch on top.
What's your opinion?

>> +ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
>> +if (ret < 0)
>> +goto fast_fail;
>> +}
>> +
>> +return 0;
>> +
>> +fast_fail:
>> +kvm_deassign_ioeventfd(kvm, args);
> Shouldn't you use kvm_deassign_ioeventfd(kvm, bus_idx, args) here?

Actually, it's the same. (the deassign of fast mmio will return -ENOENT
and will be ignored.) But I admit do what you suggested here is better.
Will do this.

Thanks

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V4 3/4] kvm: fix zero length mmio searching

2015-09-11 Thread Jason Wang


On 09/11/2015 04:31 PM, Cornelia Huck wrote:
> On Fri, 11 Sep 2015 10:26:41 +0200
> Paolo Bonzini  wrote:
>
>> On 11/09/2015 05:17, Jason Wang wrote:
>>> +   int len = r2->len ? r1->len : 0;
>>> +
>>> if (r1->addr < r2->addr)
>>> return -1;
>>> -   if (r1->addr + r1->len > r2->addr + r2->len)
>>> +   if (r1->addr + len > r2->addr + r2->len)
>>> return 1;
>> Perhaps better:
>>
>>  gpa_t addr1 = r1->addr;
>>  gpa_t addr2 = r2->addr;
>>
>>  if (addr1 < addr2)
>>  return -1;
>>
>>  /* If r2->len == 0, match the exact address.  If r2->len != 0,
>>   * accept any overlapping write.  Any order is acceptable for
>>   * overlapping ranges, because kvm_io_bus_get_first_dev ensures
>>   * we process all of them.
>>   */
>>  if (r2->len) {
>>  addr1 += r1->len;
>>  addr2 += r2->len;
>>  }
>>
>>  if (addr1 > addr2)
>>  return 1;
>>
>>  return 0;
>>
> +1 to documenting what the semantics are :)
>

Right, better. Will fix this in V5.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] vfio: Whitelist PCI bridges

2015-09-11 Thread Alex Williamson
When determining whether a group is viable, we already allow devices
bound to pcieport.  Generalize this to include any PCI bridge device.

Signed-off-by: Alex Williamson 
---
 drivers/vfio/vfio.c |   31 +--
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 563c510..1c0f98c 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -438,16 +439,33 @@ static struct vfio_device *vfio_group_get_device(struct 
vfio_group *group,
 }
 
 /*
- * Whitelist some drivers that we know are safe (no dma) or just sit on
- * a device.  It's not always practical to leave a device within a group
- * driverless as it could get re-bound to something unsafe.
+ * Some drivers, like pci-stub, are only used to prevent other drivers from
+ * claiming a device and are therefore perfectly legitimate for a user owned
+ * group.  The pci-stub driver has no dependencies on DMA or the IOVA mapping
+ * of the device, but it does prevent the user from having direct access to
+ * the device, which is useful in some circumstances.
+ *
+ * We also assume that we can include PCI interconnect devices, ie. bridges.
+ * IOMMU grouping on PCI necessitates that if we lack isolation on a bridge
+ * then all of the downstream devices will be part of the same IOMMU group as
+ * the bridge.  Thus, if placing the bridge into the user owned IOVA space
+ * breaks anything, it only does so for user owned devices downstream.  Note
+ * that error notification via MSI can be affected for platforms that handle
+ * MSI within the same IOVA space as DMA.
  */
-static const char * const vfio_driver_whitelist[] = { "pci-stub", "pcieport" };
+static const char * const vfio_driver_whitelist[] = { "pci-stub" };
 
-static bool vfio_whitelisted_driver(struct device_driver *drv)
+static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv)
 {
int i;
 
+   if (dev_is_pci(dev)) {
+   struct pci_dev *pdev = to_pci_dev(dev);
+
+   if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
+   return true;
+   }
+
for (i = 0; i < ARRAY_SIZE(vfio_driver_whitelist); i++) {
if (!strcmp(drv->name, vfio_driver_whitelist[i]))
return true;
@@ -462,6 +480,7 @@ static bool vfio_whitelisted_driver(struct device_driver 
*drv)
  *  - driver-less
  *  - bound to a vfio driver
  *  - bound to a whitelisted driver
+ *  - a PCI interconnect device
  *
  * We use two methods to determine whether a device is bound to a vfio
  * driver.  The first is to test whether the device exists in the vfio
@@ -486,7 +505,7 @@ static int vfio_dev_viable(struct device *dev, void *data)
}
mutex_unlock(>unbound_lock);
 
-   if (!ret || !drv || vfio_whitelisted_driver(drv))
+   if (!ret || !drv || vfio_dev_whitelisted(dev, drv))
return 0;
 
device = vfio_group_get_device(group, dev);

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 05/22] KVM: ARM64: Add a helper for CP15 registers reset to UNKNOWN

2015-09-11 Thread Marc Zyngier
On 11/09/15 09:54, Shannon Zhao wrote:
> From: Shannon Zhao 
> 
> Signed-off-by: Shannon Zhao 
> ---
>  arch/arm64/kvm/sys_regs.h | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
> index d411e25..a0b3811 100644
> --- a/arch/arm64/kvm/sys_regs.h
> +++ b/arch/arm64/kvm/sys_regs.h
> @@ -104,6 +104,14 @@ static inline void reset_unknown(struct kvm_vcpu *vcpu,
>   vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL;
>  }
>  
> +static inline void reset_unknown_cp15(struct kvm_vcpu *vcpu,
> +   const struct sys_reg_desc *r)
> +{

Oh, and given that you are using this as a function pointer in the next
patch, why is it an inline function?

Please merge this with patch #6.

M.
-- 
Jazz is not dead. It just smells funny...
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 07/22] KVM: ARM64: Add reset and access handlers for PMCEID0 and PMCEID1 register

2015-09-11 Thread Marc Zyngier
On 11/09/15 09:55, Shannon Zhao wrote:
> From: Shannon Zhao 
> 
> Add reset handler which gets host value of PMCEID0 or PMCEID1. Since
> write action to PMCEID0 or PMCEID1 is ignored, add a new case for this.
> 
> Signed-off-by: Shannon Zhao 
> ---
>  arch/arm64/kvm/sys_regs.c | 36 
>  1 file changed, 32 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 24b8972..b3bc717 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -251,6 +251,26 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const 
> struct sys_reg_desc *r)
> | (ARMV8_PMCR_MASK & 0xdecafbad);
>  }
>  
> +static void reset_pmceid(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
> +{
> + u32 pmceid;
> +
> + if (r->reg == PMCEID0_EL0 || r->reg == c9_PMCEID0) {
> + asm volatile("mrs %0, pmceid0_el0\n" : "=r" (pmceid));

Careful here. mrs always acts on a 64bit quantity, even if the register is
internally 32bit. I'd rather you use a u64 variable.

> + if (!vcpu_mode_is_32bit(vcpu))
> + vcpu_sys_reg(vcpu, r->reg) = pmceid;
> + else
> + vcpu_cp15(vcpu, r->reg) = pmceid;
> + } else {
> + /* PMCEID1_EL0 or c9_PMCEID1 */
> + asm volatile("mrs %0, pmceid1_el0\n" : "=r" (pmceid));
> + if (!vcpu_mode_is_32bit(vcpu))
> + vcpu_sys_reg(vcpu, r->reg) = pmceid;
> + else
> + vcpu_cp15(vcpu, r->reg) = pmceid;

Maybe we could have a helper for this kind of sequence:

static void vcpu_sysreg_write(vcpu, const struct sys_reg_desc *r, u64 val)
{
if (!vcpu_mode_is_32_bit(vcpu))
vcpu_sys_reg(vcpu, r->reg) = val;
else
vcpu_cp15(vcpu, r->reg) = lower_32_bit(val);
}

> + }
> +}
> +
>  /* PMU registers accessor. */
>  static bool access_pmu_regs(struct kvm_vcpu *vcpu,
>   const struct sys_reg_params *p,
> @@ -268,6 +288,9 @@ static bool access_pmu_regs(struct kvm_vcpu *vcpu,
>   vcpu_sys_reg(vcpu, r->reg) = val;
>   break;
>   }
> + case PMCEID0_EL0:
> + case PMCEID1_EL0:
> + return ignore_write(vcpu, p);
>   default:
>   vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
>   break;
> @@ -488,10 +511,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
> access_pmu_regs, reset_unknown, PMSELR_EL0 },
>   /* PMCEID0_EL0 */
>   { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110),
> -   trap_raz_wi },
> +   access_pmu_regs, reset_pmceid, PMCEID0_EL0 },
>   /* PMCEID1_EL0 */
>   { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111),
> -   trap_raz_wi },
> +   access_pmu_regs, reset_pmceid, PMCEID1_EL0 },
>   /* PMCCNTR_EL0 */
>   { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000),
> trap_raz_wi },
> @@ -692,6 +715,9 @@ static bool access_pmu_cp15_regs(struct kvm_vcpu *vcpu,
>   vcpu_cp15(vcpu, r->reg) = val;
>   break;
>   }
> + case c9_PMCEID0:
> + case c9_PMCEID1:
> + return ignore_write(vcpu, p);
>   default:
>   vcpu_cp15(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
>   break;
> @@ -738,8 +764,10 @@ static const struct sys_reg_desc cp15_regs[] = {
>   { Op1( 0), CRn( 9), CRm(12), Op2( 3), trap_raz_wi },
>   { Op1( 0), CRn( 9), CRm(12), Op2( 5), access_pmu_cp15_regs,
> reset_unknown_cp15, c9_PMSELR },
> - { Op1( 0), CRn( 9), CRm(12), Op2( 6), trap_raz_wi },
> - { Op1( 0), CRn( 9), CRm(12), Op2( 7), trap_raz_wi },
> + { Op1( 0), CRn( 9), CRm(12), Op2( 6), access_pmu_cp15_regs,
> +   reset_pmceid, c9_PMCEID0 },
> + { Op1( 0), CRn( 9), CRm(12), Op2( 7), access_pmu_cp15_regs,
> +   reset_pmceid, c9_PMCEID1 },
>   { Op1( 0), CRn( 9), CRm(13), Op2( 0), trap_raz_wi },
>   { Op1( 0), CRn( 9), CRm(13), Op2( 1), trap_raz_wi },
>   { Op1( 0), CRn( 9), CRm(13), Op2( 2), trap_raz_wi },
> 


-- 
Jazz is not dead. It just smells funny...
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 06/17] KVM: Make struct kvm_irq_routing_table accessible

2015-09-11 Thread Paolo Bonzini


On 25/08/2015 10:50, Feng Wu wrote:
> Move struct kvm_irq_routing_table from irqchip.c to kvm_host.h,
> so we can use it outside of irqchip.c.
> 
> Signed-off-by: Feng Wu 
> ---
>  include/linux/kvm_host.h | 14 ++
>  virt/kvm/irqchip.c   | 10 --
>  2 files changed, 14 insertions(+), 10 deletions(-)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 5ac8d21..5f183fb 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -328,6 +328,20 @@ struct kvm_kernel_irq_routing_entry {
>   struct hlist_node link;
>  };
>  
> +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
> +
> +struct kvm_irq_routing_table {
> + int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
> + u32 nr_rt_entries;
> + /*
> +  * Array indexed by gsi. Each entry contains list of irq chips
> +  * the gsi is connected to.
> +  */
> + struct hlist_head map[0];
> +};
> +
> +#endif
> +
>  #ifndef KVM_PRIVATE_MEM_SLOTS
>  #define KVM_PRIVATE_MEM_SLOTS 0
>  #endif
> diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
> index 21c1424..2cf45d3 100644
> --- a/virt/kvm/irqchip.c
> +++ b/virt/kvm/irqchip.c
> @@ -31,16 +31,6 @@
>  #include 
>  #include "irq.h"
>  
> -struct kvm_irq_routing_table {
> - int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
> - u32 nr_rt_entries;
> - /*
> -  * Array indexed by gsi. Each entry contains list of irq chips
> -  * the gsi is connected to.
> -  */
> - struct hlist_head map[0];
> -};
> -
>  int kvm_irq_map_gsi(struct kvm *kvm,
>   struct kvm_kernel_irq_routing_entry *entries, int gsi)
>  {
> 

Reviewed-by: Paolo Bonzini 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 6/8] arm/arm64: KVM: Add forwarded physical interrupts documentation

2015-09-11 Thread Andre Przywara
Hi Christoffer,

(actually you are not supposed to reply during your holidays!)

On 09/09/15 09:49, Christoffer Dall wrote:
> On Tue, Sep 8, 2015 at 6:57 PM, Andre Przywara  wrote:
>> Hi Eric,
>>
>> thanks for you answer.
>>
>> On 08/09/15 09:43, Eric Auger wrote:
>>> Hi Andre,
>>> On 09/07/2015 01:25 PM, Andre Przywara wrote:
 Hi,

 firstly: this text is really great, thanks for coming up with that.
 See below for some information I got from tracing the host which I
 cannot make sense of


 On 04/09/15 20:40, Christoffer Dall wrote:
> Forwarded physical interrupts on arm/arm64 is a tricky concept and the
> way we deal with them is not apparently easy to understand by reading
> various specs.
>
> Therefore, add a proper documentation file explaining the flow and
> rationale of the behavior of the vgic.
>
> Some of this text was contributed by Marc Zyngier and edited by me.
> Omissions and errors are all mine.
>
> Signed-off-by: Christoffer Dall 
> ---
>  Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt | 181 
> +
>  1 file changed, 181 insertions(+)
>  create mode 100644 Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt
>
> diff --git a/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt 
> b/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt
> new file mode 100644
> index 000..24b6f28
> --- /dev/null
> +++ b/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt
> @@ -0,0 +1,181 @@
> +KVM/ARM VGIC Forwarded Physical Interrupts
> +==
> +
> +The KVM/ARM code implements software support for the ARM Generic
> +Interrupt Controller's (GIC's) hardware support for virtualization by
> +allowing software to inject virtual interrupts to a VM, which the guest
> +OS sees as regular interrupts.  The code is famously known as the VGIC.
> +
> +Some of these virtual interrupts, however, correspond to physical
> +interrupts from real physical devices.  One example could be the
> +architected timer, which itself supports virtualization, and therefore
> +lets a guest OS program the hardware device directly to raise an
> +interrupt at some point in time.  When such an interrupt is raised, the
> +host OS initially handles the interrupt and must somehow signal this
> +event as a virtual interrupt to the guest.  Another example could be a
> +passthrough device, where the physical interrupts are initially handled
> +by the host, but the device driver for the device lives in the guest OS
> +and KVM must therefore somehow inject a virtual interrupt on behalf of
> +the physical one to the guest OS.
> +
> +These virtual interrupts corresponding to a physical interrupt on the
> +host are called forwarded physical interrupts, but are also sometimes
> +referred to as 'virtualized physical interrupts' and 'mapped interrupts'.
> +
> +Forwarded physical interrupts are handled slightly differently compared
> +to virtual interrupts generated purely by a software emulated device.
> +
> +
> +The HW bit
> +--
> +Virtual interrupts are signalled to the guest by programming the List
> +Registers (LRs) on the GIC before running a VCPU.  The LR is programmed
> +with the virtual IRQ number and the state of the interrupt (Pending,
> +Active, or Pending+Active).  When the guest ACKs and EOIs a virtual
> +interrupt, the LR state moves from Pending to Active, and finally to
> +inactive.
> +
> +The LRs include an extra bit, called the HW bit.  When this bit is set,
> +KVM must also program an additional field in the LR, the physical IRQ
> +number, to link the virtual with the physical IRQ.
> +
> +When the HW bit is set, KVM must EITHER set the Pending OR the Active
> +bit, never both at the same time.
> +
> +Setting the HW bit causes the hardware to deactivate the physical
> +interrupt on the physical distributor when the guest deactivates the
> +corresponding virtual interrupt.
> +
> +
> +Forwarded Physical Interrupts Life Cycle
> +
> +
> +The state of forwarded physical interrupts is managed in the following 
> way:
> +
> +  - The physical interrupt is acked by the host, and becomes active on
> +the physical distributor (*).
> +  - KVM sets the LR.Pending bit, because this is the only way the GICV
> +interface is going to present it to the guest.
> +  - LR.Pending will stay set as long as the guest has not acked the 
> interrupt.
> +  - LR.Pending transitions to LR.Active on the guest read of the IAR, as
> +expected.
> +  - On guest EOI, the *physical distributor* active bit gets cleared,
> +   

Re: [PATCH v7 15/17] KVM: Update Posted-Interrupts Descriptor when vCPU is blocked

2015-09-11 Thread Paolo Bonzini


On 25/08/2015 10:50, Feng Wu wrote:
> This patch updates the Posted-Interrupts Descriptor when vCPU
> is blocked.
> 
> pre-block:
> - Add the vCPU to the blocked per-CPU list
> - Set 'NV' to POSTED_INTR_WAKEUP_VECTOR
> 
> post-block:
> - Remove the vCPU from the per-CPU list
> 
> Signed-off-by: Feng Wu 
> ---
>  arch/x86/include/asm/kvm_host.h |   5 ++
>  arch/x86/kvm/vmx.c  | 151 
> 
>  arch/x86/kvm/x86.c  |  55 ---
>  include/linux/kvm_host.h|   3 +
>  virt/kvm/kvm_main.c |   3 +
>  5 files changed, 207 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 22269b4..32af275 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -554,6 +554,8 @@ struct kvm_vcpu_arch {
>*/
>   bool write_fault_to_shadow_pgtable;
>  
> + bool halted;
> +
>   /* set at EPT violation at this point */
>   unsigned long exit_qualification;
>  
> @@ -868,6 +870,9 @@ struct kvm_x86_ops {
>  
>   void (*pi_clear_sn)(struct kvm_vcpu *vcpu);
>   void (*pi_set_sn)(struct kvm_vcpu *vcpu);
> +
> + int (*pi_pre_block)(struct kvm_vcpu *vcpu);
> + void (*pi_post_block)(struct kvm_vcpu *vcpu);

Just pre_block/post_block please.  Also, please document the return
value of pre_block.

> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index ef93fdc..fc7f222 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -5869,7 +5869,13 @@ int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
>  {
>   ++vcpu->stat.halt_exits;
>   if (irqchip_in_kernel(vcpu->kvm)) {
> - vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
> + /* Handle posted-interrupt when vCPU is to be halted */
> + if (!kvm_x86_ops->pi_pre_block ||
> + (kvm_x86_ops->pi_pre_block &&

No need to test kvm_x86_ops->pi_pre_block again.

> + kvm_x86_ops->pi_pre_block(vcpu) == 0)) {
> + vcpu->arch.halted = true;
> + vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
> + }
>   return 1;
>   } else {
>   vcpu->run->exit_reason = KVM_EXIT_HLT;
> @@ -6518,6 +6524,21 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>   kvm_vcpu_reload_apic_access_page(vcpu);
>   }
>  
> + /*
> +  * Since posted-interrupts can be set by VT-d HW now, in this
> +  * case, KVM_REQ_EVENT is not set. We move the following
> +  * operations out of the if statement.
> +  */

Just "KVM_REQ_EVENT is not set when posted interrupts are set by VT-d
hardware, so we have to update RVI unconditionally", please.

Could we skip this (in a future patch) if PI.ON=0?

> + if (kvm_lapic_enabled(vcpu)) {
> + /*
> +  * Update architecture specific hints for APIC
> +  * virtual interrupt delivery.
> +  */
> + if (kvm_x86_ops->hwapic_irr_update)
> + kvm_x86_ops->hwapic_irr_update(vcpu,
> + kvm_lapic_find_highest_irr(vcpu));
> + }
> +
>   if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
>   kvm_apic_accept_events(vcpu);
>   if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
> @@ -6534,13 +6555,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>   kvm_x86_ops->enable_irq_window(vcpu);
>  
>   if (kvm_lapic_enabled(vcpu)) {
> - /*
> -  * Update architecture specific hints for APIC
> -  * virtual interrupt delivery.
> -  */
> - if (kvm_x86_ops->hwapic_irr_update)
> - kvm_x86_ops->hwapic_irr_update(vcpu,
> - kvm_lapic_find_highest_irr(vcpu));
>   update_cr8_intercept(vcpu);
>   kvm_lapic_sync_to_vapic(vcpu);
>   }
> @@ -6711,10 +6725,31 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
>  
>   for (;;) {
>   if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
> - !vcpu->arch.apf.halted)
> + !vcpu->arch.apf.halted) {
> + /*
> +  * For some cases, we can get here with
> +  * vcpu->arch.halted being true.
> +  */

Which cases?

Paolo

> + if (kvm_x86_ops->pi_post_block && vcpu->arch.halted) {
> + kvm_x86_ops->pi_post_block(vcpu);
> + vcpu->arch.halted = false;
> + }
> +
>   r = vcpu_enter_guest(vcpu);
> - else
> + } else {
>   r = vcpu_block(kvm, vcpu);
> +
> + /*
> +  * 

Re: [PATCH v2 08/22] KVM: ARM64: PMU: Add perf event map and introduce perf event creating function

2015-09-11 Thread Shannon Zhao



On 2015/9/11 19:04, Marc Zyngier wrote:

On 11/09/15 09:55, Shannon Zhao wrote:

From: Shannon Zhao 

When we use tools like perf on host, perf passes the event type and the
id of this event type category to kernel, then kernel will map them to
hardware event number and write this number to PMU PMEVTYPER_EL0
register. While we're trapping and emulating guest accesses to PMU
registers, we get the hardware event number and map it to the event type
and the id reversely. Then call perf_event kernel API to create an event
for it.

Signed-off-by: Shannon Zhao 
---
  arch/arm64/include/asm/pmu.h |   2 +
  arch/arm64/kvm/Makefile  |   1 +
  include/kvm/arm_pmu.h|  15 +++
  virt/kvm/arm/pmu.c   | 240 +++
  4 files changed, 258 insertions(+)
  create mode 100644 virt/kvm/arm/pmu.c

diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
index 95681e6..42e7093 100644
--- a/arch/arm64/include/asm/pmu.h
+++ b/arch/arm64/include/asm/pmu.h
@@ -33,6 +33,8 @@
  #define ARMV8_PMCR_D  (1 << 3) /* CCNT counts every 64th cpu cycle */
  #define ARMV8_PMCR_X  (1 << 4) /* Export to ETM */
  #define ARMV8_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
+/* Determines which PMCCNTR_EL0 bit generates an overflow */
+#define ARMV8_PMCR_LC  (1 << 6)
  #define   ARMV8_PMCR_N_SHIFT  11   /* Number of counters 
supported */
  #define   ARMV8_PMCR_N_MASK   0x1f
  #define   ARMV8_PMCR_MASK 0x3f /* Mask for writable bits */
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index f90f4aa..78db4ee 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -27,3 +27,4 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o
  kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o
  kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v3-switch.o
  kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
+kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 64af88a..387ec6f 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -36,4 +36,19 @@ struct kvm_pmu {
  #endif
  };

+#ifdef CONFIG_KVM_ARM_PMU
+unsigned long kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu,
+   unsigned long select_idx);
+void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, unsigned long data,
+   unsigned long select_idx);
+#else
+unsigned long kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu,
+   unsigned long select_idx)
+{
+   return 0;
+}
+void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, unsigned long data,
+   unsigned long select_idx) {}
+#endif
+
  #endif
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
new file mode 100644
index 000..0c7fe5c
--- /dev/null
+++ b/virt/kvm/arm/pmu.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright (C) 2015 Linaro Ltd.
+ * Author: Shannon Zhao 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see .
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* PMU HW events mapping. */
+static struct kvm_pmu_hw_event_map {
+   unsigned eventsel;
+   unsigned event_type;
+} kvm_pmu_hw_events[] = {
+   [0] = { 0x11, PERF_COUNT_HW_CPU_CYCLES },
+   [1] = { 0x08, PERF_COUNT_HW_INSTRUCTIONS },
+   [2] = { 0x04, PERF_COUNT_HW_CACHE_REFERENCES },
+   [3] = { 0x03, PERF_COUNT_HW_CACHE_MISSES },
+   [4] = { 0x10, PERF_COUNT_HW_BRANCH_MISSES },


How about using enum armv8_pmuv3_perf_types here?


+};
+
+/* PMU HW cache events mapping. */
+static struct kvm_pmu_hw_cache_event_map {
+   unsigned eventsel;
+   unsigned cache_type;
+   unsigned cache_op;
+   unsigned cache_result;
+} kvm_pmu_hw_cache_events[] = {
+   [0] = { 0x12, PERF_COUNT_HW_CACHE_BPU, PERF_COUNT_HW_CACHE_OP_READ,
+ PERF_COUNT_HW_CACHE_RESULT_ACCESS },
+   [1] = { 0x12, PERF_COUNT_HW_CACHE_BPU, PERF_COUNT_HW_CACHE_OP_WRITE,
+ PERF_COUNT_HW_CACHE_RESULT_ACCESS },
+};
+
+static void kvm_pmu_set_evttyper(struct kvm_vcpu *vcpu, unsigned long idx,
+unsigned long val)
+{
+   if (!vcpu_mode_is_32bit(vcpu))
+   vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + idx) = 

Re: [PATCH kvmtool] Make static libc and guest-init functionality optional.

2015-09-11 Thread Dimitri John Ledkov
On 11 September 2015 at 13:47, Andre Przywara  wrote:
> Hi Dimitri,
>
> thanks for sharing this patch and sorry for the delay.

No worries, I have a few more patches to send, polishing them for release.

>
> (CC:ing Will)
>
> On 04/09/15 13:04, Dimitri John Ledkov wrote:
>> If one typically only boots full disk-images, one wouldn't necessaraly
>> want to statically link glibc, for the guest-init feature of the
>> kvmtool. As statically linked glibc triggers haevy security
>> maintainance.
>
> I like the idea of making guest-init optional, and actually was bitten
> by this annoying static libc requirement once before.
> Some comments below:
>

\o/

>>
>> Signed-off-by: Dimitri John Ledkov 
>> ---
>>  Makefile| 11 ++-
>>  builtin-run.c   |  7 +++
>>  builtin-setup.c |  7 +++
>>  3 files changed, 20 insertions(+), 5 deletions(-)
>>
>> diff --git a/Makefile b/Makefile
>> index 1534e6f..42a629a 100644
>> --- a/Makefile
>> +++ b/Makefile
>> @@ -34,8 +34,6 @@ bindir_SQ = $(subst ','\'',$(bindir))
>>  PROGRAM  := lkvm
>>  PROGRAM_ALIAS := vm
>>
>> -GUEST_INIT := guest/init
>> -
>>  OBJS += builtin-balloon.o
>>  OBJS += builtin-debug.o
>>  OBJS += builtin-help.o
>> @@ -279,8 +277,12 @@ ifeq ($(LTO),1)
>>   endif
>>  endif
>>
>> -ifneq ($(call try-build,$(SOURCE_STATIC),,-static),y)
>> -$(error No static libc found. Please install glibc-static package.)
>> +ifeq ($(call try-build,$(SOURCE_STATIC),,-static),y)
>> + CFLAGS  += -DCONFIG_HAS_LIBC
>
> The name CONFIG_HAS_LIBC seems a bit misleading to me, so at least this
> symbol should read CONFIG_HAS_STATIC_LIBC. But I'd prefer to have it
> named after it's user instead: CONFIG_GUEST_INIT (or the like), since
> this is what it protects in the code.
>

OK, sounds good. I am bad at naming things =) this looks good.


>> + GUEST_INIT := guest/init
>> + GUEST_OBJS = guest/guest_init.o
>> +else
>> + NOTFOUND+= static-libc
>>  endif
>>
>>  ifeq (y,$(ARCH_WANT_LIBFDT))
>> @@ -356,7 +358,6 @@ c_flags   = -Wp,-MD,$(depfile) $(CFLAGS)
>>  # $(OTHEROBJS) are things that do not get substituted like this.
>>  #
>>  STATIC_OBJS = $(patsubst %.o,%.static.o,$(OBJS) $(OBJS_STATOPT))
>> -GUEST_OBJS = guest/guest_init.o
>>
>>  $(PROGRAM)-static:  $(STATIC_OBJS) $(OTHEROBJS) $(GUEST_INIT)
>>   $(E) "  LINK" $@
>> diff --git a/builtin-run.c b/builtin-run.c
>> index 1ee75ad..0f67471 100644
>> --- a/builtin-run.c
>> +++ b/builtin-run.c
>> @@ -59,8 +59,13 @@ static int  kvm_run_wrapper;
>>
>>  bool do_debug_print = false;
>>
>> +#ifdef CONFIG_HAS_LIBC
>>  extern char _binary_guest_init_start;
>>  extern char _binary_guest_init_size;
>> +#else
>> +static char _binary_guest_init_start=0;
>> +static char _binary_guest_init_size=0;
>> +#endif
>>
>>  static const char * const run_usage[] = {
>>   "lkvm run [] []",
>> @@ -354,6 +359,8 @@ static int kvm_setup_guest_init(struct kvm *kvm)
>>   char *data;
>>
>>   /* Setup /virt/init */
>> + if (!_binary_guest_init_size)
>> + die("Guest init not compiled");
>
> I wonder if comparing with 0 is safe in every case. I appreciate not
> spoiling the code with #ifdefs, but putting one around here seems
> cleaner to me (especially if you look at the error message).

Ok, I can put the #ifdef here as well. Note that the non-extern
declaration will still be needed in the code above, as otherwise the
build fails to link without static-libc.

>
>>   size = (size_t)&_binary_guest_init_size;
>>   data = (char *)&_binary_guest_init_start;
>>   snprintf(tmp, PATH_MAX, "%s%s/virt/init", kvm__get_dir(), rootfs);
>> diff --git a/builtin-setup.c b/builtin-setup.c
>> index 8b45c56..d77e5e0 100644
>> --- a/builtin-setup.c
>> +++ b/builtin-setup.c
>> @@ -16,8 +16,13 @@
>>  #include 
>>  #include 
>>
>> +#ifdef CONFIG_HAS_LIBC
>>  extern char _binary_guest_init_start;
>>  extern char _binary_guest_init_size;
>> +#else
>> +static char _binary_guest_init_start=0;
>> +static char _binary_guest_init_size=0;
>> +#endif
>>
>>  static const char *instance_name;
>>
>> @@ -131,6 +136,8 @@ static int copy_init(const char *guestfs_name)
>>   int fd, ret;
>>   char *data;
>>
>> + if (!_binary_guest_init_size)
>> + die("Guest init not compiled");
>
> Same as above.

Ack.

>
> Cheers,
> Andre.
>
>>   size = (size_t)&_binary_guest_init_size;
>>   data = (char *)&_binary_guest_init_start;
>>   snprintf(path, PATH_MAX, "%s%s/virt/init", kvm__get_dir(), 
>> guestfs_name);
>>

-- 
Regards,

Dimitri.
Pura Vida!

https://clearlinux.org
Open Source Technology Center
Intel Corporation (UK) Ltd. - Co. Reg. #1134945 - Pipers Way, Swindon SN3 1RJ.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 08/22] KVM: ARM64: PMU: Add perf event map and introduce perf event creating function

2015-09-11 Thread Marc Zyngier
On 11/09/15 14:35, Shannon Zhao wrote:
> 
> 
> On 2015/9/11 19:04, Marc Zyngier wrote:

[...]

>> Having had a chat with Will, it appears that a much better solution
>> would be to ask perf to use raw events instead of trying to map things
>> to perf events (which the guest has already done).
>>
>> See drivers/oprofile/oprofile_perf.c::op_perf_setup().
>>
>> Thoughts?
>>
> 
> Yeah, directly using PERF_TYPE_RAW event looks simpler. But should we 
> check whether the value of event number written to PMXEVTYPER_EL0 is 
> valid? Or That is guaranteed by guest?

I don't think we need to check for anything. Userspace is allowed to
request any event and the PMU will count it if this event exists, or
won't if it doesn't.

We shouldn't be doing more validation than that, if only because most
events are microarchitectural, and they are mostly undocumented.

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 02/17] KVM: Add some helper functions for Posted-Interrupts

2015-09-11 Thread Paolo Bonzini


On 25/08/2015 10:50, Feng Wu wrote:
> This patch adds some helper functions to manipulate the
> Posted-Interrupts Descriptor.
> 
> Signed-off-by: Feng Wu 
> ---
>  arch/x86/kvm/vmx.c | 26 ++
>  1 file changed, 26 insertions(+)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 271dd70..316f9bf 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -443,6 +443,8 @@ struct nested_vmx {
>  };
>  
>  #define POSTED_INTR_ON  0
> +#define POSTED_INTR_SN  1
> +
>  /* Posted-Interrupt Descriptor */
>  struct pi_desc {
>   u32 pir[8]; /* Posted interrupt requested */
> @@ -483,6 +485,30 @@ static int pi_test_and_set_pir(int vector, struct 
> pi_desc *pi_desc)
>   return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
>  }
>  
> +static void pi_clear_sn(struct pi_desc *pi_desc)
> +{
> + return clear_bit(POSTED_INTR_SN,
> + (unsigned long *)_desc->control);
> +}
> +
> +static void pi_set_sn(struct pi_desc *pi_desc)
> +{
> + return set_bit(POSTED_INTR_SN,
> + (unsigned long *)_desc->control);
> +}
> +
> +static int pi_test_on(struct pi_desc *pi_desc)
> +{
> + return test_bit(POSTED_INTR_ON,
> + (unsigned long *)_desc->control);
> +}
> +
> +static int pi_test_sn(struct pi_desc *pi_desc)
> +{
> + return test_bit(POSTED_INTR_SN,
> + (unsigned long *)_desc->control);
> +}
> +
>  struct vcpu_vmx {
>   struct kvm_vcpu   vcpu;
>   unsigned long host_rsp;
> 

Reviewed-by: Paolo Bonzini 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 12/17] KVM: Implement IRQ bypass consumer callbacks for x86

2015-09-11 Thread Paolo Bonzini


On 25/08/2015 10:50, Feng Wu wrote:
> + struct kvm_kernel_irqfd *irqfd =
> + container_of(cons, struct kvm_kernel_irqfd, consumer);
> +
> + irqfd->producer = prod;

This assignment should be under "if (kvm_x86_ops->update_pi_irte)".

> + return kvm_arch_update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 1);
> +}
> +
> +void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
> +   struct irq_bypass_producer *prod)
> +{
> + int ret;
> + struct kvm_kernel_irqfd *irqfd =
> + container_of(cons, struct kvm_kernel_irqfd, consumer);
> +
> + irqfd->producer = NULL;

And here it should be like:

if (!kvm_x86_ops->update_pi_irte) {
WARN_ON(irqfd->producer != NULL);
return;
}

WARN_ON(irqfd->producer != prod);
irqfd->producer = NULL;

Paolo

> +
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 16/17] KVM: Warn if 'SN' is set during posting interrupts by software

2015-09-11 Thread Paolo Bonzini


On 25/08/2015 10:50, Feng Wu wrote:
> Currently, we don't support urgent interrupt, all interrupts
> are recognized as non-urgent interrupt, so we cannot post
> interrupts when 'SN' is set.
> 
> If the vcpu is in guest mode, it cannot have been scheduled out,
> and that's the only case when SN is set currently, warning if
> SN is set.
> 
> Signed-off-by: Feng Wu 
> ---
>  arch/x86/kvm/vmx.c | 16 
>  1 file changed, 16 insertions(+)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 64e35ea..eb640a1 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -4494,6 +4494,22 @@ static inline bool 
> kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
>  {
>  #ifdef CONFIG_SMP
>   if (vcpu->mode == IN_GUEST_MODE) {
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
> + /*
> +  * Currently, we don't support urgent interrupt,
> +  * all interrupts are recognized as non-urgent
> +  * interrupt, so we cannot post interrupts when
> +  * 'SN' is set.
> +  *
> +  * If the vcpu is in guest mode, it means it is
> +  * running instead of being scheduled out and
> +  * waiting in the run queue, and that's the only
> +  * case when 'SN' is set currently, warning if
> +  * 'SN' is set.
> +  */
> + WARN_ON_ONCE(pi_test_sn(>pi_desc));
> +
>   apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
>   POSTED_INTR_VECTOR);
>   return true;
> 

Reviewed-by: Paolo Bonzini 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 3/4] irqchip: GIC: Convert to EOImode == 1

2015-09-11 Thread Julien Grall



On 11/09/2015 11:54, Ian Campbell wrote:

On Thu, 2015-09-10 at 17:23 +0100, Julien Grall wrote:

I applied the two patches on top of linus/master and I'm able to boot
correctly on X-gene. Thank you!


Perhaps we should replicate this approach in Xen and get rid of
  PLATFORM_QUIRK_GIC_64K_STRIDE?


I was thinking to do it. But, I wasn't sure if it was worth to get a 
such "ugly" patch compare to the quirk.


Regards,

--
Julien Grall
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 17/17] iommu/vt-d: Add a command line parameter for VT-d posted-interrupts

2015-09-11 Thread Paolo Bonzini


On 25/08/2015 10:50, Feng Wu wrote:
> Enable VT-d Posted-Interrtups and add a command line
> parameter for it.
> 
> Signed-off-by: Feng Wu 
> ---
>  Documentation/kernel-parameters.txt |  1 +
>  drivers/iommu/irq_remapping.c   | 12 
>  2 files changed, 9 insertions(+), 4 deletions(-)
> 
> diff --git a/Documentation/kernel-parameters.txt 
> b/Documentation/kernel-parameters.txt
> index 1d6f045..52aca36 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -1547,6 +1547,7 @@ bytes respectively. Such letter suffixes can also be 
> entirely omitted.
>   nosid   disable Source ID checking
>   no_x2apic_optout
>   BIOS x2APIC opt-out request will be ignored
> + nopost  disable Interrupt Posting
>  
>   iomem=  Disable strict checking of access to MMIO memory
>   strict  regions from userspace.
> diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
> index 2d99930..d8c3997 100644
> --- a/drivers/iommu/irq_remapping.c
> +++ b/drivers/iommu/irq_remapping.c
> @@ -22,7 +22,7 @@ int irq_remap_broken;
>  int disable_sourceid_checking;
>  int no_x2apic_optout;
>  
> -int disable_irq_post = 1;
> +int disable_irq_post = 0;
>  
>  static int disable_irq_remap;
>  static struct irq_remap_ops *remap_ops;
> @@ -58,14 +58,18 @@ static __init int setup_irqremap(char *str)
>   return -EINVAL;
>  
>   while (*str) {
> - if (!strncmp(str, "on", 2))
> + if (!strncmp(str, "on", 2)) {
>   disable_irq_remap = 0;
> - else if (!strncmp(str, "off", 3))
> + disable_irq_post = 0;
> + } else if (!strncmp(str, "off", 3)) {
>   disable_irq_remap = 1;
> - else if (!strncmp(str, "nosid", 5))
> + disable_irq_post = 1;
> + } else if (!strncmp(str, "nosid", 5))
>   disable_sourceid_checking = 1;
>   else if (!strncmp(str, "no_x2apic_optout", 16))
>   no_x2apic_optout = 1;
> + else if (!strncmp(str, "nopost", 6))
> + disable_irq_post = 1;
>  
>   str += strcspn(str, ",");
>   while (*str == ',')
> 

Reviewed-by: Paolo Bonzini 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH v3] ppc/spapr: Implement H_RANDOM hypercall in QEMU

2015-09-11 Thread Eric Blake
On 09/11/2015 03:17 AM, Thomas Huth wrote:
> The PAPR interface defines a hypercall to pass high-quality
> hardware generated random numbers to guests. Recent kernels can
> already provide this hypercall to the guest if the right hardware
> random number generator is available. But in case the user wants
> to use another source like EGD, or QEMU is running with an older
> kernel, we should also have this call in QEMU, so that guests that
> do not support virtio-rng yet can get good random numbers, too.
> 
> This patch now adds a new pseude-device to QEMU that either

s/pseude/pseudo/

> directly provides this hypercall to the guest or is able to
> enable the in-kernel hypercall if available. The in-kernel
> hypercall can be enabled with the use-kvm property, e.g.:
> 
>  qemu-system-ppc64 -device spapr-rng,use-kvm=true
> 

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [PATCH v7 11/17] KVM: Define two weak arch callbacks for irq bypass manager

2015-09-11 Thread Paolo Bonzini


On 25/08/2015 10:50, Feng Wu wrote:
> Define two weak arch callbacks so that archs that don't need
> them don't need define them.
> 
> Signed-off-by: Feng Wu 
> ---
>  virt/kvm/eventfd.c | 10 ++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index d7a230f..f3050b9 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -256,6 +256,16 @@ static void irqfd_update(struct kvm *kvm, struct 
> kvm_kernel_irqfd *irqfd)
>   write_seqcount_end(>irq_entry_sc);
>  }
>  
> +void __attribute__((weak)) kvm_arch_irq_bypass_stop(
> + struct irq_bypass_consumer *cons)
> +{
> +}
> +
> +void __attribute__((weak)) kvm_arch_irq_bypass_start(
> + struct irq_bypass_consumer *cons)
> +{
> +}
> +
>  static int
>  kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
>  {
> 

This would belong into the patch that adds CONFIG_KVM_HAVE_IRQ_BYPASS
(and the functions should be under "#ifdef CONFIG_KVM_HAVE_IRQ_BYPASS").

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 04/17] KVM: Get Posted-Interrupts descriptor address from 'struct kvm_vcpu'

2015-09-11 Thread Paolo Bonzini


On 25/08/2015 10:50, Feng Wu wrote:
> Define an interface to get PI descriptor address from the vCPU structure.
> 
> Signed-off-by: Feng Wu 

See the later review, this interface and the one in patch 5 is too
low-level.

Paolo

> ---
>  arch/x86/include/asm/kvm_host.h |  2 ++
>  arch/x86/kvm/vmx.c  | 11 +++
>  2 files changed, 13 insertions(+)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index af11bca..d50c1d3 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -858,6 +858,8 @@ struct kvm_x86_ops {
>   void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
>  struct kvm_memory_slot *slot,
>  gfn_t offset, unsigned long mask);
> +
> + u64 (*get_pi_desc_addr)(struct kvm_vcpu *vcpu);
>   /* pmu operations of sub-arch */
>   const struct kvm_pmu_ops *pmu_ops;
>  };
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 316f9bf..81a995c 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -610,6 +610,10 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu 
> *vcpu)
>  #define FIELD64(number, name)[number] = VMCS12_OFFSET(name), \
>   [number##_HIGH] = VMCS12_OFFSET(name)+4
>  
> +struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
> +{
> + return &(to_vmx(vcpu)->pi_desc);
> +}
>  
>  static unsigned long shadow_read_only_fields[] = {
>   /*
> @@ -4487,6 +4491,11 @@ static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu 
> *vcpu)
>   return;
>  }
>  
> +static u64 vmx_get_pi_desc_addr(struct kvm_vcpu *vcpu)
> +{
> + return __pa((u64)vcpu_to_pi_desc(vcpu));
> +}
> +
>  /*
>   * Set up the vmcs's constant host-state fields, i.e., host-state fields that
>   * will not change in the lifetime of the guest.
> @@ -10460,6 +10469,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
>   .flush_log_dirty = vmx_flush_log_dirty,
>   .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
>  
> + .get_pi_desc_addr = vmx_get_pi_desc_addr,
> +
>   .pmu_ops = _pmu_ops,
>  };
>  
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 14/17] KVM: Update Posted-Interrupts Descriptor when vCPU is preempted

2015-09-11 Thread Paolo Bonzini


On 25/08/2015 10:50, Feng Wu wrote:
> This patch updates the Posted-Interrupts Descriptor when vCPU
> is preempted.
> 
> sched out:
> - Set 'SN' to suppress furture non-urgent interrupts posted for
> the vCPU.
> 
> sched in:
> - Clear 'SN'
> - Change NDST if vCPU is scheduled to a different CPU
> - Set 'NV' to POSTED_INTR_VECTOR
> 
> Signed-off-by: Feng Wu 
> ---
>  arch/x86/kvm/vmx.c | 51 +++
>  1 file changed, 51 insertions(+)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 234f720..9c87064 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -45,6 +45,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "trace.h"
>  #include "pmu.h"
> @@ -2001,10 +2002,60 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int 
> cpu)
>   vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
>   vmx->loaded_vmcs->cpu = cpu;
>   }
> +
> + if (irq_remapping_cap(IRQ_POSTING_CAP)) {
> + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
> + struct pi_desc old, new;
> + unsigned int dest;
> +
> + do {
> + old.control = new.control = pi_desc->control;
> +
> + /*
> +  * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there
> +  * are two possible cases:
> +  * 1. After running 'pi_pre_block', context switch
> +  *happened. For this case, 'sn' was set in
> +  *vmx_vcpu_put(), so we need to clear it here.
> +  * 2. After running 'pi_pre_block', we were blocked,
> +  *and woken up by some other guy. For this case,
> +  *we don't need to do anything, 'pi_post_block'
> +  *will do everything for us. However, we cannot
> +  *check whether it is case #1 or case #2 here
> +  *(maybe, not needed), so we also clear sn here,
> +  *I think it is not a big deal.
> +  */
> + if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) {
> + if (vcpu->cpu != cpu) {
> + dest = cpu_physical_id(cpu);
> +
> + if (x2apic_enabled())
> + new.ndst = dest;
> + else
> + new.ndst = (dest << 8) & 0xFF00;
> + }
> +
> + /* set 'NV' to 'notification vector' */
> + new.nv = POSTED_INTR_VECTOR;
> + }
> +
> + /* Allow posting non-urgent interrupts */
> + new.sn = 0;
> + } while (cmpxchg(_desc->control, old.control,
> + new.control) != old.control);
> + }
>  }
>  
>  static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
>  {
> + if (irq_remapping_cap(IRQ_POSTING_CAP)) {
> + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
> +
> + /* Set SN when the vCPU is preempted */
> + if (vcpu->preempted)
> + pi_set_sn(pi_desc);
> + }
> +
>   __vmx_load_host_state(to_vmx(vcpu));
>   if (!vmm_exclusive) {
>   __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs);
> 

Please make this separate functions vmx_vcpu_pi_load and vmx_vcpu_pi_put.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 3/4] irqchip: GIC: Convert to EOImode == 1

2015-09-11 Thread Ian Campbell
On Thu, 2015-09-10 at 17:23 +0100, Julien Grall wrote:
> I applied the two patches on top of linus/master and I'm able to boot
> correctly on X-gene. Thank you!

Perhaps we should replicate this approach in Xen and get rid of
 PLATFORM_QUIRK_GIC_64K_STRIDE?

Ian.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 03/17] KVM: Define a new interface kvm_intr_is_single_vcpu()

2015-09-11 Thread Paolo Bonzini


On 25/08/2015 10:50, Feng Wu wrote:
> This patch defines a new interface kvm_intr_is_single_vcpu(),
> which can returns whether the interrupt is for single-CPU or not.
> 
> It is used by VT-d PI, since now we only support single-CPU
> interrupts, For lowest-priority interrupts, if user configures
> it via /proc/irq or uses irqbalance to make it single-CPU, we
> can use PI to deliver the interrupts to it. Full functionality
> of lowest-priority support will be added later.
> 
> Signed-off-by: Feng Wu 
> ---
>  arch/x86/include/asm/kvm_host.h |  3 +++
>  arch/x86/kvm/irq_comm.c | 24 
>  2 files changed, 27 insertions(+)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 49ec903..af11bca 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1204,4 +1204,7 @@ int __x86_set_memory_region(struct kvm *kvm,
>  int x86_set_memory_region(struct kvm *kvm,
> const struct kvm_userspace_memory_region *mem);
>  
> +bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
> +  struct kvm_vcpu **dest_vcpu);
> +
>  #endif /* _ASM_X86_KVM_HOST_H */
> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> index 9efff9e..a9572a13 100644
> --- a/arch/x86/kvm/irq_comm.c
> +++ b/arch/x86/kvm/irq_comm.c
> @@ -297,6 +297,30 @@ out:
>   return r;
>  }
>  
> +bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
> +  struct kvm_vcpu **dest_vcpu)
> +{
> + int i, r = 0;
> + struct kvm_vcpu *vcpu;
> +
> + kvm_for_each_vcpu(i, vcpu, kvm) {
> + if (!kvm_apic_present(vcpu))
> + continue;
> +
> + if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
> + irq->dest_id, irq->dest_mode))
> + continue;
> +
> + r++;

if (++r == 2)
return false;

> + *dest_vcpu = vcpu;
> + }
> +
> + if (r == 1)
> + return true;
> + else
> + return false;

... then just "return r == 1;" is enough here.

This could also be optimized to treat APIC_DEST_NOSHORT specially.  Get
the mda, and if it has a single physical CPU check that it has
kvm_apic_present(vcpu) set.  Otherwise fall back to the slow path.

Paolo

> +}
> +
>  #define IOAPIC_ROUTING_ENTRY(irq) \
>   { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,  \
> .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V4 2/4] kvm: fix double free for fast mmio eventfd

2015-09-11 Thread Cornelia Huck
On Fri, 11 Sep 2015 17:25:45 +0800
Jason Wang  wrote:

> On 09/11/2015 03:46 PM, Cornelia Huck wrote:
> > On Fri, 11 Sep 2015 11:17:35 +0800
> > Jason Wang  wrote:

> >> +
> >> +  /* When length is ignored, MMIO is also put on a separate bus, for
> >> +   * faster lookups.
> >> +   */
> >> +  if (!args->len && !(args->flags & KVM_IOEVENTFD_FLAG_PIO)) {
> > Dito on a positive check for bus_idx == KVM_MMIO_BUS.
> 
> I was thinking maybe this should be done in a separate patch on top.
> What's your opinion?

The check is an independent issue, an extra patch is fine (current
usage does not trigger any problems).

> 
> >> +  ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
> >> +  if (ret < 0)
> >> +  goto fast_fail;
> >> +  }

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 10/17] KVM: x86: Update IRTE for posted-interrupts

2015-09-11 Thread Paolo Bonzini


On 25/08/2015 10:50, Feng Wu wrote:
> +int kvm_arch_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
> + uint32_t guest_irq, bool set)

Please move all of this code to a vmx.c callback instead of adding
get_pi_desc_addr. Check if this makes the pi_set_sn and pi_clear_sn
callbacks superfluous.

Paolo

> + if (!irq_remapping_cap(IRQ_POSTING_CAP))
> + return 0;


> + idx = srcu_read_lock(>irq_srcu);
> + irq_rt = srcu_dereference(kvm->irq_routing, >irq_srcu);
> + BUG_ON(guest_irq >= irq_rt->nr_rt_entries);
> +
> + hlist_for_each_entry(e, _rt->map[guest_irq], link) {
> + if (e->type != KVM_IRQ_ROUTING_MSI)
> + continue;
> + /*
> +  * VT-d PI cannot support posting multicast/broadcast
> +  * interrupts to a VCPU, we still use interrupt remapping
> +  * for these kind of interrupts.
> +  *
> +  * For lowest-priority interrupts, we only support
> +  * those with single CPU as the destination, e.g. user
> +  * configures the interrupts via /proc/irq or uses
> +  * irqbalance to make the interrupts single-CPU.
> +  *
> +  * We will support full lowest-priority interrupt later.
> +  *
> +  */
> +
> + kvm_set_msi_irq(e, );
> + if (!kvm_intr_is_single_vcpu(kvm, , ))
> + continue;
> +
> + vcpu_info.pi_desc_addr = kvm_x86_ops->get_pi_desc_addr(vcpu);
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 08/22] KVM: ARM64: PMU: Add perf event map and introduce perf event creating function

2015-09-11 Thread Marc Zyngier
On 11/09/15 09:55, Shannon Zhao wrote:
> From: Shannon Zhao 
> 
> When we use tools like perf on host, perf passes the event type and the
> id of this event type category to kernel, then kernel will map them to
> hardware event number and write this number to PMU PMEVTYPER_EL0
> register. While we're trapping and emulating guest accesses to PMU
> registers, we get the hardware event number and map it to the event type
> and the id reversely. Then call perf_event kernel API to create an event
> for it.
> 
> Signed-off-by: Shannon Zhao 
> ---
>  arch/arm64/include/asm/pmu.h |   2 +
>  arch/arm64/kvm/Makefile  |   1 +
>  include/kvm/arm_pmu.h|  15 +++
>  virt/kvm/arm/pmu.c   | 240 
> +++
>  4 files changed, 258 insertions(+)
>  create mode 100644 virt/kvm/arm/pmu.c
> 
> diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
> index 95681e6..42e7093 100644
> --- a/arch/arm64/include/asm/pmu.h
> +++ b/arch/arm64/include/asm/pmu.h
> @@ -33,6 +33,8 @@
>  #define ARMV8_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */
>  #define ARMV8_PMCR_X (1 << 4) /* Export to ETM */
>  #define ARMV8_PMCR_DP(1 << 5) /* Disable CCNT if 
> non-invasive debug*/
> +/* Determines which PMCCNTR_EL0 bit generates an overflow */
> +#define ARMV8_PMCR_LC(1 << 6)
>  #define  ARMV8_PMCR_N_SHIFT  11   /* Number of counters 
> supported */
>  #define  ARMV8_PMCR_N_MASK   0x1f
>  #define  ARMV8_PMCR_MASK 0x3f /* Mask for writable bits */
> diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
> index f90f4aa..78db4ee 100644
> --- a/arch/arm64/kvm/Makefile
> +++ b/arch/arm64/kvm/Makefile
> @@ -27,3 +27,4 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o
>  kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o
>  kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v3-switch.o
>  kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
> +kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
> diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
> index 64af88a..387ec6f 100644
> --- a/include/kvm/arm_pmu.h
> +++ b/include/kvm/arm_pmu.h
> @@ -36,4 +36,19 @@ struct kvm_pmu {
>  #endif
>  };
>  
> +#ifdef CONFIG_KVM_ARM_PMU
> +unsigned long kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu,
> + unsigned long select_idx);
> +void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, unsigned long 
> data,
> + unsigned long select_idx);
> +#else
> +unsigned long kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu,
> + unsigned long select_idx)
> +{
> + return 0;
> +}
> +void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, unsigned long 
> data,
> + unsigned long select_idx) {}
> +#endif
> +
>  #endif
> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
> new file mode 100644
> index 000..0c7fe5c
> --- /dev/null
> +++ b/virt/kvm/arm/pmu.c
> @@ -0,0 +1,240 @@
> +/*
> + * Copyright (C) 2015 Linaro Ltd.
> + * Author: Shannon Zhao 
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see .
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +/* PMU HW events mapping. */
> +static struct kvm_pmu_hw_event_map {
> + unsigned eventsel;
> + unsigned event_type;
> +} kvm_pmu_hw_events[] = {
> + [0] = { 0x11, PERF_COUNT_HW_CPU_CYCLES },
> + [1] = { 0x08, PERF_COUNT_HW_INSTRUCTIONS },
> + [2] = { 0x04, PERF_COUNT_HW_CACHE_REFERENCES },
> + [3] = { 0x03, PERF_COUNT_HW_CACHE_MISSES },
> + [4] = { 0x10, PERF_COUNT_HW_BRANCH_MISSES },

How about using enum armv8_pmuv3_perf_types here?

> +};
> +
> +/* PMU HW cache events mapping. */
> +static struct kvm_pmu_hw_cache_event_map {
> + unsigned eventsel;
> + unsigned cache_type;
> + unsigned cache_op;
> + unsigned cache_result;
> +} kvm_pmu_hw_cache_events[] = {
> + [0] = { 0x12, PERF_COUNT_HW_CACHE_BPU, PERF_COUNT_HW_CACHE_OP_READ,
> +   PERF_COUNT_HW_CACHE_RESULT_ACCESS },
> + [1] = { 0x12, PERF_COUNT_HW_CACHE_BPU, PERF_COUNT_HW_CACHE_OP_WRITE,
> +   PERF_COUNT_HW_CACHE_RESULT_ACCESS },
> +};
> +
> +static void kvm_pmu_set_evttyper(struct kvm_vcpu *vcpu, unsigned long 

Re: [PATCH v2 05/22] KVM: ARM64: Add a helper for CP15 registers reset to UNKNOWN

2015-09-11 Thread Marc Zyngier
On 11/09/15 09:54, Shannon Zhao wrote:
> From: Shannon Zhao 
> 
> Signed-off-by: Shannon Zhao 
> ---
>  arch/arm64/kvm/sys_regs.h | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
> index d411e25..a0b3811 100644
> --- a/arch/arm64/kvm/sys_regs.h
> +++ b/arch/arm64/kvm/sys_regs.h
> @@ -104,6 +104,14 @@ static inline void reset_unknown(struct kvm_vcpu *vcpu,
>   vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL;
>  }
>  
> +static inline void reset_unknown_cp15(struct kvm_vcpu *vcpu,
> +   const struct sys_reg_desc *r)
> +{
> + BUG_ON(!r->reg);
> + BUG_ON(r->reg >= NR_COPRO_REGS);
> + vcpu_cp15(vcpu, r->reg) = 0xdecafbad;

Given how many times we use this constant, maybe we should consider
having a #define for it?

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 13/17] KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd'

2015-09-11 Thread Paolo Bonzini


On 25/08/2015 10:50, Feng Wu wrote:
> +void kvm_arch_irqfd_init(struct kvm_kernel_irqfd *irqfd)
> +{
> + irqfd->arch_update = kvm_arch_update_pi_irte;
> +}
> +
>  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
>  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
>  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 5f183fb..f4005dc 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -34,6 +34,8 @@
>  
>  #include 
>  
> +struct kvm_kernel_irqfd;
> +
>  /*
>   * The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used
>   * in kvm, other bits are visible for userspace which are defined in
> @@ -1145,6 +1147,15 @@ extern struct kvm_device_ops kvm_xics_ops;
>  extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
>  extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
>  
> +#ifdef __KVM_HAVE_ARCH_IRQFD_INIT
> +void kvm_arch_irqfd_init(struct kvm_kernel_irqfd *irqfd);
> +#else
> +static inline void kvm_arch_irqfd_init(struct kvm_kernel_irqfd *irqfd)
> +{
> + irqfd->arch_update = NULL;
> +}
> +#endif
> +
>  #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
>  
>  static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
> diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
> index 0c1de05..b7aab52 100644
> --- a/include/linux/kvm_irqfd.h
> +++ b/include/linux/kvm_irqfd.h
> @@ -66,6 +66,8 @@ struct kvm_kernel_irqfd {
>   struct work_struct shutdown;
>   struct irq_bypass_consumer consumer;
>   struct irq_bypass_producer *producer;
> + int (*arch_update)(struct kvm *kvm, unsigned int host_irq,
> +uint32_t guest_irq, bool set);
>  };
>  
>  #endif /* __LINUX_KVM_IRQFD_H */
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index f3050b9..b2d9066 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -288,6 +288,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
>   INIT_LIST_HEAD(>list);
>   INIT_WORK(>inject, irqfd_inject);
>   INIT_WORK(>shutdown, irqfd_shutdown);
> + kvm_arch_irqfd_init(irqfd);
>   seqcount_init(>irq_entry_sc);
>  
>   f = fdget(args->fd);
> @@ -580,13 +581,22 @@ kvm_irqfd_release(struct kvm *kvm)
>   */
>  void kvm_irq_routing_update(struct kvm *kvm)
>  {
> + int ret;
>   struct kvm_kernel_irqfd *irqfd;
>  
>   spin_lock_irq(>irqfds.lock);
>  
> - list_for_each_entry(irqfd, >irqfds.items, list)
> + list_for_each_entry(irqfd, >irqfds.items, list) {
>   irqfd_update(kvm, irqfd);
>  
> + if (irqfd->arch_update && irqfd->producer) {

With the changes I suggested in the previous message, you only need to
check "if (irqfd->producer)" here.  Then you can remove
kvm_arch_irqfd_init and just put the new "if (irqfd->producer)" under
"#ifdef CONFIG_KVM_HAVE_IRQ_BYPASS".

Just rename kvm_arch_update_pi_irte to kvm_arch_update_irqfd_routing.

Paolo

> + ret = irqfd->arch_update(
> + irqfd->kvm, irqfd->producer->irq,
> + irqfd->gsi, 1);
> + WARN_ON(ret);
> + }
> + }
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 07/17] KVM: make kvm_set_msi_irq() public

2015-09-11 Thread Paolo Bonzini


On 25/08/2015 10:50, Feng Wu wrote:
> Make kvm_set_msi_irq() public, we can use this function outside.
> 
> Signed-off-by: Feng Wu 
> ---
>  arch/x86/include/asm/kvm_host.h | 4 
>  arch/x86/kvm/irq_comm.c | 4 ++--
>  2 files changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index c4f99f1..82d0709 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -175,6 +175,8 @@ enum {
>   */
>  #define KVM_APIC_PV_EOI_PENDING  1
>  
> +struct kvm_kernel_irq_routing_entry;
> +
>  /*
>   * We don't want allocation failures within the mmu code, so we preallocate
>   * enough memory for a single page fault in a cache.
> @@ -1212,4 +1214,6 @@ int x86_set_memory_region(struct kvm *kvm,
>  bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
>struct kvm_vcpu **dest_vcpu);
>  
> +void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
> +  struct kvm_lapic_irq *irq);
>  #endif /* _ASM_X86_KVM_HOST_H */
> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> index a9572a13..1319c60 100644
> --- a/arch/x86/kvm/irq_comm.c
> +++ b/arch/x86/kvm/irq_comm.c
> @@ -91,8 +91,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct 
> kvm_lapic *src,
>   return r;
>  }
>  
> -static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
> -struct kvm_lapic_irq *irq)
> +void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
> +  struct kvm_lapic_irq *irq)
>  {
>   trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
>  
> 

Reviewed-by: Paolo Bonzini 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v7 08/17] vfio: Select IRQ_BYPASS_MANAGER for vfio PCI devices

2015-09-11 Thread Paolo Bonzini


On 25/08/2015 10:50, Feng Wu wrote:
> Enable irq bypass manager for vfio PCI devices.
> 
> Signed-off-by: Feng Wu 
> ---
>  drivers/vfio/pci/Kconfig | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
> index 579d83b..02912f1 100644
> --- a/drivers/vfio/pci/Kconfig
> +++ b/drivers/vfio/pci/Kconfig
> @@ -2,6 +2,7 @@ config VFIO_PCI
>   tristate "VFIO support for PCI devices"
>   depends on VFIO && PCI && EVENTFD
>   select VFIO_VIRQFD
> + select IRQ_BYPASS_MANAGER
>   help
> Support for the PCI VFIO bus driver.  This is required to make
> use of PCI drivers using the VFIO framework.
> 

Might as well squash it into patch 9.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 3/4] irqchip: GIC: Convert to EOImode == 1

2015-09-11 Thread Marc Zyngier
On 11/09/15 11:59, Julien Grall wrote:
> 
> 
> On 11/09/2015 11:54, Ian Campbell wrote:
>> On Thu, 2015-09-10 at 17:23 +0100, Julien Grall wrote:
>>> I applied the two patches on top of linus/master and I'm able to boot
>>> correctly on X-gene. Thank you!
>>
>> Perhaps we should replicate this approach in Xen and get rid of
>>   PLATFORM_QUIRK_GIC_64K_STRIDE?
> 
> I was thinking to do it. But, I wasn't sure if it was worth to get a 
> such "ugly" patch compare to the quirk.

It is not a quirk. It is actually recommended in the SBSA spec. The
patch is ugly because we can't do the right thing on the one platform
that actually implemented ARM's own recommendation (we can't tell the
bloody firmware to stop overriding our DT).

I would otherwise have added a "arm,use-sbsa-aliasing" property (or
something similar) instead of trying to guess things...

M.
-- 
Jazz is not dead. It just smells funny...
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] vfio/pci: Use kernel VPD access functions

2015-09-11 Thread Rustad, Mark D
Alex,

> On Sep 11, 2015, at 11:16 AM, Alex Williamson  
> wrote:
> 
> RFC - Is this something we should do?

Superficially this looks pretty good. I need to think harder to be sure of the 
details.

> Should we consider providing
> similar emulation through PCI sysfs to allow lspci to also make use
> of the vpd interfaces?

It looks to me like lspci already uses the vpd attribute in sysfs to access 
VPD, so maybe nothing more than this is needed. No doubt lspci can be coerced 
into accessing VPD directly, but is that really worth going after? I'm not so 
sure.

An strace of lspci accessing a device with VPD shows me:

write(1, "\tCapabilities: [e0] Vital Produc"..., 39 Capabilities: [e0] 
Vital Product Data
) = 39
open("/sys/bus/pci/devices/:02:00.0/vpd", O_RDONLY) = 4
^^^ accesses to this should be safe, I 
think

pread(4, "\202", 1, 0)  = 1
pread(4, "\10\0", 2, 1) = 2
pread(4, "PVL Dell", 8, 3)  = 8
write(1, "\t\tProduct Name: PVL Dell\n", 25 Product Name: PVL Dell
) = 25

and so forth.

--
Mark Rustad, Networking Division, Intel Corporation



signature.asc
Description: Message signed with OpenPGP using GPGMail


Re: [PATCH v4 3/4] irqchip: GIC: Convert to EOImode == 1

2015-09-11 Thread Julien Grall
On 11/09/15 12:09, Marc Zyngier wrote:
> On 11/09/15 11:59, Julien Grall wrote:
>>
>>
>> On 11/09/2015 11:54, Ian Campbell wrote:
>>> On Thu, 2015-09-10 at 17:23 +0100, Julien Grall wrote:
 I applied the two patches on top of linus/master and I'm able to boot
 correctly on X-gene. Thank you!
>>>
>>> Perhaps we should replicate this approach in Xen and get rid of
>>>   PLATFORM_QUIRK_GIC_64K_STRIDE?
>>
>> I was thinking to do it. But, I wasn't sure if it was worth to get a 
>> such "ugly" patch compare to the quirk.
> 
> It is not a quirk. It is actually recommended in the SBSA spec. The
> patch is ugly because we can't do the right thing on the one platform
> that actually implemented ARM's own recommendation (we can't tell the
> bloody firmware to stop overriding our DT).
> 
> I would otherwise have added a "arm,use-sbsa-aliasing" property (or
> something similar) instead of trying to guess things...

I will give a look to port this patch on Xen.

Regards,

-- 
Julien Grall
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH kvmtool] Make static libc and guest-init functionality optional.

2015-09-11 Thread Andre Przywara
Hi Dimitri,

thanks for sharing this patch and sorry for the delay.

(CC:ing Will)

On 04/09/15 13:04, Dimitri John Ledkov wrote:
> If one typically only boots full disk-images, one wouldn't necessaraly
> want to statically link glibc, for the guest-init feature of the
> kvmtool. As statically linked glibc triggers haevy security
> maintainance.

I like the idea of making guest-init optional, and actually was bitten
by this annoying static libc requirement once before.
Some comments below:

> 
> Signed-off-by: Dimitri John Ledkov 
> ---
>  Makefile| 11 ++-
>  builtin-run.c   |  7 +++
>  builtin-setup.c |  7 +++
>  3 files changed, 20 insertions(+), 5 deletions(-)
> 
> diff --git a/Makefile b/Makefile
> index 1534e6f..42a629a 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -34,8 +34,6 @@ bindir_SQ = $(subst ','\'',$(bindir))
>  PROGRAM  := lkvm
>  PROGRAM_ALIAS := vm
>  
> -GUEST_INIT := guest/init
> -
>  OBJS += builtin-balloon.o
>  OBJS += builtin-debug.o
>  OBJS += builtin-help.o
> @@ -279,8 +277,12 @@ ifeq ($(LTO),1)
>   endif
>  endif
>  
> -ifneq ($(call try-build,$(SOURCE_STATIC),,-static),y)
> -$(error No static libc found. Please install glibc-static package.)
> +ifeq ($(call try-build,$(SOURCE_STATIC),,-static),y)
> + CFLAGS  += -DCONFIG_HAS_LIBC

The name CONFIG_HAS_LIBC seems a bit misleading to me, so at least this
symbol should read CONFIG_HAS_STATIC_LIBC. But I'd prefer to have it
named after it's user instead: CONFIG_GUEST_INIT (or the like), since
this is what it protects in the code.

> + GUEST_INIT := guest/init
> + GUEST_OBJS = guest/guest_init.o
> +else
> + NOTFOUND+= static-libc
>  endif
>  
>  ifeq (y,$(ARCH_WANT_LIBFDT))
> @@ -356,7 +358,6 @@ c_flags   = -Wp,-MD,$(depfile) $(CFLAGS)
>  # $(OTHEROBJS) are things that do not get substituted like this.
>  #
>  STATIC_OBJS = $(patsubst %.o,%.static.o,$(OBJS) $(OBJS_STATOPT))
> -GUEST_OBJS = guest/guest_init.o
>  
>  $(PROGRAM)-static:  $(STATIC_OBJS) $(OTHEROBJS) $(GUEST_INIT)
>   $(E) "  LINK" $@
> diff --git a/builtin-run.c b/builtin-run.c
> index 1ee75ad..0f67471 100644
> --- a/builtin-run.c
> +++ b/builtin-run.c
> @@ -59,8 +59,13 @@ static int  kvm_run_wrapper;
>  
>  bool do_debug_print = false;
>  
> +#ifdef CONFIG_HAS_LIBC
>  extern char _binary_guest_init_start;
>  extern char _binary_guest_init_size;
> +#else
> +static char _binary_guest_init_start=0;
> +static char _binary_guest_init_size=0;
> +#endif
>  
>  static const char * const run_usage[] = {
>   "lkvm run [] []",
> @@ -354,6 +359,8 @@ static int kvm_setup_guest_init(struct kvm *kvm)
>   char *data;
>  
>   /* Setup /virt/init */
> + if (!_binary_guest_init_size)
> + die("Guest init not compiled");

I wonder if comparing with 0 is safe in every case. I appreciate not
spoiling the code with #ifdefs, but putting one around here seems
cleaner to me (especially if you look at the error message).

>   size = (size_t)&_binary_guest_init_size;
>   data = (char *)&_binary_guest_init_start;
>   snprintf(tmp, PATH_MAX, "%s%s/virt/init", kvm__get_dir(), rootfs);
> diff --git a/builtin-setup.c b/builtin-setup.c
> index 8b45c56..d77e5e0 100644
> --- a/builtin-setup.c
> +++ b/builtin-setup.c
> @@ -16,8 +16,13 @@
>  #include 
>  #include 
>  
> +#ifdef CONFIG_HAS_LIBC
>  extern char _binary_guest_init_start;
>  extern char _binary_guest_init_size;
> +#else
> +static char _binary_guest_init_start=0;
> +static char _binary_guest_init_size=0;
> +#endif
>  
>  static const char *instance_name;
>  
> @@ -131,6 +136,8 @@ static int copy_init(const char *guestfs_name)
>   int fd, ret;
>   char *data;
>  
> + if (!_binary_guest_init_size)
> + die("Guest init not compiled");

Same as above.

Cheers,
Andre.

>   size = (size_t)&_binary_guest_init_size;
>   data = (char *)&_binary_guest_init_start;
>   snprintf(path, PATH_MAX, "%s%s/virt/init", kvm__get_dir(), 
> guestfs_name);
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 kvmtool] Make static libc and guest-init functionality optional.

2015-09-11 Thread Dimitri John Ledkov
If one typically only boots full disk-images, one wouldn't necessaraly
want to statically link glibc, for the guest-init feature of the
kvmtool. As statically linked glibc triggers haevy security
maintainance.

Signed-off-by: Dimitri John Ledkov 
---
 Changes since v1:
 - rename CONFIG_HAS_LIBC to CONFIG_GUEST_INIT for clarity
 - use more ifdefs, instead of runtime check of _binary_guest_init_size==0
 
 Makefile| 11 ++-
 builtin-run.c   |  6 ++
 builtin-setup.c |  6 ++
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/Makefile b/Makefile
index 1534e6f..bc6059c 100644
--- a/Makefile
+++ b/Makefile
@@ -34,8 +34,6 @@ bindir_SQ = $(subst ','\'',$(bindir))
 PROGRAM:= lkvm
 PROGRAM_ALIAS := vm
 
-GUEST_INIT := guest/init
-
 OBJS   += builtin-balloon.o
 OBJS   += builtin-debug.o
 OBJS   += builtin-help.o
@@ -279,8 +277,12 @@ ifeq ($(LTO),1)
endif
 endif
 
-ifneq ($(call try-build,$(SOURCE_STATIC),,-static),y)
-$(error No static libc found. Please install glibc-static package.)
+ifeq ($(call try-build,$(SOURCE_STATIC),,-static),y)
+   CFLAGS  += -DCONFIG_GUEST_INIT
+   GUEST_INIT := guest/init
+   GUEST_OBJS = guest/guest_init.o
+else
+   NOTFOUND+= static-libc
 endif
 
 ifeq (y,$(ARCH_WANT_LIBFDT))
@@ -356,7 +358,6 @@ c_flags = -Wp,-MD,$(depfile) $(CFLAGS)
 # $(OTHEROBJS) are things that do not get substituted like this.
 #
 STATIC_OBJS = $(patsubst %.o,%.static.o,$(OBJS) $(OBJS_STATOPT))
-GUEST_OBJS = guest/guest_init.o
 
 $(PROGRAM)-static:  $(STATIC_OBJS) $(OTHEROBJS) $(GUEST_INIT)
$(E) "  LINK" $@
diff --git a/builtin-run.c b/builtin-run.c
index 1ee75ad..e27acd6 100644
--- a/builtin-run.c
+++ b/builtin-run.c
@@ -59,8 +59,10 @@ static int  kvm_run_wrapper;
 
 bool do_debug_print = false;
 
+#ifdef CONFIG_GUEST_INIT
 extern char _binary_guest_init_start;
 extern char _binary_guest_init_size;
+#endif
 
 static const char * const run_usage[] = {
"lkvm run [] []",
@@ -347,6 +349,7 @@ void kvm_run_help(void)
 
 static int kvm_setup_guest_init(struct kvm *kvm)
 {
+#ifdef CONFIG_GUEST_INIT
const char *rootfs = kvm->cfg.custom_rootfs_name;
char tmp[PATH_MAX];
size_t size;
@@ -367,6 +370,9 @@ static int kvm_setup_guest_init(struct kvm *kvm)
close(fd);
 
return 0;
+#else
+   die("Guest init not compiled");
+#endif
 }
 
 static int kvm_run_set_sandbox(struct kvm *kvm)
diff --git a/builtin-setup.c b/builtin-setup.c
index 8b45c56..ff796c3 100644
--- a/builtin-setup.c
+++ b/builtin-setup.c
@@ -16,8 +16,10 @@
 #include 
 #include 
 
+#ifdef CONFIG_GUEST_INIT
 extern char _binary_guest_init_start;
 extern char _binary_guest_init_size;
+#endif
 
 static const char *instance_name;
 
@@ -126,6 +128,7 @@ static const char *guestfs_symlinks[] = {
 
 static int copy_init(const char *guestfs_name)
 {
+#ifdef CONFIG_GUEST_INIT
char path[PATH_MAX];
size_t size;
int fd, ret;
@@ -144,6 +147,9 @@ static int copy_init(const char *guestfs_name)
close(fd);
 
return 0;
+#else
+   die("Guest init not compiled");
+#endif
 }
 
 static int copy_passwd(const char *guestfs_name)
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html