[PATCH 1/1] KVM: arm/arm64: arch_timer: Fix TimerValue TVAL calculation in KVM

2019-03-29 Thread Wei Huang
Recently the generic timer test of kvm-unit-tests failed to complete
(stalled) when a physical timer is being used. This issue is caused
by incorrect update of CNT_CVAL when TimerValue is being accessed,
introduced by 'Commit 84135d3d18da ("KVM: arm/arm64: consolidate arch
timer trap handlers")'. According to Arm ARM, the read/write behavior
of accesses to TimeValue registers is expected to be:

  * READ: TimerValue = (CompareValue – (Counter - Offset)
  * WRITE: CompareValue = ((Counter - Offset) + Sign(TimerValue)

This patch fixes the TVAL read/write code path according to the
specification.

Signed-off-by: Wei Huang 
---
 virt/kvm/arm/arch_timer.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 3417f2dbc366..d43308dc3617 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -812,7 +812,7 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
 
switch (treg) {
case TIMER_REG_TVAL:
-   val = kvm_phys_timer_read() - timer->cntvoff - timer->cnt_cval;
+   val = timer->cnt_cval - kvm_phys_timer_read() + timer->cntvoff;
break;
 
case TIMER_REG_CTL:
@@ -858,7 +858,7 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
 {
switch (treg) {
case TIMER_REG_TVAL:
-   timer->cnt_cval = val - kvm_phys_timer_read() - timer->cntvoff;
+   timer->cnt_cval = kvm_phys_timer_read() - timer->cntvoff + val;
break;
 
case TIMER_REG_CTL:
-- 
2.14.5

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: host stalls when qemu-system-aarch64 with kvm and pflash

2017-04-07 Thread Wei Huang


On 03/30/2017 05:51 AM, Marc Zyngier wrote:
> On 29/03/17 19:56, Christoffer Dall wrote:
>> On Tue, Mar 28, 2017 at 01:24:15PM -0700, Radha Mohan wrote:
>>> On Tue, Mar 28, 2017 at 1:16 PM, Christoffer Dall  wrote:
 Hi Radha,

 On Tue, Mar 28, 2017 at 12:58:24PM -0700, Radha Mohan wrote:
> Hi,
> I am seeing an issue with qemu-system-aarch64 when using pflash
> (booting kernel via UEFI bios).
>
> Host kernel: 4.11.0-rc3-next-20170323
> Qemu version: v2.9.0-rc1
>
> Command used:
> ./aarch64-softmmu/qemu-system-aarch64 -cpu host -enable-kvm -M
> virt,gic_version=3 -nographic -smp 1 -m 2048 -drive
> if=none,id=hd0,file=/root/zesty-server-cloudimg-arm64.img,id=0 -device
> virtio-blk-device,drive=hd0 -pflash /root/flash0.img -pflash
> /root/flash1.img
>
>
> As soon as the guest kernel boots the host starts to stall and prints
> the below messages. And the system never recovers. I can neither
> poweroff the guest nor the host. So I have resort to external power
> reset of the host.
>
> ==
> [  116.199077] NMI watchdog: BUG: soft lockup - CPU#25 stuck for 23s!
> [kworker/25:1:454]
> [  116.206901] Modules linked in: binfmt_misc nls_iso8859_1 aes_ce_blk
> shpchp crypto_simd gpio_keys cryptd aes_ce_cipher ghash_ce sha2_ce
> sha1_ce uio_pdrv_genirq uio autofs4 btrfs raid10 rai
> d456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor
> raid6_pq libcrc32c raid1 raid0 multipath linear ast i2c_algo_bit ttm
> drm_kms_helper syscopyarea sysfillrect sysimgblt fb_s
> ys_fops drm nicvf ahci nicpf libahci thunder_bgx thunder_xcv
> mdio_thunder mdio_cavium
>
> [  116.206995] CPU: 25 PID: 454 Comm: kworker/25:1 Not tainted
> 4.11.0-rc3-next-20170323 #1
> [  116.206997] Hardware name: www.cavium.com crb-1s/crb-1s, BIOS 0.3 Feb 
> 23 2017
> [  116.207010] Workqueue: events netstamp_clear
> [  116.207015] task: 801f906b5400 task.stack: 801f901a4000
> [  116.207020] PC is at smp_call_function_many+0x284/0x2e8
> [  116.207023] LR is at smp_call_function_many+0x244/0x2e8
> [  116.207026] pc : [] lr : []
> pstate: 8145
> [  116.207028] sp : 801f901a7be0
> [  116.207030] x29: 801f901a7be0 x28: 09139000
> [  116.207036] x27: 09139434 x26: 0080
> [  116.207041] x25:  x24: 081565d0
> [  116.207047] x23: 0001 x22: 08e11e00
> [  116.207052] x21: 801f6d5cff00 x20: 801f6d5cff08
> [  116.207057] x19: 09138e38 x18: 0a03
> [  116.207063] x17: b77c9028 x16: 082e81d8
> [  116.207068] x15: 3d0d6dd44d08 x14: 0036312196549b4a
> [  116.207073] x13: 58dabe4c x12: 0018
> [  116.207079] x11: 366e2f04 x10: 09f0
> [  116.207084] x9 : 801f901a7d30 x8 : 0002
> [  116.207089] x7 :  x6 : 
> [  116.207095] x5 :  x4 : 0020
> [  116.207100] x3 : 0020 x2 : 
> [  116.207105] x1 : 801f6d682578 x0 : 0003
>
> [  150.443116] INFO: rcu_sched self-detected stall on CPU
> [  150.448261]  25-...: (14997 ticks this GP)
> idle=47a/141/0 softirq=349/349 fqs=7495
> [  150.451115] INFO: rcu_sched detected stalls on CPUs/tasks:
> [  150.451123]  25-...: (14997 ticks this GP)
> idle=47a/141/0 softirq=349/349 fqs=7495
> [  150.451124]  (detected by 13, t=15002 jiffies, g=805, c=804, q=8384)
> [  150.451136] Task dump for CPU 25:
> [  150.451138] kworker/25:1R  running task0   454  2 
> 0x0002
> [  150.451155] Workqueue: events netstamp_clear
> [  150.451158] Call trace:
> [  150.451164] [] __switch_to+0x90/0xa8
> [  150.451172] [] static_key_slow_inc+0x128/0x138
> [  150.451175] [] static_key_enable+0x34/0x60
> [  150.451178] [] netstamp_clear+0x68/0x80
> [  150.451181] [] process_one_work+0x158/0x478
> [  150.451183] [] worker_thread+0x50/0x4a8
> [  150.451187] [] kthread+0x108/0x138
> [  150.451190] [] ret_from_fork+0x10/0x50
> [  150.477451]   (t=15008 jiffies g=805 c=804 q=8384)
> [  150.482242] Task dump for CPU 25:
> [  150.482245] kworker/25:1R  running task0   454  2 
> 0x0002
> [  150.482259] Workqueue: events netstamp_clear
> [  150.482264] Call trace:
> [  150.482271] [] dump_backtrace+0x0/0x2b0
> [  150.482277] [] show_stack+0x24/0x30
> [  150.482281] [] sched_show_task+0x128/0x178
> [  150.482285] [] dump_cpu_task+0x48/0x58
> [  150.482288] [] rcu_dump_cpu_stacks+0xa0/0xe8
> [  150.482297] [] rcu_check_callbacks+0x774/0x938
> [  150.482305] [] update_process_times+0x34/0x60
> [  

Re: [PATCH kvm-unit-tests v2 2/2] arm/pmu: don't run tcg tests

2016-12-09 Thread Wei Huang


On 12/08/2016 11:05 AM, Andrew Jones wrote:
> The TCG PMU is barely implemented for ARM and not at all implemented
> for AArch64. Let's not bother running the TCG-only tests yet. We'll
> likely move them to a new TCG-only unittests.cfg at some point before
> re-enabling them too.
> 
> Signed-off-by: Andrew Jones 

It is always safe to disable TCG tests, as proposed in this patch.
However I don't think we need it because PMU will behave correctly after
your PATCH 1:

1. Under TCG AArch32 mode, get_pmu_version() returns 2. This is
acceptable for pmu-tcg-icount-1 and pmu-tcg-icount-256. We should allow
the tests to proceed.
2. Under TCG AArch64, get_pmu_version() returns 0. pmu-tcg-icount-1 and
pmu-tcg-icount-256 will skip because pmu_probe() returns FALSE. As long
as there isn't an error, most people will be OK to see SKIP message.

Thanks,
-Wei

> ---
>  arm/unittests.cfg | 20 ++--
>  1 file changed, 10 insertions(+), 10 deletions(-)
> 
> diff --git a/arm/unittests.cfg b/arm/unittests.cfg
> index 044d97c9e73d..65f9c4c0b9eb 100644
> --- a/arm/unittests.cfg
> +++ b/arm/unittests.cfg
> @@ -65,15 +65,15 @@ file = pmu.flat
>  groups = pmu
>  
>  # Test PMU support (TCG) with -icount IPC=1
> -[pmu-tcg-icount-1]
> -file = pmu.flat
> -extra_params = -icount 0 -append '1'
> -groups = pmu
> -accel = tcg
> +#[pmu-tcg-icount-1]
> +#file = pmu.flat
> +#extra_params = -icount 0 -append '1'
> +#groups = pmu
> +#accel = tcg
>  
>  # Test PMU support (TCG) with -icount IPC=256
> -[pmu-tcg-icount-256]
> -file = pmu.flat
> -extra_params = -icount 8 -append '256'
> -groups = pmu
> -accel = tcg
> +#[pmu-tcg-icount-256]
> +#file = pmu.flat
> +#extra_params = -icount 8 -append '256'
> +#groups = pmu
> +#accel = tcg
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH kvm-unit-tests v2 1/2] arm/pmu: fix probe on AArch64

2016-12-09 Thread Wei Huang
Reviewed-by: Wei Huang <w...@redhat.com>. I also tested on real machine
and it was working.

Thanks,
-Wei

On 12/08/2016 11:05 AM, Andrew Jones wrote:
> The spec for ID_DFR0_EL1 says "In an AArch64-only implementation,
> this register is UNKNOWN." Indeed ThunderX just returns zero when
> that register is read. This means we can't rely on a non-zero
> value to determine if we can test the PMU. For AArch64 we need to
> read ID_AA64DFR0_EL1. This patch has the side effect of no longer
> running PMU tests on TCG for AArch64. That's actually another fix,
> though, as TCG chooses not to implement a PMU for AArch64 at this
> time. The only way it worked before was probing the wrong register
> and proceeding even though the version was 2, which is not a valid
> version for AArch64. When TCG eventually implements a PMU things
> should "just work".
> 
> Signed-off-by: Andrew Jones <drjo...@redhat.com>
> ---
>  arm/pmu.c | 37 ++---
>  1 file changed, 22 insertions(+), 15 deletions(-)
> 
> diff --git a/arm/pmu.c b/arm/pmu.c
> index a39dae43c99e..c4d5c97dbf87 100644
> --- a/arm/pmu.c
> +++ b/arm/pmu.c
> @@ -28,15 +28,15 @@
>  #define PMU_PMCR_IMP_SHIFT 24
>  #define PMU_PMCR_IMP_MASK  0xff
>  
> -#define ID_DFR0_PERFMON_SHIFT 24
> -#define ID_DFR0_PERFMON_MASK  0xf
> -
> -#define PMU_CYCLE_IDX 31
> +#define PMU_CYCLE_IDX  31
>  
>  #define NR_SAMPLES 10
>  
>  static unsigned int pmu_version;
>  #if defined(__arm__)
> +#define ID_DFR0_PERFMON_SHIFT 24
> +#define ID_DFR0_PERFMON_MASK  0xf
> +
>  #define PMCR __ACCESS_CP15(c9, 0, c12, 0)
>  #define ID_DFR0  __ACCESS_CP15(c0, 0, c1, 2)
>  #define PMSELR   __ACCESS_CP15(c9, 0, c12, 5)
> @@ -50,6 +50,11 @@ static inline uint32_t get_pmcr(void) { return 
> read_sysreg(PMCR); }
>  static inline void set_pmcr(uint32_t v) { write_sysreg(v, PMCR); }
>  static inline void set_pmcntenset(uint32_t v) { write_sysreg(v, PMCNTENSET); 
> }
>  
> +static inline uint8_t get_pmu_version(void)
> +{
> + return (get_id_dfr0() >> ID_DFR0_PERFMON_SHIFT) & ID_DFR0_PERFMON_MASK;
> +}
> +
>  static inline uint64_t get_pmccntr(void)
>  {
>   if (pmu_version == 0x3)
> @@ -95,7 +100,10 @@ static inline void precise_instrs_loop(int loop, uint32_t 
> pmcr)
>   : "cc");
>  }
>  #elif defined(__aarch64__)
> -static inline uint32_t get_id_dfr0(void) { return read_sysreg(id_dfr0_el1); }
> +#define ID_AA64DFR0_PERFMON_SHIFT 8
> +#define ID_AA64DFR0_PERFMON_MASK  0xf
> +
> +static inline uint32_t get_id_aa64dfr0(void) { return 
> read_sysreg(id_aa64dfr0_el1); }
>  static inline uint32_t get_pmcr(void) { return read_sysreg(pmcr_el0); }
>  static inline void set_pmcr(uint32_t v) { write_sysreg(v, pmcr_el0); }
>  static inline uint64_t get_pmccntr(void) { return read_sysreg(pmccntr_el0); }
> @@ -103,6 +111,12 @@ static inline void set_pmccntr(uint64_t v) { 
> write_sysreg(v, pmccntr_el0); }
>  static inline void set_pmcntenset(uint32_t v) { write_sysreg(v, 
> pmcntenset_el0); }
>  static inline void set_pmccfiltr(uint32_t v) { write_sysreg(v, 
> pmccfiltr_el0); }
>  
> +static inline uint8_t get_pmu_version(void)
> +{
> + uint8_t ver = (get_id_aa64dfr0() >> ID_AA64DFR0_PERFMON_SHIFT) & 
> ID_AA64DFR0_PERFMON_MASK;
> + return ver == 1 ? 3 : ver;
> +}
> +
>  /*
>   * Extra instructions inserted by the compiler would be difficult to 
> compensate
>   * for, so hand assemble everything between, and including, the PMCR accesses
> @@ -256,16 +270,9 @@ static bool check_cpi(int cpi)
>  /* Return FALSE if no PMU found, otherwise return TRUE */
>  bool pmu_probe(void)
>  {
> - uint32_t dfr0;
> -
> - /* probe pmu version */
> - dfr0 = get_id_dfr0();
> - pmu_version = (dfr0 >> ID_DFR0_PERFMON_SHIFT) & ID_DFR0_PERFMON_MASK;
> -
> - if (pmu_version)
> - report_info("PMU version: %d", pmu_version);
> -
> - return pmu_version;
> + pmu_version = get_pmu_version();
> + report_info("PMU version: %d", pmu_version);
> + return pmu_version != 0 && pmu_version != 0xf;
>  }
>  
>  int main(int argc, char *argv[])
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[kvm-unit-tests PATCH v14 4/5] arm: pmu: Check cycle count increases

2016-12-06 Thread Wei Huang
From: Christopher Covington <c...@codeaurora.org>

Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
even for the smallest delta of two subsequent reads.

Signed-off-by: Christopher Covington <c...@codeaurora.org>
Signed-off-by: Wei Huang <w...@redhat.com>
---
 arm/pmu.c | 66 +++
 1 file changed, 66 insertions(+)

diff --git a/arm/pmu.c b/arm/pmu.c
index bf6ac69..d9ff19d 100644
--- a/arm/pmu.c
+++ b/arm/pmu.c
@@ -18,6 +18,9 @@
 #include "asm/sysreg.h"
 #include "asm/processor.h"
 
+#define PMU_PMCR_E (1 << 0)
+#define PMU_PMCR_C (1 << 2)
+#define PMU_PMCR_LC(1 << 6)
 #define PMU_PMCR_N_SHIFT   11
 #define PMU_PMCR_N_MASK0x1f
 #define PMU_PMCR_ID_SHIFT  16
@@ -28,16 +31,47 @@
 #define ID_DFR0_PERFMON_SHIFT 24
 #define ID_DFR0_PERFMON_MASK  0xf
 
+#define PMU_CYCLE_IDX 31
+
+#define NR_SAMPLES 10
+
 static unsigned int pmu_version;
 #if defined(__arm__)
 #define PMCR __ACCESS_CP15(c9, 0, c12, 0)
 #define ID_DFR0  __ACCESS_CP15(c0, 0, c1, 2)
+#define PMSELR   __ACCESS_CP15(c9, 0, c12, 5)
+#define PMXEVTYPER   __ACCESS_CP15(c9, 0, c13, 1)
+#define PMCNTENSET   __ACCESS_CP15(c9, 0, c12, 1)
+#define PMCCNTR32__ACCESS_CP15(c9, 0, c13, 0)
+#define PMCCNTR64__ACCESS_CP15_64(0, c9)
 
 static inline uint32_t get_id_dfr0(void) { return read_sysreg(ID_DFR0); }
 static inline uint32_t get_pmcr(void) { return read_sysreg(PMCR); }
+static inline void set_pmcr(uint32_t v) { write_sysreg(v, PMCR); }
+static inline void set_pmcntenset(uint32_t v) { write_sysreg(v, PMCNTENSET); }
+
+static inline uint64_t get_pmccntr(void)
+{
+   if (pmu_version == 0x3)
+   return read_sysreg(PMCCNTR64);
+   else
+   return read_sysreg(PMCCNTR32);
+}
+
+/* PMCCFILTR is an obsolete name for PMXEVTYPER31 in ARMv7 */
+static inline void set_pmccfiltr(uint32_t value)
+{
+   write_sysreg(PMU_CYCLE_IDX, PMSELR);
+   write_sysreg(value, PMXEVTYPER);
+   isb();
+}
 #elif defined(__aarch64__)
 static inline uint32_t get_id_dfr0(void) { return read_sysreg(id_dfr0_el1); }
 static inline uint32_t get_pmcr(void) { return read_sysreg(pmcr_el0); }
+static inline void set_pmcr(uint32_t v) { write_sysreg(v, pmcr_el0); }
+static inline uint64_t get_pmccntr(void) { return read_sysreg(pmccntr_el0); }
+static inline void set_pmcntenset(uint32_t v) { write_sysreg(v, 
pmcntenset_el0); }
+static inline void set_pmccfiltr(uint32_t v) { write_sysreg(v, pmccfiltr_el0); 
}
 #endif
 
 /*
@@ -63,6 +97,37 @@ static bool check_pmcr(void)
return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0;
 }
 
+/*
+ * Ensure that the cycle counter progresses between back-to-back reads.
+ */
+static bool check_cycles_increase(void)
+{
+   bool success = true;
+
+   /* init before event access, this test only cares about cycle count */
+   set_pmcntenset(1 << PMU_CYCLE_IDX);
+   set_pmccfiltr(0); /* count cycles in EL0, EL1, but not EL2 */
+
+   set_pmcr(get_pmcr() | PMU_PMCR_LC | PMU_PMCR_C | PMU_PMCR_E);
+
+   for (int i = 0; i < NR_SAMPLES; i++) {
+   uint64_t a, b;
+
+   a = get_pmccntr();
+   b = get_pmccntr();
+
+   if (a >= b) {
+   printf("Read %"PRId64" then %"PRId64".\n", a, b);
+   success = false;
+   break;
+   }
+   }
+
+   set_pmcr(get_pmcr() & ~PMU_PMCR_E);
+
+   return success;
+}
+
 /* Return FALSE if no PMU found, otherwise return TRUE */
 bool pmu_probe(void)
 {
@@ -88,6 +153,7 @@ int main(void)
report_prefix_push("pmu");
 
report("Control register", check_pmcr());
+   report("Monotonically increasing cycle count", check_cycles_increase());
 
return report_summary();
 }
-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[kvm-unit-tests PATCH v14 2/5] arm: Add support for read_sysreg() and write_sysreg()

2016-12-06 Thread Wei Huang
This patch adds two new macros to support read/write operations of ARMv7
and ARMv8 system registers. As part of the change, xstr() is revised to
support variable arguments. With it, ARMv7 system register can be defined
with __ACCESS_CP15() or __ACCESS_CP15_64() depending if it is 32-bit or
64-bit. get_mpidr() is re-written with new macros.

Suggested-by: Andrew Jones <drjo...@redhat.com>
Signed-off-by: Wei Huang <w...@redhat.com>
---
 lib/arm/asm/processor.h   |  6 +++---
 lib/arm/asm/sysreg.h  | 19 +++
 lib/arm64/asm/processor.h | 11 ---
 lib/arm64/asm/sysreg.h| 26 ++
 lib/libcflat.h|  4 ++--
 5 files changed, 54 insertions(+), 12 deletions(-)
 create mode 100644 lib/arm64/asm/sysreg.h

diff --git a/lib/arm/asm/processor.h b/lib/arm/asm/processor.h
index f25e7ee..c831749 100644
--- a/lib/arm/asm/processor.h
+++ b/lib/arm/asm/processor.h
@@ -6,6 +6,7 @@
  * This work is licensed under the terms of the GNU LGPL, version 2.
  */
 #include 
+#include 
 
 enum vector {
EXCPTN_RST,
@@ -33,11 +34,10 @@ static inline unsigned long current_cpsr(void)
 
 #define current_mode() (current_cpsr() & MODE_MASK)
 
+#define MPIDR __ACCESS_CP15(c0, 0, c0, 5)
 static inline unsigned int get_mpidr(void)
 {
-   unsigned int mpidr;
-   asm volatile("mrc p15, 0, %0, c0, c0, 5" : "=r" (mpidr));
-   return mpidr;
+   return read_sysreg(MPIDR);
 }
 
 /* Only support Aff0 for now, up to 4 cpus */
diff --git a/lib/arm/asm/sysreg.h b/lib/arm/asm/sysreg.h
index 3e1ad3a..02dbe3d 100644
--- a/lib/arm/asm/sysreg.h
+++ b/lib/arm/asm/sysreg.h
@@ -34,4 +34,23 @@
 #define CR_AFE (1 << 29)   /* Access flag enable   */
 #define CR_TE  (1 << 30)   /* Thumb exception enable   */
 
+#ifndef __ASSEMBLY__
+#include 
+
+#define __ACCESS_CP15(CRn, Op1, CRm, Op2)  \
+   "mrc", "mcr", xstr(p15, Op1, %0, CRn, CRm, Op2), u32
+#define __ACCESS_CP15_64(Op1, CRm) \
+   "mrrc", "mcrr", xstr(p15, Op1, %Q0, %R0, CRm), u64
+
+#define __read_sysreg(r, w, c, t) ({   \
+   t __val;\
+   asm volatile(r " " c : "=r" (__val));   \
+   __val;  \
+   })
+#define read_sysreg(...) __read_sysreg(__VA_ARGS__)
+
+#define __write_sysreg(v, r, w, c, t)   asm volatile(w " " c : : "r" ((t)(v)))
+#define write_sysreg(v, ...)__write_sysreg(v, __VA_ARGS__)
+#endif /* !__ASSEMBLY__ */
+
 #endif /* _ASMARM_SYSREG_H_ */
diff --git a/lib/arm64/asm/processor.h b/lib/arm64/asm/processor.h
index 84d5c7c..ed59ad2 100644
--- a/lib/arm64/asm/processor.h
+++ b/lib/arm64/asm/processor.h
@@ -19,6 +19,7 @@
 #ifndef __ASSEMBLY__
 #include 
 #include 
+#include 
 
 enum vector {
EL1T_SYNC,
@@ -66,14 +67,10 @@ static inline unsigned long current_level(void)
return el & 0xc;
 }
 
-#define DEFINE_GET_SYSREG32(reg)   \
-static inline unsigned int get_##reg(void) \
-{  \
-   unsigned int reg;   \
-   asm volatile("mrs %0, " #reg "_el1" : "=r" (reg));  \
-   return reg; \
+static inline unsigned int get_mpidr(void)
+{
+   return read_sysreg(mpidr_el1);
 }
-DEFINE_GET_SYSREG32(mpidr)
 
 /* Only support Aff0 for now, gicv2 only */
 #define mpidr_to_cpu(mpidr) ((int)((mpidr) & 0xff))
diff --git a/lib/arm64/asm/sysreg.h b/lib/arm64/asm/sysreg.h
new file mode 100644
index 000..05b9fcb
--- /dev/null
+++ b/lib/arm64/asm/sysreg.h
@@ -0,0 +1,26 @@
+/*
+ * Ripped off from arch/arm64/include/asm/sysreg.h
+ *
+ * Copyright (C) 2016, Red Hat Inc, Andrew Jones <drjo...@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.
+ */
+#ifndef _ASMARM64_SYSREG_H_
+#define _ASMARM64_SYSREG_H_
+
+#ifndef __ASSEMBLY__
+#include 
+
+#define read_sysreg(r) ({  \
+   u64 __val;  \
+   asm volatile("mrs %0, " xstr(r) : "=r" (__val));\
+   __val;  \
+})
+
+#define write_sysreg(v, r) do {\
+   u64 __val = (u64)v; \
+   asm volatile("msr " xstr(r) ", %x0" : : "rZ" (__val));  \
+} while (0)
+
+#endif /* !__ASSEMBLY__ */
+#endif /* _ASMARM64_SYSREG_H_ */
diff --git a/lib/libcflat.h b/lib/libcflat.h
index c622198..c3fa4f2 100644
--- a/lib/libcflat.h
+++ b/lib/libc

[kvm-unit-tests PATCH v14 1/5] arm: rename cp15.h to sysreg.h

2016-12-06 Thread Wei Huang
To prepare for future support of ARMv8 system register, rename cp15.h file
to sysreg.h, with _ASMARM_CP15_H_ renamed to _ASMARM_SYSREG_H_ in header
file.

Signed-off-by: Wei Huang <w...@redhat.com>
---
 arm/cstart.S | 2 +-
 lib/arm/asm/{cp15.h => sysreg.h} | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)
 rename lib/arm/asm/{cp15.h => sysreg.h} (94%)

diff --git a/arm/cstart.S b/arm/cstart.S
index 3943867..9822fb7 100644
--- a/arm/cstart.S
+++ b/arm/cstart.S
@@ -9,7 +9,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 
 #define THREAD_START_SP ((THREAD_SIZE - S_FRAME_SIZE * 8) & ~7)
 
diff --git a/lib/arm/asm/cp15.h b/lib/arm/asm/sysreg.h
similarity index 94%
rename from lib/arm/asm/cp15.h
rename to lib/arm/asm/sysreg.h
index 7690a48..3e1ad3a 100644
--- a/lib/arm/asm/cp15.h
+++ b/lib/arm/asm/sysreg.h
@@ -1,5 +1,5 @@
-#ifndef _ASMARM_CP15_H_
-#define _ASMARM_CP15_H_
+#ifndef _ASMARM_SYSREG_H_
+#define _ASMARM_SYSREG_H_
 /*
  * From the Linux kernel arch/arm/include/asm/cp15.h
  *
@@ -34,4 +34,4 @@
 #define CR_AFE (1 << 29)   /* Access flag enable   */
 #define CR_TE  (1 << 30)   /* Thumb exception enable   */
 
-#endif /* _ASMARM_CP15_H_ */
+#endif /* _ASMARM_SYSREG_H_ */
-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[kvm-unit-tests PATCH v14 5/5] arm: pmu: Add CPI checking

2016-12-06 Thread Wei Huang
From: Christopher Covington <c...@codeaurora.org>

Calculate the numbers of cycles per instruction (CPI) implied by ARM
PMU cycle counter values. The code includes a strict checking facility
intended for the -icount option in TCG mode in the configuration file.

Signed-off-by: Christopher Covington <c...@codeaurora.org>
Signed-off-by: Wei Huang <w...@redhat.com>
---
 arm/pmu.c | 133 +-
 arm/unittests.cfg |  14 ++
 2 files changed, 146 insertions(+), 1 deletion(-)

diff --git a/arm/pmu.c b/arm/pmu.c
index d9ff19d..a39dae4 100644
--- a/arm/pmu.c
+++ b/arm/pmu.c
@@ -58,6 +58,14 @@ static inline uint64_t get_pmccntr(void)
return read_sysreg(PMCCNTR32);
 }
 
+static inline void set_pmccntr(uint64_t value)
+{
+   if (pmu_version == 0x3)
+   write_sysreg(value, PMCCNTR64);
+   else
+   write_sysreg(value & 0x, PMCCNTR32);
+}
+
 /* PMCCFILTR is an obsolete name for PMXEVTYPER31 in ARMv7 */
 static inline void set_pmccfiltr(uint32_t value)
 {
@@ -65,13 +73,56 @@ static inline void set_pmccfiltr(uint32_t value)
write_sysreg(value, PMXEVTYPER);
isb();
 }
+
+/*
+ * Extra instructions inserted by the compiler would be difficult to compensate
+ * for, so hand assemble everything between, and including, the PMCR accesses
+ * to start and stop counting. isb instructions were inserted to make sure
+ * pmccntr read after this function returns the exact instructions executed in
+ * the controlled block. Total instrs = isb + mcr + 2*loop = 2 + 2*loop.
+ */
+static inline void precise_instrs_loop(int loop, uint32_t pmcr)
+{
+   asm volatile(
+   "   mcr p15, 0, %[pmcr], c9, c12, 0\n"
+   "   isb\n"
+   "1: subs%[loop], %[loop], #1\n"
+   "   bgt 1b\n"
+   "   mcr p15, 0, %[z], c9, c12, 0\n"
+   "   isb\n"
+   : [loop] "+r" (loop)
+   : [pmcr] "r" (pmcr), [z] "r" (0)
+   : "cc");
+}
 #elif defined(__aarch64__)
 static inline uint32_t get_id_dfr0(void) { return read_sysreg(id_dfr0_el1); }
 static inline uint32_t get_pmcr(void) { return read_sysreg(pmcr_el0); }
 static inline void set_pmcr(uint32_t v) { write_sysreg(v, pmcr_el0); }
 static inline uint64_t get_pmccntr(void) { return read_sysreg(pmccntr_el0); }
+static inline void set_pmccntr(uint64_t v) { write_sysreg(v, pmccntr_el0); }
 static inline void set_pmcntenset(uint32_t v) { write_sysreg(v, 
pmcntenset_el0); }
 static inline void set_pmccfiltr(uint32_t v) { write_sysreg(v, pmccfiltr_el0); 
}
+
+/*
+ * Extra instructions inserted by the compiler would be difficult to compensate
+ * for, so hand assemble everything between, and including, the PMCR accesses
+ * to start and stop counting. isb instructions are inserted to make sure
+ * pmccntr read after this function returns the exact instructions executed
+ * in the controlled block. Total instrs = isb + msr + 2*loop = 2 + 2*loop.
+ */
+static inline void precise_instrs_loop(int loop, uint32_t pmcr)
+{
+   asm volatile(
+   "   msr pmcr_el0, %[pmcr]\n"
+   "   isb\n"
+   "1: subs%[loop], %[loop], #1\n"
+   "   b.gt1b\n"
+   "   msr pmcr_el0, xzr\n"
+   "   isb\n"
+   : [loop] "+r" (loop)
+   : [pmcr] "r" (pmcr)
+   : "cc");
+}
 #endif
 
 /*
@@ -128,6 +179,80 @@ static bool check_cycles_increase(void)
return success;
 }
 
+/*
+ * Execute a known number of guest instructions. Only even instruction counts
+ * greater than or equal to 4 are supported by the in-line assembly code. The
+ * control register (PMCR_EL0) is initialized with the provided value (allowing
+ * for example for the cycle counter or event counters to be reset). At the end
+ * of the exact instruction loop, zero is written to PMCR_EL0 to disable
+ * counting, allowing the cycle counter or event counters to be read at the
+ * leisure of the calling code.
+ */
+static void measure_instrs(int num, uint32_t pmcr)
+{
+   int loop = (num - 2) / 2;
+
+   assert(num >= 4 && ((num - 2) % 2 == 0));
+   precise_instrs_loop(loop, pmcr);
+}
+
+/*
+ * Measure cycle counts for various known instruction counts. Ensure that the
+ * cycle counter progresses (similar to check_cycles_increase() but with more
+ * instructions and using reset and stop controls). If supplied a positive,
+ * nonzero CPI parameter, it also strictly checks that every measurement 
matches
+ * it. Strict CPI checking is used to test -icount mode.
+ */
+static bool check_cpi(int cpi)
+{
+   uint32_t pmcr = get_pmcr() | PMU_PMCR_LC | PMU_PMCR_C | PMU_PMCR_E;
+
+   /* init before event access, this test only cares about cycle count */
+   set_pmc

[kvm-unit-tests PATCH v14 3/5] arm: Add PMU test

2016-12-06 Thread Wei Huang
From: Christopher Covington <c...@codeaurora.org>

Beginning with a simple sanity check of the control register, add
a unit test for the ARM Performance Monitors Unit (PMU).

Signed-off-by: Christopher Covington <c...@codeaurora.org>
Signed-off-by: Wei Huang <w...@redhat.com>
---
 arm/Makefile.common |  3 +-
 arm/pmu.c   | 93 +
 arm/unittests.cfg   |  5 +++
 3 files changed, 100 insertions(+), 1 deletion(-)
 create mode 100644 arm/pmu.c

diff --git a/arm/Makefile.common b/arm/Makefile.common
index f37b5c2..5da2fdd 100644
--- a/arm/Makefile.common
+++ b/arm/Makefile.common
@@ -12,7 +12,8 @@ endif
 tests-common = \
$(TEST_DIR)/selftest.flat \
$(TEST_DIR)/spinlock-test.flat \
-   $(TEST_DIR)/pci-test.flat
+   $(TEST_DIR)/pci-test.flat \
+   $(TEST_DIR)/pmu.flat
 
 all: test_cases
 
diff --git a/arm/pmu.c b/arm/pmu.c
new file mode 100644
index 000..bf6ac69
--- /dev/null
+++ b/arm/pmu.c
@@ -0,0 +1,93 @@
+/*
+ * Test the ARM Performance Monitors Unit (PMU).
+ *
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ * Copyright (C) 2016, Red Hat Inc, Wei Huang <w...@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version 2.1 and
+ * only version 2.1 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+ * for more details.
+ */
+#include "libcflat.h"
+#include "asm/barrier.h"
+#include "asm/sysreg.h"
+#include "asm/processor.h"
+
+#define PMU_PMCR_N_SHIFT   11
+#define PMU_PMCR_N_MASK0x1f
+#define PMU_PMCR_ID_SHIFT  16
+#define PMU_PMCR_ID_MASK   0xff
+#define PMU_PMCR_IMP_SHIFT 24
+#define PMU_PMCR_IMP_MASK  0xff
+
+#define ID_DFR0_PERFMON_SHIFT 24
+#define ID_DFR0_PERFMON_MASK  0xf
+
+static unsigned int pmu_version;
+#if defined(__arm__)
+#define PMCR __ACCESS_CP15(c9, 0, c12, 0)
+#define ID_DFR0  __ACCESS_CP15(c0, 0, c1, 2)
+
+static inline uint32_t get_id_dfr0(void) { return read_sysreg(ID_DFR0); }
+static inline uint32_t get_pmcr(void) { return read_sysreg(PMCR); }
+#elif defined(__aarch64__)
+static inline uint32_t get_id_dfr0(void) { return read_sysreg(id_dfr0_el1); }
+static inline uint32_t get_pmcr(void) { return read_sysreg(pmcr_el0); }
+#endif
+
+/*
+ * As a simple sanity check on the PMCR_EL0, ensure the implementer field isn't
+ * null. Also print out a couple other interesting fields for diagnostic
+ * purposes. For example, as of fall 2016, QEMU TCG mode doesn't implement
+ * event counters and therefore reports zero event counters, but hopefully
+ * support for at least the instructions event will be added in the future and
+ * the reported number of event counters will become nonzero.
+ */
+static bool check_pmcr(void)
+{
+   uint32_t pmcr;
+
+   pmcr = get_pmcr();
+
+   report_info("PMU implementer/ID code/counters: 0x%x(\"%c\")/0x%x/%d",
+   (pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK,
+   ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) ? : ' ',
+   (pmcr >> PMU_PMCR_ID_SHIFT) & PMU_PMCR_ID_MASK,
+   (pmcr >> PMU_PMCR_N_SHIFT) & PMU_PMCR_N_MASK);
+
+   return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0;
+}
+
+/* Return FALSE if no PMU found, otherwise return TRUE */
+bool pmu_probe(void)
+{
+   uint32_t dfr0;
+
+   /* probe pmu version */
+   dfr0 = get_id_dfr0();
+   pmu_version = (dfr0 >> ID_DFR0_PERFMON_SHIFT) & ID_DFR0_PERFMON_MASK;
+
+   if (pmu_version)
+   report_info("PMU version: %d", pmu_version);
+
+   return pmu_version;
+}
+
+int main(void)
+{
+   if (!pmu_probe()) {
+   printf("No PMU found, test skipped...\n");
+   return report_summary();
+   }
+
+   report_prefix_push("pmu");
+
+   report("Control register", check_pmcr());
+
+   return report_summary();
+}
diff --git a/arm/unittests.cfg b/arm/unittests.cfg
index ae32a42..816f494 100644
--- a/arm/unittests.cfg
+++ b/arm/unittests.cfg
@@ -58,3 +58,8 @@ groups = selftest
 [pci-test]
 file = pci-test.flat
 groups = pci
+
+# Test PMU support
+[pmu]
+file = pmu.flat
+groups = pmu
-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[kvm-unit-tests PATCH v14 0/5] ARM PMU tests

2016-12-06 Thread Wei Huang
Changes from v13:
* Rename cp15.h to sysreg.h for ARMv7 and add a new file sysreg.h for ARMv8
* Add macros for read_sysreg() and write_sysreg(). CP15 registers can be
  defined with __ACCESS_CP15() or __ACCESS_CP15_64(). sysreg.h (ARMv8) was
  from Drew's GIC testing code, which can be leveraged when his GIC testing
  code is imported.
* Rewrite PMU testing code based on new macros. All get_xxx() and set_xxx()
  functions are defined in pmu.c based on read_sysreg() and write_sysreg().
  So the code parsing tool, like cscope, can parse them easily.
* Minor fixes inside pmu.c, printf formatting, pmu_probe() func, based on
  Andre's comments.

Note:
Current KVM code has bugs in handling PMCCFILTR write. A fix (see below) is
required for this unit testing code to work correctly under KVM mode.
Link: https://lists.cs.columbia.edu/pipermail/kvmarm/2016-November/022134.html.

Thanks,
-Wei

Christopher Covington (3):
  arm: Add PMU test
  arm: pmu: Check cycle count increases
  arm: pmu: Add CPI checking

Wei Huang (2):
  arm: rename cp15.h to sysreg.h
  arm: Add support for read_sysreg() and write_sysreg()

 arm/Makefile.common  |   3 +-
 arm/cstart.S |   2 +-
 arm/pmu.c| 290 +++
 arm/unittests.cfg|  19 +++
 lib/arm/asm/processor.h  |   6 +-
 lib/arm/asm/{cp15.h => sysreg.h} |  25 +++-
 lib/arm64/asm/processor.h|  11 +-
 lib/arm64/asm/sysreg.h   |  26 
 lib/libcflat.h   |   4 +-
 9 files changed, 369 insertions(+), 17 deletions(-)
 create mode 100644 arm/pmu.c
 rename lib/arm/asm/{cp15.h => sysreg.h} (68%)
 create mode 100644 lib/arm64/asm/sysreg.h

-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [kvm-unit-tests PATCH v13 4/4] arm: pmu: Add CPI checking

2016-12-01 Thread Wei Huang


On 12/01/2016 02:27 PM, Andre Przywara wrote:
> Hi,
> 
> On 01/12/16 05:16, Wei Huang wrote:
>> From: Christopher Covington <c...@codeaurora.org>
>>
>> Calculate the numbers of cycles per instruction (CPI) implied by ARM
>> PMU cycle counter values. The code includes a strict checking facility
>> intended for the -icount option in TCG mode in the configuration file.
>>
>> Signed-off-by: Christopher Covington <c...@codeaurora.org>
>> Signed-off-by: Wei Huang <w...@redhat.com>
>> Reviewed-by: Andrew Jones <drjo...@redhat.com>
>> ---
>>  arm/pmu.c | 123 
>> +-
>>  arm/unittests.cfg |  14 +++
>>  2 files changed, 136 insertions(+), 1 deletion(-)
>>
>> diff --git a/arm/pmu.c b/arm/pmu.c
>> index 3566a27..29d7c2c 100644
>> --- a/arm/pmu.c
>> +++ b/arm/pmu.c
>> @@ -69,6 +69,27 @@ static inline void set_pmccfiltr(uint32_t value)
>>  set_pmxevtyper(value);
>>  isb();
>>  }
>> +
>> +/*
>> + * Extra instructions inserted by the compiler would be difficult to 
>> compensate
>> + * for, so hand assemble everything between, and including, the PMCR 
>> accesses
>> + * to start and stop counting. isb instructions were inserted to make sure
>> + * pmccntr read after this function returns the exact instructions executed 
>> in
>> + * the controlled block. Total instrs = isb + mcr + 2*loop = 2 + 2*loop.
>> + */
>> +static inline void precise_instrs_loop(int loop, uint32_t pmcr)
>> +{
>> +asm volatile(
>> +"   mcr p15, 0, %[pmcr], c9, c12, 0\n"
>> +"   isb\n"
>> +"1: subs%[loop], %[loop], #1\n"
>> +"   bgt 1b\n"
>> +"   mcr p15, 0, %[z], c9, c12, 0\n"
>> +"   isb\n"
>> +: [loop] "+r" (loop)
>> +: [pmcr] "r" (pmcr), [z] "r" (0)
>> +: "cc");
>> +}
>>  #elif defined(__aarch64__)
>>  DEFINE_GET_SYSREG32(pmcr, el0)
>>  DEFINE_SET_SYSREG32(pmcr, el0)
>> @@ -77,6 +98,27 @@ DEFINE_GET_SYSREG64(pmccntr, el0);
>>  DEFINE_SET_SYSREG64(pmccntr, el0);
>>  DEFINE_SET_SYSREG32(pmcntenset, el0);
>>  DEFINE_SET_SYSREG32(pmccfiltr, el0);
>> +
>> +/*
>> + * Extra instructions inserted by the compiler would be difficult to 
>> compensate
>> + * for, so hand assemble everything between, and including, the PMCR 
>> accesses
>> + * to start and stop counting. isb instructions are inserted to make sure
>> + * pmccntr read after this function returns the exact instructions executed
>> + * in the controlled block. Total instrs = isb + msr + 2*loop = 2 + 2*loop.
>> + */
>> +static inline void precise_instrs_loop(int loop, uint32_t pmcr)
>> +{
>> +asm volatile(
>> +"   msr pmcr_el0, %[pmcr]\n"
>> +"   isb\n"
>> +"1: subs%[loop], %[loop], #1\n"
>> +"   b.gt1b\n"
>> +"   msr pmcr_el0, xzr\n"
>> +"   isb\n"
>> +: [loop] "+r" (loop)
>> +: [pmcr] "r" (pmcr)
>> +: "cc");
>> +}
>>  #endif
>>  
>>  /*
>> @@ -134,6 +176,79 @@ static bool check_cycles_increase(void)
>>  return success;
>>  }
>>  
>> +/*
>> + * Execute a known number of guest instructions. Only even instruction 
>> counts
>> + * greater than or equal to 4 are supported by the in-line assembly code. 
>> The
>> + * control register (PMCR_EL0) is initialized with the provided value 
>> (allowing
>> + * for example for the cycle counter or event counters to be reset). At the 
>> end
>> + * of the exact instruction loop, zero is written to PMCR_EL0 to disable
>> + * counting, allowing the cycle counter or event counters to be read at the
>> + * leisure of the calling code.
>> + */
>> +static void measure_instrs(int num, uint32_t pmcr)
>> +{
>> +int loop = (num - 2) / 2;
>> +
>> +assert(num >= 4 && ((num - 2) % 2 == 0));
>> +precise_instrs_loop(loop, pmcr);
>> +}
>> +
>> +/*
>> + * Measure cycle counts for various known instruction counts. Ensure that 
>> the
>> + * cycle counter progresses (similar to check_cycles_increase() but with 
>> more
>> + * instructions and using reset and stop controls). If supplied a positive,
>> + * nonzero CPI parameter, also strictly 

Re: [Qemu-devel] [kvm-unit-tests PATCH v13 3/4] arm: pmu: Check cycle count increases

2016-12-01 Thread Wei Huang


On 12/01/2016 03:18 AM, Andrew Jones wrote:
> On Wed, Nov 30, 2016 at 11:16:41PM -0600, Wei Huang wrote:
>> From: Christopher Covington <c...@codeaurora.org>
>>
>> Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
>> even for the smallest delta of two subsequent reads.
>>
>> Signed-off-by: Christopher Covington <c...@codeaurora.org>
>> Signed-off-by: Wei Huang <w...@redhat.com>
>> Reviewed-by: Andrew Jones <drjo...@redhat.com>
>> ---
>>  arm/pmu.c | 94 
>> +++
>>  1 file changed, 94 insertions(+)
>>
>> diff --git a/arm/pmu.c b/arm/pmu.c
>> index 1fe2b1a..3566a27 100644
>> --- a/arm/pmu.c
>> +++ b/arm/pmu.c
>> @@ -16,6 +16,9 @@
>>  #include "asm/barrier.h"
>>  #include "asm/processor.h"
>>  
>> +#define PMU_PMCR_E (1 << 0)
>> +#define PMU_PMCR_C (1 << 2)
>> +#define PMU_PMCR_LC(1 << 6)
>>  #define PMU_PMCR_N_SHIFT   11
>>  #define PMU_PMCR_N_MASK0x1f
>>  #define PMU_PMCR_ID_SHIFT  16
>> @@ -23,10 +26,57 @@
>>  #define PMU_PMCR_IMP_SHIFT 24
>>  #define PMU_PMCR_IMP_MASK  0xff
>>  
>> +#define ID_DFR0_PERFMON_SHIFT 24
>> +#define ID_DFR0_PERFMON_MASK  0xf
>> +
>> +#define PMU_CYCLE_IDX 31
>> +
>> +#define NR_SAMPLES 10
>> +
>> +static unsigned int pmu_version;
>>  #if defined(__arm__)
>>  DEFINE_GET_SYSREG32(pmcr, 0, c9, c12, 0)
>> +DEFINE_SET_SYSREG32(pmcr, 0, c9, c12, 0)
>> +DEFINE_GET_SYSREG32(id_dfr0, 0, c0, c1, 2)
>> +DEFINE_SET_SYSREG32(pmselr, 0, c9, c12, 5)
>> +DEFINE_SET_SYSREG32(pmxevtyper, 0, c9, c13, 1)
>> +DEFINE_GET_SYSREG32(pmccntr32, 0, c9, c13, 0)
>> +DEFINE_SET_SYSREG32(pmccntr32, 0, c9, c13, 0)
>> +DEFINE_GET_SYSREG64(pmccntr64, 0, c9)
>> +DEFINE_SET_SYSREG64(pmccntr64, 0, c9)
>> +DEFINE_SET_SYSREG32(pmcntenset, 0, c9, c12, 1)
> 
> Seeing how we get lots of redundant looking lines, I think instead
> of defining DEFINE_SET/GET_SYSREG32/64, we should instead have
> 
> DEFINE_SYSREG32/64  ... creates both get_ and set_
> DEFINE_SYSREG32/64_RO   ... creates just get_

Don't like the naming. I think we can create a new macro, named
DEFINE_GET_SET_SYSREG32/64. I know it is boring, but readers should get
the idea easily.

> 
>> +
>> +static inline uint64_t get_pmccntr(void)
>> +{
>> +if (pmu_version == 0x3)
>> +return get_pmccntr64();
>> +else
>> +return get_pmccntr32();
>> +}
>> +
>> +static inline void set_pmccntr(uint64_t value)
>> +{
>> +if (pmu_version == 0x3)
>> +set_pmccntr64(value);
>> +else
>> +set_pmccntr32(value & 0x);
>> +}
> 
> So the two accessors above are exceptional, which is why we don't
> use SYSREG for them. These can have uint64_t for there external
> interface. We can't require 'unsigned long' or 'unsigned long long'
> 
>> +
>> +/* PMCCFILTR is an obsolete name for PMXEVTYPER31 in ARMv7 */
>> +static inline void set_pmccfiltr(uint32_t value)
>> +{
>> +set_pmselr(PMU_CYCLE_IDX);
>> +set_pmxevtyper(value);
>> +isb();
>> +}
>>  #elif defined(__aarch64__)
>>  DEFINE_GET_SYSREG32(pmcr, el0)
>> +DEFINE_SET_SYSREG32(pmcr, el0)
>> +DEFINE_GET_SYSREG32(id_dfr0, el1)
>> +DEFINE_GET_SYSREG64(pmccntr, el0);
>> +DEFINE_SET_SYSREG64(pmccntr, el0);
>> +DEFINE_SET_SYSREG32(pmcntenset, el0);
>> +DEFINE_SET_SYSREG32(pmccfiltr, el0);
>>  #endif
>>  
>>  /*
>> @@ -52,11 +102,55 @@ static bool check_pmcr(void)
>>  return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0;
>>  }
>>  
>> +/*
>> + * Ensure that the cycle counter progresses between back-to-back reads.
>> + */
>> +static bool check_cycles_increase(void)
>> +{
>> +bool success = true;
>> +
>> +/* init before event access, this test only cares about cycle count */
>> +set_pmcntenset(1 << PMU_CYCLE_IDX);
>> +set_pmccfiltr(0); /* count cycles in EL0, EL1, but not EL2 */
>> +set_pmccntr(0);
>> +
>> +set_pmcr(get_pmcr() | PMU_PMCR_LC | PMU_PMCR_C | PMU_PMCR_E);
>> +
>> +for (int i = 0; i < NR_SAMPLES; i++) {
>> +uint64_t a, b;
>> +
>> +a = get_pmccntr();
>> +b = get_pmccntr();
>> +
>> +if (a >= b) {
>> +p

Re: [kvm-unit-tests PATCH v13 3/4] arm: pmu: Check cycle count increases

2016-12-01 Thread Wei Huang


On 12/01/2016 05:27 AM, Andre Przywara wrote:
> Hi,
> 
> On 01/12/16 05:16, Wei Huang wrote:
>> From: Christopher Covington <c...@codeaurora.org>
>>
>> Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
>> even for the smallest delta of two subsequent reads.
>>
>> Signed-off-by: Christopher Covington <c...@codeaurora.org>
>> Signed-off-by: Wei Huang <w...@redhat.com>
>> Reviewed-by: Andrew Jones <drjo...@redhat.com>
>> ---
>>  arm/pmu.c | 94 
>> +++
>>  1 file changed, 94 insertions(+)
>>
>> diff --git a/arm/pmu.c b/arm/pmu.c
>> index 1fe2b1a..3566a27 100644
>> --- a/arm/pmu.c
>> +++ b/arm/pmu.c
>> @@ -16,6 +16,9 @@
>>  #include "asm/barrier.h"
>>  #include "asm/processor.h"
>>  
>> +#define PMU_PMCR_E (1 << 0)
>> +#define PMU_PMCR_C (1 << 2)
>> +#define PMU_PMCR_LC(1 << 6)
>>  #define PMU_PMCR_N_SHIFT   11
>>  #define PMU_PMCR_N_MASK0x1f
>>  #define PMU_PMCR_ID_SHIFT  16
>> @@ -23,10 +26,57 @@
>>  #define PMU_PMCR_IMP_SHIFT 24
>>  #define PMU_PMCR_IMP_MASK  0xff
>>  
>> +#define ID_DFR0_PERFMON_SHIFT 24
>> +#define ID_DFR0_PERFMON_MASK  0xf
>> +
>> +#define PMU_CYCLE_IDX 31
>> +
>> +#define NR_SAMPLES 10
>> +
>> +static unsigned int pmu_version;
>>  #if defined(__arm__)
>>  DEFINE_GET_SYSREG32(pmcr, 0, c9, c12, 0)
>> +DEFINE_SET_SYSREG32(pmcr, 0, c9, c12, 0)
>> +DEFINE_GET_SYSREG32(id_dfr0, 0, c0, c1, 2)
>> +DEFINE_SET_SYSREG32(pmselr, 0, c9, c12, 5)
>> +DEFINE_SET_SYSREG32(pmxevtyper, 0, c9, c13, 1)
>> +DEFINE_GET_SYSREG32(pmccntr32, 0, c9, c13, 0)
>> +DEFINE_SET_SYSREG32(pmccntr32, 0, c9, c13, 0)
>> +DEFINE_GET_SYSREG64(pmccntr64, 0, c9)
>> +DEFINE_SET_SYSREG64(pmccntr64, 0, c9)
>> +DEFINE_SET_SYSREG32(pmcntenset, 0, c9, c12, 1)
>> +
>> +static inline uint64_t get_pmccntr(void)
>> +{
>> +if (pmu_version == 0x3)
>> +return get_pmccntr64();
>> +else
>> +return get_pmccntr32();
>> +}
>> +
>> +static inline void set_pmccntr(uint64_t value)
>> +{
>> +if (pmu_version == 0x3)
>> +set_pmccntr64(value);
>> +else
>> +set_pmccntr32(value & 0x);
>> +}
>> +
>> +/* PMCCFILTR is an obsolete name for PMXEVTYPER31 in ARMv7 */
>> +static inline void set_pmccfiltr(uint32_t value)
>> +{
>> +set_pmselr(PMU_CYCLE_IDX);
>> +set_pmxevtyper(value);
>> +isb();
>> +}
>>  #elif defined(__aarch64__)
>>  DEFINE_GET_SYSREG32(pmcr, el0)
>> +DEFINE_SET_SYSREG32(pmcr, el0)
>> +DEFINE_GET_SYSREG32(id_dfr0, el1)
>> +DEFINE_GET_SYSREG64(pmccntr, el0);
>> +DEFINE_SET_SYSREG64(pmccntr, el0);
>> +DEFINE_SET_SYSREG32(pmcntenset, el0);
>> +DEFINE_SET_SYSREG32(pmccfiltr, el0);
>>  #endif
>>  
>>  /*
>> @@ -52,11 +102,55 @@ static bool check_pmcr(void)
>>  return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0;
>>  }
>>  
>> +/*
>> + * Ensure that the cycle counter progresses between back-to-back reads.
>> + */
>> +static bool check_cycles_increase(void)
>> +{
>> +bool success = true;
>> +
>> +/* init before event access, this test only cares about cycle count */
>> +set_pmcntenset(1 << PMU_CYCLE_IDX);
>> +set_pmccfiltr(0); /* count cycles in EL0, EL1, but not EL2 */
>> +set_pmccntr(0);
> 
> Why do we need this? Shouldn't PMU_PMCR_C below take care of that?

PMU_PMCR_C does reset cycle counter, I can remove this one.

> 
>> +
>> +set_pmcr(get_pmcr() | PMU_PMCR_LC | PMU_PMCR_C | PMU_PMCR_E);
>> +
>> +for (int i = 0; i < NR_SAMPLES; i++) {
>> +uint64_t a, b;
>> +
>> +a = get_pmccntr();
>> +b = get_pmccntr();
>> +
>> +if (a >= b) {
>> +printf("Read %"PRId64" then %"PRId64".\n", a, b);
>> +success = false;
>> +break;
>> +}
>> +}
>> +
>> +set_pmcr(get_pmcr() & ~PMU_PMCR_E);
>> +
>> +return success;
>> +}
>> +
>> +void pmu_init(void)
> 
> Mmh, this function doesn't really initialize anything, does it?
> Should it be named pmu_available() or pmu_version() or the like?
> 

This function used to contain cycle counter confi

Re: [Qemu-devel] [kvm-unit-tests PATCH v13 1/4] arm: Define macros for accessing system registers

2016-12-01 Thread Wei Huang


On 12/01/2016 02:59 AM, Andrew Jones wrote:
> 
> Should this be From: Andre?
> 
> On Wed, Nov 30, 2016 at 11:16:39PM -0600, Wei Huang wrote:
>> This patch defines four macros to assist creating system register
>> accessors under both ARMv7 and AArch64:
>>* DEFINE_GET_SYSREG32(name, ...)
>>* DEFINE_SET_SYSREG32(name, ...)
>>* DEFINE_GET_SYSREG64(name, ...)
>>* DEFINE_SET_SYSREG64(name, ...)
>> These macros are translated to inline functions with consistent naming,
>> get_##name() and set_##name(), which can be used by C code directly.
>>
>> Signed-off-by: Andre Przywara <andre.przyw...@arm.com>
>> Signed-off-by: Wei Huang <w...@redhat.com>
>> ---
>>  lib/arm/asm/processor.h   | 37 -
>>  lib/arm64/asm/processor.h | 35 ---
>>  2 files changed, 60 insertions(+), 12 deletions(-)
>>
>> diff --git a/lib/arm/asm/processor.h b/lib/arm/asm/processor.h
>> index f25e7ee..3ca6b42 100644
>> --- a/lib/arm/asm/processor.h
>> +++ b/lib/arm/asm/processor.h
>> @@ -33,13 +33,40 @@ static inline unsigned long current_cpsr(void)
>>  
>>  #define current_mode() (current_cpsr() & MODE_MASK)
>>  
>> -static inline unsigned int get_mpidr(void)
>> -{
>> -unsigned int mpidr;
>> -asm volatile("mrc p15, 0, %0, c0, c0, 5" : "=r" (mpidr));
>> -return mpidr;
>> +#define DEFINE_GET_SYSREG32(name, opc1, crn, crm, opc2) 
>> \
>> +static inline uint32_t get_##name(void) 
>> \
>> +{   \
>> +uint32_t reg;   \
>> +asm volatile("mrc p15, " #opc1 ", %0, " #crn ", " #crm ", " \
>> + #opc2 : "=r" (reg));   \
>> +return reg; \
>> +}
>> +
>> +#define DEFINE_SET_SYSREG32(name, opc1, crn, crm, opc2) 
>> \
>> +static inline void set_##name(uint32_t value)   
>> \
>> +{   \
>> +asm volatile("mcr p15, " #opc1 ", %0, " #crn ", " #crm ", " \
>> + #opc2 :: "r" (value)); \
>^ nit: no space here, checkpatch would complain

Which checkpatch script you are using? I didn't find one in
kvm-unit-tests. I tried kernel's checkpatch script, but it didn't
complain anything against this patch.

>> +}
>> +


___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [Qemu-devel] [kvm-unit-tests PATCH v11 1/3] arm: Add PMU test

2016-11-30 Thread Wei Huang


On 11/25/2016 08:26 AM, Andrew Jones wrote:
> On Fri, Nov 25, 2016 at 12:32:24PM +, Andre Przywara wrote:
>> Hi Drew,
>>
>> 
>>
>> On 23/11/16 17:15, Andrew Jones wrote:
> +
> +#if defined(__arm__)

 I guess you should use the arch specific header files we have in place
 for that (lib/arm{.64}/asm/processor.h). Also there are sysreg read
 wrappers (at least for arm64) in there already, can't we base this
 function on them: DEFINE_GET_SYSREG32(pmcr, el0)?
 (Requires a small change to get rid of the forced "_el1" suffix)

 We should wait for the GIC series to be merged, as this contains some
 changes in this area.
>>>
>>> As this unit test is the only consumer of PMC registers so far, then
>>> I'd prefer the defines and accessors stay here for now. Once we see
>>> a use in other unit tests then we can move some of it out.
>>
>> Well, I was more thinking of something like below.
>> I am fine with keeping the PMU sysregs private to pmu.c, but we can still
>> use the sysreg wrappers, can't we?
>> This is on top of Wei's series, so doesn't have your SYSREG32/64
>> unification, but I leave this as an exercise to the reader.
>> There is some churn in pmu.c below due to the change of _write to
>> set_, but the rest looks like simplification to me.
>>
>> Does that make sense?
> 
> Ah, now I see what you mean, and I think I like that. The question is
> whether or not I like my SYSREG macros :-) I see value in having the
> asm's easy to read (open-coded), as well as value in making sure we
> only have to review sysreg functions once. Let's ask for Wei's and
> Cov's votes. If they like the SYSREG direction, then they can vote
> with another version of this series :-)

Let us use SYSREG macros then, because it makes coding easier. V13 has
been sent. I think this PMU patcheset is a bit bloated now. So hopefully
this is the last version. After it is accepted, we can always come back
to re-factor SYSREG r/w further (if need).

Thanks,
-Wei

> 
> Thanks,
> drew
> 
>>
>> Cheers,
>> Andre.
>>
>> ---
>>  arm/pmu.c | 159 
>> +-
>>  lib/arm/asm/processor.h   |  34 --
>>  lib/arm64/asm/processor.h |  23 ++-
>>  3 files changed, 92 insertions(+), 124 deletions(-)
>>
>> diff --git a/arm/pmu.c b/arm/pmu.c
>> index f667676..f0ad02a 100644
>> --- a/arm/pmu.c
>> +++ b/arm/pmu.c
>> @@ -14,6 +14,7 @@
>>   */
>>  #include "libcflat.h"
>>  #include "asm/barrier.h"
>> +#include "asm/processor.h"
>>  
>>  #define PMU_PMCR_E (1 << 0)
>>  #define PMU_PMCR_C (1 << 2)
>> @@ -33,78 +34,42 @@
>>  #define NR_SAMPLES 10
>>  
>>  static unsigned int pmu_version;
>> -#if defined(__arm__)
>> -static inline uint32_t pmcr_read(void)
>> -{
>> -uint32_t ret;
>> -
>> -asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
>> -return ret;
>> -}
>> -
>> -static inline void pmcr_write(uint32_t value)
>> -{
>> -asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (value));
>> -isb();
>> -}
>>  
>> -static inline void pmselr_write(uint32_t value)
>> -{
>> -asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (value));
>> -isb();
>> -}
>> -
>> -static inline void pmxevtyper_write(uint32_t value)
>> -{
>> -asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (value));
>> -}
>> -
>> -static inline uint64_t pmccntr_read(void)
>> +#if defined(__arm__)
>> +DEFINE_GET_SYSREG32(pmcr, 0, c9, c12, 0)
>> +DEFINE_SET_SYSREG32(pmcr, 0, c9, c12, 0)
>> +DEFINE_GET_SYSREG32(id_dfr0, 0, c0, c1, 2)
>> +DEFINE_SET_SYSREG32(pmselr, 0, c9, c12, 5)
>> +DEFINE_SET_SYSREG32(pmxevtyper, 0, c9, c13, 1)
>> +DEFINE_GET_SYSREG32(pmccntr32, 0, c9, c13, 0)
>> +DEFINE_SET_SYSREG32(pmccntr32, 0, c9, c13, 0)
>> +DEFINE_GET_SYSREG64(pmccntr64, 0, c9)
>> +DEFINE_SET_SYSREG64(pmccntr64, 0, c9)
>> +DEFINE_SET_SYSREG32(pmcntenset, 0, c9, c12, 1)
>> +
>> +static inline uint64_t get_pmccntr(void)
>>  {
>> -uint32_t lo, hi = 0;
>> -
>>  if (pmu_version == 0x3)
>> -asm volatile("mrrc p15, 0, %0, %1, c9" : "=r" (lo), "=r" (hi));
>> +return get_pmccntr32();
>>  else
>> -asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (lo));
>> -
>> -return ((uint64_t)hi << 32) | lo;
>> +return get_pmccntr64();
>>  }
>>  
>> -static inline void pmccntr_write(uint64_t value)
>> +static inline void set_pmccntr(uint64_t value)
>>  {
>> -uint32_t lo, hi;
>> -
>> -lo = value & 0x;
>> -hi = (value >> 32) & 0x;
>> -
>>  if (pmu_version == 0x3)
>> -asm volatile("mcrr p15, 0, %0, %1, c9" : : "r" (lo), "r" (hi));
>> +set_pmccntr64(value);
>>  else
>> -asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (lo));
>> +set_pmccntr64(value & 0x);
>>  }
>> -
>> -static inline void pmcntenset_write(uint32_t value)
>> -{
>> -asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (value));
>> -}
>> -
>>  /* PMCCFILTR is an 

[kvm-unit-tests PATCH v13 3/4] arm: pmu: Check cycle count increases

2016-11-30 Thread Wei Huang
From: Christopher Covington <c...@codeaurora.org>

Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
even for the smallest delta of two subsequent reads.

Signed-off-by: Christopher Covington <c...@codeaurora.org>
Signed-off-by: Wei Huang <w...@redhat.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
---
 arm/pmu.c | 94 +++
 1 file changed, 94 insertions(+)

diff --git a/arm/pmu.c b/arm/pmu.c
index 1fe2b1a..3566a27 100644
--- a/arm/pmu.c
+++ b/arm/pmu.c
@@ -16,6 +16,9 @@
 #include "asm/barrier.h"
 #include "asm/processor.h"
 
+#define PMU_PMCR_E (1 << 0)
+#define PMU_PMCR_C (1 << 2)
+#define PMU_PMCR_LC(1 << 6)
 #define PMU_PMCR_N_SHIFT   11
 #define PMU_PMCR_N_MASK0x1f
 #define PMU_PMCR_ID_SHIFT  16
@@ -23,10 +26,57 @@
 #define PMU_PMCR_IMP_SHIFT 24
 #define PMU_PMCR_IMP_MASK  0xff
 
+#define ID_DFR0_PERFMON_SHIFT 24
+#define ID_DFR0_PERFMON_MASK  0xf
+
+#define PMU_CYCLE_IDX 31
+
+#define NR_SAMPLES 10
+
+static unsigned int pmu_version;
 #if defined(__arm__)
 DEFINE_GET_SYSREG32(pmcr, 0, c9, c12, 0)
+DEFINE_SET_SYSREG32(pmcr, 0, c9, c12, 0)
+DEFINE_GET_SYSREG32(id_dfr0, 0, c0, c1, 2)
+DEFINE_SET_SYSREG32(pmselr, 0, c9, c12, 5)
+DEFINE_SET_SYSREG32(pmxevtyper, 0, c9, c13, 1)
+DEFINE_GET_SYSREG32(pmccntr32, 0, c9, c13, 0)
+DEFINE_SET_SYSREG32(pmccntr32, 0, c9, c13, 0)
+DEFINE_GET_SYSREG64(pmccntr64, 0, c9)
+DEFINE_SET_SYSREG64(pmccntr64, 0, c9)
+DEFINE_SET_SYSREG32(pmcntenset, 0, c9, c12, 1)
+
+static inline uint64_t get_pmccntr(void)
+{
+   if (pmu_version == 0x3)
+   return get_pmccntr64();
+   else
+   return get_pmccntr32();
+}
+
+static inline void set_pmccntr(uint64_t value)
+{
+   if (pmu_version == 0x3)
+   set_pmccntr64(value);
+   else
+   set_pmccntr32(value & 0x);
+}
+
+/* PMCCFILTR is an obsolete name for PMXEVTYPER31 in ARMv7 */
+static inline void set_pmccfiltr(uint32_t value)
+{
+   set_pmselr(PMU_CYCLE_IDX);
+   set_pmxevtyper(value);
+   isb();
+}
 #elif defined(__aarch64__)
 DEFINE_GET_SYSREG32(pmcr, el0)
+DEFINE_SET_SYSREG32(pmcr, el0)
+DEFINE_GET_SYSREG32(id_dfr0, el1)
+DEFINE_GET_SYSREG64(pmccntr, el0);
+DEFINE_SET_SYSREG64(pmccntr, el0);
+DEFINE_SET_SYSREG32(pmcntenset, el0);
+DEFINE_SET_SYSREG32(pmccfiltr, el0);
 #endif
 
 /*
@@ -52,11 +102,55 @@ static bool check_pmcr(void)
return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0;
 }
 
+/*
+ * Ensure that the cycle counter progresses between back-to-back reads.
+ */
+static bool check_cycles_increase(void)
+{
+   bool success = true;
+
+   /* init before event access, this test only cares about cycle count */
+   set_pmcntenset(1 << PMU_CYCLE_IDX);
+   set_pmccfiltr(0); /* count cycles in EL0, EL1, but not EL2 */
+   set_pmccntr(0);
+
+   set_pmcr(get_pmcr() | PMU_PMCR_LC | PMU_PMCR_C | PMU_PMCR_E);
+
+   for (int i = 0; i < NR_SAMPLES; i++) {
+   uint64_t a, b;
+
+   a = get_pmccntr();
+   b = get_pmccntr();
+
+   if (a >= b) {
+   printf("Read %"PRId64" then %"PRId64".\n", a, b);
+   success = false;
+   break;
+   }
+   }
+
+   set_pmcr(get_pmcr() & ~PMU_PMCR_E);
+
+   return success;
+}
+
+void pmu_init(void)
+{
+   uint32_t dfr0;
+
+   /* probe pmu version */
+   dfr0 = get_id_dfr0();
+   pmu_version = (dfr0 >> ID_DFR0_PERFMON_SHIFT) & ID_DFR0_PERFMON_MASK;
+   report_info("PMU version: %d", pmu_version);
+}
+
 int main(void)
 {
report_prefix_push("pmu");
 
+   pmu_init();
report("Control register", check_pmcr());
+   report("Monotonically increasing cycle count", check_cycles_increase());
 
return report_summary();
 }
-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[kvm-unit-tests PATCH v13 4/4] arm: pmu: Add CPI checking

2016-11-30 Thread Wei Huang
From: Christopher Covington <c...@codeaurora.org>

Calculate the numbers of cycles per instruction (CPI) implied by ARM
PMU cycle counter values. The code includes a strict checking facility
intended for the -icount option in TCG mode in the configuration file.

Signed-off-by: Christopher Covington <c...@codeaurora.org>
Signed-off-by: Wei Huang <w...@redhat.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
---
 arm/pmu.c | 123 +-
 arm/unittests.cfg |  14 +++
 2 files changed, 136 insertions(+), 1 deletion(-)

diff --git a/arm/pmu.c b/arm/pmu.c
index 3566a27..29d7c2c 100644
--- a/arm/pmu.c
+++ b/arm/pmu.c
@@ -69,6 +69,27 @@ static inline void set_pmccfiltr(uint32_t value)
set_pmxevtyper(value);
isb();
 }
+
+/*
+ * Extra instructions inserted by the compiler would be difficult to compensate
+ * for, so hand assemble everything between, and including, the PMCR accesses
+ * to start and stop counting. isb instructions were inserted to make sure
+ * pmccntr read after this function returns the exact instructions executed in
+ * the controlled block. Total instrs = isb + mcr + 2*loop = 2 + 2*loop.
+ */
+static inline void precise_instrs_loop(int loop, uint32_t pmcr)
+{
+   asm volatile(
+   "   mcr p15, 0, %[pmcr], c9, c12, 0\n"
+   "   isb\n"
+   "1: subs%[loop], %[loop], #1\n"
+   "   bgt 1b\n"
+   "   mcr p15, 0, %[z], c9, c12, 0\n"
+   "   isb\n"
+   : [loop] "+r" (loop)
+   : [pmcr] "r" (pmcr), [z] "r" (0)
+   : "cc");
+}
 #elif defined(__aarch64__)
 DEFINE_GET_SYSREG32(pmcr, el0)
 DEFINE_SET_SYSREG32(pmcr, el0)
@@ -77,6 +98,27 @@ DEFINE_GET_SYSREG64(pmccntr, el0);
 DEFINE_SET_SYSREG64(pmccntr, el0);
 DEFINE_SET_SYSREG32(pmcntenset, el0);
 DEFINE_SET_SYSREG32(pmccfiltr, el0);
+
+/*
+ * Extra instructions inserted by the compiler would be difficult to compensate
+ * for, so hand assemble everything between, and including, the PMCR accesses
+ * to start and stop counting. isb instructions are inserted to make sure
+ * pmccntr read after this function returns the exact instructions executed
+ * in the controlled block. Total instrs = isb + msr + 2*loop = 2 + 2*loop.
+ */
+static inline void precise_instrs_loop(int loop, uint32_t pmcr)
+{
+   asm volatile(
+   "   msr pmcr_el0, %[pmcr]\n"
+   "   isb\n"
+   "1: subs%[loop], %[loop], #1\n"
+   "   b.gt1b\n"
+   "   msr pmcr_el0, xzr\n"
+   "   isb\n"
+   : [loop] "+r" (loop)
+   : [pmcr] "r" (pmcr)
+   : "cc");
+}
 #endif
 
 /*
@@ -134,6 +176,79 @@ static bool check_cycles_increase(void)
return success;
 }
 
+/*
+ * Execute a known number of guest instructions. Only even instruction counts
+ * greater than or equal to 4 are supported by the in-line assembly code. The
+ * control register (PMCR_EL0) is initialized with the provided value (allowing
+ * for example for the cycle counter or event counters to be reset). At the end
+ * of the exact instruction loop, zero is written to PMCR_EL0 to disable
+ * counting, allowing the cycle counter or event counters to be read at the
+ * leisure of the calling code.
+ */
+static void measure_instrs(int num, uint32_t pmcr)
+{
+   int loop = (num - 2) / 2;
+
+   assert(num >= 4 && ((num - 2) % 2 == 0));
+   precise_instrs_loop(loop, pmcr);
+}
+
+/*
+ * Measure cycle counts for various known instruction counts. Ensure that the
+ * cycle counter progresses (similar to check_cycles_increase() but with more
+ * instructions and using reset and stop controls). If supplied a positive,
+ * nonzero CPI parameter, also strictly check that every measurement matches
+ * it. Strict CPI checking is used to test -icount mode.
+ */
+static bool check_cpi(int cpi)
+{
+   uint32_t pmcr = get_pmcr() | PMU_PMCR_LC | PMU_PMCR_C | PMU_PMCR_E;
+
+   /* init before event access, this test only cares about cycle count */
+   set_pmcntenset(1 << PMU_CYCLE_IDX);
+   set_pmccfiltr(0); /* count cycles in EL0, EL1, but not EL2 */
+
+   if (cpi > 0)
+   printf("Checking for CPI=%d.\n", cpi);
+   printf("instrs : cycles0 cycles1 ...\n");
+
+   for (unsigned int i = 4; i < 300; i += 32) {
+   uint64_t avg, sum = 0;
+
+   printf("%d :", i);
+   for (int j = 0; j < NR_SAMPLES; j++) {
+   uint64_t cycles;
+
+   set_pmccntr(0);
+   measure_instrs(i, pmcr);
+   cycles = get_pmccntr();
+   printf(" %"PRId64"", cycles);
+
+ 

[kvm-unit-tests PATCH v13 0/4] ARM PMU tests

2016-11-30 Thread Wei Huang
Changes from v12:
* Define macros for system register accessors
* Re-write PMU code using the newly-defined macros
* Code tested under both AArch32 and AArch64 modes

Note:
1) Current KVM code has bugs in handling PMCCFILTR write. A fix (see
below) is required for this unit testing code to work correctly under
KVM mode.
https://lists.cs.columbia.edu/pipermail/kvmarm/2016-November/022134.html.

Thanks,
-Wei

Christopher Covington (3):
  arm: Add PMU test
  arm: pmu: Check cycle count increases
  arm: pmu: Add CPI checking

Wei Huang (1):
  arm: Define macros for accessing system registers

 arm/Makefile.common   |   3 +-
 arm/pmu.c | 277 ++
 arm/unittests.cfg |  19 
 lib/arm/asm/processor.h   |  37 ++-
 lib/arm64/asm/processor.h |  35 --
 5 files changed, 358 insertions(+), 13 deletions(-)
 create mode 100644 arm/pmu.c

-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[kvm-unit-tests PATCH v13 2/4] arm: Add PMU test

2016-11-30 Thread Wei Huang
From: Christopher Covington <c...@codeaurora.org>

Beginning with a simple sanity check of the control register, add
a unit test for the ARM Performance Monitors Unit (PMU). PMU register
was read using the newly defined macros.

Signed-off-by: Christopher Covington <c...@codeaurora.org>
Signed-off-by: Wei Huang <w...@redhat.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
---
 arm/Makefile.common |  3 ++-
 arm/pmu.c   | 62 +
 arm/unittests.cfg   |  5 +
 3 files changed, 69 insertions(+), 1 deletion(-)
 create mode 100644 arm/pmu.c

diff --git a/arm/Makefile.common b/arm/Makefile.common
index f37b5c2..5da2fdd 100644
--- a/arm/Makefile.common
+++ b/arm/Makefile.common
@@ -12,7 +12,8 @@ endif
 tests-common = \
$(TEST_DIR)/selftest.flat \
$(TEST_DIR)/spinlock-test.flat \
-   $(TEST_DIR)/pci-test.flat
+   $(TEST_DIR)/pci-test.flat \
+   $(TEST_DIR)/pmu.flat
 
 all: test_cases
 
diff --git a/arm/pmu.c b/arm/pmu.c
new file mode 100644
index 000..1fe2b1a
--- /dev/null
+++ b/arm/pmu.c
@@ -0,0 +1,62 @@
+/*
+ * Test the ARM Performance Monitors Unit (PMU).
+ *
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version 2.1 and
+ * only version 2.1 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+ * for more details.
+ */
+#include "libcflat.h"
+#include "asm/barrier.h"
+#include "asm/processor.h"
+
+#define PMU_PMCR_N_SHIFT   11
+#define PMU_PMCR_N_MASK0x1f
+#define PMU_PMCR_ID_SHIFT  16
+#define PMU_PMCR_ID_MASK   0xff
+#define PMU_PMCR_IMP_SHIFT 24
+#define PMU_PMCR_IMP_MASK  0xff
+
+#if defined(__arm__)
+DEFINE_GET_SYSREG32(pmcr, 0, c9, c12, 0)
+#elif defined(__aarch64__)
+DEFINE_GET_SYSREG32(pmcr, el0)
+#endif
+
+/*
+ * As a simple sanity check on the PMCR_EL0, ensure the implementer field isn't
+ * null. Also print out a couple other interesting fields for diagnostic
+ * purposes. For example, as of fall 2016, QEMU TCG mode doesn't implement
+ * event counters and therefore reports zero event counters, but hopefully
+ * support for at least the instructions event will be added in the future and
+ * the reported number of event counters will become nonzero.
+ */
+static bool check_pmcr(void)
+{
+   uint32_t pmcr;
+
+   pmcr = get_pmcr();
+
+   report_info("PMU implementer/ID code/counters: 0x%x(\"%c\")/0x%x/%d",
+   (pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK,
+   ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) ? : ' ',
+   (pmcr >> PMU_PMCR_ID_SHIFT) & PMU_PMCR_ID_MASK,
+   (pmcr >> PMU_PMCR_N_SHIFT) & PMU_PMCR_N_MASK);
+
+   return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0;
+}
+
+int main(void)
+{
+   report_prefix_push("pmu");
+
+   report("Control register", check_pmcr());
+
+   return report_summary();
+}
diff --git a/arm/unittests.cfg b/arm/unittests.cfg
index ae32a42..816f494 100644
--- a/arm/unittests.cfg
+++ b/arm/unittests.cfg
@@ -58,3 +58,8 @@ groups = selftest
 [pci-test]
 file = pci-test.flat
 groups = pci
+
+# Test PMU support
+[pmu]
+file = pmu.flat
+groups = pmu
-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[kvm-unit-tests PATCH v13 1/4] arm: Define macros for accessing system registers

2016-11-30 Thread Wei Huang
This patch defines four macros to assist creating system register
accessors under both ARMv7 and AArch64:
   * DEFINE_GET_SYSREG32(name, ...)
   * DEFINE_SET_SYSREG32(name, ...)
   * DEFINE_GET_SYSREG64(name, ...)
   * DEFINE_SET_SYSREG64(name, ...)
These macros are translated to inline functions with consistent naming,
get_##name() and set_##name(), which can be used by C code directly.

Signed-off-by: Andre Przywara <andre.przyw...@arm.com>
Signed-off-by: Wei Huang <w...@redhat.com>
---
 lib/arm/asm/processor.h   | 37 -
 lib/arm64/asm/processor.h | 35 ---
 2 files changed, 60 insertions(+), 12 deletions(-)

diff --git a/lib/arm/asm/processor.h b/lib/arm/asm/processor.h
index f25e7ee..3ca6b42 100644
--- a/lib/arm/asm/processor.h
+++ b/lib/arm/asm/processor.h
@@ -33,13 +33,40 @@ static inline unsigned long current_cpsr(void)
 
 #define current_mode() (current_cpsr() & MODE_MASK)
 
-static inline unsigned int get_mpidr(void)
-{
-   unsigned int mpidr;
-   asm volatile("mrc p15, 0, %0, c0, c0, 5" : "=r" (mpidr));
-   return mpidr;
+#define DEFINE_GET_SYSREG32(name, opc1, crn, crm, opc2)
\
+static inline uint32_t get_##name(void)
\
+{  \
+   uint32_t reg;   \
+   asm volatile("mrc p15, " #opc1 ", %0, " #crn ", " #crm ", " \
+#opc2 : "=r" (reg));   \
+   return reg; \
+}
+
+#define DEFINE_SET_SYSREG32(name, opc1, crn, crm, opc2)
\
+static inline void set_##name(uint32_t value)  \
+{  \
+   asm volatile("mcr p15, " #opc1 ", %0, " #crn ", " #crm ", " \
+#opc2 :: "r" (value)); \
+}
+
+#define DEFINE_GET_SYSREG64(name, opc, crm)\
+static inline uint64_t get_##name(void)
\
+{  \
+   uint32_t lo, hi;\
+   asm volatile("mrrc p15, " #opc ", %0, %1, " #crm\
+: "=r" (lo), "=r" (hi));   \
+   return (uint64_t)hi << 32 | lo; \
+}
+
+#define DEFINE_SET_SYSREG64(name, opc, crm)\
+static inline void set_##name(uint64_t value)  \
+{  \
+   asm volatile("mcrr p15, " #opc ", %0, %1, " #crm\
+:: "r" (value & 0x), "r" (value >> 32));   \
 }
 
+DEFINE_GET_SYSREG32(mpidr, 0, c0, c0, 5)
+
 /* Only support Aff0 for now, up to 4 cpus */
 #define mpidr_to_cpu(mpidr) ((int)((mpidr) & 0xff))
 
diff --git a/lib/arm64/asm/processor.h b/lib/arm64/asm/processor.h
index 84d5c7c..dfa75eb 100644
--- a/lib/arm64/asm/processor.h
+++ b/lib/arm64/asm/processor.h
@@ -66,14 +66,35 @@ static inline unsigned long current_level(void)
return el & 0xc;
 }
 
-#define DEFINE_GET_SYSREG32(reg)   \
-static inline unsigned int get_##reg(void) \
-{  \
-   unsigned int reg;   \
-   asm volatile("mrs %0, " #reg "_el1" : "=r" (reg));  \
-   return reg; \
+#define DEFINE_GET_SYSREG32(reg, el)   \
+static inline uint32_t get_##reg(void) \
+{  \
+   uint32_t reg;   \
+   asm volatile("mrs %0, " #reg "_" #el : "=r" (reg)); \
+   return reg; \
 }
-DEFINE_GET_SYSREG32(mpidr)
+
+#define DEFINE_SET_SYSREG32(reg, el)   \
+static inline void set_##reg(uint32_t value)   \
+{  \
+   asm volatile("msr " #reg "_" #el ", %0" :: "r" (value));\
+}
+
+#define DEFINE_GET_SYSREG64(reg, el)   \
+static inline uint64_t get_##reg(void)

Re: Unable to use perf in VM

2016-11-30 Thread Wei Huang


On 11/30/2016 07:37 AM, Marc Zyngier wrote:
> On 30/11/16 11:48, Marc Zyngier wrote:
>> + Shannon
>>
>> On 29/11/16 22:04, Itaru Kitayama wrote:
>>> Hi,
>>>
>>> In a VM (virsh controlled, KVM acceleration enabled) on a recent
>>> kvmarm kernel host, I find I am unable to use perf to obtain
>>> performance statistics for a complex task like kernel build.
>>> (I've verified this is seen with a Fedora 25 VM and host combination
>>> as well)
>>> APM folks CC'ed think this might be caused by a bug in the core PMU 
>>> framework code, thus I'd like to have experts opinion on this issue.
>>>
>>> [root@localhost linux]# perf stat -B make
>>>CHK include/config/kernel.release
>>> [  119.617684] git[1144]: undefined instruction: pc=fc000808ff30
>>> [  119.623040] Code: 51000442 92401042 d51b9ca2 d5033fdf (d53b9d40)
>>> [  119.627607] Internal error: undefined instruction: 0 [#1] SMP
>>
>> [...]
>>
>> In a VM running mainline hosted on an AMD Seattle box:
>>
>>  Performance counter stats for 'make':
>>
>> 1526089.499304  task-clock:u (msec)   #0.932 CPUs utilized   
>>
>>  0  context-switches:u#0.000 K/sec   
>>
>>  0  cpu-migrations:u  #0.000 K/sec   
>>
>>   29527793  page-faults:u #0.019 M/sec   
>>
>>  2913174122673  cycles:u  #1.909 GHz 
>>
>>  2365040892322  instructions:u#0.81  insn per cycle  
>>
>>  branches:u   
>>
>>32049215378  branch-misses:u   #0.00% of all branches 
>>
>>
>> 1637.531444837 seconds time elapsed
>>
>> Running the same host kernel on a Mustang system, the guest explodes
>> in the way you reported. The failing instruction always seems to be
>> an access to pmxevcntr_el0 (I've seen both reads and writes).
>>
>> Funnily enough, it dies if you try any HW event other than cycles
>> ("perf stat -e cycles ls" works, and "perf stat -e instructions ls"
>> explodes). Which would tend to indicate that we're screwing up
>> the counter selection, but I have no proof of that (specially that
>> the Seattle guest is working just as expected).
> 
> It turns out that we *don't* inject an undef. It seems to be generated
> locally at EL1.
> 
> Still digging.

Just FYI: I saw it on Mustang before. My initial thought was HW related,
but without proof. I am interested to see your findings...

> 
>   M.
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [Qemu-devel] [kvm-unit-tests PATCH v11 1/3] arm: Add PMU test

2016-11-23 Thread Wei Huang


On 11/23/2016 11:15 AM, Andrew Jones wrote:
> On Wed, Nov 23, 2016 at 01:16:08PM +, Andre Przywara wrote:
>> Hi,
>>
>> On 22/11/16 18:29, Wei Huang wrote:
>>> From: Christopher Covington <c...@codeaurora.org>
>>>
>>> Beginning with a simple sanity check of the control register, add
>>> a unit test for the ARM Performance Monitors Unit (PMU).
>>
>> Mmh, the output of this is a bit confusing. How about to join some
>> information? I changed it to give me:
>> INFO: pmu: PMU implementer/ID code: "A"(0x41)/0x0
>> INFO: pmu: Event counters:  0
>> PASS: pmu: Control register
>>
>> ... by using the newly introduced report_info() to make it look nicer.
> 
> Agreed. That would look nicer and make good use of report_info. Let's
> do that.

I have adjusted v12 using report_info(), with all PMU PMCR fields
printed in the same line. Implementer info was printed with Hex first,
then ASCII representation, to match MIDR table in ARM manual:

INFO: pmu: PMU implementer/ID code/counters: 0x41("A")/0x1/6


> 
>>
>>>
>>> Signed-off-by: Christopher Covington <c...@codeaurora.org>
>>> Signed-off-by: Wei Huang <w...@redhat.com>
>>> Reviewed-by: Andrew Jones <drjo...@redhat.com>
>>> ---
>>>  arm/Makefile.common |  3 ++-
>>>  arm/pmu.c   | 74 
>>> +
>>>  arm/unittests.cfg   |  5 
>>>  3 files changed, 81 insertions(+), 1 deletion(-)
>>>  create mode 100644 arm/pmu.c
>>>
>>> diff --git a/arm/Makefile.common b/arm/Makefile.common
>>> index f37b5c2..5da2fdd 100644
>>> --- a/arm/Makefile.common
>>> +++ b/arm/Makefile.common
>>> @@ -12,7 +12,8 @@ endif
>>>  tests-common = \
>>> $(TEST_DIR)/selftest.flat \
>>> $(TEST_DIR)/spinlock-test.flat \
>>> -   $(TEST_DIR)/pci-test.flat
>>> +   $(TEST_DIR)/pci-test.flat \
>>> +   $(TEST_DIR)/pmu.flat
>>>  
>>>  all: test_cases
>>>  
>>> diff --git a/arm/pmu.c b/arm/pmu.c
>>> new file mode 100644
>>> index 000..9d9c53b
>>> --- /dev/null
>>> +++ b/arm/pmu.c
>>> @@ -0,0 +1,74 @@
>>> +/*
>>> + * Test the ARM Performance Monitors Unit (PMU).
>>> + *
>>> + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
>>> + *
>>> + * This program is free software; you can redistribute it and/or modify it
>>> + * under the terms of the GNU Lesser General Public License version 2.1 and
>>> + * only version 2.1 as published by the Free Software Foundation.
>>> + *
>>> + * This program is distributed in the hope that it will be useful, but 
>>> WITHOUT
>>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>>> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 
>>> License
>>> + * for more details.
>>> + */
>>> +#include "libcflat.h"
>>> +#include "asm/barrier.h"
>>> +
>>> +#define PMU_PMCR_N_SHIFT   11
>>> +#define PMU_PMCR_N_MASK0x1f
>>> +#define PMU_PMCR_ID_SHIFT  16
>>> +#define PMU_PMCR_ID_MASK   0xff
>>> +#define PMU_PMCR_IMP_SHIFT 24
>>> +#define PMU_PMCR_IMP_MASK  0xff
>>> +
>>> +#if defined(__arm__)
>>
>> I guess you should use the arch specific header files we have in place
>> for that (lib/arm{.64}/asm/processor.h). Also there are sysreg read
>> wrappers (at least for arm64) in there already, can't we base this
>> function on them: DEFINE_GET_SYSREG32(pmcr, el0)?
>> (Requires a small change to get rid of the forced "_el1" suffix)
>>
>> We should wait for the GIC series to be merged, as this contains some
>> changes in this area.
> 
> As this unit test is the only consumer of PMC registers so far, then
> I'd prefer the defines and accessors stay here for now. Once we see
> a use in other unit tests then we can move some of it out.

I left accessors in-place. We can always come back to refactor them later.

> 
>>
>>> +static inline uint32_t pmcr_read(void)
>>> +{
>>> +   uint32_t ret;
>>> +
>>> +   asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
>>> +   return ret;
>>> +}
>>> +#elif defined(__aarch64__)
>>> +static inline uint32_t pmcr_read(void)
>>> +{
>>> +   uint32_t ret;
>>> +
>>> +   asm volatile("mrs %0, pmcr_e

[kvm-unit-tests PATCH v12 3/3] arm: pmu: Add CPI checking

2016-11-23 Thread Wei Huang
From: Christopher Covington <c...@codeaurora.org>

Calculate the numbers of cycles per instruction (CPI) implied by ARM
PMU cycle counter values. The code includes a strict checking facility
intended for the -icount option in TCG mode in the configuration file.

Signed-off-by: Christopher Covington <c...@codeaurora.org>
Signed-off-by: Wei Huang <w...@redhat.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
---
 arm/pmu.c | 123 +-
 arm/unittests.cfg |  14 +++
 2 files changed, 136 insertions(+), 1 deletion(-)

diff --git a/arm/pmu.c b/arm/pmu.c
index 3ae6545..f05d00d 100644
--- a/arm/pmu.c
+++ b/arm/pmu.c
@@ -104,6 +104,27 @@ static inline uint32_t id_dfr0_read(void)
asm volatile("mrc p15, 0, %0, c0, c1, 2" : "=r" (val));
return val;
 }
+
+/*
+ * Extra instructions inserted by the compiler would be difficult to compensate
+ * for, so hand assemble everything between, and including, the PMCR accesses
+ * to start and stop counting. isb instructions were inserted to make sure
+ * pmccntr read after this function returns the exact instructions executed in
+ * the controlled block. Total instrs = isb + mcr + 2*loop = 2 + 2*loop.
+ */
+static inline void precise_instrs_loop(int loop, uint32_t pmcr)
+{
+   asm volatile(
+   "   mcr p15, 0, %[pmcr], c9, c12, 0\n"
+   "   isb\n"
+   "1: subs%[loop], %[loop], #1\n"
+   "   bgt 1b\n"
+   "   mcr p15, 0, %[z], c9, c12, 0\n"
+   "   isb\n"
+   : [loop] "+r" (loop)
+   : [pmcr] "r" (pmcr), [z] "r" (0)
+   : "cc");
+}
 #elif defined(__aarch64__)
 static inline uint32_t pmcr_read(void)
 {
@@ -150,6 +171,27 @@ static inline uint32_t id_dfr0_read(void)
asm volatile("mrs %0, id_dfr0_el1" : "=r" (id));
return id;
 }
+
+/*
+ * Extra instructions inserted by the compiler would be difficult to compensate
+ * for, so hand assemble everything between, and including, the PMCR accesses
+ * to start and stop counting. isb instructions are inserted to make sure
+ * pmccntr read after this function returns the exact instructions executed
+ * in the controlled block. Total instrs = isb + msr + 2*loop = 2 + 2*loop.
+ */
+static inline void precise_instrs_loop(int loop, uint32_t pmcr)
+{
+   asm volatile(
+   "   msr pmcr_el0, %[pmcr]\n"
+   "   isb\n"
+   "1: subs%[loop], %[loop], #1\n"
+   "   b.gt1b\n"
+   "   msr pmcr_el0, xzr\n"
+   "   isb\n"
+   : [loop] "+r" (loop)
+   : [pmcr] "r" (pmcr)
+   : "cc");
+}
 #endif
 
 /*
@@ -207,6 +249,79 @@ static bool check_cycles_increase(void)
return success;
 }
 
+/*
+ * Execute a known number of guest instructions. Only even instruction counts
+ * greater than or equal to 4 are supported by the in-line assembly code. The
+ * control register (PMCR_EL0) is initialized with the provided value (allowing
+ * for example for the cycle counter or event counters to be reset). At the end
+ * of the exact instruction loop, zero is written to PMCR_EL0 to disable
+ * counting, allowing the cycle counter or event counters to be read at the
+ * leisure of the calling code.
+ */
+static void measure_instrs(int num, uint32_t pmcr)
+{
+   int loop = (num - 2) / 2;
+
+   assert(num >= 4 && ((num - 2) % 2 == 0));
+   precise_instrs_loop(loop, pmcr);
+}
+
+/*
+ * Measure cycle counts for various known instruction counts. Ensure that the
+ * cycle counter progresses (similar to check_cycles_increase() but with more
+ * instructions and using reset and stop controls). If supplied a positive,
+ * nonzero CPI parameter, also strictly check that every measurement matches
+ * it. Strict CPI checking is used to test -icount mode.
+ */
+static bool check_cpi(int cpi)
+{
+   uint32_t pmcr = pmcr_read() | PMU_PMCR_LC | PMU_PMCR_C | PMU_PMCR_E;
+
+   /* init before event access, this test only cares about cycle count */
+   pmcntenset_write(1 << PMU_CYCLE_IDX);
+   pmccfiltr_write(0); /* count cycles in EL0, EL1, but not EL2 */
+
+   if (cpi > 0)
+   printf("Checking for CPI=%d.\n", cpi);
+   printf("instrs : cycles0 cycles1 ...\n");
+
+   for (unsigned int i = 4; i < 300; i += 32) {
+   uint64_t avg, sum = 0;
+
+   printf("%d :", i);
+   for (int j = 0; j < NR_SAMPLES; j++) {
+   uint64_t cycles;
+
+   pmccntr_write(0);
+   measure_instrs(i, pmcr);
+   cycles = pmccntr_read();
+   printf(" %"P

[kvm-unit-tests PATCH v12 2/3] arm: pmu: Check cycle count increases

2016-11-23 Thread Wei Huang
From: Christopher Covington <c...@codeaurora.org>

Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
even for the smallest delta of two subsequent reads.

Signed-off-by: Christopher Covington <c...@codeaurora.org>
Signed-off-by: Wei Huang <w...@redhat.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
---
 arm/pmu.c | 156 ++
 1 file changed, 156 insertions(+)

diff --git a/arm/pmu.c b/arm/pmu.c
index 98ebea4..3ae6545 100644
--- a/arm/pmu.c
+++ b/arm/pmu.c
@@ -15,6 +15,9 @@
 #include "libcflat.h"
 #include "asm/barrier.h"
 
+#define PMU_PMCR_E (1 << 0)
+#define PMU_PMCR_C (1 << 2)
+#define PMU_PMCR_LC(1 << 6)
 #define PMU_PMCR_N_SHIFT   11
 #define PMU_PMCR_N_MASK0x1f
 #define PMU_PMCR_ID_SHIFT  16
@@ -22,6 +25,14 @@
 #define PMU_PMCR_IMP_SHIFT 24
 #define PMU_PMCR_IMP_MASK  0xff
 
+#define ID_DFR0_PERFMON_SHIFT 24
+#define ID_DFR0_PERFMON_MASK  0xf
+
+#define PMU_CYCLE_IDX 31
+
+#define NR_SAMPLES 10
+
+static unsigned int pmu_version;
 #if defined(__arm__)
 static inline uint32_t pmcr_read(void)
 {
@@ -30,6 +41,69 @@ static inline uint32_t pmcr_read(void)
asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
return ret;
 }
+
+static inline void pmcr_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (value));
+   isb();
+}
+
+static inline void pmselr_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (value));
+   isb();
+}
+
+static inline void pmxevtyper_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (value));
+}
+
+static inline uint64_t pmccntr_read(void)
+{
+   uint32_t lo, hi = 0;
+
+   if (pmu_version == 0x3)
+   asm volatile("mrrc p15, 0, %0, %1, c9" : "=r" (lo), "=r" (hi));
+   else
+   asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (lo));
+
+   return ((uint64_t)hi << 32) | lo;
+}
+
+static inline void pmccntr_write(uint64_t value)
+{
+   uint32_t lo, hi;
+
+   lo = value & 0x;
+   hi = (value >> 32) & 0x;
+
+   if (pmu_version == 0x3)
+   asm volatile("mcrr p15, 0, %0, %1, c9" : : "r" (lo), "r" (hi));
+   else
+   asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (lo));
+}
+
+static inline void pmcntenset_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (value));
+}
+
+/* PMCCFILTR is an obsolete name for PMXEVTYPER31 in ARMv7 */
+static inline void pmccfiltr_write(uint32_t value)
+{
+   pmselr_write(PMU_CYCLE_IDX);
+   pmxevtyper_write(value);
+   isb();
+}
+
+static inline uint32_t id_dfr0_read(void)
+{
+   uint32_t val;
+
+   asm volatile("mrc p15, 0, %0, c0, c1, 2" : "=r" (val));
+   return val;
+}
 #elif defined(__aarch64__)
 static inline uint32_t pmcr_read(void)
 {
@@ -38,6 +112,44 @@ static inline uint32_t pmcr_read(void)
asm volatile("mrs %0, pmcr_el0" : "=r" (ret));
return ret;
 }
+
+static inline void pmcr_write(uint32_t value)
+{
+   asm volatile("msr pmcr_el0, %0" : : "r" (value));
+   isb();
+}
+
+static inline uint64_t pmccntr_read(void)
+{
+   uint64_t cycles;
+
+   asm volatile("mrs %0, pmccntr_el0" : "=r" (cycles));
+   return cycles;
+}
+
+static inline void pmccntr_write(uint64_t value)
+{
+   asm volatile("msr pmccntr_el0, %0" : : "r" (value));
+}
+
+static inline void pmcntenset_write(uint32_t value)
+{
+   asm volatile("msr pmcntenset_el0, %0" : : "r" (value));
+}
+
+static inline void pmccfiltr_write(uint32_t value)
+{
+   asm volatile("msr pmccfiltr_el0, %0" : : "r" (value));
+   isb();
+}
+
+static inline uint32_t id_dfr0_read(void)
+{
+   uint32_t id;
+
+   asm volatile("mrs %0, id_dfr0_el1" : "=r" (id));
+   return id;
+}
 #endif
 
 /*
@@ -63,11 +175,55 @@ static bool check_pmcr(void)
return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0;
 }
 
+/*
+ * Ensure that the cycle counter progresses between back-to-back reads.
+ */
+static bool check_cycles_increase(void)
+{
+   bool success = true;
+
+   /* init before event access, this test only cares about cycle count */
+   pmcntenset_write(1 << PMU_CYCLE_IDX);
+   pmccfiltr_write(0); /* count cycles in EL0, EL1, but not EL2 */
+   pmccntr_write(0);
+
+   pmcr_write(pmcr_read() | PMU_PMCR_LC | PMU_PMCR_C | PMU_PMCR_E);
+
+   for (int i = 0; i < NR_SAMPLES; i++) {

[kvm-unit-tests PATCH v12 1/3] arm: Add PMU test

2016-11-23 Thread Wei Huang
From: Christopher Covington <c...@codeaurora.org>

Beginning with a simple sanity check of the control register, add
a unit test for the ARM Performance Monitors Unit (PMU).

Signed-off-by: Christopher Covington <c...@codeaurora.org>
Signed-off-by: Wei Huang <w...@redhat.com>
---
 arm/Makefile.common |  3 ++-
 arm/pmu.c   | 73 +
 arm/unittests.cfg   |  5 
 3 files changed, 80 insertions(+), 1 deletion(-)
 create mode 100644 arm/pmu.c

diff --git a/arm/Makefile.common b/arm/Makefile.common
index f37b5c2..5da2fdd 100644
--- a/arm/Makefile.common
+++ b/arm/Makefile.common
@@ -12,7 +12,8 @@ endif
 tests-common = \
$(TEST_DIR)/selftest.flat \
$(TEST_DIR)/spinlock-test.flat \
-   $(TEST_DIR)/pci-test.flat
+   $(TEST_DIR)/pci-test.flat \
+   $(TEST_DIR)/pmu.flat
 
 all: test_cases
 
diff --git a/arm/pmu.c b/arm/pmu.c
new file mode 100644
index 000..98ebea4
--- /dev/null
+++ b/arm/pmu.c
@@ -0,0 +1,73 @@
+/*
+ * Test the ARM Performance Monitors Unit (PMU).
+ *
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version 2.1 and
+ * only version 2.1 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+ * for more details.
+ */
+#include "libcflat.h"
+#include "asm/barrier.h"
+
+#define PMU_PMCR_N_SHIFT   11
+#define PMU_PMCR_N_MASK0x1f
+#define PMU_PMCR_ID_SHIFT  16
+#define PMU_PMCR_ID_MASK   0xff
+#define PMU_PMCR_IMP_SHIFT 24
+#define PMU_PMCR_IMP_MASK  0xff
+
+#if defined(__arm__)
+static inline uint32_t pmcr_read(void)
+{
+   uint32_t ret;
+
+   asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
+   return ret;
+}
+#elif defined(__aarch64__)
+static inline uint32_t pmcr_read(void)
+{
+   uint32_t ret;
+
+   asm volatile("mrs %0, pmcr_el0" : "=r" (ret));
+   return ret;
+}
+#endif
+
+/*
+ * As a simple sanity check on the PMCR_EL0, ensure the implementer field isn't
+ * null. Also print out a couple other interesting fields for diagnostic
+ * purposes. For example, as of fall 2016, QEMU TCG mode doesn't implement
+ * event counters and therefore reports zero event counters, but hopefully
+ * support for at least the instructions event will be added in the future and
+ * the reported number of event counters will become nonzero.
+ */
+static bool check_pmcr(void)
+{
+   uint32_t pmcr;
+
+   pmcr = pmcr_read();
+
+   report_info("PMU implementer/ID code/counters: 0x%x(\"%c\")/0x%x/%d",
+   (pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK,
+   ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) ? : ' ',
+   (pmcr >> PMU_PMCR_ID_SHIFT) & PMU_PMCR_ID_MASK,
+   (pmcr >> PMU_PMCR_N_SHIFT) & PMU_PMCR_N_MASK);
+
+   return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0;
+}
+
+int main(void)
+{
+   report_prefix_push("pmu");
+
+   report("Control register", check_pmcr());
+
+   return report_summary();
+}
diff --git a/arm/unittests.cfg b/arm/unittests.cfg
index ae32a42..816f494 100644
--- a/arm/unittests.cfg
+++ b/arm/unittests.cfg
@@ -58,3 +58,8 @@ groups = selftest
 [pci-test]
 file = pci-test.flat
 groups = pci
+
+# Test PMU support
+[pmu]
+file = pmu.flat
+groups = pmu
-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[kvm-unit-tests PATCH v12 0/3] ARM PMU tests

2016-11-23 Thread Wei Huang
Changes from v11:
* Use report_info() to report PMU HW related info (implementer, id code, ...)
* Print PMU PMCR info in the same line

Note:
1) Current KVM code has bugs in handling PMCCFILTR write. A fix (see
below) is required for this unit testing code to work correctly under
KVM mode.
https://lists.cs.columbia.edu/pipermail/kvmarm/2016-November/022134.html.

Thanks,
-Wei

Christopher Covington (3):
  arm: Add PMU test
  arm: pmu: Check cycle count increases
  arm: pmu: Add CPI checking

 arm/Makefile.common |   3 +-
 arm/pmu.c   | 350 
 arm/unittests.cfg   |  19 +++
 3 files changed, 371 insertions(+), 1 deletion(-)
 create mode 100644 arm/pmu.c

-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [kvm-unit-tests PATCH v11 1/3] arm: Add PMU test

2016-11-23 Thread Wei Huang


On 11/23/2016 07:16 AM, Andre Przywara wrote:
> Hi,
> 
> On 22/11/16 18:29, Wei Huang wrote:
>> From: Christopher Covington <c...@codeaurora.org>
>>
>> Beginning with a simple sanity check of the control register, add
>> a unit test for the ARM Performance Monitors Unit (PMU).
> 
> Mmh, the output of this is a bit confusing. How about to join some
> information? I changed it to give me:
> INFO: pmu: PMU implementer/ID code: "A"(0x41)/0x0
> INFO: pmu: Event counters:  0
> PASS: pmu: Control register
> 
> ... by using the newly introduced report_info() to make it look nicer.
> 
>>
>> Signed-off-by: Christopher Covington <c...@codeaurora.org>
>> Signed-off-by: Wei Huang <w...@redhat.com>
>> Reviewed-by: Andrew Jones <drjo...@redhat.com>
>> ---
>>  arm/Makefile.common |  3 ++-
>>  arm/pmu.c   | 74 
>> +
>>  arm/unittests.cfg   |  5 
>>  3 files changed, 81 insertions(+), 1 deletion(-)
>>  create mode 100644 arm/pmu.c
>>
>> diff --git a/arm/Makefile.common b/arm/Makefile.common
>> index f37b5c2..5da2fdd 100644
>> --- a/arm/Makefile.common
>> +++ b/arm/Makefile.common
>> @@ -12,7 +12,8 @@ endif
>>  tests-common = \
>>  $(TEST_DIR)/selftest.flat \
>>  $(TEST_DIR)/spinlock-test.flat \
>> -$(TEST_DIR)/pci-test.flat
>> +$(TEST_DIR)/pci-test.flat \
>> +$(TEST_DIR)/pmu.flat
>>  
>>  all: test_cases
>>  
>> diff --git a/arm/pmu.c b/arm/pmu.c
>> new file mode 100644
>> index 000..9d9c53b
>> --- /dev/null
>> +++ b/arm/pmu.c
>> @@ -0,0 +1,74 @@
>> +/*
>> + * Test the ARM Performance Monitors Unit (PMU).
>> + *
>> + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
>> + *
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms of the GNU Lesser General Public License version 2.1 and
>> + * only version 2.1 as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful, but 
>> WITHOUT
>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 
>> License
>> + * for more details.
>> + */
>> +#include "libcflat.h"
>> +#include "asm/barrier.h"
>> +
>> +#define PMU_PMCR_N_SHIFT   11
>> +#define PMU_PMCR_N_MASK0x1f
>> +#define PMU_PMCR_ID_SHIFT  16
>> +#define PMU_PMCR_ID_MASK   0xff
>> +#define PMU_PMCR_IMP_SHIFT 24
>> +#define PMU_PMCR_IMP_MASK  0xff
>> +
>> +#if defined(__arm__)
> 
> I guess you should use the arch specific header files we have in place
> for that (lib/arm{.64}/asm/processor.h). Also there are sysreg read
> wrappers (at least for arm64) in there already, can't we base this
> function on them: DEFINE_GET_SYSREG32(pmcr, el0)?
> (Requires a small change to get rid of the forced "_el1" suffix)
> 
> We should wait for the GIC series to be merged, as this contains some
> changes in this area.

We planned to add it after this series is merged. However if GIC series
has a similar support, we can piggy-back on it.

> 
>> +static inline uint32_t pmcr_read(void)
>> +{
>> +uint32_t ret;
>> +
>> +asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
>> +return ret;
>> +}
>> +#elif defined(__aarch64__)
>> +static inline uint32_t pmcr_read(void)
>> +{
>> +uint32_t ret;
>> +
>> +asm volatile("mrs %0, pmcr_el0" : "=r" (ret));
>> +return ret;
>> +}
>> +#endif
>> +
>> +/*
>> + * As a simple sanity check on the PMCR_EL0, ensure the implementer field 
>> isn't
>> + * null. Also print out a couple other interesting fields for diagnostic
>> + * purposes. For example, as of fall 2016, QEMU TCG mode doesn't implement
>> + * event counters and therefore reports zero event counters, but hopefully
>> + * support for at least the instructions event will be added in the future 
>> and
>> + * the reported number of event counters will become nonzero.
>> + */
>> +static bool check_pmcr(void)
>> +{
>> +uint32_t pmcr;
>> +
>> +pmcr = pmcr_read();
>> +
>> +printf("PMU implementer: %c\n",
>> +   (pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK);
> 
> If this register reads as zero, the output is mangled (si

[kvm-unit-tests PATCH v11 1/3] arm: Add PMU test

2016-11-22 Thread Wei Huang
From: Christopher Covington <c...@codeaurora.org>

Beginning with a simple sanity check of the control register, add
a unit test for the ARM Performance Monitors Unit (PMU).

Signed-off-by: Christopher Covington <c...@codeaurora.org>
Signed-off-by: Wei Huang <w...@redhat.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
---
 arm/Makefile.common |  3 ++-
 arm/pmu.c   | 74 +
 arm/unittests.cfg   |  5 
 3 files changed, 81 insertions(+), 1 deletion(-)
 create mode 100644 arm/pmu.c

diff --git a/arm/Makefile.common b/arm/Makefile.common
index f37b5c2..5da2fdd 100644
--- a/arm/Makefile.common
+++ b/arm/Makefile.common
@@ -12,7 +12,8 @@ endif
 tests-common = \
$(TEST_DIR)/selftest.flat \
$(TEST_DIR)/spinlock-test.flat \
-   $(TEST_DIR)/pci-test.flat
+   $(TEST_DIR)/pci-test.flat \
+   $(TEST_DIR)/pmu.flat
 
 all: test_cases
 
diff --git a/arm/pmu.c b/arm/pmu.c
new file mode 100644
index 000..9d9c53b
--- /dev/null
+++ b/arm/pmu.c
@@ -0,0 +1,74 @@
+/*
+ * Test the ARM Performance Monitors Unit (PMU).
+ *
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version 2.1 and
+ * only version 2.1 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+ * for more details.
+ */
+#include "libcflat.h"
+#include "asm/barrier.h"
+
+#define PMU_PMCR_N_SHIFT   11
+#define PMU_PMCR_N_MASK0x1f
+#define PMU_PMCR_ID_SHIFT  16
+#define PMU_PMCR_ID_MASK   0xff
+#define PMU_PMCR_IMP_SHIFT 24
+#define PMU_PMCR_IMP_MASK  0xff
+
+#if defined(__arm__)
+static inline uint32_t pmcr_read(void)
+{
+   uint32_t ret;
+
+   asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
+   return ret;
+}
+#elif defined(__aarch64__)
+static inline uint32_t pmcr_read(void)
+{
+   uint32_t ret;
+
+   asm volatile("mrs %0, pmcr_el0" : "=r" (ret));
+   return ret;
+}
+#endif
+
+/*
+ * As a simple sanity check on the PMCR_EL0, ensure the implementer field isn't
+ * null. Also print out a couple other interesting fields for diagnostic
+ * purposes. For example, as of fall 2016, QEMU TCG mode doesn't implement
+ * event counters and therefore reports zero event counters, but hopefully
+ * support for at least the instructions event will be added in the future and
+ * the reported number of event counters will become nonzero.
+ */
+static bool check_pmcr(void)
+{
+   uint32_t pmcr;
+
+   pmcr = pmcr_read();
+
+   printf("PMU implementer: %c\n",
+  (pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK);
+   printf("Identification code: 0x%x\n",
+  (pmcr >> PMU_PMCR_ID_SHIFT) & PMU_PMCR_ID_MASK);
+   printf("Event counters:  %d\n",
+  (pmcr >> PMU_PMCR_N_SHIFT) & PMU_PMCR_N_MASK);
+
+   return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0;
+}
+
+int main(void)
+{
+   report_prefix_push("pmu");
+
+   report("Control register", check_pmcr());
+
+   return report_summary();
+}
diff --git a/arm/unittests.cfg b/arm/unittests.cfg
index ae32a42..816f494 100644
--- a/arm/unittests.cfg
+++ b/arm/unittests.cfg
@@ -58,3 +58,8 @@ groups = selftest
 [pci-test]
 file = pci-test.flat
 groups = pci
+
+# Test PMU support
+[pmu]
+file = pmu.flat
+groups = pmu
-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[kvm-unit-tests PATCH v11 3/3] arm: pmu: Add CPI checking

2016-11-22 Thread Wei Huang
From: Christopher Covington <c...@codeaurora.org>

Calculate the numbers of cycles per instruction (CPI) implied by ARM
PMU cycle counter values. The code includes a strict checking facility
intended for the -icount option in TCG mode in the configuration file.

Signed-off-by: Christopher Covington <c...@codeaurora.org>
Signed-off-by: Wei Huang <w...@redhat.com>
---
 arm/pmu.c | 123 +-
 arm/unittests.cfg |  14 +++
 2 files changed, 136 insertions(+), 1 deletion(-)

diff --git a/arm/pmu.c b/arm/pmu.c
index 176b070..f667676 100644
--- a/arm/pmu.c
+++ b/arm/pmu.c
@@ -104,6 +104,27 @@ static inline uint32_t id_dfr0_read(void)
asm volatile("mrc p15, 0, %0, c0, c1, 2" : "=r" (val));
return val;
 }
+
+/*
+ * Extra instructions inserted by the compiler would be difficult to compensate
+ * for, so hand assemble everything between, and including, the PMCR accesses
+ * to start and stop counting. isb instructions were inserted to make sure
+ * pmccntr read after this function returns the exact instructions executed in
+ * the controlled block. Total instrs = isb + mcr + 2*loop = 2 + 2*loop.
+ */
+static inline void precise_instrs_loop(int loop, uint32_t pmcr)
+{
+   asm volatile(
+   "   mcr p15, 0, %[pmcr], c9, c12, 0\n"
+   "   isb\n"
+   "1: subs%[loop], %[loop], #1\n"
+   "   bgt 1b\n"
+   "   mcr p15, 0, %[z], c9, c12, 0\n"
+   "   isb\n"
+   : [loop] "+r" (loop)
+   : [pmcr] "r" (pmcr), [z] "r" (0)
+   : "cc");
+}
 #elif defined(__aarch64__)
 static inline uint32_t pmcr_read(void)
 {
@@ -150,6 +171,27 @@ static inline uint32_t id_dfr0_read(void)
asm volatile("mrs %0, id_dfr0_el1" : "=r" (id));
return id;
 }
+
+/*
+ * Extra instructions inserted by the compiler would be difficult to compensate
+ * for, so hand assemble everything between, and including, the PMCR accesses
+ * to start and stop counting. isb instructions are inserted to make sure
+ * pmccntr read after this function returns the exact instructions executed
+ * in the controlled block. Total instrs = isb + msr + 2*loop = 2 + 2*loop.
+ */
+static inline void precise_instrs_loop(int loop, uint32_t pmcr)
+{
+   asm volatile(
+   "   msr pmcr_el0, %[pmcr]\n"
+   "   isb\n"
+   "1: subs%[loop], %[loop], #1\n"
+   "   b.gt1b\n"
+   "   msr pmcr_el0, xzr\n"
+   "   isb\n"
+   : [loop] "+r" (loop)
+   : [pmcr] "r" (pmcr)
+   : "cc");
+}
 #endif
 
 /*
@@ -208,6 +250,79 @@ static bool check_cycles_increase(void)
return success;
 }
 
+/*
+ * Execute a known number of guest instructions. Only even instruction counts
+ * greater than or equal to 4 are supported by the in-line assembly code. The
+ * control register (PMCR_EL0) is initialized with the provided value (allowing
+ * for example for the cycle counter or event counters to be reset). At the end
+ * of the exact instruction loop, zero is written to PMCR_EL0 to disable
+ * counting, allowing the cycle counter or event counters to be read at the
+ * leisure of the calling code.
+ */
+static void measure_instrs(int num, uint32_t pmcr)
+{
+   int loop = (num - 2) / 2;
+
+   assert(num >= 4 && ((num - 2) % 2 == 0));
+   precise_instrs_loop(loop, pmcr);
+}
+
+/*
+ * Measure cycle counts for various known instruction counts. Ensure that the
+ * cycle counter progresses (similar to check_cycles_increase() but with more
+ * instructions and using reset and stop controls). If supplied a positive,
+ * nonzero CPI parameter, also strictly check that every measurement matches
+ * it. Strict CPI checking is used to test -icount mode.
+ */
+static bool check_cpi(int cpi)
+{
+   uint32_t pmcr = pmcr_read() | PMU_PMCR_LC | PMU_PMCR_C | PMU_PMCR_E;
+
+   /* init before event access, this test only cares about cycle count */
+   pmcntenset_write(1 << PMU_CYCLE_IDX);
+   pmccfiltr_write(0); /* count cycles in EL0, EL1, but not EL2 */
+
+   if (cpi > 0)
+   printf("Checking for CPI=%d.\n", cpi);
+   printf("instrs : cycles0 cycles1 ...\n");
+
+   for (unsigned int i = 4; i < 300; i += 32) {
+   uint64_t avg, sum = 0;
+
+   printf("%d :", i);
+   for (int j = 0; j < NR_SAMPLES; j++) {
+   uint64_t cycles;
+
+   pmccntr_write(0);
+   measure_instrs(i, pmcr);
+   cycles = pmccntr_read();
+   printf(" %"PRId64"", cycles);
+
+   

[kvm-unit-tests PATCH v11 0/3] ARM PMU tests

2016-11-22 Thread Wei Huang
Changes from v10:
* Change the name of loop test function to precise_instrs_loop()
* Minor comment fixes to measure_instrs() and to explain isb() in loop funcs 

Note:
1) Current KVM code has bugs in handling PMCCFILTR write. A fix (see
below) is required for this unit testing code to work correctly under
KVM mode.
https://lists.cs.columbia.edu/pipermail/kvmarm/2016-November/022134.html.

Thanks,
-Wei

Christopher Covington (3):
  arm: Add PMU test
  arm: pmu: Check cycle count increases
  arm: pmu: Add CPI checking

 arm/Makefile.common |   3 +-
 arm/pmu.c   | 351 
 arm/unittests.cfg   |  19 +++
 3 files changed, 372 insertions(+), 1 deletion(-)
 create mode 100644 arm/pmu.c

-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [kvm-unit-tests PATCH v10 3/3] arm: pmu: Add CPI checking

2016-11-21 Thread Wei Huang


On 11/21/2016 03:40 PM, Christopher Covington wrote:
> Hi Wei,
> 
> On 11/21/2016 03:24 PM, Wei Huang wrote:
>> From: Christopher Covington <c...@codeaurora.org>
> 
> I really appreciate your work on these patches. If for any or all of these
> you have more lines added/modified than me (or using any other better
> metric), please make sure to change the author to be you with
> `git commit --amend --reset-author` or equivalent.

Sure, I will if needed. Regarding your comments below, I will fix the
patch series after Drew's comments, if any.

> 
>> Calculate the numbers of cycles per instruction (CPI) implied by ARM
>> PMU cycle counter values. The code includes a strict checking facility
>> intended for the -icount option in TCG mode in the configuration file.
>>
>> Signed-off-by: Christopher Covington <c...@codeaurora.org>
>> Signed-off-by: Wei Huang <w...@redhat.com>
>> ---
>>  arm/pmu.c | 119 
>> +-
>>  arm/unittests.cfg |  14 +++
>>  2 files changed, 132 insertions(+), 1 deletion(-)
>>
>> diff --git a/arm/pmu.c b/arm/pmu.c
>> index 176b070..129ef1e 100644
>> --- a/arm/pmu.c
>> +++ b/arm/pmu.c
>> @@ -104,6 +104,25 @@ static inline uint32_t id_dfr0_read(void)
>>  asm volatile("mrc p15, 0, %0, c0, c1, 2" : "=r" (val));
>>  return val;
>>  }
>> +
>> +/*
>> + * Extra instructions inserted by the compiler would be difficult to 
>> compensate
>> + * for, so hand assemble everything between, and including, the PMCR 
>> accesses
>> + * to start and stop counting. Total cycles = isb + mcr + 2*loop = 2 + 
>> 2*loop.
   
I will change the comment above to "Total instrs".

>> + */
>> +static inline void precise_cycles_loop(int loop, uint32_t pmcr)
> 
> Nit: I would call this precise_instrs_loop. How many cycles it takes is
> IMPLEMENTATION DEFINED.

You are right. The cycle indeed depends on the design. Will fix.

> 
>> +{
>> +asm volatile(
>> +"   mcr p15, 0, %[pmcr], c9, c12, 0\n"
>> +"   isb\n"
>> +"1: subs%[loop], %[loop], #1\n"
>> +"   bgt 1b\n"
> 
> Is there any chance we might need an isb here, to prevent the stop from 
> happening
> before or during the loop? Where ISBs are required, the Linux best practice 
> is to

In theory, I think this can happen when mcr is executed before all loop
instructions completed, causing pmccntr_read() to miss some cycles. But
QEMU TCG mode doesn't support out-order-execution. So the test
condition, "cpi > 0 && cycles != i * cpi", will never be TRUE. Because
cpi==0 in KVM, this same test condition won't be TRUE under KVM mode either.

> diligently comment why they are needed. Perhaps it would be a good habit to
> carry over into kvm-unit-tests.

Agreed. Most isb() instructions were added following CP15 writes (not
all CP15 writes, but at limited locations). We tried to follow what
Linux kernel does in perf_event.c. If you feel that any isb() place
needs special comment, I will be more than happy to add it.


___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[kvm-unit-tests PATCH v10 1/3] arm: Add PMU test

2016-11-21 Thread Wei Huang
From: Christopher Covington <c...@codeaurora.org>

Beginning with a simple sanity check of the control register, add
a unit test for the ARM Performance Monitors Unit (PMU).

Signed-off-by: Christopher Covington <c...@codeaurora.org>
Signed-off-by: Wei Huang <w...@redhat.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
---
 arm/Makefile.common |  3 ++-
 arm/pmu.c   | 74 +
 arm/unittests.cfg   |  5 
 3 files changed, 81 insertions(+), 1 deletion(-)
 create mode 100644 arm/pmu.c

diff --git a/arm/Makefile.common b/arm/Makefile.common
index f37b5c2..5da2fdd 100644
--- a/arm/Makefile.common
+++ b/arm/Makefile.common
@@ -12,7 +12,8 @@ endif
 tests-common = \
$(TEST_DIR)/selftest.flat \
$(TEST_DIR)/spinlock-test.flat \
-   $(TEST_DIR)/pci-test.flat
+   $(TEST_DIR)/pci-test.flat \
+   $(TEST_DIR)/pmu.flat
 
 all: test_cases
 
diff --git a/arm/pmu.c b/arm/pmu.c
new file mode 100644
index 000..9d9c53b
--- /dev/null
+++ b/arm/pmu.c
@@ -0,0 +1,74 @@
+/*
+ * Test the ARM Performance Monitors Unit (PMU).
+ *
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version 2.1 and
+ * only version 2.1 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+ * for more details.
+ */
+#include "libcflat.h"
+#include "asm/barrier.h"
+
+#define PMU_PMCR_N_SHIFT   11
+#define PMU_PMCR_N_MASK0x1f
+#define PMU_PMCR_ID_SHIFT  16
+#define PMU_PMCR_ID_MASK   0xff
+#define PMU_PMCR_IMP_SHIFT 24
+#define PMU_PMCR_IMP_MASK  0xff
+
+#if defined(__arm__)
+static inline uint32_t pmcr_read(void)
+{
+   uint32_t ret;
+
+   asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
+   return ret;
+}
+#elif defined(__aarch64__)
+static inline uint32_t pmcr_read(void)
+{
+   uint32_t ret;
+
+   asm volatile("mrs %0, pmcr_el0" : "=r" (ret));
+   return ret;
+}
+#endif
+
+/*
+ * As a simple sanity check on the PMCR_EL0, ensure the implementer field isn't
+ * null. Also print out a couple other interesting fields for diagnostic
+ * purposes. For example, as of fall 2016, QEMU TCG mode doesn't implement
+ * event counters and therefore reports zero event counters, but hopefully
+ * support for at least the instructions event will be added in the future and
+ * the reported number of event counters will become nonzero.
+ */
+static bool check_pmcr(void)
+{
+   uint32_t pmcr;
+
+   pmcr = pmcr_read();
+
+   printf("PMU implementer: %c\n",
+  (pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK);
+   printf("Identification code: 0x%x\n",
+  (pmcr >> PMU_PMCR_ID_SHIFT) & PMU_PMCR_ID_MASK);
+   printf("Event counters:  %d\n",
+  (pmcr >> PMU_PMCR_N_SHIFT) & PMU_PMCR_N_MASK);
+
+   return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0;
+}
+
+int main(void)
+{
+   report_prefix_push("pmu");
+
+   report("Control register", check_pmcr());
+
+   return report_summary();
+}
diff --git a/arm/unittests.cfg b/arm/unittests.cfg
index ae32a42..816f494 100644
--- a/arm/unittests.cfg
+++ b/arm/unittests.cfg
@@ -58,3 +58,8 @@ groups = selftest
 [pci-test]
 file = pci-test.flat
 groups = pci
+
+# Test PMU support
+[pmu]
+file = pmu.flat
+groups = pmu
-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[kvm-unit-tests PATCH v10 2/3] arm: pmu: Check cycle count increases

2016-11-21 Thread Wei Huang
From: Christopher Covington <c...@codeaurora.org>

Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
even for the smallest delta of two subsequent reads.

Signed-off-by: Christopher Covington <c...@codeaurora.org>
Signed-off-by: Wei Huang <w...@redhat.com>
---
 arm/pmu.c | 156 ++
 1 file changed, 156 insertions(+)

diff --git a/arm/pmu.c b/arm/pmu.c
index 9d9c53b..176b070 100644
--- a/arm/pmu.c
+++ b/arm/pmu.c
@@ -15,6 +15,9 @@
 #include "libcflat.h"
 #include "asm/barrier.h"
 
+#define PMU_PMCR_E (1 << 0)
+#define PMU_PMCR_C (1 << 2)
+#define PMU_PMCR_LC(1 << 6)
 #define PMU_PMCR_N_SHIFT   11
 #define PMU_PMCR_N_MASK0x1f
 #define PMU_PMCR_ID_SHIFT  16
@@ -22,6 +25,14 @@
 #define PMU_PMCR_IMP_SHIFT 24
 #define PMU_PMCR_IMP_MASK  0xff
 
+#define ID_DFR0_PERFMON_SHIFT 24
+#define ID_DFR0_PERFMON_MASK  0xf
+
+#define PMU_CYCLE_IDX 31
+
+#define NR_SAMPLES 10
+
+static unsigned int pmu_version;
 #if defined(__arm__)
 static inline uint32_t pmcr_read(void)
 {
@@ -30,6 +41,69 @@ static inline uint32_t pmcr_read(void)
asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
return ret;
 }
+
+static inline void pmcr_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (value));
+   isb();
+}
+
+static inline void pmselr_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (value));
+   isb();
+}
+
+static inline void pmxevtyper_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (value));
+}
+
+static inline uint64_t pmccntr_read(void)
+{
+   uint32_t lo, hi = 0;
+
+   if (pmu_version == 0x3)
+   asm volatile("mrrc p15, 0, %0, %1, c9" : "=r" (lo), "=r" (hi));
+   else
+   asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (lo));
+
+   return ((uint64_t)hi << 32) | lo;
+}
+
+static inline void pmccntr_write(uint64_t value)
+{
+   uint32_t lo, hi;
+
+   lo = value & 0x;
+   hi = (value >> 32) & 0x;
+
+   if (pmu_version == 0x3)
+   asm volatile("mcrr p15, 0, %0, %1, c9" : : "r" (lo), "r" (hi));
+   else
+   asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (lo));
+}
+
+static inline void pmcntenset_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (value));
+}
+
+/* PMCCFILTR is an obsolete name for PMXEVTYPER31 in ARMv7 */
+static inline void pmccfiltr_write(uint32_t value)
+{
+   pmselr_write(PMU_CYCLE_IDX);
+   pmxevtyper_write(value);
+   isb();
+}
+
+static inline uint32_t id_dfr0_read(void)
+{
+   uint32_t val;
+
+   asm volatile("mrc p15, 0, %0, c0, c1, 2" : "=r" (val));
+   return val;
+}
 #elif defined(__aarch64__)
 static inline uint32_t pmcr_read(void)
 {
@@ -38,6 +112,44 @@ static inline uint32_t pmcr_read(void)
asm volatile("mrs %0, pmcr_el0" : "=r" (ret));
return ret;
 }
+
+static inline void pmcr_write(uint32_t value)
+{
+   asm volatile("msr pmcr_el0, %0" : : "r" (value));
+   isb();
+}
+
+static inline uint64_t pmccntr_read(void)
+{
+   uint64_t cycles;
+
+   asm volatile("mrs %0, pmccntr_el0" : "=r" (cycles));
+   return cycles;
+}
+
+static inline void pmccntr_write(uint64_t value)
+{
+   asm volatile("msr pmccntr_el0, %0" : : "r" (value));
+}
+
+static inline void pmcntenset_write(uint32_t value)
+{
+   asm volatile("msr pmcntenset_el0, %0" : : "r" (value));
+}
+
+static inline void pmccfiltr_write(uint32_t value)
+{
+   asm volatile("msr pmccfiltr_el0, %0" : : "r" (value));
+   isb();
+}
+
+static inline uint32_t id_dfr0_read(void)
+{
+   uint32_t id;
+
+   asm volatile("mrs %0, id_dfr0_el1" : "=r" (id));
+   return id;
+}
 #endif
 
 /*
@@ -64,11 +176,55 @@ static bool check_pmcr(void)
return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0;
 }
 
+/*
+ * Ensure that the cycle counter progresses between back-to-back reads.
+ */
+static bool check_cycles_increase(void)
+{
+   bool success = true;
+
+   /* init before event access, this test only cares about cycle count */
+   pmcntenset_write(1 << PMU_CYCLE_IDX);
+   pmccfiltr_write(0); /* count cycles in EL0, EL1, but not EL2 */
+   pmccntr_write(0);
+
+   pmcr_write(pmcr_read() | PMU_PMCR_LC | PMU_PMCR_C | PMU_PMCR_E);
+
+   for (int i = 0; i < NR_SAMPLES; i++) {
+   uint64_t a, b;
+
+  

[kvm-unit-tests PATCH v9 3/3] arm: pmu: Add CPI checking

2016-11-18 Thread Wei Huang
From: Christopher Covington <c...@codeaurora.org>

Calculate the numbers of cycles per instruction (CPI) implied by ARM
PMU cycle counter values. The code includes a strict checking facility
intended for the -icount option in TCG mode in the configuration file.

Signed-off-by: Christopher Covington <c...@codeaurora.org>
Signed-off-by: Wei Huang <w...@redhat.com>
---
 arm/pmu.c | 111 +-
 arm/unittests.cfg |  14 +++
 2 files changed, 124 insertions(+), 1 deletion(-)

diff --git a/arm/pmu.c b/arm/pmu.c
index fa87de4..b36c4fb 100644
--- a/arm/pmu.c
+++ b/arm/pmu.c
@@ -104,6 +104,25 @@ static inline uint32_t id_dfr0_read(void)
asm volatile("mrc p15, 0, %0, c0, c1, 2" : "=r" (val));
return val;
 }
+
+/*
+ * Extra instructions inserted by the compiler would be difficult to compensate
+ * for, so hand assemble everything between, and including, the PMCR accesses
+ * to start and stop counting.
+ */
+static inline void loop(int i, uint32_t pmcr)
+{
+   asm volatile(
+   "   mcr p15, 0, %[pmcr], c9, c12, 0\n"
+   "   isb\n"
+   "1: subs%[i], %[i], #1\n"
+   "   bgt 1b\n"
+   "   mcr p15, 0, %[z], c9, c12, 0\n"
+   "   isb\n"
+   : [i] "+r" (i)
+   : [pmcr] "r" (pmcr), [z] "r" (0)
+   : "cc");
+}
 #elif defined(__aarch64__)
 static inline uint32_t pmcr_read(void)
 {
@@ -150,6 +169,25 @@ static inline uint32_t id_dfr0_read(void)
asm volatile("mrs %0, id_dfr0_el1" : "=r" (id));
return id;
 }
+
+/*
+ * Extra instructions inserted by the compiler would be difficult to compensate
+ * for, so hand assemble everything between, and including, the PMCR accesses
+ * to start and stop counting.
+ */
+static inline void loop(int i, uint32_t pmcr)
+{
+   asm volatile(
+   "   msr pmcr_el0, %[pmcr]\n"
+   "   isb\n"
+   "1: subs%[i], %[i], #1\n"
+   "   b.gt1b\n"
+   "   msr pmcr_el0, xzr\n"
+   "   isb\n"
+   : [i] "+r" (i)
+   : [pmcr] "r" (pmcr)
+   : "cc");
+}
 #endif
 
 /*
@@ -204,6 +242,71 @@ static bool check_cycles_increase(void)
return success;
 }
 
+/*
+ * Execute a known number of guest instructions. Only odd instruction counts
+ * greater than or equal to 3 are supported by the in-line assembly code. The
+ * control register (PMCR_EL0) is initialized with the provided value (allowing
+ * for example for the cycle counter or event counters to be reset). At the end
+ * of the exact instruction loop, zero is written to PMCR_EL0 to disable
+ * counting, allowing the cycle counter or event counters to be read at the
+ * leisure of the calling code.
+ */
+static void measure_instrs(int num, uint32_t pmcr)
+{
+   int i = (num - 1) / 2;
+
+   assert(num >= 3 && ((num - 1) % 2 == 0));
+   loop(i, pmcr);
+}
+
+/*
+ * Measure cycle counts for various known instruction counts. Ensure that the
+ * cycle counter progresses (similar to check_cycles_increase() but with more
+ * instructions and using reset and stop controls). If supplied a positive,
+ * nonzero CPI parameter, also strictly check that every measurement matches
+ * it. Strict CPI checking is used to test -icount mode.
+ */
+static bool check_cpi(int cpi)
+{
+   uint32_t pmcr = pmcr_read() | PMU_PMCR_LC | PMU_PMCR_C | PMU_PMCR_E;
+   
+   if (cpi > 0)
+   printf("Checking for CPI=%d.\n", cpi);
+   printf("instrs : cycles0 cycles1 ...\n");
+
+   for (unsigned int i = 3; i < 300; i += 32) {
+   uint64_t avg, sum = 0;
+
+   printf("%d :", i);
+   for (int j = 0; j < NR_SAMPLES; j++) {
+   uint64_t cycles;
+
+   pmccntr_write(0);
+   measure_instrs(i, pmcr);
+   cycles = pmccntr_read();
+   printf(" %"PRId64"", cycles);
+
+   /*
+* The cycles taken by the loop above should fit in
+* 32 bits easily. We check the upper 32 bits of the
+* cycle counter to make sure there is no supprise.
+*/
+   if (!cycles || (cpi > 0 && cycles != i * cpi) ||
+   (cycles & 0x)) {
+   printf("\n");
+   return false;
+   }
+
+   sum += cycles;
+   }
+   avg = sum / NR_SAMPLES;
+   printf(" sum=%"PRId64" a

[kvm-unit-tests PATCH v9 2/3] arm: pmu: Check cycle count increases

2016-11-18 Thread Wei Huang
From: Christopher Covington <c...@codeaurora.org>

Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
even for the smallest delta of two subsequent reads.

Signed-off-by: Christopher Covington <c...@codeaurora.org>
Signed-off-by: Wei Huang <w...@redhat.com>
---
 arm/pmu.c | 156 ++
 1 file changed, 156 insertions(+)

diff --git a/arm/pmu.c b/arm/pmu.c
index 9d9c53b..fa87de4 100644
--- a/arm/pmu.c
+++ b/arm/pmu.c
@@ -15,6 +15,9 @@
 #include "libcflat.h"
 #include "asm/barrier.h"
 
+#define PMU_PMCR_E (1 << 0)
+#define PMU_PMCR_C (1 << 2)
+#define PMU_PMCR_LC(1 << 6)
 #define PMU_PMCR_N_SHIFT   11
 #define PMU_PMCR_N_MASK0x1f
 #define PMU_PMCR_ID_SHIFT  16
@@ -22,6 +25,14 @@
 #define PMU_PMCR_IMP_SHIFT 24
 #define PMU_PMCR_IMP_MASK  0xff
 
+#define ID_DFR0_PERFMON_SHIFT 24
+#define ID_DFR0_PERFMON_MASK  0xf
+
+#define PMU_CYCLE_IDX 31
+
+#define NR_SAMPLES 10
+
+static unsigned int pmu_version;
 #if defined(__arm__)
 static inline uint32_t pmcr_read(void)
 {
@@ -30,6 +41,69 @@ static inline uint32_t pmcr_read(void)
asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
return ret;
 }
+
+static inline void pmcr_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (value));
+   isb();
+}
+
+static inline void pmselr_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (value));
+   isb();
+}
+
+static inline void pmxevtyper_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (value));
+}
+
+static inline uint64_t pmccntr_read(void)
+{
+   uint32_t lo, hi = 0;
+
+   if (pmu_version == 0x3)
+   asm volatile("mrrc p15, 0, %0, %1, c9" : "=r" (lo), "=r" (hi));
+   else
+   asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (lo));
+
+   return ((uint64_t)hi << 32) | lo;
+}
+
+static inline void pmccntr_write(uint64_t value)
+{
+   uint32_t lo, hi;
+
+   lo = value & 0x;
+   hi = (value >> 32) & 0x;
+
+   if (pmu_version == 0x3)
+   asm volatile("mcrr p15, 0, %0, %1, c9" : : "r" (lo), "r" (hi));
+   else
+   asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (lo));
+}
+
+static inline void pmcntenset_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (value));
+}
+
+/* PMCCFILTR is an obsolete name for PMXEVTYPER31 in ARMv7 */
+static inline void pmccfiltr_write(uint32_t value)
+{
+   pmselr_write(PMU_CYCLE_IDX);
+   pmxevtyper_write(value);
+   isb();
+}
+
+static inline uint32_t id_dfr0_read(void)
+{
+   uint32_t val;
+
+   asm volatile("mrc p15, 0, %0, c0, c1, 2" : "=r" (val));
+   return val;
+}
 #elif defined(__aarch64__)
 static inline uint32_t pmcr_read(void)
 {
@@ -38,6 +112,44 @@ static inline uint32_t pmcr_read(void)
asm volatile("mrs %0, pmcr_el0" : "=r" (ret));
return ret;
 }
+
+static inline void pmcr_write(uint32_t value)
+{
+   asm volatile("msr pmcr_el0, %0" : : "r" (value));
+   isb();
+}
+
+static inline uint64_t pmccntr_read(void)
+{
+   uint64_t cycles;
+
+   asm volatile("mrs %0, pmccntr_el0" : "=r" (cycles));
+   return cycles;
+}
+
+static inline void pmccntr_write(uint64_t value)
+{
+   asm volatile("msr pmccntr_el0, %0" : : "r" (value));
+}
+
+static inline void pmcntenset_write(uint32_t value)
+{
+   asm volatile("msr pmcntenset_el0, %0" : : "r" (value));
+}
+
+static inline void pmccfiltr_write(uint32_t value)
+{
+   asm volatile("msr pmccfiltr_el0, %0" : : "r" (value));
+   isb();
+}
+
+static inline uint32_t id_dfr0_read(void)
+{
+   uint32_t id;
+
+   asm volatile("mrs %0, id_dfr0_el1" : "=r" (id));
+   return id;
+}
 #endif
 
 /*
@@ -64,11 +176,55 @@ static bool check_pmcr(void)
return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0;
 }
 
+/*
+ * Ensure that the cycle counter progresses between back-to-back reads.
+ */
+static bool check_cycles_increase(void)
+{
+   bool success = true;
+
+   pmccntr_write(0);
+   pmcr_write(pmcr_read() | PMU_PMCR_LC | PMU_PMCR_C | PMU_PMCR_E);
+
+   for (int i = 0; i < NR_SAMPLES; i++) {
+   uint64_t a, b;
+
+   a = pmccntr_read();
+   b = pmccntr_read();
+
+   if (a >= b) {
+   printf("Read %"PRId64" then %"PRId64".\n", a, b);
+   

[kvm-unit-tests PATCH v9 1/3] arm: Add PMU test

2016-11-18 Thread Wei Huang
From: Christopher Covington <c...@codeaurora.org>

Beginning with a simple sanity check of the control register, add
a unit test for the ARM Performance Monitors Unit (PMU).

Signed-off-by: Christopher Covington <c...@codeaurora.org>
Signed-off-by: Wei Huang <w...@redhat.com>
Reviewed-by: Andrew Jones <drjo...@redhat.com>
---
 arm/Makefile.common |  3 ++-
 arm/pmu.c   | 74 +
 arm/unittests.cfg   |  5 
 3 files changed, 81 insertions(+), 1 deletion(-)
 create mode 100644 arm/pmu.c

diff --git a/arm/Makefile.common b/arm/Makefile.common
index ccb554d..f98f422 100644
--- a/arm/Makefile.common
+++ b/arm/Makefile.common
@@ -11,7 +11,8 @@ endif
 
 tests-common = \
$(TEST_DIR)/selftest.flat \
-   $(TEST_DIR)/spinlock-test.flat
+   $(TEST_DIR)/spinlock-test.flat \
+   $(TEST_DIR)/pmu.flat
 
 all: test_cases
 
diff --git a/arm/pmu.c b/arm/pmu.c
new file mode 100644
index 000..9d9c53b
--- /dev/null
+++ b/arm/pmu.c
@@ -0,0 +1,74 @@
+/*
+ * Test the ARM Performance Monitors Unit (PMU).
+ *
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version 2.1 and
+ * only version 2.1 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+ * for more details.
+ */
+#include "libcflat.h"
+#include "asm/barrier.h"
+
+#define PMU_PMCR_N_SHIFT   11
+#define PMU_PMCR_N_MASK0x1f
+#define PMU_PMCR_ID_SHIFT  16
+#define PMU_PMCR_ID_MASK   0xff
+#define PMU_PMCR_IMP_SHIFT 24
+#define PMU_PMCR_IMP_MASK  0xff
+
+#if defined(__arm__)
+static inline uint32_t pmcr_read(void)
+{
+   uint32_t ret;
+
+   asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
+   return ret;
+}
+#elif defined(__aarch64__)
+static inline uint32_t pmcr_read(void)
+{
+   uint32_t ret;
+
+   asm volatile("mrs %0, pmcr_el0" : "=r" (ret));
+   return ret;
+}
+#endif
+
+/*
+ * As a simple sanity check on the PMCR_EL0, ensure the implementer field isn't
+ * null. Also print out a couple other interesting fields for diagnostic
+ * purposes. For example, as of fall 2016, QEMU TCG mode doesn't implement
+ * event counters and therefore reports zero event counters, but hopefully
+ * support for at least the instructions event will be added in the future and
+ * the reported number of event counters will become nonzero.
+ */
+static bool check_pmcr(void)
+{
+   uint32_t pmcr;
+
+   pmcr = pmcr_read();
+
+   printf("PMU implementer: %c\n",
+  (pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK);
+   printf("Identification code: 0x%x\n",
+  (pmcr >> PMU_PMCR_ID_SHIFT) & PMU_PMCR_ID_MASK);
+   printf("Event counters:  %d\n",
+  (pmcr >> PMU_PMCR_N_SHIFT) & PMU_PMCR_N_MASK);
+
+   return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0;
+}
+
+int main(void)
+{
+   report_prefix_push("pmu");
+
+   report("Control register", check_pmcr());
+
+   return report_summary();
+}
diff --git a/arm/unittests.cfg b/arm/unittests.cfg
index 3f6fa45..7645180 100644
--- a/arm/unittests.cfg
+++ b/arm/unittests.cfg
@@ -54,3 +54,8 @@ file = selftest.flat
 smp = $MAX_SMP
 extra_params = -append 'smp'
 groups = selftest
+
+# Test PMU support
+[pmu]
+file = pmu.flat
+groups = pmu
-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[kvm-unit-tests PATCH v9 0/3] ARM PMU tests

2016-11-18 Thread Wei Huang
Changes from v8:
* Probe PMU version based on ID_DFR0
* pmccntr_read() now returns 64bit and can handle both 32bit and 64bit
  PMCCNTR based on PMU version.
* Add pmccntr_write() support
* Use a common printf format PRId64 to support 64bit variable smoothly in
  test functions
* Add barriers to several PMU write functions
* Verfied on different execution modes

Note:
1) Current KVM code has bugs in handling PMCCFILTR write. A fix (see
below) is required for this unit testing code to work correctly under
KVM mode.
https://lists.cs.columbia.edu/pipermail/kvmarm/2016-November/022134.html.

Thanks,
-Wei

Wei Huang (3):
  arm: Add PMU test
  arm: pmu: Check cycle count increases
  arm: pmu: Add CPI checking

 arm/Makefile.common |   3 +-
 arm/pmu.c   | 339 
 arm/unittests.cfg   |  19 +++
 3 files changed, 360 insertions(+), 1 deletion(-)
 create mode 100644 arm/pmu.c

-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [kvm-unit-tests PATCH] arm/arm64: introduce is_aarch32

2016-11-16 Thread Wei Huang


On 11/16/2016 04:02 PM, Christopher Covington wrote:
> On 11/16/2016 12:46 PM, Marc Zyngier wrote:
>> On 16/11/16 14:38, Andrew Jones wrote:
>>> ARMv7-A isn't exactly the same as ARMv8-A32 (AArch32). This
>>> function allows unit tests to make the distinction.
>>
>> Hi Drew,
>>
>> Overall, having to find out about the architecture is a bad idea most of
>> the time. We have feature registers for most things, and it definitely
>> makes more sense to check for those than trying to cast a wider net.
>>
>>>
>>> Signed-off-by: Andrew Jones 
>>>
>>> ---
>>> I'm actually unsure if there's a feature bit or not that I could
>>> probe instead. It'd be nice if somebody can confirm. Thanks, drew
> 
> I'd be happy to settle with the hard-coded CPU list.
> 
> But if you're curious about alternatives, I've taken a look through some
> documentation. ID_ISAR0.coproc describes whether mrrc is available but
> I think it is generally available on v7 and above. I think ID_ISAR5 will
> be zero on v7 and nonzero on v8-A32. But PMCR.LC seems like the best bit
> to check.
> 
>>> diff --git a/lib/arm64/asm/processor.h b/lib/arm64/asm/processor.h
>>> index 84d5c7ce752b..b602e1fbbc2d 100644
>>> --- a/lib/arm64/asm/processor.h
>>> +++ b/lib/arm64/asm/processor.h
>>> @@ -81,5 +81,10 @@ DEFINE_GET_SYSREG32(mpidr)
>>>  extern void start_usr(void (*func)(void *arg), void *arg, unsigned long 
>>> sp_usr);
>>>  extern bool is_user(void);
>>>  
>>> +static inline bool is_aarch32(void)
>>> +{
>>> +   return false;
>>> +}
>>> +
>>>  #endif /* !__ASSEMBLY__ */
>>>  #endif /* _ASMARM64_PROCESSOR_H_ */
>>>
>>
>> So the real question is: what are you trying to check for?
> 
> The question is "how many bits wide is pmccntr?" I think we
> can test whether writing PMCR.LC = 1 sticks. Based on the
> documentation, it seems to me like it wouldn't for v7 and
> would for v8-A32.
> 
> uint8_t size_pmccntr(void) {
>   uint32_t pmcr = get_pmcr();
>   if (pmcr & PMU_PMCR_LC_MASK)
> return 64;
>   set_pmcr(pmcr | (1 << PMU_PMCR_LC_SHIFT));
>   if (get_pmcr() & PMU_PMCR_LC_MASK)
> return 64;
>   return 32;
> }

This might actually be the solution if we can't find a more reliable
detection approach. I briefly tested it and it seemed to work.

> 
> Thanks,
> Cov
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 1/1] KVM: ARM64: Fix the issues when guest PMCCFILTR is configured

2016-11-16 Thread Wei Huang
KVM calls kvm_pmu_set_counter_event_type() when PMCCFILTR is configured.
But this function can't deals with PMCCFILTR correctly because the evtCount
bits of PMCCFILTR, which is reserved 0, conflits with the SW_INCR event
type of other PMXEVTYPER registers. To fix it, when eventsel == 0, this
function shouldn't return immediately; instead it needs to check further
if select_idx is ARMV8_PMU_CYCLE_IDX.

Another issue is that KVM shouldn't copy the eventsel bits of PMCCFILTER
blindly to attr.config. Instead it ought to convert the request to the
"cpu cycle" event type (i.e. 0x11).

To support this patch and to prevent duplicated definitions, a limited
set of ARMv8 perf event types were relocated from perf_event.c to
asm/perf_event.h.

Signed-off-by: Wei Huang <w...@redhat.com>
---
 arch/arm64/include/asm/perf_event.h | 10 +-
 arch/arm64/kernel/perf_event.c  | 10 +-
 virt/kvm/arm/pmu.c  |  8 +---
 3 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/include/asm/perf_event.h 
b/arch/arm64/include/asm/perf_event.h
index 2065f46..38b6a2b 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -46,7 +46,15 @@
 #defineARMV8_PMU_EVTYPE_MASK   0xc800  /* Mask for writable 
bits */
 #defineARMV8_PMU_EVTYPE_EVENT  0x  /* Mask for EVENT bits 
*/
 
-#define ARMV8_PMU_EVTYPE_EVENT_SW_INCR 0   /* Software increment event */
+/*
+ * PMUv3 event types: required events
+ */
+#define ARMV8_PMUV3_PERFCTR_SW_INCR0x00
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL   0x03
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE  0x04
+#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED0x10
+#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11
+#define ARMV8_PMUV3_PERFCTR_BR_PRED0x12
 
 /*
  * Event filters for PMUv3
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index a9310a6..57ae9d9 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -31,17 +31,9 @@
 
 /*
  * ARMv8 PMUv3 Performance Events handling code.
- * Common event types.
+ * Common event types (some are defined in asm/perf_event.h).
  */
 
-/* Required events. */
-#define ARMV8_PMUV3_PERFCTR_SW_INCR0x00
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL   0x03
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE  0x04
-#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED0x10
-#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11
-#define ARMV8_PMUV3_PERFCTR_BR_PRED0x12
-
 /* At least one of the following is required. */
 #define ARMV8_PMUV3_PERFCTR_INST_RETIRED   0x08
 #define ARMV8_PMUV3_PERFCTR_INST_SPEC  0x1B
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index 6e9c40e..69ccce3 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -305,7 +305,7 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 
val)
continue;
type = vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
   & ARMV8_PMU_EVTYPE_EVENT;
-   if ((type == ARMV8_PMU_EVTYPE_EVENT_SW_INCR)
+   if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR)
&& (enable & BIT(i))) {
reg = vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
reg = lower_32_bits(reg);
@@ -379,7 +379,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, 
u64 data,
eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
 
/* Software increment event does't need to be backed by a perf event */
-   if (eventsel == ARMV8_PMU_EVTYPE_EVENT_SW_INCR)
+   if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
+   select_idx != ARMV8_PMU_CYCLE_IDX)
return;
 
memset(, 0, sizeof(struct perf_event_attr));
@@ -391,7 +392,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, 
u64 data,
attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
attr.exclude_hv = 1; /* Don't count EL2 events */
attr.exclude_host = 1; /* Don't count host events */
-   attr.config = eventsel;
+   attr.config = (select_idx == ARMV8_PMU_CYCLE_IDX) ?
+   ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
 
counter = kvm_pmu_get_counter_value(vcpu, select_idx);
/* The initial sample period (overflow count) of an event. */
-- 
2.7.4

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [Qemu-devel] [kvm-unit-tests PATCH v8 2/3] arm: pmu: Check cycle count increases

2016-11-15 Thread Wei Huang


On 11/14/2016 09:12 AM, Christopher Covington wrote:
> Hi Drew, Wei,
> 
> On 11/14/2016 05:05 AM, Andrew Jones wrote:
>> On Fri, Nov 11, 2016 at 01:55:49PM -0600, Wei Huang wrote:
>>>
>>>
>>> On 11/11/2016 01:43 AM, Andrew Jones wrote:
>>>> On Tue, Nov 08, 2016 at 12:17:14PM -0600, Wei Huang wrote:
>>>>> From: Christopher Covington <c...@codeaurora.org>
>>>>>
>>>>> Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
>>>>> even for the smallest delta of two subsequent reads.
>>>>>
>>>>> Signed-off-by: Christopher Covington <c...@codeaurora.org>
>>>>> Signed-off-by: Wei Huang <w...@redhat.com>
>>>>> ---
>>>>>  arm/pmu.c | 98 
>>>>> +++
>>>>>  1 file changed, 98 insertions(+)
>>>>>
>>>>> diff --git a/arm/pmu.c b/arm/pmu.c
>>>>> index 0b29088..d5e3ac3 100644
>>>>> --- a/arm/pmu.c
>>>>> +++ b/arm/pmu.c
>>>>> @@ -14,6 +14,7 @@
>>>>>   */
>>>>>  #include "libcflat.h"
>>>>>  
>>>>> +#define PMU_PMCR_E (1 << 0)
>>>>>  #define PMU_PMCR_N_SHIFT   11
>>>>>  #define PMU_PMCR_N_MASK0x1f
>>>>>  #define PMU_PMCR_ID_SHIFT  16
>>>>> @@ -21,6 +22,10 @@
>>>>>  #define PMU_PMCR_IMP_SHIFT 24
>>>>>  #define PMU_PMCR_IMP_MASK  0xff
>>>>>  
>>>>> +#define PMU_CYCLE_IDX  31
>>>>> +
>>>>> +#define NR_SAMPLES 10
>>>>> +
>>>>>  #if defined(__arm__)
>>>>>  static inline uint32_t pmcr_read(void)
>>>>>  {
>>>>> @@ -29,6 +34,47 @@ static inline uint32_t pmcr_read(void)
>>>>>   asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
>>>>>   return ret;
>>>>>  }
>>>>> +
>>>>> +static inline void pmcr_write(uint32_t value)
>>>>> +{
>>>>> + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (value));
>>>>> +}
>>>>> +
>>>>> +static inline void pmselr_write(uint32_t value)
>>>>> +{
>>>>> + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (value));
>>>>> +}
>>>>> +
>>>>> +static inline void pmxevtyper_write(uint32_t value)
>>>>> +{
>>>>> + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (value));
>>>>> +}
>>>>> +
>>>>> +/*
>>>>> + * While PMCCNTR can be accessed as a 64 bit coprocessor register, 
>>>>> returning 64
>>>>> + * bits doesn't seem worth the trouble when differential usage of the 
>>>>> result is
>>>>> + * expected (with differences that can easily fit in 32 bits). So just 
>>>>> return
>>>>> + * the lower 32 bits of the cycle count in AArch32.
>>>>
>>>> Like I said in the last review, I'd rather we not do this. We should
>>>> return the full value and then the test case should confirm the upper
>>>> 32 bits are zero.
>>>
>>> Unless I miss something in ARM documentation, ARMv7 PMCCNTR is a 32-bit
>>> register. We can force it to a more coarse-grained cycle counter with
>>> PMCR.D bit=1 (see below). But it is still not a 64-bit register.
> 
> AArch32 System Register Descriptions
> Performance Monitors registers
> PMCCNTR, Performance Monitors Cycle Count Register
> 
> To access the PMCCNTR when accessing as a 32-bit register:
> MRC p15,0,,c9,c13,0 ; Read PMCCNTR[31:0] into Rt
> MCR p15,0,,c9,c13,0 ; Write Rt to PMCCNTR[31:0]. PMCCNTR[63:32] are 
> unchanged
> 
> To access the PMCCNTR when accessing as a 64-bit register:
> MRRC p15,0,,,c9 ; Read PMCCNTR[31:0] into Rt and PMCCNTR[63:32] into 
> Rt2
> MCRR p15,0,,,c9 ; Write Rt to PMCCNTR[31:0] and Rt2 to PMCCNTR[63:32]
> 

Thanks. I did some research based on your info and came back with the
following proposals (Cov, correct me if I am wrong):

By comparing A57 TRM (page 394 in [1]) with A15 TRM (page 273 in [2]), I
think this 64-bit cycle register is only available when running under
aarch32 compatibility mode on ARMv8 because it is not specified in A15
TRM. To further verify it, I tested 32-bit pmu code on QEMU with TCG
mode. The result is: accessing 64-bit PMCCNTR using the following
assembly failed on A15:

   volatile("mrrc p15, 0, %0, %1, c9" : "=r" (lo), "=r" (hi));
or
   volatile("mrrc p15, 0, %Q0, %R0, c9" : "=r" (val));

Given this difference, I think there are two solutions for 64-bit
AArch32 pmccntr_read, as requested by Drew:

1) The PMU unit testing code tells if it is running under ARMv7 or under
AArch32-compability mode. When it is running ARMv7, such as A15, let us
use "MRC p15,0,,c9,c13,0" and clear the upper 32-bit as 0. Otherwise
use "MRRC p15,0,,,c9".

2) Returns 64-bit results for ARM pmccntr_read(). But we only uses "MRC
p15,0,,c9,c13,0" and always clear the upper 32-bit as 0. This will
be the same as the original code.

Thoughts?

-Wei

[1] A57 TRM,
http://infocenter.arm.com/help/topic/com.arm.doc.ddi0488c/DDI0488C_cortex_a57_mpcore_r1p0_trm.pdf
[2] A15 TRM,
http://infocenter.arm.com/help/topic/com.arm.doc.ddi0438c/DDI0438C_cortex_a15_r2p0_trm.pdf

> Regards,
> Cov
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [Qemu-devel] [kvm-unit-tests PATCH v8 2/3] arm: pmu: Check cycle count increases

2016-11-11 Thread Wei Huang


On 11/11/2016 01:43 AM, Andrew Jones wrote:
> On Tue, Nov 08, 2016 at 12:17:14PM -0600, Wei Huang wrote:
>> From: Christopher Covington <c...@codeaurora.org>
>>
>> Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
>> even for the smallest delta of two subsequent reads.
>>
>> Signed-off-by: Christopher Covington <c...@codeaurora.org>
>> Signed-off-by: Wei Huang <w...@redhat.com>
>> ---
>>  arm/pmu.c | 98 
>> +++
>>  1 file changed, 98 insertions(+)
>>
>> diff --git a/arm/pmu.c b/arm/pmu.c
>> index 0b29088..d5e3ac3 100644
>> --- a/arm/pmu.c
>> +++ b/arm/pmu.c
>> @@ -14,6 +14,7 @@
>>   */
>>  #include "libcflat.h"
>>  
>> +#define PMU_PMCR_E (1 << 0)
>>  #define PMU_PMCR_N_SHIFT   11
>>  #define PMU_PMCR_N_MASK0x1f
>>  #define PMU_PMCR_ID_SHIFT  16
>> @@ -21,6 +22,10 @@
>>  #define PMU_PMCR_IMP_SHIFT 24
>>  #define PMU_PMCR_IMP_MASK  0xff
>>  
>> +#define PMU_CYCLE_IDX  31
>> +
>> +#define NR_SAMPLES 10
>> +
>>  #if defined(__arm__)
>>  static inline uint32_t pmcr_read(void)
>>  {
>> @@ -29,6 +34,47 @@ static inline uint32_t pmcr_read(void)
>>  asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
>>  return ret;
>>  }
>> +
>> +static inline void pmcr_write(uint32_t value)
>> +{
>> +asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (value));
>> +}
>> +
>> +static inline void pmselr_write(uint32_t value)
>> +{
>> +asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (value));
>> +}
>> +
>> +static inline void pmxevtyper_write(uint32_t value)
>> +{
>> +asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (value));
>> +}
>> +
>> +/*
>> + * While PMCCNTR can be accessed as a 64 bit coprocessor register, 
>> returning 64
>> + * bits doesn't seem worth the trouble when differential usage of the 
>> result is
>> + * expected (with differences that can easily fit in 32 bits). So just 
>> return
>> + * the lower 32 bits of the cycle count in AArch32.
> 
> Like I said in the last review, I'd rather we not do this. We should
> return the full value and then the test case should confirm the upper
> 32 bits are zero.
> 

Unless I miss something in ARM documentation, ARMv7 PMCCNTR is a 32-bit
register. We can force it to a more coarse-grained cycle counter with
PMCR.D bit=1 (see below). But it is still not a 64-bit register. ARMv8
PMCCNTR_EL0 is a 64-bit register.

"The PMCR.D bit configures whether PMCCNTR increments once every clock
cycle, or once every 64 clock cycles. "

So I think the comment above in the code is an overstatement, which
should be deleted or moved down to ARMv8 pmccntr_read() below.

>> + */
>> +static inline uint32_t pmccntr_read(void)
>> +{
>> +uint32_t cycles;
>> +
>> +asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (cycles));
>> +return cycles;
>> +}
>> +
>> +static inline void pmcntenset_write(uint32_t value)
>> +{
>> +asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (value));
>> +}
>> +
>> +/* PMCCFILTR is an obsolete name for PMXEVTYPER31 in ARMv7 */
>> +static inline void pmccfiltr_write(uint32_t value)
>> +{
>> +pmselr_write(PMU_CYCLE_IDX);
>> +pmxevtyper_write(value);
>> +}
>>  #elif defined(__aarch64__)
>>  static inline uint32_t pmcr_read(void)
>>  {
>> @@ -37,6 +83,29 @@ static inline uint32_t pmcr_read(void)
>>  asm volatile("mrs %0, pmcr_el0" : "=r" (ret));
>>  return ret;
>>  }
>> +
>> +static inline void pmcr_write(uint32_t value)
>> +{
>> +asm volatile("msr pmcr_el0, %0" : : "r" (value));
>> +}
>> +
>> +static inline uint32_t pmccntr_read(void)
>> +{
>> +uint32_t cycles;
>> +
>> +asm volatile("mrs %0, pmccntr_el0" : "=r" (cycles));
>> +return cycles;
>> +}
>> +
>> +static inline void pmcntenset_write(uint32_t value)
>> +{
>> +asm volatile("msr pmcntenset_el0, %0" : : "r" (value));
>> +}
>> +
>> +static inline void pmccfiltr_write(uint32_t value)
>> +{
>> +asm volatile("msr pmccfiltr_el0, %0" : : "r" (value));
>> +}
>>  #endif
>>  
>>  /*
>> @@ -63,11 +

Re: [Qemu-devel] [kvm-unit-tests PATCH v8 3/3] arm: pmu: Add CPI checking

2016-11-11 Thread Wei Huang


On 11/11/2016 02:08 AM, Andrew Jones wrote:
> On Tue, Nov 08, 2016 at 12:17:15PM -0600, Wei Huang wrote:
>> From: Christopher Covington <c...@codeaurora.org>
>>
>> Calculate the numbers of cycles per instruction (CPI) implied by ARM
>> PMU cycle counter values. The code includes a strict checking facility
>> intended for the -icount option in TCG mode in the configuration file.
>>
>> Signed-off-by: Christopher Covington <c...@codeaurora.org>
>> Signed-off-by: Wei Huang <w...@redhat.com>
>> ---
>>  arm/pmu.c | 101 
>> +-
>>  arm/unittests.cfg |  14 
>>  2 files changed, 114 insertions(+), 1 deletion(-)
>>
>> diff --git a/arm/pmu.c b/arm/pmu.c
>> index d5e3ac3..09aff89 100644
>> --- a/arm/pmu.c
>> +++ b/arm/pmu.c
>> @@ -15,6 +15,7 @@
>>  #include "libcflat.h"
>>  
>>  #define PMU_PMCR_E (1 << 0)
>> +#define PMU_PMCR_C (1 << 2)
>>  #define PMU_PMCR_N_SHIFT   11
>>  #define PMU_PMCR_N_MASK0x1f
>>  #define PMU_PMCR_ID_SHIFT  16
>> @@ -75,6 +76,23 @@ static inline void pmccfiltr_write(uint32_t value)
>>  pmselr_write(PMU_CYCLE_IDX);
>>  pmxevtyper_write(value);
>>  }
>> +
>> +/*
>> + * Extra instructions inserted by the compiler would be difficult to 
>> compensate
>> + * for, so hand assemble everything between, and including, the PMCR 
>> accesses
>> + * to start and stop counting.
>> + */
>> +static inline void loop(int i, uint32_t pmcr)
> 
> We should probably pick a more descriptive name for this function, as
> we intend to add many more PMU tests to this file. While at it, let's
> change 'i' to 'n', as it's the number of times to loop.

I will rename it to fixed_num_loop(). When more tests are added to it,
we can standardize the name, e.g. *_test().

> 
>> +{
>> +asm volatile(
>> +"   mcr p15, 0, %[pmcr], c9, c12, 0\n"
>> +"1: subs%[i], %[i], #1\n"
>> +"   bgt 1b\n"
>> +"   mcr p15, 0, %[z], c9, c12, 0\n"
>> +: [i] "+r" (i)
>> +: [pmcr] "r" (pmcr), [z] "r" (0)
>> +: "cc");
>> +}
>>  #elif defined(__aarch64__)
>>  static inline uint32_t pmcr_read(void)
>>  {
>> @@ -106,6 +124,23 @@ static inline void pmccfiltr_write(uint32_t value)
>>  {
>>  asm volatile("msr pmccfiltr_el0, %0" : : "r" (value));
>>  }
>> +
>> +/*
>> + * Extra instructions inserted by the compiler would be difficult to 
>> compensate
>> + * for, so hand assemble everything between, and including, the PMCR 
>> accesses
>> + * to start and stop counting.
>> + */
>> +static inline void loop(int i, uint32_t pmcr)
>> +{
>> +asm volatile(
>> +"   msr pmcr_el0, %[pmcr]\n"
>> +"1: subs%[i], %[i], #1\n"
>> +"   b.gt1b\n"
>> +"   msr pmcr_el0, xzr\n"
>> +: [i] "+r" (i)
>> +: [pmcr] "r" (pmcr)
>> +: "cc");
>> +}
>>  #endif
>>  
>>  /*
>> @@ -156,8 +191,71 @@ static bool check_cycles_increase(void)
>>  return true;
>>  }
>>  
>> -int main(void)
>> +/*
>> + * Execute a known number of guest instructions. Only odd instruction counts
>> + * greater than or equal to 3 are supported by the in-line assembly code. 
>> The
>> + * control register (PMCR_EL0) is initialized with the provided value 
>> (allowing
>> + * for example for the cycle counter or event counters to be reset). At the 
>> end
>> + * of the exact instruction loop, zero is written to PMCR_EL0 to disable
>> + * counting, allowing the cycle counter or event counters to be read at the
>> + * leisure of the calling code.
>> + */
>> +static void measure_instrs(int num, uint32_t pmcr)
>> +{
>> +int i = (num - 1) / 2;
>> +
>> +assert(num >= 3 && ((num - 1) % 2 == 0));
>> +loop(i, pmcr);
>> +}
>> +
>> +/*
>> + * Measure cycle counts for various known instruction counts. Ensure that 
>> the
>> + * cycle counter progresses (similar to check_cycles_increase() but with 
>> more
>> + * instructions and using reset and stop controls). If supplied a positive,
>> + * nonzero CPI parameter, also strictly check that every measurement m

Re: [PATCH 1/2] arm64: perf: Move ARMv8 PMU perf event definitions to asm/perf_event.h

2016-11-10 Thread Wei Huang


On 11/10/2016 11:17 AM, Will Deacon wrote:
> On Thu, Nov 10, 2016 at 03:32:12PM +, Marc Zyngier wrote:
>> On 10/11/16 15:12, Wei Huang wrote:
>>>
>>>
>>> On 11/10/2016 03:10 AM, Marc Zyngier wrote:
>>>> Hi Wei,
>>>>
>>>> On 09/11/16 19:57, Wei Huang wrote:
>>>>> This patch moves ARMv8-related perf event definitions from perf_event.c
>>>>> to asm/perf_event.h; so KVM code can use them directly. This also help
>>>>> remove a duplicated definition of SW_INCR in perf_event.h.
>>>>>
>>>>> Signed-off-by: Wei Huang <w...@redhat.com>
>>>>> ---
>>>>>  arch/arm64/include/asm/perf_event.h | 161 
>>>>> +++-
>>>>>  arch/arm64/kernel/perf_event.c  | 161 
>>>>> 
>>>>>  2 files changed, 160 insertions(+), 162 deletions(-)
>>>>>
>>>>> diff --git a/arch/arm64/include/asm/perf_event.h 
>>>>> b/arch/arm64/include/asm/perf_event.h
>>>>> index 2065f46..6c7b18b 100644
>>>>> --- a/arch/arm64/include/asm/perf_event.h
>>>>> +++ b/arch/arm64/include/asm/perf_event.h
>>>>> @@ -46,7 +46,166 @@
>>>>>  #define  ARMV8_PMU_EVTYPE_MASK   0xc800  /* Mask for writable 
>>>>> bits */
>>>>>  #define  ARMV8_PMU_EVTYPE_EVENT  0x  /* Mask for EVENT bits 
>>>>> */
>>>>>  
>>>>> -#define ARMV8_PMU_EVTYPE_EVENT_SW_INCR   0   /* Software increment 
>>>>> event */
>>>>> +/*
>>>>> + * ARMv8 PMUv3 Performance Events handling code.
>>>>> + * Common event types.
>>>>> + */
>>>>> +
>>>>> +/* Required events. */
>>>>> +#define ARMV8_PMUV3_PERFCTR_SW_INCR  0x00
>>>>> +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03
>>>>> +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE0x04
>>>>> +#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED  0x10
>>>>> +#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES   0x11
>>>>> +#define ARMV8_PMUV3_PERFCTR_BR_PRED  0x12
>>>>
>>>> In my initial review, I asked for the "required" events to be moved to a
>>>> shared location. What's the rational for moving absolutely everything?
>>>
>>> I did notice the phrase "required" in the original email. However I
>>> think it is weird to have two places for a same set of PMU definitions.
>>> Other developers might think these two are missing if they don't search
>>> kernel files carefully.
>>>
>>> If Will Deacon and you insist, I can move only two defs to perf_event.h,
>>> consolidated with the 2nd patch into a single one.
>>
>> My personal feeling is that only architected events should be in a
>> public header. The CPU-specific ones are probably better kept private,
>> as it is doubtful that other users would appear).
>>
>> I'll leave it up to Will to decide, as all I want to avoid is the
>> duplication of constants between the PMU and KVM code bases.
> 
> Yeah, just take the sets that you need (i.e. the architected events).

Hi Will,

Just to clarify what "architected" means:

We need two for KVM: SW_INCR (architectural) and CPU_CYCLES
(micro-architectural). Looking at perf_event.c file, I can either
relocate the  "Required events" (6 events) or the whole set of
ARMV8_PMUV3_PERFCTR_* (~50 events) to perf_event.h. Which way you prefer?

Thanks,
-Wei

> 
> Also, check that it builds.
> 
> Will
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 1/2] arm64: perf: Move ARMv8 PMU perf event definitions to asm/perf_event.h

2016-11-10 Thread Wei Huang


On 11/10/2016 03:10 AM, Marc Zyngier wrote:
> Hi Wei,
> 
> On 09/11/16 19:57, Wei Huang wrote:
>> This patch moves ARMv8-related perf event definitions from perf_event.c
>> to asm/perf_event.h; so KVM code can use them directly. This also help
>> remove a duplicated definition of SW_INCR in perf_event.h.
>>
>> Signed-off-by: Wei Huang <w...@redhat.com>
>> ---
>>  arch/arm64/include/asm/perf_event.h | 161 
>> +++-
>>  arch/arm64/kernel/perf_event.c  | 161 
>> 
>>  2 files changed, 160 insertions(+), 162 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/perf_event.h 
>> b/arch/arm64/include/asm/perf_event.h
>> index 2065f46..6c7b18b 100644
>> --- a/arch/arm64/include/asm/perf_event.h
>> +++ b/arch/arm64/include/asm/perf_event.h
>> @@ -46,7 +46,166 @@
>>  #define ARMV8_PMU_EVTYPE_MASK   0xc800  /* Mask for writable 
>> bits */
>>  #define ARMV8_PMU_EVTYPE_EVENT  0x  /* Mask for EVENT bits 
>> */
>>  
>> -#define ARMV8_PMU_EVTYPE_EVENT_SW_INCR  0   /* Software increment 
>> event */
>> +/*
>> + * ARMv8 PMUv3 Performance Events handling code.
>> + * Common event types.
>> + */
>> +
>> +/* Required events. */
>> +#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00
>> +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL0x03
>> +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE   0x04
>> +#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10
>> +#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES  0x11
>> +#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12
> 
> In my initial review, I asked for the "required" events to be moved to a
> shared location. What's the rational for moving absolutely everything?

I did notice the phrase "required" in the original email. However I
think it is weird to have two places for a same set of PMU definitions.
Other developers might think these two are missing if they don't search
kernel files carefully.

If Will Deacon and you insist, I can move only two defs to perf_event.h,
consolidated with the 2nd patch into a single one.

> KVM only needs to know about ARMV8_PMUV3_PERFCTR_SW_INCR and
> ARMV8_PMUV3_PERFCTR_CPU_CYCLES, so I thought that moving the above six
> events (and maybe the following two) would be enough.
> 
> Also, you've now broken the build by dropping
> ARMV8_PMU_EVTYPE_EVENT_SW_INCR without amending it use in the KVM PMU
> code (see the kbuild report).
> 

My bad. I tested compilation only after two patches applied. Will fix it.



>> +
>>  /* PMUv3 HW events mapping. */
>>  
>>  /*
>>
> 
> Thanks,
> 
>   M.
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH 1/2] arm64: perf: Move ARMv8 PMU perf event definitions to asm/perf_event.h

2016-11-09 Thread Wei Huang
This patch moves ARMv8-related perf event definitions from perf_event.c
to asm/perf_event.h; so KVM code can use them directly. This also help
remove a duplicated definition of SW_INCR in perf_event.h.

Signed-off-by: Wei Huang <w...@redhat.com>
---
 arch/arm64/include/asm/perf_event.h | 161 +++-
 arch/arm64/kernel/perf_event.c  | 161 
 2 files changed, 160 insertions(+), 162 deletions(-)

diff --git a/arch/arm64/include/asm/perf_event.h 
b/arch/arm64/include/asm/perf_event.h
index 2065f46..6c7b18b 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -46,7 +46,166 @@
 #defineARMV8_PMU_EVTYPE_MASK   0xc800  /* Mask for writable 
bits */
 #defineARMV8_PMU_EVTYPE_EVENT  0x  /* Mask for EVENT bits 
*/
 
-#define ARMV8_PMU_EVTYPE_EVENT_SW_INCR 0   /* Software increment event */
+/*
+ * ARMv8 PMUv3 Performance Events handling code.
+ * Common event types.
+ */
+
+/* Required events. */
+#define ARMV8_PMUV3_PERFCTR_SW_INCR0x00
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL   0x03
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE  0x04
+#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED0x10
+#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11
+#define ARMV8_PMUV3_PERFCTR_BR_PRED0x12
+
+/* At least one of the following is required. */
+#define ARMV8_PMUV3_PERFCTR_INST_RETIRED   0x08
+#define ARMV8_PMUV3_PERFCTR_INST_SPEC  0x1B
+
+/* Common architectural events. */
+#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x06
+#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x07
+#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN  0x09
+#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x0A
+#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED  0x0B
+#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED   0x0C
+#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED   0x0D
+#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED  0x0E
+#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x0F
+#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x1C
+#define ARMV8_PMUV3_PERFCTR_CHAIN  0x1E
+#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x21
+
+/* Common microarchitectural events. */
+#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL   0x01
+#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x02
+#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x05
+#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x13
+#define ARMV8_PMUV3_PERFCTR_L1I_CACHE  0x14
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB   0x15
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE  0x16
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL   0x17
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB   0x18
+#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x19
+#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR   0x1A
+#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x1D
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x1F
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x20
+#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED0x22
+#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND 0x23
+#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND  0x24
+#define ARMV8_PMUV3_PERFCTR_L1D_TLB0x25
+#define ARMV8_PMUV3_PERFCTR_L1I_TLB0x26
+#define ARMV8_PMUV3_PERFCTR_L2I_CACHE  0x27
+#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL   0x28
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE 0x29
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL   0x2A
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE  0x2B
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB   0x2C
+#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x2D
+#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x2E
+#define ARMV8_PMUV3_PERFCTR_L2D_TLB0x2F
+#define ARMV8_PMUV3_PERFCTR_L2I_TLB0x30
+
+/* ARMv8 recommended implementation defined event types */
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD  0x40
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR  0x41
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD   0x42
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR   0x43
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER0x44
+#

[PATCH 2/2] KVM: ARM64: Fix the issues when guest PMCCFILTR is configured

2016-11-09 Thread Wei Huang
KVM calls kvm_pmu_set_counter_event_type() when PMCCFILTR is configured.
But this function can't deals with PMCCFILTR correctly because the evtCount
bit of PMCCFILTR, which is reserved 0, conflits with the SW_INCR event
type of other PMXEVTYPER registers. To fix it, when eventsel == 0, this
function shouldn't return immediately; instead it needs to check further
if select_idx is ARMV8_PMU_CYCLE_IDX.

Another issue is that KVM shouldn't copy the eventsel bits of PMCCFILTER
blindly to attr.config. Instead it ought to convert the request to the
"cpu cycle" event type (i.e. 0x11).

Signed-off-by: Wei Huang <w...@redhat.com>
---
 virt/kvm/arm/pmu.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index 6e9c40e..69ccce3 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -305,7 +305,7 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 
val)
continue;
type = vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
   & ARMV8_PMU_EVTYPE_EVENT;
-   if ((type == ARMV8_PMU_EVTYPE_EVENT_SW_INCR)
+   if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR)
&& (enable & BIT(i))) {
reg = vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
reg = lower_32_bits(reg);
@@ -379,7 +379,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, 
u64 data,
eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
 
/* Software increment event does't need to be backed by a perf event */
-   if (eventsel == ARMV8_PMU_EVTYPE_EVENT_SW_INCR)
+   if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
+   select_idx != ARMV8_PMU_CYCLE_IDX)
return;
 
memset(, 0, sizeof(struct perf_event_attr));
@@ -391,7 +392,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, 
u64 data,
attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
attr.exclude_hv = 1; /* Don't count EL2 events */
attr.exclude_host = 1; /* Don't count host events */
-   attr.config = eventsel;
+   attr.config = (select_idx == ARMV8_PMU_CYCLE_IDX) ?
+   ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
 
counter = kvm_pmu_get_counter_value(vcpu, select_idx);
/* The initial sample period (overflow count) of an event. */
-- 
2.7.4

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[kvm-unit-tests PATCH v8 2/3] arm: pmu: Check cycle count increases

2016-11-08 Thread Wei Huang
From: Christopher Covington <c...@codeaurora.org>

Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
even for the smallest delta of two subsequent reads.

Signed-off-by: Christopher Covington <c...@codeaurora.org>
Signed-off-by: Wei Huang <w...@redhat.com>
---
 arm/pmu.c | 98 +++
 1 file changed, 98 insertions(+)

diff --git a/arm/pmu.c b/arm/pmu.c
index 0b29088..d5e3ac3 100644
--- a/arm/pmu.c
+++ b/arm/pmu.c
@@ -14,6 +14,7 @@
  */
 #include "libcflat.h"
 
+#define PMU_PMCR_E (1 << 0)
 #define PMU_PMCR_N_SHIFT   11
 #define PMU_PMCR_N_MASK0x1f
 #define PMU_PMCR_ID_SHIFT  16
@@ -21,6 +22,10 @@
 #define PMU_PMCR_IMP_SHIFT 24
 #define PMU_PMCR_IMP_MASK  0xff
 
+#define PMU_CYCLE_IDX  31
+
+#define NR_SAMPLES 10
+
 #if defined(__arm__)
 static inline uint32_t pmcr_read(void)
 {
@@ -29,6 +34,47 @@ static inline uint32_t pmcr_read(void)
asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
return ret;
 }
+
+static inline void pmcr_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (value));
+}
+
+static inline void pmselr_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (value));
+}
+
+static inline void pmxevtyper_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (value));
+}
+
+/*
+ * While PMCCNTR can be accessed as a 64 bit coprocessor register, returning 64
+ * bits doesn't seem worth the trouble when differential usage of the result is
+ * expected (with differences that can easily fit in 32 bits). So just return
+ * the lower 32 bits of the cycle count in AArch32.
+ */
+static inline uint32_t pmccntr_read(void)
+{
+   uint32_t cycles;
+
+   asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (cycles));
+   return cycles;
+}
+
+static inline void pmcntenset_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (value));
+}
+
+/* PMCCFILTR is an obsolete name for PMXEVTYPER31 in ARMv7 */
+static inline void pmccfiltr_write(uint32_t value)
+{
+   pmselr_write(PMU_CYCLE_IDX);
+   pmxevtyper_write(value);
+}
 #elif defined(__aarch64__)
 static inline uint32_t pmcr_read(void)
 {
@@ -37,6 +83,29 @@ static inline uint32_t pmcr_read(void)
asm volatile("mrs %0, pmcr_el0" : "=r" (ret));
return ret;
 }
+
+static inline void pmcr_write(uint32_t value)
+{
+   asm volatile("msr pmcr_el0, %0" : : "r" (value));
+}
+
+static inline uint32_t pmccntr_read(void)
+{
+   uint32_t cycles;
+
+   asm volatile("mrs %0, pmccntr_el0" : "=r" (cycles));
+   return cycles;
+}
+
+static inline void pmcntenset_write(uint32_t value)
+{
+   asm volatile("msr pmcntenset_el0, %0" : : "r" (value));
+}
+
+static inline void pmccfiltr_write(uint32_t value)
+{
+   asm volatile("msr pmccfiltr_el0, %0" : : "r" (value));
+}
 #endif
 
 /*
@@ -63,11 +132,40 @@ static bool check_pmcr(void)
return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0;
 }
 
+/*
+ * Ensure that the cycle counter progresses between back-to-back reads.
+ */
+static bool check_cycles_increase(void)
+{
+   pmcr_write(pmcr_read() | PMU_PMCR_E);
+
+   for (int i = 0; i < NR_SAMPLES; i++) {
+   unsigned long a, b;
+
+   a = pmccntr_read();
+   b = pmccntr_read();
+
+   if (a >= b) {
+   printf("Read %ld then %ld.\n", a, b);
+   return false;
+   }
+   }
+
+   pmcr_write(pmcr_read() & ~PMU_PMCR_E);
+
+   return true;
+}
+
 int main(void)
 {
report_prefix_push("pmu");
 
+   /* init for PMU event access, right now only care about cycle count */
+   pmcntenset_write(1 << PMU_CYCLE_IDX);
+   pmccfiltr_write(0); /* count cycles in EL0, EL1, but not EL2 */
+
report("Control register", check_pmcr());
+   report("Monotonically increasing cycle count", check_cycles_increase());
 
return report_summary();
 }
-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[kvm-unit-tests PATCH v8 0/3] ARM PMU tests

2016-11-08 Thread Wei Huang
Changes from v7:
* Standardize PMU register accessor names and remove unused ones
* Use bit defines instead of bit fields
* Change the testing configure for pmu.flat
* Commit comments were updated

Note:
1) Current KVM code has bugs in handling PMCCFILTR write. A fix (see
below) is required for this unit testing code to work correctly under
KVM mode.
https://lists.cs.columbia.edu/pipermail/kvmarm/2016-November/022134.html.
2) Because the code was changed, Drew's original reviewed-by needs to
be acknowledged by him again.

-Wei

Wei Huang (3):
  arm: Add PMU test
  arm: pmu: Check cycle count increases
  arm: pmu: Add CPI checking

 arm/Makefile.common |   3 +-
 arm/pmu.c   | 270 
 arm/unittests.cfg   |  19 
 3 files changed, 291 insertions(+), 1 deletion(-)
 create mode 100644 arm/pmu.c

-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[kvm-unit-tests PATCH v8 1/3] arm: Add PMU test

2016-11-08 Thread Wei Huang
From: Christopher Covington <c...@codeaurora.org>

Beginning with a simple sanity check of the control register, add
a unit test for the ARM Performance Monitors Unit (PMU).

Signed-off-by: Christopher Covington <c...@codeaurora.org>
Signed-off-by: Wei Huang <w...@redhat.com>
---
 arm/Makefile.common |  3 ++-
 arm/pmu.c   | 73 +
 arm/unittests.cfg   |  5 
 3 files changed, 80 insertions(+), 1 deletion(-)
 create mode 100644 arm/pmu.c

diff --git a/arm/Makefile.common b/arm/Makefile.common
index ccb554d..f98f422 100644
--- a/arm/Makefile.common
+++ b/arm/Makefile.common
@@ -11,7 +11,8 @@ endif
 
 tests-common = \
$(TEST_DIR)/selftest.flat \
-   $(TEST_DIR)/spinlock-test.flat
+   $(TEST_DIR)/spinlock-test.flat \
+   $(TEST_DIR)/pmu.flat
 
 all: test_cases
 
diff --git a/arm/pmu.c b/arm/pmu.c
new file mode 100644
index 000..0b29088
--- /dev/null
+++ b/arm/pmu.c
@@ -0,0 +1,73 @@
+/*
+ * Test the ARM Performance Monitors Unit (PMU).
+ *
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version 2.1 and
+ * only version 2.1 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+ * for more details.
+ */
+#include "libcflat.h"
+
+#define PMU_PMCR_N_SHIFT   11
+#define PMU_PMCR_N_MASK0x1f
+#define PMU_PMCR_ID_SHIFT  16
+#define PMU_PMCR_ID_MASK   0xff
+#define PMU_PMCR_IMP_SHIFT 24
+#define PMU_PMCR_IMP_MASK  0xff
+
+#if defined(__arm__)
+static inline uint32_t pmcr_read(void)
+{
+   uint32_t ret;
+
+   asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
+   return ret;
+}
+#elif defined(__aarch64__)
+static inline uint32_t pmcr_read(void)
+{
+   uint32_t ret;
+
+   asm volatile("mrs %0, pmcr_el0" : "=r" (ret));
+   return ret;
+}
+#endif
+
+/*
+ * As a simple sanity check on the PMCR_EL0, ensure the implementer field isn't
+ * null. Also print out a couple other interesting fields for diagnostic
+ * purposes. For example, as of fall 2016, QEMU TCG mode doesn't implement
+ * event counters and therefore reports zero event counters, but hopefully
+ * support for at least the instructions event will be added in the future and
+ * the reported number of event counters will become nonzero.
+ */
+static bool check_pmcr(void)
+{
+   uint32_t pmcr;
+
+   pmcr = pmcr_read();
+
+   printf("PMU implementer: %c\n",
+  (pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK);
+   printf("Identification code: 0x%x\n",
+  (pmcr >> PMU_PMCR_ID_SHIFT) & PMU_PMCR_ID_MASK);
+   printf("Event counters:  %d\n",
+  (pmcr >> PMU_PMCR_N_SHIFT) & PMU_PMCR_N_MASK);
+
+   return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0;
+}
+
+int main(void)
+{
+   report_prefix_push("pmu");
+
+   report("Control register", check_pmcr());
+
+   return report_summary();
+}
diff --git a/arm/unittests.cfg b/arm/unittests.cfg
index 3f6fa45..7645180 100644
--- a/arm/unittests.cfg
+++ b/arm/unittests.cfg
@@ -54,3 +54,8 @@ file = selftest.flat
 smp = $MAX_SMP
 extra_params = -append 'smp'
 groups = selftest
+
+# Test PMU support
+[pmu]
+file = pmu.flat
+groups = pmu
-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[kvm-unit-tests PATCH v8 3/3] arm: pmu: Add CPI checking

2016-11-08 Thread Wei Huang
From: Christopher Covington <c...@codeaurora.org>

Calculate the numbers of cycles per instruction (CPI) implied by ARM
PMU cycle counter values. The code includes a strict checking facility
intended for the -icount option in TCG mode in the configuration file.

Signed-off-by: Christopher Covington <c...@codeaurora.org>
Signed-off-by: Wei Huang <w...@redhat.com>
---
 arm/pmu.c | 101 +-
 arm/unittests.cfg |  14 
 2 files changed, 114 insertions(+), 1 deletion(-)

diff --git a/arm/pmu.c b/arm/pmu.c
index d5e3ac3..09aff89 100644
--- a/arm/pmu.c
+++ b/arm/pmu.c
@@ -15,6 +15,7 @@
 #include "libcflat.h"
 
 #define PMU_PMCR_E (1 << 0)
+#define PMU_PMCR_C (1 << 2)
 #define PMU_PMCR_N_SHIFT   11
 #define PMU_PMCR_N_MASK0x1f
 #define PMU_PMCR_ID_SHIFT  16
@@ -75,6 +76,23 @@ static inline void pmccfiltr_write(uint32_t value)
pmselr_write(PMU_CYCLE_IDX);
pmxevtyper_write(value);
 }
+
+/*
+ * Extra instructions inserted by the compiler would be difficult to compensate
+ * for, so hand assemble everything between, and including, the PMCR accesses
+ * to start and stop counting.
+ */
+static inline void loop(int i, uint32_t pmcr)
+{
+   asm volatile(
+   "   mcr p15, 0, %[pmcr], c9, c12, 0\n"
+   "1: subs%[i], %[i], #1\n"
+   "   bgt 1b\n"
+   "   mcr p15, 0, %[z], c9, c12, 0\n"
+   : [i] "+r" (i)
+   : [pmcr] "r" (pmcr), [z] "r" (0)
+   : "cc");
+}
 #elif defined(__aarch64__)
 static inline uint32_t pmcr_read(void)
 {
@@ -106,6 +124,23 @@ static inline void pmccfiltr_write(uint32_t value)
 {
asm volatile("msr pmccfiltr_el0, %0" : : "r" (value));
 }
+
+/*
+ * Extra instructions inserted by the compiler would be difficult to compensate
+ * for, so hand assemble everything between, and including, the PMCR accesses
+ * to start and stop counting.
+ */
+static inline void loop(int i, uint32_t pmcr)
+{
+   asm volatile(
+   "   msr pmcr_el0, %[pmcr]\n"
+   "1: subs%[i], %[i], #1\n"
+   "   b.gt1b\n"
+   "   msr pmcr_el0, xzr\n"
+   : [i] "+r" (i)
+   : [pmcr] "r" (pmcr)
+   : "cc");
+}
 #endif
 
 /*
@@ -156,8 +191,71 @@ static bool check_cycles_increase(void)
return true;
 }
 
-int main(void)
+/*
+ * Execute a known number of guest instructions. Only odd instruction counts
+ * greater than or equal to 3 are supported by the in-line assembly code. The
+ * control register (PMCR_EL0) is initialized with the provided value (allowing
+ * for example for the cycle counter or event counters to be reset). At the end
+ * of the exact instruction loop, zero is written to PMCR_EL0 to disable
+ * counting, allowing the cycle counter or event counters to be read at the
+ * leisure of the calling code.
+ */
+static void measure_instrs(int num, uint32_t pmcr)
+{
+   int i = (num - 1) / 2;
+
+   assert(num >= 3 && ((num - 1) % 2 == 0));
+   loop(i, pmcr);
+}
+
+/*
+ * Measure cycle counts for various known instruction counts. Ensure that the
+ * cycle counter progresses (similar to check_cycles_increase() but with more
+ * instructions and using reset and stop controls). If supplied a positive,
+ * nonzero CPI parameter, also strictly check that every measurement matches
+ * it. Strict CPI checking is used to test -icount mode.
+ */
+static bool check_cpi(int cpi)
+{
+   uint32_t pmcr = pmcr_read() | PMU_PMCR_C | PMU_PMCR_E;
+   
+   if (cpi > 0)
+   printf("Checking for CPI=%d.\n", cpi);
+   printf("instrs : cycles0 cycles1 ...\n");
+
+   for (int i = 3; i < 300; i += 32) {
+   int avg, sum = 0;
+
+   printf("%d :", i);
+   for (int j = 0; j < NR_SAMPLES; j++) {
+   int cycles;
+
+   measure_instrs(i, pmcr);
+   cycles =pmccntr_read();
+   printf(" %d", cycles);
+
+   if (!cycles || (cpi > 0 && cycles != i * cpi)) {
+   printf("\n");
+   return false;
+   }
+
+   sum += cycles;
+   }
+   avg = sum / NR_SAMPLES;
+   printf(" sum=%d avg=%d avg_ipc=%d avg_cpi=%d\n",
+   sum, avg, i / avg, avg / i);
+   }
+
+   return true;
+}
+
+int main(int argc, char *argv[])
 {
+   int cpi = 0;
+
+   if (argc >= 1)
+   cpi = atol(argv[0]);
+
report_prefix_push("pmu");
 
/* init for PMU event access, right now only care about cy

Re: [PATCH v2 1/6] KVM: arm/arm64: arch_timer: Gather KVM specific information in a structure

2016-02-18 Thread Wei Huang


On 02/11/2016 09:33 AM, Julien Grall wrote:
> Introduce a structure which are filled up by the arch timer driver and
> used by the virtual timer in KVM.
> 
> The first member of this structure will be the timecounter. More members
> will be added later.
> 
> This is also dropping arch_timer_get_timecounter as it was only used by
> the KVM code. Furthermore, a stub for the new helper hasn't been
> introduced because KVM is requiring the arch timer for both ARM64 and
> ARM32.
> 
> Signed-off-by: Julien Grall 
> 
> ---
> Cc: Daniel Lezcano 
> Cc: Thomas Gleixner 
> Cc: Christoffer Dall 
> Cc: Marc Zyngier 
> Cc: Gleb Natapov 
> Cc: Paolo Bonzini 
> ---
>  drivers/clocksource/arm_arch_timer.c |  9 +
>  include/clocksource/arm_arch_timer.h | 12 ++--
>  virt/kvm/arm/arch_timer.c|  6 +++---
>  3 files changed, 14 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/clocksource/arm_arch_timer.c 
> b/drivers/clocksource/arm_arch_timer.c
> index c64d543..6eb2c5d 100644
> --- a/drivers/clocksource/arm_arch_timer.c
> +++ b/drivers/clocksource/arm_arch_timer.c
> @@ -447,11 +447,11 @@ static struct cyclecounter cyclecounter = {
>   .mask   = CLOCKSOURCE_MASK(56),
>  };
>  
> -static struct timecounter timecounter;
> +static struct arch_timer_kvm_info arch_timer_kvm_info;
>  
> -struct timecounter *arch_timer_get_timecounter(void)
> +struct arch_timer_kvm_info *arch_timer_get_kvm_info(void)
>  {
> - return 
> + return _timer_kvm_info;
>  }
>  
>  static void __init arch_counter_register(unsigned type)
> @@ -479,7 +479,8 @@ static void __init arch_counter_register(unsigned type)
>   clocksource_register_hz(_counter, arch_timer_rate);
>   cyclecounter.mult = clocksource_counter.mult;
>   cyclecounter.shift = clocksource_counter.shift;
> - timecounter_init(, , start_count);
> + timecounter_init(_timer_kvm_info.timecounter,
> +  , start_count);
>  
>   /* 56 bits minimum, so we assume worst case rollover */
>   sched_clock_register(arch_timer_read_counter, 56, arch_timer_rate);
> diff --git a/include/clocksource/arm_arch_timer.h 
> b/include/clocksource/arm_arch_timer.h
> index 25d0914..4d487f8 100644
> --- a/include/clocksource/arm_arch_timer.h
> +++ b/include/clocksource/arm_arch_timer.h
> @@ -49,11 +49,16 @@ enum arch_timer_reg {
>  
>  #define ARCH_TIMER_EVT_STREAM_FREQ   1   /* 100us */
>  
> +struct arch_timer_kvm_info {
> + struct timecounter timecounter;
> +};
> +
>  #ifdef CONFIG_ARM_ARCH_TIMER
>  
>  extern u32 arch_timer_get_rate(void);
>  extern u64 (*arch_timer_read_counter)(void);
> -extern struct timecounter *arch_timer_get_timecounter(void);
> +
> +extern struct arch_timer_kvm_info *arch_timer_get_kvm_info(void);
>  
>  #else
>  
> @@ -67,11 +72,6 @@ static inline u64 arch_timer_read_counter(void)
>   return 0;
>  }
>  
> -static inline struct timecounter *arch_timer_get_timecounter(void)
> -{
> - return NULL;
> -}
> -

Most parts are OK. Regarding removing this function from the #else area,
is there a possibility to have CONFIG_ARM_ARCH_TIMER=n and CONFIG_KVM=y.
If so, will the compilation fails here?

-Wei

>  #endif
>  
>  #endif
> diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
> index 69bca18..a669c6a 100644
> --- a/virt/kvm/arm/arch_timer.c
> +++ b/virt/kvm/arm/arch_timer.c
> @@ -385,11 +385,11 @@ int kvm_timer_hyp_init(void)
>  {
>   struct device_node *np;
>   unsigned int ppi;
> + struct arch_timer_kvm_info *info;
>   int err;
>  
> - timecounter = arch_timer_get_timecounter();
> - if (!timecounter)
> - return -ENODEV;
> + info = arch_timer_get_kvm_info();
> + timecounter = >timecounter;
>  
>   np = of_find_matching_node(NULL, arch_timer_of_match);
>   if (!np) {
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 0/6] arm64: Add support of KVM with ACPI

2016-02-18 Thread Wei Huang


On 02/11/2016 09:33 AM, Julien Grall wrote:
> Hello,
> 
> This small series allows an ARM64 ACPI based platform to use KVM.
> 
> Currently the KVM code has to parse the firmware table to get the necessary
> information to setup the virtual timer and virtual GIC.
> 
> However the parsing of those tables are already done in the GIC and arch
> timer drivers.
> 
> This patch series introduces different helpers to retrieve the information
> from different drivers avoiding to duplicate the parsing code.
> 
> Note there is patch series ([1] and [2]) adding support of KVM on ACPI,
> although the approach chosen is completely different. The code to parse
> the firmware tables are duplicated which I think make more complex to
> support new firmware tables.

I backported these patches to my internal tree. It booted on an ARM64
machine. Even though I haven't got the chance to test it on an GICv3
machine (will update later), I think you can add my name as Tested-by if
needed.

-Wei

> 
> See the changes since v1 in the different patches.
> 
> Regards,
> 
> [1] https://lists.cs.columbia.edu/pipermail/kvmarm/2016-February/018482.html
> [2] https://lists.cs.columbia.edu/pipermail/kvmarm/2016-February/018355.html
> 
> Julien Grall (6):
>   KVM: arm/arm64: arch_timer: Gather KVM specific information in a
> structure
>   KVM: arm/arm64: arch_timer: Rely on the arch timer to parse the
> firmware tables
>   irqchip/gic-v2: Gather ACPI specific data in a single structure
>   irqchip/gic-v2: Parse and export virtual GIC information
>   irqchip/gic-v3: Parse and export virtual GIC information
>   KVM: arm/arm64: vgic: Rely on the GIC driver to parse the firmware
> tables
> 
>  drivers/clocksource/arm_arch_timer.c   | 11 ++--
>  drivers/irqchip/irq-gic-common.c   | 13 +
>  drivers/irqchip/irq-gic-common.h   |  3 ++
>  drivers/irqchip/irq-gic-v3.c   | 36 ++
>  drivers/irqchip/irq-gic.c  | 91 
> --
>  include/clocksource/arm_arch_timer.h   | 13 ++---
>  include/kvm/arm_vgic.h |  7 +--
>  include/linux/irqchip/arm-gic-common.h | 34 +
>  virt/kvm/arm/arch_timer.c  | 39 ---
>  virt/kvm/arm/vgic-v2.c | 67 +
>  virt/kvm/arm/vgic-v3.c | 45 +
>  virt/kvm/arm/vgic.c| 50 ++-
>  12 files changed, 264 insertions(+), 145 deletions(-)
>  create mode 100644 include/linux/irqchip/arm-gic-common.h
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 3/5] irqchip/gic-v2: Parse and export virtual GIC information

2016-02-09 Thread Wei Huang


On 02/09/2016 02:49 PM, Christoffer Dall wrote:
> On Mon, Feb 08, 2016 at 04:47:27PM +, Julien Grall wrote:
>> For now, the firmware tables are parsed 2 times: once in the GIC
>> drivers, the other timer when initializing the vGIC. It means code
>> duplication and make more tedious to add the support for another
>> firmware table (like ACPI).
>>
>> Introduce a new structure and set of helpers to get/set the virtual GIC
>> information. Also fill up the structure for GICv2.
>>
>> Signed-off-by: Julien Grall 
>> ---
>>
>> Cc: Thomas Gleixner 
>> Cc: Jason Cooper 
>> Cc: Marc Zyngier 
>>
>>  drivers/irqchip/irq-gic-common.c   | 13 ++
>>  drivers/irqchip/irq-gic-common.h   |  3 ++
>>  drivers/irqchip/irq-gic.c  | 78 
>> +-
>>  include/linux/irqchip/arm-gic-common.h | 34 +++
>>  4 files changed, 127 insertions(+), 1 deletion(-)
>>  create mode 100644 include/linux/irqchip/arm-gic-common.h
>>
>> diff --git a/drivers/irqchip/irq-gic-common.c 
>> b/drivers/irqchip/irq-gic-common.c
>> index f174ce0..704caf4 100644
>> --- a/drivers/irqchip/irq-gic-common.c
>> +++ b/drivers/irqchip/irq-gic-common.c
>> @@ -21,6 +21,19 @@
>>  
>>  #include "irq-gic-common.h"
>>  
>> +static const struct gic_kvm_info *gic_kvm_info;
>> +
>> +const struct gic_kvm_info *gic_get_kvm_info(void)
>> +{
>> +return gic_kvm_info;
>> +}
>> +
>> +void gic_set_kvm_info(const struct gic_kvm_info *info)
>> +{
>> +WARN(gic_kvm_info != NULL, "gic_kvm_info already set\n");
>> +gic_kvm_info = info;
>> +}
>> +
>>  void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
>>  void *data)
>>  {
>> diff --git a/drivers/irqchip/irq-gic-common.h 
>> b/drivers/irqchip/irq-gic-common.h
>> index fff697d..205e5fd 100644
>> --- a/drivers/irqchip/irq-gic-common.h
>> +++ b/drivers/irqchip/irq-gic-common.h
>> @@ -19,6 +19,7 @@
>>  
>>  #include 
>>  #include 
>> +#include 
>>  
>>  struct gic_quirk {
>>  const char *desc;
>> @@ -35,4 +36,6 @@ void gic_cpu_config(void __iomem *base, void 
>> (*sync_access)(void));
>>  void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
>>  void *data);
>>  
>> +void gic_set_kvm_info(const struct gic_kvm_info *info);
>> +
>>  #endif /* _IRQ_GIC_COMMON_H */
>> diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
>> index 911758c..d3a09a4 100644
>> --- a/drivers/irqchip/irq-gic.c
>> +++ b/drivers/irqchip/irq-gic.c
>> @@ -102,6 +102,8 @@ static struct static_key supports_deactivate = 
>> STATIC_KEY_INIT_TRUE;
>>  
>>  static struct gic_chip_data gic_data[CONFIG_ARM_GIC_MAX_NR] __read_mostly;
>>  
>> +static struct gic_kvm_info gic_v2_kvm_info;
>> +
>>  #ifdef CONFIG_GIC_NON_BANKED
>>  static void __iomem *gic_get_percpu_base(union gic_base *base)
>>  {
>> @@ -1190,6 +1192,44 @@ static bool gic_check_eoimode(struct device_node 
>> *node, void __iomem **base)
>>  return true;
>>  }
>>  
>> +static void __init gic_of_setup_kvm_info(struct device_node *node)
>> +{
>> +int ret;
>> +struct resource r;
>> +unsigned int irq;
>> +
>> +gic_v2_kvm_info.type = GIC_V2;
>> +
>> +irq = irq_of_parse_and_map(node, 0);
>> +if (!irq)
>> +gic_v2_kvm_info.maint_irq = -1;
>> +else
>> +gic_v2_kvm_info.maint_irq = irq;
>> +
>> +ret = of_address_to_resource(node, 2, );
>> +if (!ret) {
>> +gic_v2_kvm_info.vctrl_base = r.start;
>> +gic_v2_kvm_info.vctrl_size = resource_size();
>> +}
>> +
>> +ret = of_address_to_resource(node, 3, );
>> +if (!ret) {
>> +if (!PAGE_ALIGNED(r.start))
>> +pr_warn("GICV physical address 0x%llx not page 
>> aligned\n",
>> +(unsigned long long)r.start);
>> +else if (!PAGE_ALIGNED(resource_size()))
>> +pr_warn("GICV size 0x%llx not a multiple of page size 
>> 0x%lx\n",
>> +(unsigned long long)resource_size(),
>> +PAGE_SIZE);
>> +else {
>> +gic_v2_kvm_info.vcpu_base = r.start;
>> +gic_v2_kvm_info.vcpu_size = resource_size();
>> +}
>> +}
>> +
>> +gic_set_kvm_info(_v2_kvm_info);
>> +}
>> +
>>  int __init
>>  gic_of_init(struct device_node *node, struct device_node *parent)
>>  {
>> @@ -1219,8 +1259,10 @@ gic_of_init(struct device_node *node, struct 
>> device_node *parent)
>>  
>>  __gic_init_bases(gic_cnt, -1, dist_base, cpu_base, percpu_offset,
>>   >fwnode);
>> -if (!gic_cnt)
>> +if (!gic_cnt) {
>>  gic_init_physaddr(node);
>> +gic_of_setup_kvm_info(node);
>> +}
>>  
>>  if (parent) {
>>  irq = irq_of_parse_and_map(node, 0);
>> @@ -1247,6 +1289,32 @@ IRQCHIP_DECLARE(pl390, "arm,pl390", gic_of_init);
>>  
>>  #ifdef 

Re: [PATCH V1 0/7] Enable ACPI support for ARM KVM GIC

2016-02-08 Thread Wei Huang


On 2/8/16 10:39, Julien Grall wrote:
> Hi,
> 
> On 08/02/16 09:59, Marc Zyngier wrote:
>> On 05/02/16 17:07, Wei Huang wrote:
>>> Wei Huang (7):
>>>KVM: GIC: Move GIC DT probing code to GICv2 and GICv3 files
>>>KVM: GIC: Add extra fields to store GICH and GICV resource info
>>>KVM: GIC: Create a common probe function for GIC
>>>KVM: GICv2: Extract the common code from DT
>>>KVM: GICv2: Add ACPI probing function
>>>KVM: GICv3: Extract the common code from DT
>>>KVM: GICv3: Add ACPI probing function
>>>
>>>   include/kvm/arm_vgic.h  |  14 ++--
>>>   virt/kvm/arm/vgic-v2-emul.c |   4 +-
>>>   virt/kvm/arm/vgic-v2.c  | 186
>>> +---
>>>   virt/kvm/arm/vgic-v3.c  | 159
>>> -
>>>   virt/kvm/arm/vgic.c |  22 +-
>>>   5 files changed, 277 insertions(+), 108 deletions(-)
>>>
>>
>> So when I see this diffstat and the patches that follow, I cannot help
>> but think that we do have a disconnect here. You do add a bunch of ACPI
>> probing in KVM, to which I've already said no.
>>
>> I want to see the probing code in the GIC drivers, exported through a
>> common structure that KVM can then use. That's it. Nothing else.
> 
> I've been working on a patch series which rely on the GIC and arch timer
> drivers to get the necessary information to initialize the different KVM
> components (vGIC + vtimer).
> 
> I think this achieves the goal you have in mind. I will post the series
> in a few minutes.
> 

Julien,

Regarding arch_timer, there is an effort going on by Linaro. It has
impact on KVM arch_timer init. See https://lkml.org/lkml/2016/2/1/658.

-Wei

> Regards,
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH V1 0/7] Enable ACPI support for ARM KVM GIC

2016-02-08 Thread Wei Huang


On 2/8/16 03:59, Marc Zyngier wrote:
> Wei,
> 
> On 05/02/16 17:07, Wei Huang wrote:
>> This patch set enables ACPI support for KVM GIC. Note that the patches
>> are in fact the V3 of previously submitted patches (search "Enable ACPI
>> support for KVM ARM"). But because Fu Wei includes the arch_timer part
>> in his series [1] and I have substantially re-written the GIC code in this
>> revision, the version number is reset to v1. 
>>
>> By following Marc's prior comments, the main design idea is to let DT or
>> ACPI code to fill out the "struct vgic_params" which are extended to
>> include all GIC related info.
> 
> I think you misread the comments I gave back in June (!), where I said:
> 
> 
> [...]
> 
> Simply making available a global structure containing the base addresses
> and interrupt should be enough, and could be shared with both DT and
> ACPI. You could start your series by letting both GIC drivers expose
> that information obtained through DT, convert KVM to use this structure,
> and later on let ACPI fill in this structure too.
> 
>> Anyway the difficulty is to find a common place to store and share info
>> between other modules & KVM.
> 
> Indeed. As a rule of thumb, I want to minimize the amount of gratuitous
> divergence between DT and ACPI. So the sooner we extract the required
> information from whatever firmware we have, the better.
> 
> 
>>
>> [1] https://lkml.org/lkml/2016/2/1/658
>>
>> Thanks,
>> -Wei
>>
>> Wei Huang (7):
>>   KVM: GIC: Move GIC DT probing code to GICv2 and GICv3 files
>>   KVM: GIC: Add extra fields to store GICH and GICV resource info
>>   KVM: GIC: Create a common probe function for GIC
>>   KVM: GICv2: Extract the common code from DT
>>   KVM: GICv2: Add ACPI probing function
>>   KVM: GICv3: Extract the common code from DT
>>   KVM: GICv3: Add ACPI probing function
>>
>>  include/kvm/arm_vgic.h  |  14 ++--
>>  virt/kvm/arm/vgic-v2-emul.c |   4 +-
>>  virt/kvm/arm/vgic-v2.c  | 186 
>> +---
>>  virt/kvm/arm/vgic-v3.c  | 159 -
>>  virt/kvm/arm/vgic.c |  22 +-
>>  5 files changed, 277 insertions(+), 108 deletions(-)
>>
> 
> So when I see this diffstat and the patches that follow, I cannot help
> but think that we do have a disconnect here. You do add a bunch of ACPI
> probing in KVM, to which I've already said no.
> 
> I want to see the probing code in the GIC drivers, exported through a
> common structure that KVM can then use. That's it. Nothing else.

OK. I will create a V2 based on that idea.

-Wei

> 
> Thanks,
> 
>   M.
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH V1 4/7] KVM: GICv2: Extract the common code from DT

2016-02-05 Thread Wei Huang
This patch extracts the common code from the DT probe function. With
this patch the DT function only fills out the following info in *vgic.
  - maint_irq (mapped)
  - GICH resource
  - GICV resource
Note that vgic->vctrl_base io-remapping is now moved to vgic_v2_probe().

Signed-off-by: Wei Huang <w...@redhat.com>
---
 virt/kvm/arm/vgic-v2.c | 92 --
 1 file changed, 44 insertions(+), 48 deletions(-)

diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index 7dd5fb3..b60e73a 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -188,7 +188,7 @@ static const struct of_device_id vgic_v2_ids[] = {
  */
 static int vgic_v2_dt_probe(struct vgic_params *vgic)
 {
-   int ret;
+   int ret = 0;
struct resource vctrl_res;
struct resource vcpu_res;
struct device_node *vgic_node;
@@ -201,24 +201,55 @@ static int vgic_v2_dt_probe(struct vgic_params *vgic)
 
vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
if (!vgic->maint_irq) {
-   kvm_err("error getting vgic maintenance irq from DT\n");
+   kvm_err("Cannot get vgic maintenance irq from DT\n");
ret = -ENXIO;
goto out;
}
 
ret = of_address_to_resource(vgic_node, 2, _res);
if (ret) {
-   kvm_err("Cannot obtain GICH resource\n");
+   kvm_err("Cannot obtain GICH resource from DT\n");
goto out;
}
vgic->vctrl_phys_base = vctrl_res.start;
vgic->vctrl_size = resource_size(_res);
 
-   vgic->vctrl_base = of_iomap(vgic_node, 2);
+   if (of_address_to_resource(vgic_node, 3, _res)) {
+   kvm_err("Cannot obtain GICV resource\n");
+   ret = -ENXIO;
+   goto out;
+   }
+   vgic->vcpu_phys_base = vcpu_res.start;
+   vgic->vcpu_size = resource_size(_res);
+out:
+   of_node_put(vgic_node);
+   return ret;
+}
+
+/**
+ * vgic_v2_probe - probe for a GICv2 compatible interrupt controller
+ * @ops:   address of a pointer to the GICv2 operations
+ * @params:address of a pointer to HW-specific parameters
+ *
+ * Returns 0 if a GICv2 has been found, with the low level operations
+ * in *ops and the HW parameters in *params. Returns an error code
+ * otherwise.
+ */
+int vgic_v2_probe(const struct vgic_ops **ops,
+ const struct vgic_params **params)
+{
+   int ret;
+   struct vgic_params *vgic = _v2_params;
+
+   ret = vgic_v2_dt_probe(vgic);
+   if (ret)
+   goto err_out;
+
+   vgic->vctrl_base = ioremap(vgic->vctrl_phys_base, vgic->vctrl_size);
if (!vgic->vctrl_base) {
kvm_err("Cannot ioremap GICH\n");
ret = -ENOMEM;
-   goto out;
+   goto err_out;
}
 
vgic->nr_lr = readl_relaxed(vgic->vctrl_base + GICH_VTR);
@@ -229,71 +260,36 @@ static int vgic_v2_dt_probe(struct vgic_params *vgic)
 vgic->vctrl_phys_base);
if (ret) {
kvm_err("Cannot map VCTRL into hyp\n");
-   goto out_unmap;
-   }
-
-   if (of_address_to_resource(vgic_node, 3, _res)) {
-   kvm_err("Cannot obtain GICV resource\n");
-   ret = -ENXIO;
-   goto out_unmap;
+   goto err_out;
}
-   vgic->vcpu_phys_base = vcpu_res.start;
-   vgic->vcpu_size = resource_size(_res);
 
if (!PAGE_ALIGNED(vgic->vcpu_phys_base)) {
kvm_err("GICV physical address 0x%llx not page aligned\n",
-   (unsigned long long)vcpu_res.start);
+   (unsigned long long)vgic->vcpu_phys_base);
ret = -ENXIO;
-   goto out_unmap;
+   goto err_out;
}
 
if (!PAGE_ALIGNED(vgic->vcpu_size)) {
kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
(unsigned long long)vgic->vcpu_size, PAGE_SIZE);
ret = -ENXIO;
-   goto out_unmap;
+   goto err_out;
}
 
-   vgic->can_emulate_gicv2 = true;
kvm_register_device_ops(_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2);
-
-   kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
-vgic->vctrl_phys_base, vgic->maint_irq);
-
+   vgic->can_emulate_gicv2 = true;
vgic->type = VGIC_V2;
vgic->max_gic_vcpus = VGIC_V2_MAX_CPUS;
-   goto out;
 
-out_unmap:
-   iounmap(vgic->vctrl_base);
-out:
-   of_node_put(vgic_node);
-   return ret;
-}
-
-/**
- * vgic_v2_probe - probe for a GICv2 compatible interrupt controller
- * @ops:   address of a pointer to the GICv2 operations
- 

[PATCH V1 6/7] KVM: GICv3: Extract the common code from DT

2016-02-05 Thread Wei Huang
In preparation for ACPI probing, this patch extracts the DT-neutral
code into vgic_v3_probe(). DT function nows fills out the following
info in *vgic:
  - maint_irq (mapped)
  - GICv resources

Signed-off-by: Wei Huang <w...@redhat.com>
---
 virt/kvm/arm/vgic-v3.c | 75 --
 1 file changed, 42 insertions(+), 33 deletions(-)

diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index b036134..5eca58a 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -239,20 +239,11 @@ static int vgic_v3_dt_probe(struct vgic_params *vgic)
 
vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
if (!vgic->maint_irq) {
-   kvm_err("error getting vgic maintenance irq from DT\n");
+   kvm_err("Cannot getting vgic maintenance irq from DT\n");
ret = -ENXIO;
goto out;
}
 
-   ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);
-
-   /*
-* The ListRegs field is 5 bits, but there is a architectural
-* maximum of 16 list registers. Just ignore bit 4...
-*/
-   vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1;
-   vgic->can_emulate_gicv2 = false;
-
if (of_property_read_u32(vgic_node, "#redistributor-regions", 
_idx))
gicv_idx = 1;
 
@@ -260,35 +251,15 @@ static int vgic_v3_dt_probe(struct vgic_params *vgic)
if (of_address_to_resource(vgic_node, gicv_idx, _res)) {
kvm_info("GICv3: no GICV resource entry\n");
vgic->vcpu_phys_base = 0;
-   } else if (!PAGE_ALIGNED(vcpu_res.start)) {
-   pr_warn("GICV physical address 0x%llx not page aligned\n",
-   (unsigned long long)vcpu_res.start);
-   vgic->vcpu_phys_base = 0;
-   } else if (!PAGE_ALIGNED(resource_size(_res))) {
-   pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n",
-   (unsigned long long)resource_size(_res),
-   PAGE_SIZE);
-   vgic->vcpu_phys_base = 0;
} else {
vgic->vcpu_phys_base = vcpu_res.start;
-   vgic->can_emulate_gicv2 = true;
-   kvm_register_device_ops(_arm_vgic_v2_ops,
-   KVM_DEV_TYPE_ARM_VGIC_V2);
+   vgic->vcpu_size = resource_size(_res);
}
-   if (vgic->vcpu_phys_base == 0)
-   kvm_info("disabling GICv2 emulation\n");
-   kvm_register_device_ops(_arm_vgic_v3_ops, KVM_DEV_TYPE_ARM_VGIC_V3);
-
-   vgic->vctrl_base = NULL;
-   vgic->type = VGIC_V3;
-   vgic->max_gic_vcpus = VGIC_V3_MAX_CPUS;
-
-   kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
-vcpu_res.start, vgic->maint_irq);
 out:
of_node_put(vgic_node);
return ret;
 }
+
 /**
  * vgic_v3_probe - probe for a GICv3 compatible interrupt controller
  * @ops:   address of a pointer to the GICv3 operations
@@ -304,10 +275,48 @@ int vgic_v3_probe(const struct vgic_ops **ops,
int ret = 0;
struct vgic_params *vgic = _v3_params;
 
+   /* DT probing first, then try ACPI probing */
ret = vgic_v3_dt_probe(vgic);
+   if (ret)
+   goto out;
+
+   ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);
+
+   /*
+* The ListRegs field is 5 bits, but there is a architectural
+* maximum of 16 list registers. Just ignore bit 4...
+*/
+   vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1;
+
+   if (!PAGE_ALIGNED(vgic->vcpu_phys_base)) {
+   pr_warn("GICV physical address 0x%llx not page aligned\n",
+   (unsigned long long)vgic->vcpu_phys_base);
+   vgic->vcpu_phys_base = 0;
+   } else if (!PAGE_ALIGNED(vgic->vcpu_size)) {
+   pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n",
+   (unsigned long long)vgic->vcpu_size, PAGE_SIZE);
+   vgic->vcpu_phys_base = 0;
+   };
+
+   if (vgic->vcpu_phys_base != 0) {
+   vgic->can_emulate_gicv2 = true;
+   kvm_register_device_ops(_arm_vgic_v2_ops,
+   KVM_DEV_TYPE_ARM_VGIC_V2);
+   } else {
+   vgic->can_emulate_gicv2 = false;
+   kvm_info("disabling GICv2 emulation\n");
+   }
+
+   kvm_register_device_ops(_arm_vgic_v3_ops, KVM_DEV_TYPE_ARM_VGIC_V3);
+
+   vgic->vctrl_base = NULL;
+   vgic->type = VGIC_V3;
+   vgic->max_gic_vcpus = VGIC_V3_MAX_CPUS;
+
+   kvm_info("GICv3@%llx IRQ%d\n", vgic->vcpu_phys_base, vgic->maint_irq);
 
*ops = _v3_ops;
*params = vgic;
-
+out:
return ret;
 }
-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH V1 7/7] KVM: GICv3: Add ACPI probing function

2016-02-05 Thread Wei Huang
This patch implements ACPI probing for GICv3.

Signed-off-by: Wei Huang <w...@redhat.com>
---
 virt/kvm/arm/vgic-v3.c | 64 --
 1 file changed, 62 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 5eca58a..5bfb9cb 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -260,6 +261,62 @@ out:
return ret;
 }
 
+#ifdef CONFIG_ACPI
+static struct acpi_madt_generic_interrupt *vgic_acpi;
+static void gic_v3_get_acpi_header(struct acpi_subtable_header *header)
+{
+   vgic_acpi = (struct acpi_madt_generic_interrupt *)header;
+}
+
+static struct acpi_madt_generic_distributor *dist_acpi;
+static void gic_v3_get_dist_header(struct acpi_subtable_header *header)
+{
+   dist_acpi = (struct acpi_madt_generic_distributor *)header;
+}
+
+static int vgic_v3_acpi_probe(struct vgic_params *vgic)
+{
+   int irq_mode;
+   int count = 0;
+   int ret = 0;
+
+   count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
+   (acpi_tbl_entry_handler)gic_v3_get_acpi_header, 0);
+   if (!count) {
+   ret = -ENODEV;
+   goto out;
+   }
+
+   count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR,
+   (acpi_tbl_entry_handler)gic_v3_get_dist_header, 0);
+   if (!count || (dist_acpi->version != ACPI_MADT_GIC_VERSION_V3)) {
+   ret = -ENODEV;
+   goto out;
+   }
+
+   /* IRQ trigger mode */
+   irq_mode = (vgic_acpi->flags & ACPI_MADT_VGIC_IRQ_MODE) ?
+   ACPI_EDGE_SENSITIVE : ACPI_LEVEL_SENSITIVE;
+   vgic->maint_irq = acpi_register_gsi(NULL, vgic_acpi->vgic_interrupt,
+   irq_mode, ACPI_ACTIVE_LOW);
+   if (!vgic->maint_irq) {
+   kvm_err("failed to get vgic maintenance irq from ACPI\n");
+   ret = -ENXIO;
+   goto out;
+   }
+
+   vgic->vcpu_phys_base = vgic_acpi->gicv_base_address;
+   vgic->vcpu_size = SZ_8K;
+out:
+   return ret;
+}
+#else
+static inline int vgic_v3_acpi_probe(struct vgic_params *vgic)
+{
+   return -ENODEV;
+}
+#endif /* CONFIG_ACPI */
+
 /**
  * vgic_v3_probe - probe for a GICv3 compatible interrupt controller
  * @ops:   address of a pointer to the GICv3 operations
@@ -277,8 +334,11 @@ int vgic_v3_probe(const struct vgic_ops **ops,
 
/* DT probing first, then try ACPI probing */
ret = vgic_v3_dt_probe(vgic);
-   if (ret)
-   goto out;
+   if (ret && !acpi_disabled) {
+   ret = vgic_v3_acpi_probe(vgic);
+   if (ret)
+   goto out;
+   }
 
ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);
 
-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH V1 1/7] KVM: GIC: Move GIC DT probing code to GICv2 and GICv3 files

2016-02-05 Thread Wei Huang
This patch moves GIC DT probing code from vgic.c to GICv2 & GICv3
sub-files. The probing will start from GICv2. If the probing fails,
KVM will try to probe GICv3 then.

Signed-off-by: Wei Huang <w...@redhat.com>
---
 include/kvm/arm_vgic.h |  6 ++
 virt/kvm/arm/vgic-v2.c | 17 ++---
 virt/kvm/arm/vgic-v3.c | 15 ---
 virt/kvm/arm/vgic.c| 22 ++
 4 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 13a3d53..59428d4 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -357,12 +357,10 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, struct 
irq_phys_map *map);
 #define vgic_initialized(k)(!!((k)->arch.vgic.nr_cpus))
 #define vgic_ready(k)  ((k)->arch.vgic.ready)
 
-int vgic_v2_probe(struct device_node *vgic_node,
- const struct vgic_ops **ops,
+int vgic_v2_probe(const struct vgic_ops **ops,
  const struct vgic_params **params);
 #ifdef CONFIG_KVM_ARM_VGIC_V3
-int vgic_v3_probe(struct device_node *vgic_node,
- const struct vgic_ops **ops,
+int vgic_v3_probe(const struct vgic_ops **ops,
  const struct vgic_params **params);
 #else
 static inline int vgic_v3_probe(struct device_node *vgic_node,
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index ff02f08..dc9ceab 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -175,10 +175,15 @@ static const struct vgic_ops vgic_v2_ops = {
 };
 
 static struct vgic_params vgic_v2_params;
+static const struct of_device_id vgic_v2_ids[] = {
+   { .compatible = "arm,cortex-a15-gic" },
+   { .compatible = "arm,cortex-a7-gic" },
+   { .compatible = "arm,gic-400" },
+   {},
+};
 
 /**
  * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT
- * @node:  pointer to the DT node
  * @ops:   address of a pointer to the GICv2 operations
  * @params:address of a pointer to HW-specific parameters
  *
@@ -186,15 +191,21 @@ static struct vgic_params vgic_v2_params;
  * in *ops and the HW parameters in *params. Returns an error code
  * otherwise.
  */
-int vgic_v2_probe(struct device_node *vgic_node,
- const struct vgic_ops **ops,
+int vgic_v2_probe(const struct vgic_ops **ops,
  const struct vgic_params **params)
 {
int ret;
struct resource vctrl_res;
struct resource vcpu_res;
+   struct device_node *vgic_node;
struct vgic_params *vgic = _v2_params;
 
+   vgic_node = of_find_matching_node(NULL, vgic_v2_ids);
+   if (!vgic_node) {
+   ret = -ENODEV;
+   goto out;
+   }
+
vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
if (!vgic->maint_irq) {
kvm_err("error getting vgic maintenance irq from DT\n");
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 453eafd..5fa5fa7 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -215,10 +215,13 @@ static const struct vgic_ops vgic_v3_ops = {
 };
 
 static struct vgic_params vgic_v3_params;
+static const struct of_device_id vgic_v3_ids[] = {
+   { .compatible = "arm,gic-v3" },
+   {},
+};
 
 /**
  * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
- * @node:  pointer to the DT node
  * @ops:   address of a pointer to the GICv3 operations
  * @params:address of a pointer to HW-specific parameters
  *
@@ -226,15 +229,21 @@ static struct vgic_params vgic_v3_params;
  * in *ops and the HW parameters in *params. Returns an error code
  * otherwise.
  */
-int vgic_v3_probe(struct device_node *vgic_node,
- const struct vgic_ops **ops,
+int vgic_v3_probe(const struct vgic_ops **ops,
  const struct vgic_params **params)
 {
int ret = 0;
u32 gicv_idx;
struct resource vcpu_res;
+   struct device_node *vgic_node;
struct vgic_params *vgic = _v3_params;
 
+   vgic_node = of_find_matching_node(NULL, vgic_v3_ids);
+   if (!vgic_node) {
+   ret = -ENODEV;
+   goto out;
+   }
+
vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
if (!vgic->maint_irq) {
kvm_err("error getting vgic maintenance irq from DT\n");
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 043032c..0d3d6b7 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -2389,34 +2389,16 @@ static struct notifier_block vgic_cpu_nb = {
.notifier_call = vgic_cpu_notify,
 };
 
-static const struct of_device_id vgic_ids[] = {
-   { .compatible = "arm,cortex-a15-gic",   .data = vgic_v2_probe, },
-   { .compatible = "arm,cortex-a7-gic",.data = vgic_v2_probe, },
-   { .compatible = "arm,gic-400",  .data = vgic_v2_probe, },
-

[PATCH V1 2/7] KVM: GIC: Add extra fields to store GICH and GICV resource info

2016-02-05 Thread Wei Huang
This patch adds new fields in the struct vgic_params to store the
resource info (base and size) of GICH & GICV interfaces. These new
fields will be used by the DT and ACPI probing code later.

Signed-off-by: Wei Huang <w...@redhat.com>
---
 include/kvm/arm_vgic.h  |  8 +++-
 virt/kvm/arm/vgic-v2-emul.c |  4 ++--
 virt/kvm/arm/vgic-v2.c  | 19 ++-
 virt/kvm/arm/vgic-v3.c  | 10 +-
 4 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 59428d4..8003ca8 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -127,11 +127,17 @@ struct vgic_params {
/* vgic type */
enum vgic_type  type;
/* Physical address of vgic virtual cpu interface */
-   phys_addr_t vcpu_base;
+   phys_addr_t vcpu_phys_base;
+   /* Size of vgic virtual cpu interface */
+   phys_addr_t vcpu_size;
/* Number of list registers */
u32 nr_lr;
/* Interrupt number */
unsigned intmaint_irq;
+   /* Virtual control interface physical address */
+   phys_addr_t vctrl_phys_base;
+   /* Size of virtual control interface */
+   phys_addr_t vctrl_size;
/* Virtual control interface base address */
void __iomem*vctrl_base;
int max_gic_vcpus;
diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c
index 1390797..e244afe 100644
--- a/virt/kvm/arm/vgic-v2-emul.c
+++ b/virt/kvm/arm/vgic-v2-emul.c
@@ -521,8 +521,8 @@ static int vgic_v2_map_resources(struct kvm *kvm,
}
 
ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base,
-   params->vcpu_base, KVM_VGIC_V2_CPU_SIZE,
-   true);
+   params->vcpu_phys_base,
+   KVM_VGIC_V2_CPU_SIZE, true);
if (ret) {
kvm_err("Unable to remap VGIC CPU to VCPU\n");
goto out_unregister;
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index dc9ceab..6540a6d 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -218,6 +218,8 @@ int vgic_v2_probe(const struct vgic_ops **ops,
kvm_err("Cannot obtain GICH resource\n");
goto out;
}
+   vgic->vctrl_phys_base = vctrl_res.start;
+   vgic->vctrl_size = resource_size(_res);
 
vgic->vctrl_base = of_iomap(vgic_node, 2);
if (!vgic->vctrl_base) {
@@ -230,8 +232,8 @@ int vgic_v2_probe(const struct vgic_ops **ops,
vgic->nr_lr = (vgic->nr_lr & 0x3f) + 1;
 
ret = create_hyp_io_mappings(vgic->vctrl_base,
-vgic->vctrl_base + 
resource_size(_res),
-vctrl_res.start);
+vgic->vctrl_base + vgic->vctrl_size,
+vgic->vctrl_phys_base);
if (ret) {
kvm_err("Cannot map VCTRL into hyp\n");
goto out_unmap;
@@ -242,18 +244,19 @@ int vgic_v2_probe(const struct vgic_ops **ops,
ret = -ENXIO;
goto out_unmap;
}
+   vgic->vcpu_phys_base = vcpu_res.start;
+   vgic->vcpu_size = resource_size(_res);
 
-   if (!PAGE_ALIGNED(vcpu_res.start)) {
+   if (!PAGE_ALIGNED(vgic->vcpu_phys_base)) {
kvm_err("GICV physical address 0x%llx not page aligned\n",
(unsigned long long)vcpu_res.start);
ret = -ENXIO;
goto out_unmap;
}
 
-   if (!PAGE_ALIGNED(resource_size(_res))) {
+   if (!PAGE_ALIGNED(vgic->vcpu_size)) {
kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
-   (unsigned long long)resource_size(_res),
-   PAGE_SIZE);
+   (unsigned long long)vgic->vcpu_size, PAGE_SIZE);
ret = -ENXIO;
goto out_unmap;
}
@@ -261,10 +264,8 @@ int vgic_v2_probe(const struct vgic_ops **ops,
vgic->can_emulate_gicv2 = true;
kvm_register_device_ops(_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2);
 
-   vgic->vcpu_base = vcpu_res.start;
-
kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
-vctrl_res.start, vgic->maint_irq);
+vgic->vctrl_phys_base, vgic->maint_irq);
 
vgic->type = VGIC_V2;
vgic->max_gic_vcpus = VGIC_V2_MAX_CPUS;
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 5fa5fa7..33a5fdf 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -266,23 +266,23 @@ int vgic_v3_probe(const struct vgic_ops **ops,
gicv_idx += 3; /* Also skip GICD, GICC, GICH */
  

[PATCH V1 5/7] KVM: GICv2: Add ACPI probing function

2016-02-05 Thread Wei Huang
This patch implements ACPI probing for GICv2.

Signed-off-by: Wei Huang <w...@redhat.com>
---
 virt/kvm/arm/vgic-v2.c | 68 ++
 1 file changed, 68 insertions(+)

diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index b60e73a..7de2a1f 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -226,6 +227,71 @@ out:
return ret;
 }
 
+#ifdef CONFIG_ACPI
+static struct acpi_madt_generic_interrupt *vgic_acpi;
+static void gic_get_acpi_header(struct acpi_subtable_header *header)
+{
+   vgic_acpi = (struct acpi_madt_generic_interrupt *)header;
+}
+
+static struct acpi_madt_generic_distributor *dist_acpi;
+static void gic_get_dist_header(struct acpi_subtable_header *header)
+{
+   dist_acpi = (struct acpi_madt_generic_distributor *)header;
+}
+
+static int vgic_v2_acpi_probe(struct vgic_params *vgic)
+{
+   int irq_mode, ret = 0;
+
+   ret = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
+   (acpi_tbl_entry_handler)gic_get_acpi_header, 0);
+   if (!ret) {
+   ret = -ENODEV;
+   goto out;
+   }
+
+   ret = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR,
+   (acpi_tbl_entry_handler)gic_get_dist_header, 0);
+   if (!ret) {
+   kvm_err("Cannot get distributor entry from ACPI\n");
+   ret = -ENODEV;
+   goto out;
+   }
+
+   if (dist_acpi->version >= ACPI_MADT_GIC_VERSION_V3) {
+   ret = -ENODEV;
+   goto out;
+   }
+
+   irq_mode = (vgic_acpi->flags & ACPI_MADT_VGIC_IRQ_MODE) ?
+   ACPI_EDGE_SENSITIVE : ACPI_LEVEL_SENSITIVE;
+   /* According to GIC-400 manual, PPIs are active-LOW, level-sensative.
+* We register IRQ as active-low.
+*/
+   vgic->maint_irq = acpi_register_gsi(NULL, vgic_acpi->vgic_interrupt,
+   irq_mode, ACPI_ACTIVE_LOW);
+   if (!vgic->maint_irq) {
+   kvm_err("Cannot register vgic maintenance irq from ACPI\n");
+   ret = -ENXIO;
+   goto out;
+   }
+
+   vgic->vctrl_phys_base = vgic_acpi->gich_base_address;
+   vgic->vctrl_size = SZ_8K;
+
+   vgic->vcpu_phys_base = vgic_acpi->gicv_base_address;
+   vgic->vcpu_size = 0; /* unavailable in ACPI, set to 0 */
+out:
+   return ret;
+}
+#else
+static inline int vgic_v2_acpi_probe(struct vgic_params *vgic)
+{
+   return -ENODEV;
+}
+#endif /* CONFIG_ACPI */
+
 /**
  * vgic_v2_probe - probe for a GICv2 compatible interrupt controller
  * @ops:   address of a pointer to the GICv2 operations
@@ -242,6 +308,8 @@ int vgic_v2_probe(const struct vgic_ops **ops,
struct vgic_params *vgic = _v2_params;
 
ret = vgic_v2_dt_probe(vgic);
+   if (ret && !acpi_disabled)
+   ret = vgic_v2_acpi_probe(vgic);
if (ret)
goto err_out;
 
-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH V1 0/7] Enable ACPI support for ARM KVM GIC

2016-02-05 Thread Wei Huang
This patch set enables ACPI support for KVM GIC. Note that the patches
are in fact the V3 of previously submitted patches (search "Enable ACPI
support for KVM ARM"). But because Fu Wei includes the arch_timer part
in his series [1] and I have substantially re-written the GIC code in this
revision, the version number is reset to v1. 

By following Marc's prior comments, the main design idea is to let DT or
ACPI code to fill out the "struct vgic_params" which are extended to
include all GIC related info.

[1] https://lkml.org/lkml/2016/2/1/658

Thanks,
-Wei

Wei Huang (7):
  KVM: GIC: Move GIC DT probing code to GICv2 and GICv3 files
  KVM: GIC: Add extra fields to store GICH and GICV resource info
  KVM: GIC: Create a common probe function for GIC
  KVM: GICv2: Extract the common code from DT
  KVM: GICv2: Add ACPI probing function
  KVM: GICv3: Extract the common code from DT
  KVM: GICv3: Add ACPI probing function

 include/kvm/arm_vgic.h  |  14 ++--
 virt/kvm/arm/vgic-v2-emul.c |   4 +-
 virt/kvm/arm/vgic-v2.c  | 186 +---
 virt/kvm/arm/vgic-v3.c  | 159 -
 virt/kvm/arm/vgic.c |  22 +-
 5 files changed, 277 insertions(+), 108 deletions(-)

-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH] KVM: arm64: Count guest exit due to various reasons

2015-10-19 Thread Wei Huang


On 10/18/2015 02:53 PM, Christoffer Dall wrote:
> On Sun, Oct 18, 2015 at 01:34:42PM +0530, Amit wrote:
>> From: Amit Singh Tomar 
>>
>> This patch adds guest exit statistics to debugfs, This can be helpful
>> while measuring KVM performance.
>>
>> Signed-off-by: Amit Singh Tomar 
>> ---
>>  arch/arm/include/asm/kvm_host.h   | 2 ++
>>  arch/arm/kvm/arm.c| 1 +
>>  arch/arm/kvm/guest.c  | 2 ++
>>  arch/arm/kvm/mmio.c   | 1 +
>>  arch/arm64/include/asm/kvm_host.h | 5 +
>>  arch/arm64/kvm/guest.c| 8 
>>  arch/arm64/kvm/handle_exit.c  | 3 +++
>>  7 files changed, 22 insertions(+)
>>
>> diff --git a/arch/arm/include/asm/kvm_host.h 
>> b/arch/arm/include/asm/kvm_host.h
>> index c4072d9..d1f3159 100644
>> --- a/arch/arm/include/asm/kvm_host.h
>> +++ b/arch/arm/include/asm/kvm_host.h
>> @@ -147,6 +147,8 @@ struct kvm_vcpu_stat {
>>  u32 halt_successful_poll;
>>  u32 halt_attempted_poll;
>>  u32 halt_wakeup;
>> +u32 mmio_exit_stat;
>> +u32 exits;
>>  };
>>  
>>  int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
>> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
>> index 78b2869..5d0a2c5 100644
>> --- a/arch/arm/kvm/arm.c
>> +++ b/arch/arm/kvm/arm.c
>> @@ -569,6 +569,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, 
>> struct kvm_run *run)
>>  ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
>>  
>>  vcpu->mode = OUTSIDE_GUEST_MODE;
>> +vcpu->stat.exits++;
>>  /*
>>   * Back from guest
>>   */
>> diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
>> index 96e935b..0eea5fc 100644
>> --- a/arch/arm/kvm/guest.c
>> +++ b/arch/arm/kvm/guest.c
>> @@ -33,6 +33,8 @@
>>  #define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 
>> }
>>  
>>  struct kvm_stats_debugfs_item debugfs_entries[] = {
>> +VCPU_STAT(mmio_exit_stat),
>> +VCPU_STAT(exits),
>>  { NULL }
>>  };
>>  
>> diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
>> index 974b1c6..51d518e 100644
>> --- a/arch/arm/kvm/mmio.c
>> +++ b/arch/arm/kvm/mmio.c
>> @@ -184,6 +184,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run 
>> *run,
>>  }
>>  
>>  rt = vcpu->arch.mmio_decode.rt;
>> +vcpu->stat.mmio_exit_stat++;
>>  
>>  if (is_write) {
>>  data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), len);
>> diff --git a/arch/arm64/include/asm/kvm_host.h 
>> b/arch/arm64/include/asm/kvm_host.h
>> index ed03968..b32c938 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -194,6 +194,11 @@ struct kvm_vcpu_stat {
>>  u32 halt_successful_poll;
>>  u32 halt_attempted_poll;
>>  u32 halt_wakeup;
>> +u32 hvc_exit_stat;
>> +u32 wfx_exit_stat;
>> +u32 wfi_exit_stat;
>> +u32 mmio_exit_stat;
>> +u32 exits;
>>  };
>>  
>>  int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
>> diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
>> index d250160..b1943cd 100644
>> --- a/arch/arm64/kvm/guest.c
>> +++ b/arch/arm64/kvm/guest.c
>> @@ -34,7 +34,15 @@
>>  
>>  #include "trace.h"
>>  
>> +#define VM_STAT(x) { #x, offsetof(struct kvm, stat.x), KVM_STAT_VM }
>> +#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 
>> }
>> +
>>  struct kvm_stats_debugfs_item debugfs_entries[] = {
>> +VCPU_STAT(hvc_exit_stat),
>> +VCPU_STAT(wfx_exit_stat),
>> +VCPU_STAT(wfi_exit_stat),
>> +VCPU_STAT(mmio_exit_stat),
>> +VCPU_STAT(exits),
>>  { NULL }
>>  };
>>  
>> diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
>> index 68a0759..6e38c50 100644
>> --- a/arch/arm64/kvm/handle_exit.c
>> +++ b/arch/arm64/kvm/handle_exit.c
>> @@ -39,6 +39,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct 
>> kvm_run *run)
>>  
>>  trace_kvm_hvc_arm64(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
>>  kvm_vcpu_hvc_get_imm(vcpu));
>> +vcpu->stat.hvc_exit_stat++;
>>  
>>  ret = kvm_psci_call(vcpu);
>>  if (ret < 0) {
>> @@ -71,9 +72,11 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct 
>> kvm_run *run)
>>  {
>>  if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
>>  trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
>> +vcpu->stat.wfx_exit_stat++;
>>  kvm_vcpu_on_spin(vcpu);
>>  } else {
>>  trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
>> +vcpu->stat.wfi_exit_stat++;
>>  kvm_vcpu_block(vcpu);
>>  }
>>  
>> -- 
>> 1.9.1
>>
> 
> I thought the idea was to not use this infrastructure anymore and
> instead use tracepoints to count exit reasons?
> 
> I'm cc'ing Wei here who sent some patches to QEMU for this purpose
> recently.

The patches I submitted were for fixing architecture-related issues
while 

Re: [PATCH v3 07/20] KVM: ARM64: PMU: Add perf event map and introduce perf event creating function

2015-10-16 Thread Wei Huang


On 09/24/2015 05:31 PM, Shannon Zhao wrote:
> When we use tools like perf on host, perf passes the event type and the
> id of this event type category to kernel, then kernel will map them to
> hardware event number and write this number to PMU PMEVTYPER_EL0
> register. When getting the event number in KVM, directly use raw event
> type to create a perf_event for it.
> 
> Signed-off-by: Shannon Zhao 
> ---
>  arch/arm64/include/asm/pmu.h |   2 +
>  arch/arm64/kvm/Makefile  |   1 +
>  include/kvm/arm_pmu.h|  13 
>  virt/kvm/arm/pmu.c   | 154 
> +++
>  4 files changed, 170 insertions(+)
>  create mode 100644 virt/kvm/arm/pmu.c
> 
> diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
> index b9f394a..2c025f2 100644
> --- a/arch/arm64/include/asm/pmu.h
> +++ b/arch/arm64/include/asm/pmu.h
> @@ -31,6 +31,8 @@
>  #define ARMV8_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */
>  #define ARMV8_PMCR_X (1 << 4) /* Export to ETM */
>  #define ARMV8_PMCR_DP(1 << 5) /* Disable CCNT if 
> non-invasive debug*/
> +/* Determines which PMCCNTR_EL0 bit generates an overflow */
> +#define ARMV8_PMCR_LC(1 << 6)
>  #define  ARMV8_PMCR_N_SHIFT  11   /* Number of counters 
> supported */
>  #define  ARMV8_PMCR_N_MASK   0x1f
>  #define  ARMV8_PMCR_MASK 0x3f /* Mask for writable bits */
> diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
> index 1949fe5..18d56d8 100644
> --- a/arch/arm64/kvm/Makefile
> +++ b/arch/arm64/kvm/Makefile
> @@ -27,3 +27,4 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o
>  kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o
>  kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v3-switch.o
>  kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
> +kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
> diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
> index bb0cd21..b48cdc6 100644
> --- a/include/kvm/arm_pmu.h
> +++ b/include/kvm/arm_pmu.h
> @@ -37,4 +37,17 @@ struct kvm_pmu {
>  #endif
>  };
>  
> +#ifdef CONFIG_KVM_ARM_PMU
> +unsigned long kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u32 
> select_idx);
> +void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u32 data,
> + u32 select_idx);
> +#else
> +unsigned long kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u32 
> select_idx)
> +{
> + return 0;
> +}
> +void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u32 data,
> + u32 select_idx) {}
> +#endif
> +
>  #endif
> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
> new file mode 100644
> index 000..002ec79
> --- /dev/null
> +++ b/virt/kvm/arm/pmu.c
> @@ -0,0 +1,154 @@
> +/*
> + * Copyright (C) 2015 Linaro Ltd.
> + * Author: Shannon Zhao 
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see .
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +static void kvm_pmu_set_evttyper(struct kvm_vcpu *vcpu, u32 idx, u32 val)
> +{
> + if (!vcpu_mode_is_32bit(vcpu))
> + vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + idx) = val;
> + else
> + vcpu_cp15(vcpu, c14_PMEVTYPER0 + idx) = val;
> +}
> +
> +static unsigned long kvm_pmu_get_evttyper(struct kvm_vcpu *vcpu, u32 idx)
> +{
> + if (!vcpu_mode_is_32bit(vcpu))
> + return vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + idx)
> +& ARMV8_EVTYPE_EVENT;
> + else
> + return vcpu_cp15(vcpu, c14_PMEVTYPER0 + idx)
> +& ARMV8_EVTYPE_EVENT;
> +}
> +
> +/**
> + * kvm_pmu_stop_counter - stop PMU counter for the selected counter
> + * @vcpu: The vcpu pointer
> + * @select_idx: The counter index
> + *
> + * If this counter has been configured to monitor some event, disable and
> + * release it.
> + */
> +static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, u32 select_idx)
> +{
> + struct kvm_pmu *pmu = >arch.pmu;
> + struct kvm_pmc *pmc = >pmc[select_idx];

A small suggestion (optional). It might be cleaner to define a macro and
use it here. Something like in arm_pmu.h :

#define VCPU_TO_PMU(vcpu)  (&(vcpu)->arch.pmu)

> +
> + if (pmc->perf_event) {
> + perf_event_disable(pmc->perf_event);
> + perf_event_release_kernel(pmc->perf_event);
> + 

Re: [PATCH v3 18/20] KVM: ARM64: Reset PMU state when resetting vcpu

2015-10-16 Thread Wei Huang


On 09/24/2015 05:31 PM, Shannon Zhao wrote:
> Signed-off-by: Shannon Zhao 

Missing commit message here.

> ---
>  arch/arm64/kvm/reset.c |  3 +++
>  include/kvm/arm_pmu.h  |  2 ++
>  virt/kvm/arm/pmu.c | 18 ++
>  3 files changed, 23 insertions(+)
> 
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index 91cf535..4da7f6c 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -120,6 +120,9 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
>   /* Reset system registers */
>   kvm_reset_sys_regs(vcpu);
>  
> + /* Reset PMU */
> + kvm_pmu_vcpu_reset(vcpu);
> +
>   /* Reset timer */
>   return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
>  }
> diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
> index 953c400..8dacfd3 100644
> --- a/include/kvm/arm_pmu.h
> +++ b/include/kvm/arm_pmu.h
> @@ -38,6 +38,7 @@ struct kvm_pmu {
>  };
>  
>  #ifdef CONFIG_KVM_ARM_PMU
> +void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu);
>  void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu);
>  unsigned long kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u32 
> select_idx);
>  void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u32 val);
> @@ -46,6 +47,7 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u32 
> val);
>  void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u32 data,
>   u32 select_idx);
>  #else
> +void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) {}
>  void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) {}
>  unsigned long kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u32 
> select_idx)
>  {
> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
> index ca7e849..faa2b76 100644
> --- a/virt/kvm/arm/pmu.c
> +++ b/virt/kvm/arm/pmu.c
> @@ -63,6 +63,24 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, 
> u32 select_idx)
>  }
>  
>  /**
> + * kvm_pmu_vcpu_reset - reset pmu state for cpu
> + * @vcpu: The vcpu pointer
> + *
> + */
> +void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
> +{
> + int i;
> + struct kvm_pmu *pmu = >arch.pmu;
> +
> + for (i = 0; i < ARMV8_MAX_COUNTERS; i++) {
> + kvm_pmu_stop_counter(vcpu, i);
> + pmu->pmc[i].idx = i;
> + pmu->pmc[i].vcpu = vcpu;
> + }
> + pmu->irq_pending = false;
> +}
> +
> +/**
>   * kvm_pmu_sync_hwstate - sync pmu state for cpu
>   * @vcpu: The vcpu pointer
>   *
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 00/20] KVM: ARM64: Add guest PMU support

2015-10-15 Thread Wei Huang


On 09/24/2015 05:31 PM, Shannon Zhao wrote:
> This patchset adds guest PMU support for KVM on ARM64. It takes
> trap-and-emulate approach. When guest wants to monitor one event, it
> will be trapped by KVM and KVM will call perf_event API to create a perf
> event and call relevant perf_event APIs to get the count value of event.
> 
> Use perf to test this patchset in guest. When using "perf list", it
> shows the list of the hardware events and hardware cache events perf
> supports. Then use "perf stat -e EVENT" to monitor some event. For
> example, use "perf stat -e cycles" to count cpu cycles and
> "perf stat -e cache-misses" to count cache misses.
> 
> Below are the outputs of "perf stat -r 5 sleep 5" when running in host
> and guest.
> 
> Host:
>  Performance counter stats for 'sleep 5' (5 runs):
> 
>   0.551428  task-clock (msec) #0.000 CPUs utilized
> ( +-  0.91% )
>  1  context-switches  #0.002 M/sec
>  0  cpu-migrations#0.000 K/sec
> 48  page-faults   #0.088 M/sec
> ( +-  1.05% )
>1150265  cycles#2.086 GHz  
> ( +-  0.92% )
>  stalled-cycles-frontend
>  stalled-cycles-backend
> 526398  instructions  #0.46  insns per cycle  
> ( +-  0.89% )
>  branches
>   9485  branch-misses #   17.201 M/sec
> ( +-  2.35% )
> 
>5.000831616 seconds time elapsed   
>( +-  0.00% )
> 
> Guest:
>  Performance counter stats for 'sleep 5' (5 runs):
> 
>   0.730868  task-clock (msec) #0.000 CPUs utilized
> ( +-  1.13% )
>  1  context-switches  #0.001 M/sec
>  0  cpu-migrations#0.000 K/sec
> 48  page-faults   #0.065 M/sec
> ( +-  0.42% )
>1642982  cycles#2.248 GHz  
> ( +-  1.04% )
>  stalled-cycles-frontend
>  stalled-cycles-backend
> 637964  instructions  #0.39  insns per cycle  
> ( +-  0.65% )
>  branches
>  10377  branch-misses #   14.198 M/sec
> ( +-  1.09% )
> 
>5.001289068 seconds time elapsed   
>( +-  0.00% )
> 

Thanks for V3. One suggestion is to run more perf stress tests, such as
"perf test". So we know the corner cases are covered as much as possible.

> This patchset can be fetched from [1] and the relevant QEMU version for
> test can be fetched from [2].
> 
> Thanks,
> Shannon
> 
> [1] https://git.linaro.org/people/shannon.zhao/linux-mainline.git  
> KVM_ARM64_PMU_v3
> [2] https://git.linaro.org/people/shannon.zhao/qemu.git  PMU_v2
> 
> Changes since v2->v3:
> * Directly use perf raw event type to create perf_event in KVM
> * Add a helper vcpu_sysreg_write
> * remove unrelated header file
> 
> Changes since v1->v2:
> * Use switch...case for registers access handler instead of adding
>   alone handler for each register
> * Try to use the sys_regs to store the register value instead of adding
>   new variables in struct kvm_pmc
> * Fix the handle of cp15 regs
> * Create a new kvm device vPMU, then userspace could choose whether to
>   create PMU
> * Fix the handle of PMU overflow interrupt
> 
> Shannon Zhao (20):
>   ARM64: Move PMU register related defines to asm/pmu.h
>   KVM: ARM64: Define PMU data structure for each vcpu
>   KVM: ARM64: Add offset defines for PMU registers
>   KVM: ARM64: Add reset and access handlers for PMCR_EL0 register
>   KVM: ARM64: Add reset and access handlers for PMSELR register
>   KVM: ARM64: Add reset and access handlers for PMCEID0 and PMCEID1
> register
>   KVM: ARM64: PMU: Add perf event map and introduce perf event creating
> function
>   KVM: ARM64: Add reset and access handlers for PMXEVTYPER register
>   KVM: ARM64: Add reset and access handlers for PMXEVCNTR register
>   KVM: ARM64: Add reset and access handlers for PMCCNTR register
>   KVM: ARM64: Add reset and access handlers for PMCNTENSET and
> PMCNTENCLR register
>   KVM: ARM64: Add reset and access handlers for PMINTENSET and
> PMINTENCLR register
>   KVM: ARM64: Add reset and access handlers for PMOVSSET and PMOVSCLR
> register
>   KVM: ARM64: Add reset and access handlers for PMUSERENR register
>   KVM: ARM64: Add reset and access handlers for PMSWINC register
>   KVM: ARM64: Add access handlers for PMEVCNTRn and PMEVTYPERn register
>   KVM: ARM64: Add PMU overflow interrupt routing
>   KVM: ARM64: Reset PMU state when resetting vcpu
>   KVM: ARM64: Free perf event of PMU when destroying vcpu
>   KVM: ARM64: Add a new kvm ARM PMU device
> 
>  

Re: [kvm-unit-tests PATCHv2] arm: Add PMU test

2015-10-05 Thread Wei Huang


On 10/02/2015 10:48 AM, Christopher Covington wrote:
> Add test the ARM Performance Monitors Unit (PMU). The informational
> fields from the control register are printed, but not checked, and
> the number of cycles it takes to run a known-instruction-count loop
> is printed, but not checked. Once QEMU is fixed, we can at least
> begin to check for IPC == 1 when using -icount.

Thanks for submitting it. I think this is a good starting point to add
PMU unit testing support for ARM. Some comments below.

> 
> Signed-off-by: Christopher Covington 
> ---
>  arm/pmu.c   | 89 
> +
>  arm/unittests.cfg   | 11 ++
>  config/config-arm64.mak |  4 ++-
>  3 files changed, 103 insertions(+), 1 deletion(-)
>  create mode 100644 arm/pmu.c
> 
> diff --git a/arm/pmu.c b/arm/pmu.c
> new file mode 100644
> index 000..f724c2c
> --- /dev/null
> +++ b/arm/pmu.c
> @@ -0,0 +1,89 @@
> +/*
> + * Test the ARM Performance Monitors Unit (PMU).
> + *
> + * Copyright 2015 The Linux Foundation. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU Lesser General Public License version 2.1 and
> + * only version 2.1 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but 
> WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 
> License
> + * for more details.
> + */
> +#include "libcflat.h"
> +
> +struct pmu_data {
> + union {
> + uint32_t pmcr_el0;
> + struct {
> + unsigned int enable:1;
> + unsigned int event_counter_reset:1;
> + unsigned int cycle_counter_reset:1;
> + unsigned int cycle_counter_clock_divider:1;
> + unsigned int event_counter_export:1;
> + unsigned int cycle_counter_disable_when_prohibited:1;
> + unsigned int cycle_counter_long:1;
> + unsigned int zeros:4;
> + unsigned int num_counters:5;
> + unsigned int identification_code:8;
> + unsigned int implementor:8;

Not saying it is a problem because "unsigned int" is 32-bit on 64bit
machine. But to make it consistent with pmcr_el0, I would prefer
"unsigned int" been replaced by "uint32_t".

> + };
> + };
> +};
> +
> +/* Execute a known number of guest instructions. Only odd instruction counts
> + * greater than or equal to 3 are supported. The control register (PMCR) is
> + * initialized with the provided value (allowing for example for the cycle
> + * counter or eventer count to be reset if needed). After the known 
> instruction
> + * count loop, zero is written to the PMCR to disable counting, allowing the
> + * cycle counter or event counters to be read as needed at a later time.
> + */
> +static void measure_instrs(int len, struct pmu_data pmcr)
> +{
> + int i = (len - 1) / 2;
> +
> + if (len < 3 || ((len - 1) % 2))
> + abort();
> +
> + asm volatile(
> + "msr pmcr_el0, %[pmcr]\n"
> + "1: subs %[i], %[i], #1\n"
> + "b.gt 1b\n"
> + "msr pmcr_el0, xzr"
> + : [i] "+r" (i) : [pmcr] "r" (pmcr) : "cc");
> +}
> +
> +int main()
> +{
> + struct pmu_data pmcr;
> + const int samples = 10;
> +
> + asm volatile("mrs %0, pmcr_el0" : "=r" (pmcr));
> +
> + printf("PMU implementor: %c\n", pmcr.implementor);
> + printf("Identification code: 0x%x\n", pmcr.identification_code);
> + printf("Event counters:  %d\n", pmcr.num_counters);
> +
> + pmcr.cycle_counter_reset = 1;
> + pmcr.enable = 1;
> +
> + printf("\ninstructions : cycles0 cycles1 ...\n");
> +
> + for (int i = 3; i < 300; i += 32) {
> + int avg, sum = 0;
> + printf("%d :", i);
> + for (int j = 0; j < samples; j++) {
> + int val;
> + measure_instrs(i, pmcr);
> + asm volatile("mrs %0, pmccntr_el0" : "=r" (val));
> + sum += val;
> + printf(" %d", val);
> + }
> + avg = sum / samples;
> + printf(" sum=%d avg=%d avg_ipc=%d avg_cpi=%d\n", sum, avg, i / 
> avg, avg / i);
> + }

I understand that, as stated in commit message, it currently doesn't
check the correctness of PMU counter values. But it would be better if
testing code can be abstracted into an independent function (e.g.
instr_cycle_check) for report("Instruction Cycles",
instr_cycle_check()). You can return TRUE in the checking code for now.


> +
> + return report_summary();
> +}
> diff --git a/arm/unittests.cfg b/arm/unittests.cfg
> index 

Re: [PATCH v2 00/22] KVM: ARM64: Add guest PMU support

2015-09-16 Thread Wei Huang


On 09/11/2015 03:54 AM, Shannon Zhao wrote:
> From: Shannon Zhao 
> 
> This patchset adds guest PMU support for KVM on ARM64. It takes
> trap-and-emulate approach. When guest wants to monitor one event, it
> will be trapped by KVM and KVM will call perf_event API to create a perf
> event and call relevant perf_event APIs to get the count value of event.
> 
> Use perf to test this patchset in guest. When using "perf list", it
> shows the list of the hardware events and hardware cache events perf
> supports. Then use "perf stat -e EVENT" to monitor some event. For
> example, use "perf stat -e cycles" to count cpu cycles and
> "perf stat -e cache-misses" to count cache misses.
> 
> Below are the outputs of "perf stat -r 5 sleep 5" when running in host
> and guest.
> 
> Host:
>  Performance counter stats for 'sleep 5' (5 runs):
> 
>   0.551428  task-clock (msec) #0.000 CPUs utilized
> ( +-  0.91% )
>  1  context-switches  #0.002 M/sec
>  0  cpu-migrations#0.000 K/sec
> 48  page-faults   #0.088 M/sec
> ( +-  1.05% )
>1150265  cycles#2.086 GHz  
> ( +-  0.92% )
>  stalled-cycles-frontend
>  stalled-cycles-backend
> 526398  instructions  #0.46  insns per cycle  
> ( +-  0.89% )
>  branches
>   9485  branch-misses #   17.201 M/sec
> ( +-  2.35% )
> 
>5.000831616 seconds time elapsed   
>( +-  0.00% )
> 
> Guest:
>  Performance counter stats for 'sleep 5' (5 runs):
> 
>   0.730868  task-clock (msec) #0.000 CPUs utilized
> ( +-  1.13% )
>  1  context-switches  #0.001 M/sec
>  0  cpu-migrations#0.000 K/sec
> 48  page-faults   #0.065 M/sec
> ( +-  0.42% )
>1642982  cycles#2.248 GHz  
> ( +-  1.04% )
>  stalled-cycles-frontend
>  stalled-cycles-backend
> 637964  instructions  #0.39  insns per cycle  
> ( +-  0.65% )
>  branches
>  10377  branch-misses #   14.198 M/sec
> ( +-  1.09% )
> 
>5.001289068 seconds time elapsed   
>( +-  0.00% )
> 
> This patchset can be fetched from [1] and the relevant QEMU version for
> test can be fetched from [2].
> 
> Thanks,
> Shannon
> 
> [1] https://git.linaro.org/people/shannon.zhao/linux-mainline.git  
> KVM_ARM64_PMU_v2
> [2] https://git.linaro.org/people/shannon.zhao/qemu.git  PMU_v2

I am testing this series. The first question is: do you plan to add ACPI
support in QEMU? My in-house kernel uses ACPI for device probing. I had
to force "acpi=off" when I test this patch series.

> 
> Shannon Zhao (22):
>   ARM64: Move PMU register related defines to asm/pmu.h
>   KVM: ARM64: Define PMU data structure for each vcpu
>   KVM: ARM64: Add offset defines for PMU registers
>   KVM: ARM64: Add reset and access handlers for PMCR_EL0 register
>   KVM: ARM64: Add a helper for CP15 registers reset to UNKNOWN
>   KVM: ARM64: Add reset and access handlers for PMSELR register
>   KVM: ARM64: Add reset and access handlers for PMCEID0 and PMCEID1
> register
>   KVM: ARM64: PMU: Add perf event map and introduce perf event creating
> function
>   KVM: ARM64: Add reset and access handlers for PMXEVTYPER register
>   KVM: ARM64: Add reset and access handlers for PMXEVCNTR register
>   KVM: ARM64: Add reset and access handlers for PMCCNTR register
>   KVM: ARM64: Add reset and access handlers for PMCNTENSET and
> PMCNTENCLR register
>   KVM: ARM64: Add reset and access handlers for PMINTENSET and
> PMINTENCLR register
>   KVM: ARM64: Add reset and access handlers for PMOVSSET and PMOVSCLR
> register
>   KVM: ARM64: Add a helper for CP15 registers reset to specified value
>   KVM: ARM64: Add reset and access handlers for PMUSERENR register
>   KVM: ARM64: Add reset and access handlers for PMSWINC register
>   KVM: ARM64: Add access handlers for PMEVCNTRn and PMEVTYPERn register
>   KVM: ARM64: Add PMU overflow interrupt routing
>   KVM: ARM64: Reset PMU state when resetting vcpu
>   KVM: ARM64: Free perf event of PMU when destroying vcpu
>   KVM: ARM64: Add a new kvm ARM PMU device
> 
>  Documentation/virtual/kvm/devices/arm-pmu.txt |  15 +
>  arch/arm/kvm/arm.c|   4 +
>  arch/arm64/include/asm/kvm_asm.h  |  59 ++-
>  arch/arm64/include/asm/kvm_host.h |   2 +
>  arch/arm64/include/asm/pmu.h  |  49 +++
>  arch/arm64/include/uapi/asm/kvm.h 

[PATCH V2 5/5] kvm: arm64: Implement ACPI probing code for GICv3

2015-06-09 Thread Wei Huang
This patches enables ACPI support for KVM virtual GICv3. KVM parses
ACPI table for virt GIC related information and initializes resources.

Signed-off-by: Wei Huang w...@redhat.com
---
 virt/kvm/arm/vgic-v3.c | 40 +++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 99d0f9f..2e4df78 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -292,6 +292,44 @@ int vgic_v3_acpi_probe(struct acpi_madt_generic_interrupt 
*vgic_acpi,
   const struct vgic_ops **ops,
   const struct vgic_params **params)
 {
-   return -EINVAL;
+   int ret = 0;
+   struct vgic_params *vgic = vgic_v3_params;
+   int irq_mode;
+
+   /* IRQ trigger mode */
+   irq_mode = (vgic_acpi-flags  ACPI_MADT_VGIC_IRQ_MODE) ?
+   ACPI_EDGE_SENSITIVE : ACPI_LEVEL_SENSITIVE;
+   vgic-maint_irq = acpi_register_gsi(NULL, vgic_acpi-vgic_interrupt,
+   irq_mode, ACPI_ACTIVE_HIGH);
+   if (!vgic-maint_irq) {
+   kvm_err(Cannot register VGIC ACPI maintenance irq\n);
+   ret = -ENXIO;
+   goto out;
+   }
+
+   ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);
+   vgic-nr_lr = (ich_vtr_el2  0xf) + 1;
+   vgic-can_emulate_gicv2 = false;
+
+   vgic-vcpu_base = vgic_acpi-gicv_base_address;
+
+   if (vgic-vcpu_base == 0)
+   kvm_info(disabling GICv2 emulation\n);
+   else {
+   vgic-can_emulate_gicv2 = true;
+   kvm_register_device_ops(kvm_arm_vgic_v2_ops,
+   KVM_DEV_TYPE_ARM_VGIC_V2);
+   }
+
+   kvm_register_device_ops(kvm_arm_vgic_v3_ops, KVM_DEV_TYPE_ARM_VGIC_V3);
+
+   vgic-vctrl_base = NULL;
+   vgic-type = VGIC_V3;
+   vgic-max_gic_vcpus = KVM_MAX_VCPUS;
+
+   *ops = vgic_v3_ops;
+   *params = vgic;
+out:
+   return ret;
 }
 #endif /* CONFIG_ACPI */
-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH V2 3/5] kvm: arm64: Detect GIC version for proper ACPI vGIC probing

2015-06-09 Thread Wei Huang
There are two GICs (GICv2 and GICv3) supported by KVM. So it is necessary
to find out GIC version before calling ACPI probing functions defined
in vgic-v2.c and vgic-v3.c.

This patch detects GIC version by checking gic_version field of GIC
distributor, which was defined  since ACPI 6.0. In case of ACPI 5.1,
we use manual hardware discovery to find out GIC version.

NOTE: This patch is based on a recent patch by Hanjun Guo.

Signed-off-by: Hanjun Guo hanjun@linaro.org
Signed-off-by: Wei Huang w...@redhat.com
---
 include/kvm/arm_vgic.h |  18 +
 virt/kvm/arm/vgic-v2.c |  10 +
 virt/kvm/arm/vgic-v3.c |  10 +
 virt/kvm/arm/vgic.c| 100 -
 4 files changed, 137 insertions(+), 1 deletion(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 3ee732a..7a44b08 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -24,6 +24,7 @@
 #include linux/irqreturn.h
 #include linux/spinlock.h
 #include linux/types.h
+#include linux/acpi.h
 #include kvm/iodev.h
 
 #define VGIC_NR_IRQS_LEGACY256
@@ -335,10 +336,18 @@ int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
 int vgic_v2_dt_probe(struct device_node *vgic_node,
 const struct vgic_ops **ops,
 const struct vgic_params **params);
+#ifdef CONFIG_ACPI
+int vgic_v2_acpi_probe(struct acpi_madt_generic_interrupt *,
+  const struct vgic_ops **ops,
+  const struct vgic_params **params);
+#endif /* CONFIG_ACPI */
 #ifdef CONFIG_ARM_GIC_V3
 int vgic_v3_dt_probe(struct device_node *vgic_node,
 const struct vgic_ops **ops,
 const struct vgic_params **params);
+int vgic_v3_acpi_probe(struct acpi_madt_generic_interrupt *,
+  const struct vgic_ops **ops,
+  const struct vgic_params **params);
 #else
 static inline int vgic_v3_dt_probe(struct device_node *vgic_node,
   const struct vgic_ops **ops,
@@ -346,6 +355,15 @@ static inline int vgic_v3_dt_probe(struct device_node 
*vgic_node,
 {
return -ENODEV;
 }
+
+#ifdef CONFIG_ACPI
+int vgic_v3_acpi_probe(struct acpi_madt_generic_interrupt *,
+  const struct vgic_ops **ops,
+  const struct vgic_params **params)
+{
+   return -ENODEV;
+}
+#endif /* CONFIG_ACPI */
 #endif
 
 #endif
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index 295996f..711de82 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -23,6 +23,7 @@
 #include linux/of.h
 #include linux/of_address.h
 #include linux/of_irq.h
+#include linux/acpi.h
 
 #include linux/irqchip/arm-gic.h
 
@@ -257,3 +258,12 @@ out:
of_node_put(vgic_node);
return ret;
 }
+
+#ifdef CONFIG_ACPI
+int vgic_v2_acpi_probe(struct acpi_madt_generic_interrupt *vgic_acpi,
+  const struct vgic_ops **ops,
+  const struct vgic_params **params)
+{
+   return -EINVAL;
+}
+#endif /* CONFIG_ACPI */
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 91814e2..99d0f9f 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -23,6 +23,7 @@
 #include linux/of.h
 #include linux/of_address.h
 #include linux/of_irq.h
+#include linux/acpi.h
 
 #include linux/irqchip/arm-gic-v3.h
 
@@ -285,3 +286,12 @@ out:
of_node_put(vgic_node);
return ret;
 }
+
+#ifdef CONFIG_ACPI
+int vgic_v3_acpi_probe(struct acpi_madt_generic_interrupt *vgic_acpi,
+  const struct vgic_ops **ops,
+  const struct vgic_params **params)
+{
+   return -EINVAL;
+}
+#endif /* CONFIG_ACPI */
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index b4010f0..cd09877 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -28,6 +28,7 @@
 #include linux/acpi.h
 
 #include linux/irqchip/arm-gic.h
+#include linux/irqchip/arm-gic-v3.h
 
 #include asm/kvm_emulate.h
 #include asm/kvm_arm.h
@@ -2114,9 +2115,106 @@ static int kvm_vgic_dt_probe(void)
 }
 
 #ifdef CONFIG_ACPI
+u8 gic_version = ACPI_MADT_GIC_VER_UNKNOWN;
+phys_addr_t dist_phy_base;
+static struct acpi_madt_generic_interrupt *vgic_acpi;
+
+static void gic_get_acpi_header(struct acpi_subtable_header *header)
+{
+   vgic_acpi = (struct acpi_madt_generic_interrupt *)header;
+}
+
+static int gic_parse_distributor(struct acpi_subtable_header *header,
+const unsigned long end)
+{
+   struct acpi_madt_generic_distributor *dist;
+
+   dist = (struct acpi_madt_generic_distributor *)header;
+
+   if (BAD_MADT_ENTRY(dist, end))
+   return -EINVAL;
+
+   gic_version = dist-gic_version;
+   dist_phy_base = dist-base_address;
+
+   return 0;
+}
+
+static int gic_match_redist(struct acpi_subtable_header *header,
+   const unsigned long end)
+{
+   return 0;
+}
+
+static bool gic_redist_is_present

[PATCH V2 4/5] kvm: arm64: Implement ACPI probing code for GICv2

2015-06-09 Thread Wei Huang
This patches enables ACPI support for KVM virtual GICv2. KVM parses
ACPI table for virt GIC related information and initializes resources.

Signed-off-by: Alexander Spyridaki a.spyrida...@virtualopensystems.com
Signed-off-by: Wei Huang w...@redhat.com
---
 virt/kvm/arm/vgic-v2.c | 50 +-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index 711de82..f6e56e9 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -264,6 +264,54 @@ int vgic_v2_acpi_probe(struct acpi_madt_generic_interrupt 
*vgic_acpi,
   const struct vgic_ops **ops,
   const struct vgic_params **params)
 {
-   return -EINVAL;
+   struct vgic_params *vgic = vgic_v2_params;
+   int irq_mode, ret;
+
+   /* IRQ trigger mode */
+   irq_mode = (vgic_acpi-flags  ACPI_MADT_VGIC_IRQ_MODE) ?
+   ACPI_EDGE_SENSITIVE : ACPI_LEVEL_SENSITIVE;
+   vgic-maint_irq = acpi_register_gsi(NULL, vgic_acpi-vgic_interrupt,
+   irq_mode, ACPI_ACTIVE_HIGH);
+   if (!vgic-maint_irq) {
+   kvm_err(Cannot register VGIC ACPI maintenance irq\n);
+   ret = -ENXIO;
+   goto out;
+   }
+
+   /* GICH resource */
+   vgic-vctrl_base = ioremap(vgic_acpi-gich_base_address, SZ_8K);
+   if (!vgic-vctrl_base) {
+   kvm_err(cannot ioremap GICH memory\n);
+   ret = -ENOMEM;
+   goto out;
+   }
+
+   vgic-nr_lr = readl_relaxed(vgic-vctrl_base + GICH_VTR);
+   vgic-nr_lr = (vgic-nr_lr  0x3f) + 1;
+
+   ret = create_hyp_io_mappings(vgic-vctrl_base,
+vgic-vctrl_base + SZ_8K,
+vgic_acpi-gich_base_address);
+   if (ret) {
+   kvm_err(Cannot map GICH into hyp\n);
+   goto out;
+   }
+
+   vgic-vcpu_base = vgic_acpi-gicv_base_address;
+   vgic-can_emulate_gicv2 = true;
+   kvm_register_device_ops(kvm_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2);
+
+   kvm_info(GICH base=0x%llx, GICV base=0x%llx, IRQ=%d\n,
+(unsigned long long)vgic_acpi-gich_base_address,
+(unsigned long long)vgic_acpi-gicv_base_address,
+vgic-maint_irq);
+
+   vgic-type = VGIC_V2;
+   vgic-max_gic_vcpus = VGIC_V2_MAX_CPUS;
+   *ops = vgic_v2_ops;
+   *params = vgic;
+
+out:
+   return ret;
 }
 #endif /* CONFIG_ACPI */
-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH V2 1/5] kvm: arm64: Enable ACPI support for virt arch timer

2015-06-09 Thread Wei Huang
This patches enables ACPI support for KVM virtual arch timer. It allows
KVM to parse ACPI table for arch timer PPI when DT table is not present.

Signed-off-by: Alexander Spyridaki a.spyrida...@virtualopensystems.com
Signed-off-by: Wei Huang w...@redhat.com
---
 virt/kvm/arm/arch_timer.c | 75 +++
 1 file changed, 62 insertions(+), 13 deletions(-)

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 98c95f2..a9da75a 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -21,6 +21,7 @@
 #include linux/kvm.h
 #include linux/kvm_host.h
 #include linux/interrupt.h
+#include linux/acpi.h
 
 #include clocksource/arm_arch_timer.h
 #include asm/arch_timer.h
@@ -274,9 +275,57 @@ static const struct of_device_id arch_timer_of_match[] = {
{},
 };
 
-int kvm_timer_hyp_init(void)
+static int kvm_timer_ppi_dt_parse(unsigned int *ppi)
 {
struct device_node *np;
+
+   np = of_find_matching_node(NULL, arch_timer_of_match);
+   if (!np)
+   return -ENODEV;
+
+   *ppi = irq_of_parse_and_map(np, 2);
+   if (*ppi == 0) {
+   of_node_put(np);
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+#ifdef CONFIG_ACPI
+struct acpi_table_gtdt *gtdt_acpi;
+static void arch_timer_acpi_parse(struct acpi_table_header *table)
+{
+   gtdt_acpi = container_of(table, struct acpi_table_gtdt, header);
+}
+
+static int kvm_timer_ppi_acpi_parse(unsigned int *ppi)
+{
+   u32 flags;
+   int trigger, polarity;
+
+   /*Get the interrupt number from the GTDT table */
+   acpi_table_parse(ACPI_SIG_GTDT,
+(acpi_tbl_table_handler)arch_timer_acpi_parse);
+
+   if (!gtdt_acpi-virtual_timer_interrupt)
+   return -EINVAL;
+
+   flags = gtdt_acpi-virtual_timer_flags;
+   trigger = (flags  ACPI_GTDT_INTERRUPT_MODE) ? ACPI_EDGE_SENSITIVE
+   : ACPI_LEVEL_SENSITIVE;
+   polarity = (flags  ACPI_GTDT_INTERRUPT_POLARITY) ? ACPI_ACTIVE_LOW
+   : ACPI_ACTIVE_HIGH;
+
+   *ppi = acpi_register_gsi(NULL, gtdt_acpi-virtual_timer_interrupt,
+trigger, polarity);
+
+   return 0;
+}
+#endif
+
+int kvm_timer_hyp_init(void)
+{
unsigned int ppi;
int err;
 
@@ -284,19 +333,20 @@ int kvm_timer_hyp_init(void)
if (!timecounter)
return -ENODEV;
 
-   np = of_find_matching_node(NULL, arch_timer_of_match);
-   if (!np) {
-   kvm_err(kvm_arch_timer: can't find DT node\n);
-   return -ENODEV;
-   }
+   /* PPI parsing: try DT first, then ACPI */
+   err = kvm_timer_ppi_dt_parse(ppi);
+#ifdef CONFIG_ACPI
+   if (err  !acpi_disabled)
+   err = kvm_timer_ppi_acpi_parse(ppi);
+#endif
 
-   ppi = irq_of_parse_and_map(np, 2);
-   if (!ppi) {
-   kvm_err(kvm_arch_timer: no virtual timer interrupt\n);
-   err = -EINVAL;
-   goto out;
+   if (err) {
+   kvm_err(kvm_arch_timer: can't find virtual timer info or 
+   config virtual timer interrupt\n);
+   return err;
}
 
+   /* configure IRQ handler */
err = request_percpu_irq(ppi, kvm_arch_timer_handler,
 kvm guest timer, kvm_get_running_vcpus());
if (err) {
@@ -319,14 +369,13 @@ int kvm_timer_hyp_init(void)
goto out_free;
}
 
-   kvm_info(%s IRQ%d\n, np-name, ppi);
+   kvm_info(timer IRQ%d\n, ppi);
on_each_cpu(kvm_timer_init_interrupt, NULL, 1);
 
goto out;
 out_free:
free_percpu_irq(ppi, kvm_get_running_vcpus());
 out:
-   of_node_put(np);
return err;
 }
 
-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH V1 4/5] kvm: arm64: Implement ACPI probing code for GICv2

2015-05-29 Thread Wei Huang


On 05/29/2015 09:06 AM, Andrew Jones wrote:
 On Thu, May 28, 2015 at 01:34:33AM -0400, Wei Huang wrote:
 This patches enables ACPI support for KVM virtual GICv2. KVM parses
 ACPI table for virt GIC related information and initializes resources.

 Signed-off-by: Alexander Spyridaki a.spyrida...@virtualopensystems.com
 Signed-off-by: Wei Huang w...@redhat.com
 ---
  virt/kvm/arm/vgic-v2.c | 49 
 -
  1 file changed, 48 insertions(+), 1 deletion(-)

 diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
 index 711de82..01ce8a3 100644
 --- a/virt/kvm/arm/vgic-v2.c
 +++ b/virt/kvm/arm/vgic-v2.c
 @@ -264,6 +264,53 @@ int vgic_v2_acpi_probe(struct 
 acpi_madt_generic_interrupt *vgic_acpi,
 const struct vgic_ops **ops,
 const struct vgic_params **params)
  {
 -return -EINVAL;
 +struct vgic_params *vgic = vgic_v2_params;
 +int irq_mode, ret;
 +
 +/* IRQ trigger mode */
 +irq_mode = (vgic_acpi-flags  ACPI_MADT_VGIC_IRQ_MODE) ?
 +ACPI_EDGE_SENSITIVE : ACPI_LEVEL_SENSITIVE;
 +vgic-maint_irq = acpi_register_gsi(NULL, vgic_acpi-vgic_interrupt,
 +irq_mode, ACPI_ACTIVE_HIGH);
 +if (!vgic-maint_irq) {
 +kvm_err(Cannot register VGIC ACPI maintenance irq\n);
 +ret = -ENXIO;
 +goto out;
 +}
 +
 +/* GICH resource */
 +vgic-vctrl_base = ioremap(vgic_acpi-gich_base_address, SZ_8K);
 +if (!vgic-vctrl_base) {
 +kvm_err(cannot ioremap GICH memory\n);
 +ret = -ENOMEM;
 +goto out;
 +}
 +
 +vgic-nr_lr = readl_relaxed(vgic-vctrl_base + GICH_VTR);
 +vgic-nr_lr = (vgic-nr_lr  0x3f) + 1;
 +
 +ret = create_hyp_io_mappings(vgic-vctrl_base,
 + vgic-vctrl_base + SZ_8K,
 + vgic_acpi-gich_base_address);
 +if (ret) {
 +kvm_err(Cannot map GICH into hyp\n);
 +goto out;
 +}
 +
 +vgic-vcpu_base = vgic_acpi-gicv_base_address;
 +vgic-can_emulate_gicv2 = true;
 +kvm_register_device_ops(kvm_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2);
 +
 +kvm_info(GICH base=0x%llx, GICV base=0x%llx, IRQ=%d\n,
 + (unsigned long long)vgic_acpi-gich_base_address,
 + (unsigned long long)vgic_acpi-gicv_base_address,
 + vgic-maint_irq);
 +
 +vgic-type = VGIC_V2;
 
 we're missing max_gic_vcpus here
 
   vgic-max_gic_vcpus = VGIC_V2_MAX_CPUS;

Yes. Will fix in the next spin.

-Wei

 
 +*ops = vgic_v2_ops;
 +*params = vgic;
 +
 +out:
 +return ret;
  }
  #endif /* CONFIG_ACPI */
 -- 
 1.8.3.1

 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH V1 2/5] kvm: arm64: Dispatch virt GIC probing to device tree and ACPI

2015-05-27 Thread Wei Huang
This patch creates a dispatch function to support virt GIC probing
in both device tree (DT) and ACPI environment. kvm_vgic_hyp_init()
will probe DT first. If failed, it will try ACPI.

Signed-off-by: Wei Huang w...@redhat.com
---
 include/kvm/arm_vgic.h | 18 +-
 virt/kvm/arm/vgic-v2.c |  8 
 virt/kvm/arm/vgic-v3.c |  8 
 virt/kvm/arm/vgic.c| 42 +++---
 4 files changed, 48 insertions(+), 28 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 133ea00..3ee732a 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -332,17 +332,17 @@ int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
 #define vgic_initialized(k)(!!((k)-arch.vgic.nr_cpus))
 #define vgic_ready(k)  ((k)-arch.vgic.ready)
 
-int vgic_v2_probe(struct device_node *vgic_node,
- const struct vgic_ops **ops,
- const struct vgic_params **params);
+int vgic_v2_dt_probe(struct device_node *vgic_node,
+const struct vgic_ops **ops,
+const struct vgic_params **params);
 #ifdef CONFIG_ARM_GIC_V3
-int vgic_v3_probe(struct device_node *vgic_node,
- const struct vgic_ops **ops,
- const struct vgic_params **params);
+int vgic_v3_dt_probe(struct device_node *vgic_node,
+const struct vgic_ops **ops,
+const struct vgic_params **params);
 #else
-static inline int vgic_v3_probe(struct device_node *vgic_node,
-   const struct vgic_ops **ops,
-   const struct vgic_params **params)
+static inline int vgic_v3_dt_probe(struct device_node *vgic_node,
+  const struct vgic_ops **ops,
+  const struct vgic_params **params)
 {
return -ENODEV;
 }
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index f9b9c7c..295996f 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -167,7 +167,7 @@ static const struct vgic_ops vgic_v2_ops = {
 static struct vgic_params vgic_v2_params;
 
 /**
- * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT
+ * vgic_v2_dt_probe - probe for a GICv2 compatible interrupt controller in DT
  * @node:  pointer to the DT node
  * @ops:   address of a pointer to the GICv2 operations
  * @params:address of a pointer to HW-specific parameters
@@ -176,9 +176,9 @@ static struct vgic_params vgic_v2_params;
  * in *ops and the HW parameters in *params. Returns an error code
  * otherwise.
  */
-int vgic_v2_probe(struct device_node *vgic_node,
- const struct vgic_ops **ops,
- const struct vgic_params **params)
+int vgic_v2_dt_probe(struct device_node *vgic_node,
+const struct vgic_ops **ops,
+const struct vgic_params **params)
 {
int ret;
struct resource vctrl_res;
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index dff0602..91814e2 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -211,7 +211,7 @@ static const struct vgic_ops vgic_v3_ops = {
 static struct vgic_params vgic_v3_params;
 
 /**
- * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
+ * vgic_v3_dt_probe - probe for a GICv3 compatible interrupt controller in DT
  * @node:  pointer to the DT node
  * @ops:   address of a pointer to the GICv3 operations
  * @params:address of a pointer to HW-specific parameters
@@ -220,9 +220,9 @@ static struct vgic_params vgic_v3_params;
  * in *ops and the HW parameters in *params. Returns an error code
  * otherwise.
  */
-int vgic_v3_probe(struct device_node *vgic_node,
- const struct vgic_ops **ops,
- const struct vgic_params **params)
+int vgic_v3_dt_probe(struct device_node *vgic_node,
+const struct vgic_ops **ops,
+const struct vgic_params **params)
 {
int ret = 0;
u32 gicv_idx;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 78fb820..b4010f0 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -25,6 +25,7 @@
 #include linux/of_address.h
 #include linux/of_irq.h
 #include linux/uaccess.h
+#include linux/acpi.h
 
 #include linux/irqchip/arm-gic.h
 
@@ -2088,32 +2089,51 @@ static struct notifier_block vgic_cpu_nb = {
 };
 
 static const struct of_device_id vgic_ids[] = {
-   { .compatible = arm,cortex-a15-gic,   .data = vgic_v2_probe, },
-   { .compatible = arm,cortex-a7-gic,.data = vgic_v2_probe, },
-   { .compatible = arm,gic-400,  .data = vgic_v2_probe, },
-   { .compatible = arm,gic-v3,   .data = vgic_v3_probe, },
+   { .compatible = arm,cortex-a15-gic,   .data = vgic_v2_dt_probe, },
+   { .compatible = arm,cortex-a7-gic,.data = vgic_v2_dt_probe, },
+   { .compatible = arm,gic-400,  .data

[PATCH V1 0/5] Enable ACPI support for KVM ARM

2015-05-27 Thread Wei Huang
Initial ACPI support for ARM64 has been accepted into Linux kernel recently.
Now it is a good time to re-visit ACPI support for KVM. This patchset
enables ACPI for both arch_timer and vGIC by probing related ACPI tables
and does necessary initialization.

Note that Alexander Spyridaki submitted similar patches before. Some of
his ideas were borrowed in this patchset, but with substancial changes.
In addition we extend support for both GICv2 and GICv3.

This patchset would work better on top of recent GIC/IRQCHIP patches by
Hanjun Guo, who added support for gic_version in ACPI struct of GIC
distributor (search ACPICA: Introduce GIC version for arm based system).

This patchset can be applied cleanly on top of Linx 4.1-rc1.

Wei Huang (5):
  kvm: arm64: Enable ACPI support for virt arch timer
  kvm: arm64: Dispatch virt GIC probing to device tree and ACPI
  kvm: arm64: Detect GIC version for proper ACPI vGIC probing
  kvm: arm64: Implement ACPI probing code for GICv2
  kvm: arm64: Implement ACPI probing code for GICv3

 include/kvm/arm_vgic.h|  36 +---
 virt/kvm/arm/arch_timer.c |  64 -
 virt/kvm/arm/vgic-v2.c|  65 +++--
 virt/kvm/arm/vgic-v3.c|  56 +--
 virt/kvm/arm/vgic.c   | 140 ++
 5 files changed, 320 insertions(+), 41 deletions(-)

-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH V1 4/5] kvm: arm64: Implement ACPI probing code for GICv2

2015-05-27 Thread Wei Huang
This patches enables ACPI support for KVM virtual GICv2. KVM parses
ACPI table for virt GIC related information and initializes resources.

Signed-off-by: Alexander Spyridaki a.spyrida...@virtualopensystems.com
Signed-off-by: Wei Huang w...@redhat.com
---
 virt/kvm/arm/vgic-v2.c | 49 -
 1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index 711de82..01ce8a3 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -264,6 +264,53 @@ int vgic_v2_acpi_probe(struct acpi_madt_generic_interrupt 
*vgic_acpi,
   const struct vgic_ops **ops,
   const struct vgic_params **params)
 {
-   return -EINVAL;
+   struct vgic_params *vgic = vgic_v2_params;
+   int irq_mode, ret;
+
+   /* IRQ trigger mode */
+   irq_mode = (vgic_acpi-flags  ACPI_MADT_VGIC_IRQ_MODE) ?
+   ACPI_EDGE_SENSITIVE : ACPI_LEVEL_SENSITIVE;
+   vgic-maint_irq = acpi_register_gsi(NULL, vgic_acpi-vgic_interrupt,
+   irq_mode, ACPI_ACTIVE_HIGH);
+   if (!vgic-maint_irq) {
+   kvm_err(Cannot register VGIC ACPI maintenance irq\n);
+   ret = -ENXIO;
+   goto out;
+   }
+
+   /* GICH resource */
+   vgic-vctrl_base = ioremap(vgic_acpi-gich_base_address, SZ_8K);
+   if (!vgic-vctrl_base) {
+   kvm_err(cannot ioremap GICH memory\n);
+   ret = -ENOMEM;
+   goto out;
+   }
+
+   vgic-nr_lr = readl_relaxed(vgic-vctrl_base + GICH_VTR);
+   vgic-nr_lr = (vgic-nr_lr  0x3f) + 1;
+
+   ret = create_hyp_io_mappings(vgic-vctrl_base,
+vgic-vctrl_base + SZ_8K,
+vgic_acpi-gich_base_address);
+   if (ret) {
+   kvm_err(Cannot map GICH into hyp\n);
+   goto out;
+   }
+
+   vgic-vcpu_base = vgic_acpi-gicv_base_address;
+   vgic-can_emulate_gicv2 = true;
+   kvm_register_device_ops(kvm_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2);
+
+   kvm_info(GICH base=0x%llx, GICV base=0x%llx, IRQ=%d\n,
+(unsigned long long)vgic_acpi-gich_base_address,
+(unsigned long long)vgic_acpi-gicv_base_address,
+vgic-maint_irq);
+
+   vgic-type = VGIC_V2;
+   *ops = vgic_v2_ops;
+   *params = vgic;
+
+out:
+   return ret;
 }
 #endif /* CONFIG_ACPI */
-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH V1 3/5] kvm: arm64: Detect GIC version for proper ACPI vGIC probing

2015-05-27 Thread Wei Huang
There are two GICs (GICv2 and GICv3) supported by KVM. So it is necessary
to find out GIC version before calling ACPI probing functions defined
in vgic-v2.c and vgic-v3.c.

This patch detects GIC version by checking gic_version field of GIC
distributor, which was defined  since ACPI 6.0. In case of ACPI 5.1,
we use manual hardware discovery to find out GIC version.

NOTE: This patch is based on a recent patch by Hanjun Guo.

Signed-off-by: Hanjun Guo hanjun@linaro.org
Signed-off-by: Wei Huang w...@redhat.com
---
 include/kvm/arm_vgic.h |  18 +
 virt/kvm/arm/vgic-v2.c |  10 +
 virt/kvm/arm/vgic-v3.c |  10 +
 virt/kvm/arm/vgic.c| 100 -
 4 files changed, 137 insertions(+), 1 deletion(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 3ee732a..7a44b08 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -24,6 +24,7 @@
 #include linux/irqreturn.h
 #include linux/spinlock.h
 #include linux/types.h
+#include linux/acpi.h
 #include kvm/iodev.h
 
 #define VGIC_NR_IRQS_LEGACY256
@@ -335,10 +336,18 @@ int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
 int vgic_v2_dt_probe(struct device_node *vgic_node,
 const struct vgic_ops **ops,
 const struct vgic_params **params);
+#ifdef CONFIG_ACPI
+int vgic_v2_acpi_probe(struct acpi_madt_generic_interrupt *,
+  const struct vgic_ops **ops,
+  const struct vgic_params **params);
+#endif /* CONFIG_ACPI */
 #ifdef CONFIG_ARM_GIC_V3
 int vgic_v3_dt_probe(struct device_node *vgic_node,
 const struct vgic_ops **ops,
 const struct vgic_params **params);
+int vgic_v3_acpi_probe(struct acpi_madt_generic_interrupt *,
+  const struct vgic_ops **ops,
+  const struct vgic_params **params);
 #else
 static inline int vgic_v3_dt_probe(struct device_node *vgic_node,
   const struct vgic_ops **ops,
@@ -346,6 +355,15 @@ static inline int vgic_v3_dt_probe(struct device_node 
*vgic_node,
 {
return -ENODEV;
 }
+
+#ifdef CONFIG_ACPI
+int vgic_v3_acpi_probe(struct acpi_madt_generic_interrupt *,
+  const struct vgic_ops **ops,
+  const struct vgic_params **params)
+{
+   return -ENODEV;
+}
+#endif /* CONFIG_ACPI */
 #endif
 
 #endif
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index 295996f..711de82 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -23,6 +23,7 @@
 #include linux/of.h
 #include linux/of_address.h
 #include linux/of_irq.h
+#include linux/acpi.h
 
 #include linux/irqchip/arm-gic.h
 
@@ -257,3 +258,12 @@ out:
of_node_put(vgic_node);
return ret;
 }
+
+#ifdef CONFIG_ACPI
+int vgic_v2_acpi_probe(struct acpi_madt_generic_interrupt *vgic_acpi,
+  const struct vgic_ops **ops,
+  const struct vgic_params **params)
+{
+   return -EINVAL;
+}
+#endif /* CONFIG_ACPI */
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 91814e2..99d0f9f 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -23,6 +23,7 @@
 #include linux/of.h
 #include linux/of_address.h
 #include linux/of_irq.h
+#include linux/acpi.h
 
 #include linux/irqchip/arm-gic-v3.h
 
@@ -285,3 +286,12 @@ out:
of_node_put(vgic_node);
return ret;
 }
+
+#ifdef CONFIG_ACPI
+int vgic_v3_acpi_probe(struct acpi_madt_generic_interrupt *vgic_acpi,
+  const struct vgic_ops **ops,
+  const struct vgic_params **params)
+{
+   return -EINVAL;
+}
+#endif /* CONFIG_ACPI */
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index b4010f0..cd09877 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -28,6 +28,7 @@
 #include linux/acpi.h
 
 #include linux/irqchip/arm-gic.h
+#include linux/irqchip/arm-gic-v3.h
 
 #include asm/kvm_emulate.h
 #include asm/kvm_arm.h
@@ -2114,9 +2115,106 @@ static int kvm_vgic_dt_probe(void)
 }
 
 #ifdef CONFIG_ACPI
+u8 gic_version = ACPI_MADT_GIC_VER_UNKNOWN;
+phys_addr_t dist_phy_base;
+static struct acpi_madt_generic_interrupt *vgic_acpi;
+
+static void gic_get_acpi_header(struct acpi_subtable_header *header)
+{
+   vgic_acpi = (struct acpi_madt_generic_interrupt *)header;
+}
+
+static int gic_parse_distributor(struct acpi_subtable_header *header,
+const unsigned long end)
+{
+   struct acpi_madt_generic_distributor *dist;
+
+   dist = (struct acpi_madt_generic_distributor *)header;
+
+   if (BAD_MADT_ENTRY(dist, end))
+   return -EINVAL;
+
+   gic_version = dist-gic_version;
+   dist_phy_base = dist-base_address;
+
+   return 0;
+}
+
+static int gic_match_redist(struct acpi_subtable_header *header,
+   const unsigned long end)
+{
+   return 0;
+}
+
+static bool gic_redist_is_present

[PATCH V1 1/5] kvm: arm64: Enable ACPI support for virt arch timer

2015-05-27 Thread Wei Huang
This patches enables ACPI support for KVM virtual arch timer. It allows
KVM to parse ACPI table for arch timer PPI when DT table is not present.

Signed-off-by: Alexander Spyridaki a.spyrida...@virtualopensystems.com
Signed-off-by: Wei Huang w...@redhat.com
---
 virt/kvm/arm/arch_timer.c | 64 +--
 1 file changed, 51 insertions(+), 13 deletions(-)

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 98c95f2..7da9eb3 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -21,6 +21,7 @@
 #include linux/kvm.h
 #include linux/kvm_host.h
 #include linux/interrupt.h
+#include linux/acpi.h
 
 #include clocksource/arm_arch_timer.h
 #include asm/arch_timer.h
@@ -274,9 +275,46 @@ static const struct of_device_id arch_timer_of_match[] = {
{},
 };
 
-int kvm_timer_hyp_init(void)
+static int kvm_timer_ppi_dt_parse(unsigned int *ppi)
 {
struct device_node *np;
+
+   np = of_find_matching_node(NULL, arch_timer_of_match);
+   if (!np)
+   return -ENODEV;
+
+   *ppi = irq_of_parse_and_map(np, 2);
+   if (*ppi == 0) {
+   of_node_put(np);
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+#ifdef CONFIG_ACPI
+struct acpi_table_gtdt *gtdt_acpi;
+static void arch_timer_acpi_parse(struct acpi_table_header *table)
+{
+   gtdt_acpi = container_of(table, struct acpi_table_gtdt, header);
+}
+
+static int kvm_timer_ppi_acpi_parse(unsigned int *ppi)
+{
+   /* Get the interrupt number from the GTDT table */
+   acpi_table_parse(ACPI_SIG_GTDT,
+(acpi_tbl_table_handler)arch_timer_acpi_parse);
+
+   if (!gtdt_acpi-virtual_timer_interrupt)
+   return -EINVAL;
+
+   *ppi = gtdt_acpi-virtual_timer_interrupt;
+   return 0;
+}
+#endif
+
+int kvm_timer_hyp_init(void)
+{
unsigned int ppi;
int err;
 
@@ -284,19 +322,20 @@ int kvm_timer_hyp_init(void)
if (!timecounter)
return -ENODEV;
 
-   np = of_find_matching_node(NULL, arch_timer_of_match);
-   if (!np) {
-   kvm_err(kvm_arch_timer: can't find DT node\n);
-   return -ENODEV;
-   }
+   /* PPI parsing: try DT first, then ACPI */
+   err = kvm_timer_ppi_dt_parse(ppi);
+#ifdef CONFIG_ACPI
+   if (err  !acpi_disabled)
+   err = kvm_timer_ppi_acpi_parse(ppi);
+#endif
 
-   ppi = irq_of_parse_and_map(np, 2);
-   if (!ppi) {
-   kvm_err(kvm_arch_timer: no virtual timer interrupt\n);
-   err = -EINVAL;
-   goto out;
+   if (err) {
+   kvm_err(kvm_arch_timer: can't find virtual timer info or 
+   config virtual timer interrupt\n);
+   return err;
}
 
+   /* configure IRQ handler */
err = request_percpu_irq(ppi, kvm_arch_timer_handler,
 kvm guest timer, kvm_get_running_vcpus());
if (err) {
@@ -319,14 +358,13 @@ int kvm_timer_hyp_init(void)
goto out_free;
}
 
-   kvm_info(%s IRQ%d\n, np-name, ppi);
+   kvm_info(timer IRQ%d\n, ppi);
on_each_cpu(kvm_timer_init_interrupt, NULL, 1);
 
goto out;
 out_free:
free_percpu_irq(ppi, kvm_get_running_vcpus());
 out:
-   of_node_put(np);
return err;
 }
 
-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm