Re: [Qemu-devel] [PATCH v6 11/14] target/arm: PMU: Add instruction and cycle events

2018-10-18 Thread Aaron Lindsay
On Oct 17 14:12, Richard Henderson wrote:
> On 10/17/18 12:47 PM, Aaron Lindsay wrote:
> > On Oct 16 17:04, Richard Henderson wrote:
> >> On 10/10/18 1:37 PM, Aaron Lindsay wrote:
> >>> + * Return the underlying cycle count for the PMU cycle counters. If 
> >>> we're in
> >>> + * usermode, simply return 0.
> >>> + */
> >>> +static uint64_t cycles_get_count(CPUARMState *env)
> >>> +{
> >>> +#ifndef CONFIG_USER_ONLY
> >>> +return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
> >>> +   ARM_CPU_FREQ, NANOSECONDS_PER_SECOND);
> >>> +#else
> >>> +return 0;
> >>> +#endif
> >>> +}
> >>
> >> Usually we pass through the host cycle counter.
> >> See cpu_get_host_ticks().
> > 
> > Why do you prefer cpu_get_host_ticks()? And are you suggesting this for
> > just user-mode, or both system and user?
> 
> Just user-mode.  Providing a clock with unknown scaling is more useful than a
> constant 0.

Okay, that makes sense I think. I slightly dislike the fact that the
behavior is silently different between user and system mode, but perhaps
some documentation could alleviate my concern - is there an appropriate
place to document this difference?

It occurs to me that one argument for always returning 0 is that it's
immediately obvious that the behavior is different from system mode,
eliminating the possibility of a user assuming results from user and
system mode are comparable.

-Aaron



Re: [Qemu-devel] [PATCH v6 11/14] target/arm: PMU: Add instruction and cycle events

2018-10-17 Thread Richard Henderson
On 10/17/18 12:47 PM, Aaron Lindsay wrote:
> On Oct 16 17:04, Richard Henderson wrote:
>> On 10/10/18 1:37 PM, Aaron Lindsay wrote:
>>> + * Return the underlying cycle count for the PMU cycle counters. If we're 
>>> in
>>> + * usermode, simply return 0.
>>> + */
>>> +static uint64_t cycles_get_count(CPUARMState *env)
>>> +{
>>> +#ifndef CONFIG_USER_ONLY
>>> +return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
>>> +   ARM_CPU_FREQ, NANOSECONDS_PER_SECOND);
>>> +#else
>>> +return 0;
>>> +#endif
>>> +}
>>
>> Usually we pass through the host cycle counter.
>> See cpu_get_host_ticks().
> 
> Why do you prefer cpu_get_host_ticks()? And are you suggesting this for
> just user-mode, or both system and user?

Just user-mode.  Providing a clock with unknown scaling is more useful than a
constant 0.


r~



Re: [Qemu-devel] [PATCH v6 11/14] target/arm: PMU: Add instruction and cycle events

2018-10-17 Thread Aaron Lindsay
On Oct 16 17:04, Richard Henderson wrote:
> On 10/10/18 1:37 PM, Aaron Lindsay wrote:
> > + * Return the underlying cycle count for the PMU cycle counters. If we're 
> > in
> > + * usermode, simply return 0.
> > + */
> > +static uint64_t cycles_get_count(CPUARMState *env)
> > +{
> > +#ifndef CONFIG_USER_ONLY
> > +return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
> > +   ARM_CPU_FREQ, NANOSECONDS_PER_SECOND);
> > +#else
> > +return 0;
> > +#endif
> > +}
> 
> Usually we pass through the host cycle counter.
> See cpu_get_host_ticks().

Why do you prefer cpu_get_host_ticks()? And are you suggesting this for
just user-mode, or both system and user?

PMCCNTR used this same qemu_clock_get_ns() call previous to my patch
(see where this patch replaces that call with one to cycles_get_count()
in pmccntr_op_start()). Of course, we could keep the preexisting PMCCNTR
behavior while making the new cycle counter use cpu_get_host_ticks(),
but having two ways through the same interface which count cycles
differently feels wrong.

-Aaron



Re: [Qemu-devel] [PATCH v6 11/14] target/arm: PMU: Add instruction and cycle events

2018-10-16 Thread Richard Henderson
On 10/10/18 1:37 PM, Aaron Lindsay wrote:
> + * Return the underlying cycle count for the PMU cycle counters. If we're in
> + * usermode, simply return 0.
> + */
> +static uint64_t cycles_get_count(CPUARMState *env)
> +{
> +#ifndef CONFIG_USER_ONLY
> +return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
> +   ARM_CPU_FREQ, NANOSECONDS_PER_SECOND);
> +#else
> +return 0;
> +#endif
> +}

Usually we pass through the host cycle counter.
See cpu_get_host_ticks().


r~



[Qemu-devel] [PATCH v6 11/14] target/arm: PMU: Add instruction and cycle events

2018-10-10 Thread Aaron Lindsay
The instruction event is only enabled when icount is used, cycles are
always supported. Always defining get_cycle_count (but altering its
behavior depending on CONFIG_USER_ONLY) allows us to remove some
CONFIG_USER_ONLY #defines throughout the rest of the code.

Signed-off-by: Aaron Lindsay 
Reviewed-by: Peter Maydell 
---
 target/arm/helper.c | 90 ++---
 1 file changed, 44 insertions(+), 46 deletions(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index f0798f7a8c..d6501de1ba 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -15,6 +15,7 @@
 #include "arm_ldst.h"
 #include  /* For crc32 */
 #include "exec/semihost.h"
+#include "sysemu/cpus.h"
 #include "sysemu/kvm.h"
 #include "fpu/softfloat.h"
 #include "qemu/range.h"
@@ -988,9 +989,50 @@ typedef struct pm_event {
 uint64_t (*get_count)(CPUARMState *);
 } pm_event;
 
+static bool event_always_supported(CPUARMState *env)
+{
+return true;
+}
+
+/*
+ * Return the underlying cycle count for the PMU cycle counters. If we're in
+ * usermode, simply return 0.
+ */
+static uint64_t cycles_get_count(CPUARMState *env)
+{
+#ifndef CONFIG_USER_ONLY
+return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+   ARM_CPU_FREQ, NANOSECONDS_PER_SECOND);
+#else
+return 0;
+#endif
+}
+
+#ifndef CONFIG_USER_ONLY
+static bool instructions_supported(CPUARMState *env)
+{
+return use_icount == 1 /* Precise instruction counting */;
+}
+
+static uint64_t instructions_get_count(CPUARMState *env)
+{
+return (uint64_t)cpu_get_icount_raw();
+}
+#endif
+
 static const pm_event pm_events[] = {
+#ifndef CONFIG_USER_ONLY
+{ .number = 0x008, /* INST_RETIRED, Instruction architecturally executed */
+  .supported = instructions_supported,
+  .get_count = instructions_get_count,
+},
+{ .number = 0x011, /* CPU_CYCLES, Cycle */
+  .supported = event_always_supported,
+  .get_count = cycles_get_count,
+}
+#endif
 };
-#define MAX_EVENT_ID 0x0
+#define MAX_EVENT_ID 0x11
 #define UNSUPPORTED_EVENT UINT16_MAX
 static uint16_t supported_event_map[MAX_EVENT_ID + 1];
 
@@ -1083,8 +1125,6 @@ static CPAccessResult pmreg_access_swinc(CPUARMState *env,
 return pmreg_access(env, ri, isread);
 }
 
-#ifndef CONFIG_USER_ONLY
-
 static CPAccessResult pmreg_access_selr(CPUARMState *env,
 const ARMCPRegInfo *ri,
 bool isread)
@@ -1195,9 +1235,7 @@ static inline bool pmu_counter_enabled(CPUARMState *env, 
uint8_t counter)
  */
 void pmccntr_op_start(CPUARMState *env)
 {
-uint64_t cycles = 0;
-cycles = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
-  ARM_CPU_FREQ, NANOSECONDS_PER_SECOND);
+uint64_t cycles = cycles_get_count(env);
 
 if (pmu_counter_enabled(env, 31)) {
 uint64_t eff_cycles = cycles;
@@ -1343,42 +1381,6 @@ static void pmccntr_write32(CPUARMState *env, const 
ARMCPRegInfo *ri,
 pmccntr_write(env, ri, deposit64(cur_val, 0, 32, value));
 }
 
-#else /* CONFIG_USER_ONLY */
-
-void pmccntr_op_start(CPUARMState *env)
-{
-}
-
-void pmccntr_op_finish(CPUARMState *env)
-{
-}
-
-void pmevcntr_op_start(CPUARMState *env, uint8_t i)
-{
-}
-
-void pmevcntr_op_finish(CPUARMState *env, uint8_t i)
-{
-}
-
-void pmu_op_start(CPUARMState *env)
-{
-}
-
-void pmu_op_finish(CPUARMState *env)
-{
-}
-
-void pmu_pre_el_change(ARMCPU *cpu, void *ignored)
-{
-}
-
-void pmu_post_el_change(ARMCPU *cpu, void *ignored)
-{
-}
-
-#endif
-
 static void pmccfiltr_write(CPUARMState *env, const ARMCPRegInfo *ri,
 uint64_t value)
 {
@@ -1752,7 +1754,6 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
 /* Unimplemented so WI. */
 { .name = "PMSWINC", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 4,
   .access = PL0_W, .accessfn = pmreg_access_swinc, .type = ARM_CP_NOP },
-#ifndef CONFIG_USER_ONLY
 { .name = "PMSELR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 5,
   .access = PL0_RW, .type = ARM_CP_ALIAS,
   .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmselr),
@@ -1774,7 +1775,6 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
   .fieldoffset = offsetof(CPUARMState, cp15.c15_ccnt),
   .readfn = pmccntr_read, .writefn = pmccntr_write,
   .raw_readfn = raw_read, .raw_writefn = raw_write, },
-#endif
 { .name = "PMCCFILTR", .cp = 15, .opc1 = 0, .crn = 14, .crm = 15, .opc2 = 
7,
   .writefn = pmccfiltr_write_a32, .readfn = pmccfiltr_read_a32,
   .access = PL0_RW, .accessfn = pmreg_access,
@@ -5416,7 +5416,6 @@ void register_cp_regs_for_features(ARMCPU *cpu)
  * count register.
  */
 unsigned int i, pmcrn = 4;
-#ifndef CONFIG_USER_ONLY
 ARMCPRegInfo pmcr = {
 .name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 
0,
 .access = PL0_RW,
@@ -5473,7 +5472,6 @@ void register_cp_regs_for_features(ARMCPU *cpu)