Re: [Qemu-devel] [kvm-unit-tests PATCH v8 2/3] arm: pmu: Check cycle count increases

2016-11-16 Thread Christopher Covington
On 11/16/2016 11:25 AM, Andrew Jones wrote:
> On Wed, Nov 16, 2016 at 11:08:42AM -0500, Christopher Covington wrote:
>> On 11/16/2016 08:01 AM, Andrew Jones wrote:
>>> On Tue, Nov 15, 2016 at 04:50:53PM -0600, Wei Huang wrote:


 On 11/14/2016 09:12 AM, Christopher Covington wrote:
> Hi Drew, Wei,
>
> On 11/14/2016 05:05 AM, Andrew Jones wrote:
>> On Fri, Nov 11, 2016 at 01:55:49PM -0600, Wei Huang wrote:
>>>
>>>
>>> On 11/11/2016 01:43 AM, Andrew Jones wrote:
 On Tue, Nov 08, 2016 at 12:17:14PM -0600, Wei Huang wrote:
> From: Christopher Covington 
>
> Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
> even for the smallest delta of two subsequent reads.
>
> Signed-off-by: Christopher Covington 
> Signed-off-by: Wei Huang 
> ---
>  arm/pmu.c | 98 
> +++
>  1 file changed, 98 insertions(+)
>
> diff --git a/arm/pmu.c b/arm/pmu.c
> index 0b29088..d5e3ac3 100644
> --- a/arm/pmu.c
> +++ b/arm/pmu.c
> @@ -14,6 +14,7 @@
>   */
>  #include "libcflat.h"
>  
> +#define PMU_PMCR_E (1 << 0)
>  #define PMU_PMCR_N_SHIFT   11
>  #define PMU_PMCR_N_MASK0x1f
>  #define PMU_PMCR_ID_SHIFT  16
> @@ -21,6 +22,10 @@
>  #define PMU_PMCR_IMP_SHIFT 24
>  #define PMU_PMCR_IMP_MASK  0xff
>  
> +#define PMU_CYCLE_IDX  31
> +
> +#define NR_SAMPLES 10
> +
>  #if defined(__arm__)
>  static inline uint32_t pmcr_read(void)
>  {
> @@ -29,6 +34,47 @@ static inline uint32_t pmcr_read(void)
>   asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
>   return ret;
>  }
> +
> +static inline void pmcr_write(uint32_t value)
> +{
> + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (value));
> +}
> +
> +static inline void pmselr_write(uint32_t value)
> +{
> + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (value));
> +}
> +
> +static inline void pmxevtyper_write(uint32_t value)
> +{
> + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (value));
> +}
> +
> +/*
> + * While PMCCNTR can be accessed as a 64 bit coprocessor register, 
> returning 64
> + * bits doesn't seem worth the trouble when differential usage of 
> the result is
> + * expected (with differences that can easily fit in 32 bits). So 
> just return
> + * the lower 32 bits of the cycle count in AArch32.

 Like I said in the last review, I'd rather we not do this. We should
 return the full value and then the test case should confirm the upper
 32 bits are zero.
>>>
>>> Unless I miss something in ARM documentation, ARMv7 PMCCNTR is a 32-bit
>>> register. We can force it to a more coarse-grained cycle counter with
>>> PMCR.D bit=1 (see below). But it is still not a 64-bit register.
>
> AArch32 System Register Descriptions
> Performance Monitors registers
> PMCCNTR, Performance Monitors Cycle Count Register
>
> To access the PMCCNTR when accessing as a 32-bit register:
> MRC p15,0,,c9,c13,0 ; Read PMCCNTR[31:0] into Rt
> MCR p15,0,,c9,c13,0 ; Write Rt to PMCCNTR[31:0]. PMCCNTR[63:32] are 
> unchanged
>
> To access the PMCCNTR when accessing as a 64-bit register:
> MRRC p15,0,,,c9 ; Read PMCCNTR[31:0] into Rt and PMCCNTR[63:32] 
> into Rt2
> MCRR p15,0,,,c9 ; Write Rt to PMCCNTR[31:0] and Rt2 to 
> PMCCNTR[63:32]
>

 Thanks. I did some research based on your info and came back with the
 following proposals (Cov, correct me if I am wrong):

 By comparing A57 TRM (page 394 in [1]) with A15 TRM (page 273 in [2]), I
 think this 64-bit cycle register is only available when running under
 aarch32 compatibility mode on ARMv8 because it is not specified in A15
 TRM.
>>
>> That interpretation sounds really strange to me. My recollection is that the
>> cycle counter was available as a 64 bit register in ARMv7 as well. I would
>> expect the Cortex TRMs to omit such details. The ARMv7 Architecture Reference
>> Manual is the complete and authoritative source.
> 
> Yes, the v7 ARM ARM is the authoritative source, and it says 32-bit.
> Whereas the v8 ARM ARM wrt to AArch32 mode says it's both 32 and 64.

Just looked it up as well in the good old ARM DDI 0406C.c and you're absolutely
right. Sorry for the bad recollection.

Cov

-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm
Technologies, Inc. Qualcomm Technologies, Inc. 

Re: [Qemu-devel] [kvm-unit-tests PATCH v8 2/3] arm: pmu: Check cycle count increases

2016-11-16 Thread Andrew Jones
On Wed, Nov 16, 2016 at 11:08:42AM -0500, Christopher Covington wrote:
> On 11/16/2016 08:01 AM, Andrew Jones wrote:
> > On Tue, Nov 15, 2016 at 04:50:53PM -0600, Wei Huang wrote:
> >>
> >>
> >> On 11/14/2016 09:12 AM, Christopher Covington wrote:
> >>> Hi Drew, Wei,
> >>>
> >>> On 11/14/2016 05:05 AM, Andrew Jones wrote:
>  On Fri, Nov 11, 2016 at 01:55:49PM -0600, Wei Huang wrote:
> >
> >
> > On 11/11/2016 01:43 AM, Andrew Jones wrote:
> >> On Tue, Nov 08, 2016 at 12:17:14PM -0600, Wei Huang wrote:
> >>> From: Christopher Covington 
> >>>
> >>> Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
> >>> even for the smallest delta of two subsequent reads.
> >>>
> >>> Signed-off-by: Christopher Covington 
> >>> Signed-off-by: Wei Huang 
> >>> ---
> >>>  arm/pmu.c | 98 
> >>> +++
> >>>  1 file changed, 98 insertions(+)
> >>>
> >>> diff --git a/arm/pmu.c b/arm/pmu.c
> >>> index 0b29088..d5e3ac3 100644
> >>> --- a/arm/pmu.c
> >>> +++ b/arm/pmu.c
> >>> @@ -14,6 +14,7 @@
> >>>   */
> >>>  #include "libcflat.h"
> >>>  
> >>> +#define PMU_PMCR_E (1 << 0)
> >>>  #define PMU_PMCR_N_SHIFT   11
> >>>  #define PMU_PMCR_N_MASK0x1f
> >>>  #define PMU_PMCR_ID_SHIFT  16
> >>> @@ -21,6 +22,10 @@
> >>>  #define PMU_PMCR_IMP_SHIFT 24
> >>>  #define PMU_PMCR_IMP_MASK  0xff
> >>>  
> >>> +#define PMU_CYCLE_IDX  31
> >>> +
> >>> +#define NR_SAMPLES 10
> >>> +
> >>>  #if defined(__arm__)
> >>>  static inline uint32_t pmcr_read(void)
> >>>  {
> >>> @@ -29,6 +34,47 @@ static inline uint32_t pmcr_read(void)
> >>>   asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
> >>>   return ret;
> >>>  }
> >>> +
> >>> +static inline void pmcr_write(uint32_t value)
> >>> +{
> >>> + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (value));
> >>> +}
> >>> +
> >>> +static inline void pmselr_write(uint32_t value)
> >>> +{
> >>> + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (value));
> >>> +}
> >>> +
> >>> +static inline void pmxevtyper_write(uint32_t value)
> >>> +{
> >>> + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (value));
> >>> +}
> >>> +
> >>> +/*
> >>> + * While PMCCNTR can be accessed as a 64 bit coprocessor register, 
> >>> returning 64
> >>> + * bits doesn't seem worth the trouble when differential usage of 
> >>> the result is
> >>> + * expected (with differences that can easily fit in 32 bits). So 
> >>> just return
> >>> + * the lower 32 bits of the cycle count in AArch32.
> >>
> >> Like I said in the last review, I'd rather we not do this. We should
> >> return the full value and then the test case should confirm the upper
> >> 32 bits are zero.
> >
> > Unless I miss something in ARM documentation, ARMv7 PMCCNTR is a 32-bit
> > register. We can force it to a more coarse-grained cycle counter with
> > PMCR.D bit=1 (see below). But it is still not a 64-bit register.
> >>>
> >>> AArch32 System Register Descriptions
> >>> Performance Monitors registers
> >>> PMCCNTR, Performance Monitors Cycle Count Register
> >>>
> >>> To access the PMCCNTR when accessing as a 32-bit register:
> >>> MRC p15,0,,c9,c13,0 ; Read PMCCNTR[31:0] into Rt
> >>> MCR p15,0,,c9,c13,0 ; Write Rt to PMCCNTR[31:0]. PMCCNTR[63:32] are 
> >>> unchanged
> >>>
> >>> To access the PMCCNTR when accessing as a 64-bit register:
> >>> MRRC p15,0,,,c9 ; Read PMCCNTR[31:0] into Rt and PMCCNTR[63:32] 
> >>> into Rt2
> >>> MCRR p15,0,,,c9 ; Write Rt to PMCCNTR[31:0] and Rt2 to 
> >>> PMCCNTR[63:32]
> >>>
> >>
> >> Thanks. I did some research based on your info and came back with the
> >> following proposals (Cov, correct me if I am wrong):
> >>
> >> By comparing A57 TRM (page 394 in [1]) with A15 TRM (page 273 in [2]), I
> >> think this 64-bit cycle register is only available when running under
> >> aarch32 compatibility mode on ARMv8 because it is not specified in A15
> >> TRM.
> 
> That interpretation sounds really strange to me. My recollection is that the
> cycle counter was available as a 64 bit register in ARMv7 as well. I would
> expect the Cortex TRMs to omit such details. The ARMv7 Architecture Reference
> Manual is the complete and authoritative source.

Yes, the v7 ARM ARM is the authoritative source, and it says 32-bit.
Whereas the v8 ARM ARM wrt to AArch32 mode says it's both 32 and 64.

> 
> >> To further verify it, I tested 32-bit pmu code on QEMU with TCG
> >> mode. The result is: accessing 64-bit PMCCNTR using the following
> >> assembly failed on A15:
> >>
> >>volatile("mrrc p15, 0, %0, %1, c9" : "=r" (lo), "=r" (hi));
> >> or
> >>volatile("mrrc p15, 0, %Q0, %R0, c9" : 

Re: [Qemu-devel] [kvm-unit-tests PATCH v8 2/3] arm: pmu: Check cycle count increases

2016-11-16 Thread Christopher Covington
On 11/16/2016 08:01 AM, Andrew Jones wrote:
> On Tue, Nov 15, 2016 at 04:50:53PM -0600, Wei Huang wrote:
>>
>>
>> On 11/14/2016 09:12 AM, Christopher Covington wrote:
>>> Hi Drew, Wei,
>>>
>>> On 11/14/2016 05:05 AM, Andrew Jones wrote:
 On Fri, Nov 11, 2016 at 01:55:49PM -0600, Wei Huang wrote:
>
>
> On 11/11/2016 01:43 AM, Andrew Jones wrote:
>> On Tue, Nov 08, 2016 at 12:17:14PM -0600, Wei Huang wrote:
>>> From: Christopher Covington 
>>>
>>> Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
>>> even for the smallest delta of two subsequent reads.
>>>
>>> Signed-off-by: Christopher Covington 
>>> Signed-off-by: Wei Huang 
>>> ---
>>>  arm/pmu.c | 98 
>>> +++
>>>  1 file changed, 98 insertions(+)
>>>
>>> diff --git a/arm/pmu.c b/arm/pmu.c
>>> index 0b29088..d5e3ac3 100644
>>> --- a/arm/pmu.c
>>> +++ b/arm/pmu.c
>>> @@ -14,6 +14,7 @@
>>>   */
>>>  #include "libcflat.h"
>>>  
>>> +#define PMU_PMCR_E (1 << 0)
>>>  #define PMU_PMCR_N_SHIFT   11
>>>  #define PMU_PMCR_N_MASK0x1f
>>>  #define PMU_PMCR_ID_SHIFT  16
>>> @@ -21,6 +22,10 @@
>>>  #define PMU_PMCR_IMP_SHIFT 24
>>>  #define PMU_PMCR_IMP_MASK  0xff
>>>  
>>> +#define PMU_CYCLE_IDX  31
>>> +
>>> +#define NR_SAMPLES 10
>>> +
>>>  #if defined(__arm__)
>>>  static inline uint32_t pmcr_read(void)
>>>  {
>>> @@ -29,6 +34,47 @@ static inline uint32_t pmcr_read(void)
>>> asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
>>> return ret;
>>>  }
>>> +
>>> +static inline void pmcr_write(uint32_t value)
>>> +{
>>> +   asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (value));
>>> +}
>>> +
>>> +static inline void pmselr_write(uint32_t value)
>>> +{
>>> +   asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (value));
>>> +}
>>> +
>>> +static inline void pmxevtyper_write(uint32_t value)
>>> +{
>>> +   asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (value));
>>> +}
>>> +
>>> +/*
>>> + * While PMCCNTR can be accessed as a 64 bit coprocessor register, 
>>> returning 64
>>> + * bits doesn't seem worth the trouble when differential usage of the 
>>> result is
>>> + * expected (with differences that can easily fit in 32 bits). So just 
>>> return
>>> + * the lower 32 bits of the cycle count in AArch32.
>>
>> Like I said in the last review, I'd rather we not do this. We should
>> return the full value and then the test case should confirm the upper
>> 32 bits are zero.
>
> Unless I miss something in ARM documentation, ARMv7 PMCCNTR is a 32-bit
> register. We can force it to a more coarse-grained cycle counter with
> PMCR.D bit=1 (see below). But it is still not a 64-bit register.
>>>
>>> AArch32 System Register Descriptions
>>> Performance Monitors registers
>>> PMCCNTR, Performance Monitors Cycle Count Register
>>>
>>> To access the PMCCNTR when accessing as a 32-bit register:
>>> MRC p15,0,,c9,c13,0 ; Read PMCCNTR[31:0] into Rt
>>> MCR p15,0,,c9,c13,0 ; Write Rt to PMCCNTR[31:0]. PMCCNTR[63:32] are 
>>> unchanged
>>>
>>> To access the PMCCNTR when accessing as a 64-bit register:
>>> MRRC p15,0,,,c9 ; Read PMCCNTR[31:0] into Rt and PMCCNTR[63:32] 
>>> into Rt2
>>> MCRR p15,0,,,c9 ; Write Rt to PMCCNTR[31:0] and Rt2 to 
>>> PMCCNTR[63:32]
>>>
>>
>> Thanks. I did some research based on your info and came back with the
>> following proposals (Cov, correct me if I am wrong):
>>
>> By comparing A57 TRM (page 394 in [1]) with A15 TRM (page 273 in [2]), I
>> think this 64-bit cycle register is only available when running under
>> aarch32 compatibility mode on ARMv8 because it is not specified in A15
>> TRM.

That interpretation sounds really strange to me. My recollection is that the
cycle counter was available as a 64 bit register in ARMv7 as well. I would
expect the Cortex TRMs to omit such details. The ARMv7 Architecture Reference
Manual is the complete and authoritative source.

>> To further verify it, I tested 32-bit pmu code on QEMU with TCG
>> mode. The result is: accessing 64-bit PMCCNTR using the following
>> assembly failed on A15:
>>
>>volatile("mrrc p15, 0, %0, %1, c9" : "=r" (lo), "=r" (hi));
>> or
>>volatile("mrrc p15, 0, %Q0, %R0, c9" : "=r" (val));

The PMU implementation on QEMU TCG mode is infantile. (I was trying to
write these tests to help guide fixes and enhancements in a
test-driven-development manner.) I would not trust QEMU TCG to behave
properly here. If you want to execute those instructions, is there anything
preventing you from doing it on hardware, or at least the Foundation Model?

>> Given this difference, I think there are two solutions for 64-bit
>> 

Re: [Qemu-devel] [kvm-unit-tests PATCH v8 2/3] arm: pmu: Check cycle count increases

2016-11-16 Thread Andrew Jones

Just crossed my mind that we're missing isb's.

On Tue, Nov 08, 2016 at 12:17:14PM -0600, Wei Huang wrote:
> From: Christopher Covington 
> 
> Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
> even for the smallest delta of two subsequent reads.
> 
> Signed-off-by: Christopher Covington 
> Signed-off-by: Wei Huang 
> ---
>  arm/pmu.c | 98 
> +++
>  1 file changed, 98 insertions(+)
> 
> diff --git a/arm/pmu.c b/arm/pmu.c
> index 0b29088..d5e3ac3 100644
> --- a/arm/pmu.c
> +++ b/arm/pmu.c
> @@ -14,6 +14,7 @@
>   */
>  #include "libcflat.h"
>  
> +#define PMU_PMCR_E (1 << 0)
>  #define PMU_PMCR_N_SHIFT   11
>  #define PMU_PMCR_N_MASK0x1f
>  #define PMU_PMCR_ID_SHIFT  16
> @@ -21,6 +22,10 @@
>  #define PMU_PMCR_IMP_SHIFT 24
>  #define PMU_PMCR_IMP_MASK  0xff
>  
> +#define PMU_CYCLE_IDX  31
> +
> +#define NR_SAMPLES 10
> +
>  #if defined(__arm__)
>  static inline uint32_t pmcr_read(void)
>  {
> @@ -29,6 +34,47 @@ static inline uint32_t pmcr_read(void)
>   asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
>   return ret;
>  }
> +
> +static inline void pmcr_write(uint32_t value)
> +{
> + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (value));
> +}
> +
> +static inline void pmselr_write(uint32_t value)
> +{
> + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (value));

Probably want an isb here, users will call this and then immediately
another PMU reg write, like is done below

> +}
> +
> +static inline void pmxevtyper_write(uint32_t value)
> +{
> + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (value));
> +}
> +
> +/*
> + * While PMCCNTR can be accessed as a 64 bit coprocessor register, returning 
> 64
> + * bits doesn't seem worth the trouble when differential usage of the result 
> is
> + * expected (with differences that can easily fit in 32 bits). So just return
> + * the lower 32 bits of the cycle count in AArch32.

Also, while we're discussing confirming upper bits are as expected, I
guess we should confirm no overflow too. We should clear the overflow
bit PMOVSCLR_EL0.C before we use the counter, and then check it at some
point to confirm it's as expected. I guess that could be separate test
cases though.

> + */
> +static inline uint32_t pmccntr_read(void)
> +{
> + uint32_t cycles;
> +
> + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (cycles));
> + return cycles;
> +}
> +
> +static inline void pmcntenset_write(uint32_t value)
> +{
> + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (value));
> +}
> +
> +/* PMCCFILTR is an obsolete name for PMXEVTYPER31 in ARMv7 */
> +static inline void pmccfiltr_write(uint32_t value)
> +{
> + pmselr_write(PMU_CYCLE_IDX);
> + pmxevtyper_write(value);
> +}
>  #elif defined(__aarch64__)
>  static inline uint32_t pmcr_read(void)
>  {
> @@ -37,6 +83,29 @@ static inline uint32_t pmcr_read(void)
>   asm volatile("mrs %0, pmcr_el0" : "=r" (ret));
>   return ret;
>  }
> +
> +static inline void pmcr_write(uint32_t value)
> +{
> + asm volatile("msr pmcr_el0, %0" : : "r" (value));
> +}
> +
> +static inline uint32_t pmccntr_read(void)
> +{
> + uint32_t cycles;
> +
> + asm volatile("mrs %0, pmccntr_el0" : "=r" (cycles));
> + return cycles;
> +}
> +
> +static inline void pmcntenset_write(uint32_t value)
> +{
> + asm volatile("msr pmcntenset_el0, %0" : : "r" (value));
> +}
> +
> +static inline void pmccfiltr_write(uint32_t value)
> +{
> + asm volatile("msr pmccfiltr_el0, %0" : : "r" (value));
> +}
>  #endif
>  
>  /*
> @@ -63,11 +132,40 @@ static bool check_pmcr(void)
>   return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0;
>  }
>  
> +/*
> + * Ensure that the cycle counter progresses between back-to-back reads.
> + */
> +static bool check_cycles_increase(void)
> +{
> + pmcr_write(pmcr_read() | PMU_PMCR_E);

Need isb() here

> +
> + for (int i = 0; i < NR_SAMPLES; i++) {
> + unsigned long a, b;
> +
> + a = pmccntr_read();
> + b = pmccntr_read();
> +
> + if (a >= b) {
> + printf("Read %ld then %ld.\n", a, b);
> + return false;
> + }
> + }
> +
> + pmcr_write(pmcr_read() & ~PMU_PMCR_E);
> +

Need isb() here

> + return true;
> +}
> +
>  int main(void)
>  {
>   report_prefix_push("pmu");
>  
> + /* init for PMU event access, right now only care about cycle count */
> + pmcntenset_write(1 << PMU_CYCLE_IDX);
> + pmccfiltr_write(0); /* count cycles in EL0, EL1, but not EL2 */

Need isb() here

> +
>   report("Control register", check_pmcr());
> + report("Monotonically increasing cycle count", check_cycles_increase());
>  
>   return report_summary();
>  }
> -- 
> 1.8.3.1
> 
>

Thanks,
drew
___
kvmarm mailing list

Re: [Qemu-devel] [kvm-unit-tests PATCH v8 2/3] arm: pmu: Check cycle count increases

2016-11-16 Thread Andrew Jones
On Tue, Nov 15, 2016 at 04:50:53PM -0600, Wei Huang wrote:
> 
> 
> On 11/14/2016 09:12 AM, Christopher Covington wrote:
> > Hi Drew, Wei,
> > 
> > On 11/14/2016 05:05 AM, Andrew Jones wrote:
> >> On Fri, Nov 11, 2016 at 01:55:49PM -0600, Wei Huang wrote:
> >>>
> >>>
> >>> On 11/11/2016 01:43 AM, Andrew Jones wrote:
>  On Tue, Nov 08, 2016 at 12:17:14PM -0600, Wei Huang wrote:
> > From: Christopher Covington 
> >
> > Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
> > even for the smallest delta of two subsequent reads.
> >
> > Signed-off-by: Christopher Covington 
> > Signed-off-by: Wei Huang 
> > ---
> >  arm/pmu.c | 98 
> > +++
> >  1 file changed, 98 insertions(+)
> >
> > diff --git a/arm/pmu.c b/arm/pmu.c
> > index 0b29088..d5e3ac3 100644
> > --- a/arm/pmu.c
> > +++ b/arm/pmu.c
> > @@ -14,6 +14,7 @@
> >   */
> >  #include "libcflat.h"
> >  
> > +#define PMU_PMCR_E (1 << 0)
> >  #define PMU_PMCR_N_SHIFT   11
> >  #define PMU_PMCR_N_MASK0x1f
> >  #define PMU_PMCR_ID_SHIFT  16
> > @@ -21,6 +22,10 @@
> >  #define PMU_PMCR_IMP_SHIFT 24
> >  #define PMU_PMCR_IMP_MASK  0xff
> >  
> > +#define PMU_CYCLE_IDX  31
> > +
> > +#define NR_SAMPLES 10
> > +
> >  #if defined(__arm__)
> >  static inline uint32_t pmcr_read(void)
> >  {
> > @@ -29,6 +34,47 @@ static inline uint32_t pmcr_read(void)
> > asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
> > return ret;
> >  }
> > +
> > +static inline void pmcr_write(uint32_t value)
> > +{
> > +   asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (value));
> > +}
> > +
> > +static inline void pmselr_write(uint32_t value)
> > +{
> > +   asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (value));
> > +}
> > +
> > +static inline void pmxevtyper_write(uint32_t value)
> > +{
> > +   asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (value));
> > +}
> > +
> > +/*
> > + * While PMCCNTR can be accessed as a 64 bit coprocessor register, 
> > returning 64
> > + * bits doesn't seem worth the trouble when differential usage of the 
> > result is
> > + * expected (with differences that can easily fit in 32 bits). So just 
> > return
> > + * the lower 32 bits of the cycle count in AArch32.
> 
>  Like I said in the last review, I'd rather we not do this. We should
>  return the full value and then the test case should confirm the upper
>  32 bits are zero.
> >>>
> >>> Unless I miss something in ARM documentation, ARMv7 PMCCNTR is a 32-bit
> >>> register. We can force it to a more coarse-grained cycle counter with
> >>> PMCR.D bit=1 (see below). But it is still not a 64-bit register.
> > 
> > AArch32 System Register Descriptions
> > Performance Monitors registers
> > PMCCNTR, Performance Monitors Cycle Count Register
> > 
> > To access the PMCCNTR when accessing as a 32-bit register:
> > MRC p15,0,,c9,c13,0 ; Read PMCCNTR[31:0] into Rt
> > MCR p15,0,,c9,c13,0 ; Write Rt to PMCCNTR[31:0]. PMCCNTR[63:32] are 
> > unchanged
> > 
> > To access the PMCCNTR when accessing as a 64-bit register:
> > MRRC p15,0,,,c9 ; Read PMCCNTR[31:0] into Rt and PMCCNTR[63:32] 
> > into Rt2
> > MCRR p15,0,,,c9 ; Write Rt to PMCCNTR[31:0] and Rt2 to 
> > PMCCNTR[63:32]
> > 
> 
> Thanks. I did some research based on your info and came back with the
> following proposals (Cov, correct me if I am wrong):
> 
> By comparing A57 TRM (page 394 in [1]) with A15 TRM (page 273 in [2]), I
> think this 64-bit cycle register is only available when running under
> aarch32 compatibility mode on ARMv8 because it is not specified in A15
> TRM.

OK, I hadn't realized that there would be differences between v7 and
AArch32. It looks like we need to add a function to the kvm-unit-tests
framework that enables unit tests to make that distinction, because we'll
want to explicitly test those differences in order to flush out emulation
bugs. I see now that Appendix K5 of the v8 ARM ARM lists some differences,
but this PMCCNTR difference isn't there...

As v8-A32 is an update/extension of v7-A, I'd expect there to be a RES0
bit in some v7 ID register that, on v8, is no longer reserved and a 1.
Unfortunately I just did some ARM doc skimming but can't find anything
like that. As we currently only use the cortex-a15 for our v7 processor,
then I guess we can just check MIDR, but yuck. Anyway, I'll send a
patch for that.

> To further verify it, I tested 32-bit pmu code on QEMU with TCG
> mode. The result is: accessing 64-bit PMCCNTR using the following
> assembly failed on A15:
> 
>volatile("mrrc p15, 0, %0, %1, c9" : "=r" (lo), "=r" (hi));
> or
>volatile("mrrc p15, 0, %Q0, %R0, c9" : 

Re: [Qemu-devel] [kvm-unit-tests PATCH v8 2/3] arm: pmu: Check cycle count increases

2016-11-15 Thread Wei Huang


On 11/14/2016 09:12 AM, Christopher Covington wrote:
> Hi Drew, Wei,
> 
> On 11/14/2016 05:05 AM, Andrew Jones wrote:
>> On Fri, Nov 11, 2016 at 01:55:49PM -0600, Wei Huang wrote:
>>>
>>>
>>> On 11/11/2016 01:43 AM, Andrew Jones wrote:
 On Tue, Nov 08, 2016 at 12:17:14PM -0600, Wei Huang wrote:
> From: Christopher Covington 
>
> Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
> even for the smallest delta of two subsequent reads.
>
> Signed-off-by: Christopher Covington 
> Signed-off-by: Wei Huang 
> ---
>  arm/pmu.c | 98 
> +++
>  1 file changed, 98 insertions(+)
>
> diff --git a/arm/pmu.c b/arm/pmu.c
> index 0b29088..d5e3ac3 100644
> --- a/arm/pmu.c
> +++ b/arm/pmu.c
> @@ -14,6 +14,7 @@
>   */
>  #include "libcflat.h"
>  
> +#define PMU_PMCR_E (1 << 0)
>  #define PMU_PMCR_N_SHIFT   11
>  #define PMU_PMCR_N_MASK0x1f
>  #define PMU_PMCR_ID_SHIFT  16
> @@ -21,6 +22,10 @@
>  #define PMU_PMCR_IMP_SHIFT 24
>  #define PMU_PMCR_IMP_MASK  0xff
>  
> +#define PMU_CYCLE_IDX  31
> +
> +#define NR_SAMPLES 10
> +
>  #if defined(__arm__)
>  static inline uint32_t pmcr_read(void)
>  {
> @@ -29,6 +34,47 @@ static inline uint32_t pmcr_read(void)
>   asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
>   return ret;
>  }
> +
> +static inline void pmcr_write(uint32_t value)
> +{
> + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (value));
> +}
> +
> +static inline void pmselr_write(uint32_t value)
> +{
> + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (value));
> +}
> +
> +static inline void pmxevtyper_write(uint32_t value)
> +{
> + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (value));
> +}
> +
> +/*
> + * While PMCCNTR can be accessed as a 64 bit coprocessor register, 
> returning 64
> + * bits doesn't seem worth the trouble when differential usage of the 
> result is
> + * expected (with differences that can easily fit in 32 bits). So just 
> return
> + * the lower 32 bits of the cycle count in AArch32.

 Like I said in the last review, I'd rather we not do this. We should
 return the full value and then the test case should confirm the upper
 32 bits are zero.
>>>
>>> Unless I miss something in ARM documentation, ARMv7 PMCCNTR is a 32-bit
>>> register. We can force it to a more coarse-grained cycle counter with
>>> PMCR.D bit=1 (see below). But it is still not a 64-bit register.
> 
> AArch32 System Register Descriptions
> Performance Monitors registers
> PMCCNTR, Performance Monitors Cycle Count Register
> 
> To access the PMCCNTR when accessing as a 32-bit register:
> MRC p15,0,,c9,c13,0 ; Read PMCCNTR[31:0] into Rt
> MCR p15,0,,c9,c13,0 ; Write Rt to PMCCNTR[31:0]. PMCCNTR[63:32] are 
> unchanged
> 
> To access the PMCCNTR when accessing as a 64-bit register:
> MRRC p15,0,,,c9 ; Read PMCCNTR[31:0] into Rt and PMCCNTR[63:32] into 
> Rt2
> MCRR p15,0,,,c9 ; Write Rt to PMCCNTR[31:0] and Rt2 to PMCCNTR[63:32]
> 

Thanks. I did some research based on your info and came back with the
following proposals (Cov, correct me if I am wrong):

By comparing A57 TRM (page 394 in [1]) with A15 TRM (page 273 in [2]), I
think this 64-bit cycle register is only available when running under
aarch32 compatibility mode on ARMv8 because it is not specified in A15
TRM. To further verify it, I tested 32-bit pmu code on QEMU with TCG
mode. The result is: accessing 64-bit PMCCNTR using the following
assembly failed on A15:

   volatile("mrrc p15, 0, %0, %1, c9" : "=r" (lo), "=r" (hi));
or
   volatile("mrrc p15, 0, %Q0, %R0, c9" : "=r" (val));

Given this difference, I think there are two solutions for 64-bit
AArch32 pmccntr_read, as requested by Drew:

1) The PMU unit testing code tells if it is running under ARMv7 or under
AArch32-compability mode. When it is running ARMv7, such as A15, let us
use "MRC p15,0,,c9,c13,0" and clear the upper 32-bit as 0. Otherwise
use "MRRC p15,0,,,c9".

2) Returns 64-bit results for ARM pmccntr_read(). But we only uses "MRC
p15,0,,c9,c13,0" and always clear the upper 32-bit as 0. This will
be the same as the original code.

Thoughts?

-Wei

[1] A57 TRM,
http://infocenter.arm.com/help/topic/com.arm.doc.ddi0488c/DDI0488C_cortex_a57_mpcore_r1p0_trm.pdf
[2] A15 TRM,
http://infocenter.arm.com/help/topic/com.arm.doc.ddi0438c/DDI0438C_cortex_a15_r2p0_trm.pdf

> Regards,
> Cov
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [Qemu-devel] [kvm-unit-tests PATCH v8 2/3] arm: pmu: Check cycle count increases

2016-11-14 Thread Christopher Covington
Hi Drew, Wei,

On 11/14/2016 05:05 AM, Andrew Jones wrote:
> On Fri, Nov 11, 2016 at 01:55:49PM -0600, Wei Huang wrote:
>>
>>
>> On 11/11/2016 01:43 AM, Andrew Jones wrote:
>>> On Tue, Nov 08, 2016 at 12:17:14PM -0600, Wei Huang wrote:
 From: Christopher Covington 

 Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
 even for the smallest delta of two subsequent reads.

 Signed-off-by: Christopher Covington 
 Signed-off-by: Wei Huang 
 ---
  arm/pmu.c | 98 
 +++
  1 file changed, 98 insertions(+)

 diff --git a/arm/pmu.c b/arm/pmu.c
 index 0b29088..d5e3ac3 100644
 --- a/arm/pmu.c
 +++ b/arm/pmu.c
 @@ -14,6 +14,7 @@
   */
  #include "libcflat.h"
  
 +#define PMU_PMCR_E (1 << 0)
  #define PMU_PMCR_N_SHIFT   11
  #define PMU_PMCR_N_MASK0x1f
  #define PMU_PMCR_ID_SHIFT  16
 @@ -21,6 +22,10 @@
  #define PMU_PMCR_IMP_SHIFT 24
  #define PMU_PMCR_IMP_MASK  0xff
  
 +#define PMU_CYCLE_IDX  31
 +
 +#define NR_SAMPLES 10
 +
  #if defined(__arm__)
  static inline uint32_t pmcr_read(void)
  {
 @@ -29,6 +34,47 @@ static inline uint32_t pmcr_read(void)
asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
return ret;
  }
 +
 +static inline void pmcr_write(uint32_t value)
 +{
 +  asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (value));
 +}
 +
 +static inline void pmselr_write(uint32_t value)
 +{
 +  asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (value));
 +}
 +
 +static inline void pmxevtyper_write(uint32_t value)
 +{
 +  asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (value));
 +}
 +
 +/*
 + * While PMCCNTR can be accessed as a 64 bit coprocessor register, 
 returning 64
 + * bits doesn't seem worth the trouble when differential usage of the 
 result is
 + * expected (with differences that can easily fit in 32 bits). So just 
 return
 + * the lower 32 bits of the cycle count in AArch32.
>>>
>>> Like I said in the last review, I'd rather we not do this. We should
>>> return the full value and then the test case should confirm the upper
>>> 32 bits are zero.
>>
>> Unless I miss something in ARM documentation, ARMv7 PMCCNTR is a 32-bit
>> register. We can force it to a more coarse-grained cycle counter with
>> PMCR.D bit=1 (see below). But it is still not a 64-bit register.

AArch32 System Register Descriptions
Performance Monitors registers
PMCCNTR, Performance Monitors Cycle Count Register

To access the PMCCNTR when accessing as a 32-bit register:
MRC p15,0,,c9,c13,0 ; Read PMCCNTR[31:0] into Rt
MCR p15,0,,c9,c13,0 ; Write Rt to PMCCNTR[31:0]. PMCCNTR[63:32] are 
unchanged

To access the PMCCNTR when accessing as a 64-bit register:
MRRC p15,0,,,c9 ; Read PMCCNTR[31:0] into Rt and PMCCNTR[63:32] into 
Rt2
MCRR p15,0,,,c9 ; Write Rt to PMCCNTR[31:0] and Rt2 to PMCCNTR[63:32]

Regards,
Cov

-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm
Technologies, Inc. Qualcomm Technologies, Inc. is a member of the Code
Aurora Forum, a Linux Foundation Collaborative Project.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [Qemu-devel] [kvm-unit-tests PATCH v8 2/3] arm: pmu: Check cycle count increases

2016-11-14 Thread Andrew Jones
On Fri, Nov 11, 2016 at 01:55:49PM -0600, Wei Huang wrote:
> 
> 
> On 11/11/2016 01:43 AM, Andrew Jones wrote:
> > On Tue, Nov 08, 2016 at 12:17:14PM -0600, Wei Huang wrote:
> >> From: Christopher Covington 
> >>
> >> Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
> >> even for the smallest delta of two subsequent reads.
> >>
> >> Signed-off-by: Christopher Covington 
> >> Signed-off-by: Wei Huang 
> >> ---
> >>  arm/pmu.c | 98 
> >> +++
> >>  1 file changed, 98 insertions(+)
> >>
> >> diff --git a/arm/pmu.c b/arm/pmu.c
> >> index 0b29088..d5e3ac3 100644
> >> --- a/arm/pmu.c
> >> +++ b/arm/pmu.c
> >> @@ -14,6 +14,7 @@
> >>   */
> >>  #include "libcflat.h"
> >>  
> >> +#define PMU_PMCR_E (1 << 0)
> >>  #define PMU_PMCR_N_SHIFT   11
> >>  #define PMU_PMCR_N_MASK0x1f
> >>  #define PMU_PMCR_ID_SHIFT  16
> >> @@ -21,6 +22,10 @@
> >>  #define PMU_PMCR_IMP_SHIFT 24
> >>  #define PMU_PMCR_IMP_MASK  0xff
> >>  
> >> +#define PMU_CYCLE_IDX  31
> >> +
> >> +#define NR_SAMPLES 10
> >> +
> >>  #if defined(__arm__)
> >>  static inline uint32_t pmcr_read(void)
> >>  {
> >> @@ -29,6 +34,47 @@ static inline uint32_t pmcr_read(void)
> >>asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
> >>return ret;
> >>  }
> >> +
> >> +static inline void pmcr_write(uint32_t value)
> >> +{
> >> +  asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (value));
> >> +}
> >> +
> >> +static inline void pmselr_write(uint32_t value)
> >> +{
> >> +  asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (value));
> >> +}
> >> +
> >> +static inline void pmxevtyper_write(uint32_t value)
> >> +{
> >> +  asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (value));
> >> +}
> >> +
> >> +/*
> >> + * While PMCCNTR can be accessed as a 64 bit coprocessor register, 
> >> returning 64
> >> + * bits doesn't seem worth the trouble when differential usage of the 
> >> result is
> >> + * expected (with differences that can easily fit in 32 bits). So just 
> >> return
> >> + * the lower 32 bits of the cycle count in AArch32.
> > 
> > Like I said in the last review, I'd rather we not do this. We should
> > return the full value and then the test case should confirm the upper
> > 32 bits are zero.
> > 
> 
> Unless I miss something in ARM documentation, ARMv7 PMCCNTR is a 32-bit
> register. We can force it to a more coarse-grained cycle counter with
> PMCR.D bit=1 (see below). But it is still not a 64-bit register. ARMv8
> PMCCNTR_EL0 is a 64-bit register.
> 
> "The PMCR.D bit configures whether PMCCNTR increments once every clock
> cycle, or once every 64 clock cycles. "
> 
> So I think the comment above in the code is an overstatement, which
> should be deleted or moved down to ARMv8 pmccntr_read() below.

OK, please fix as appropriate, but for the v8 64-bit register, please
don't drop the upper bits until after a unit test has a chance to check
them.

Thanks,
drew

> 
> >> + */
> >> +static inline uint32_t pmccntr_read(void)
> >> +{
> >> +  uint32_t cycles;
> >> +
> >> +  asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (cycles));
> >> +  return cycles;
> >> +}
> >> +
> >> +static inline void pmcntenset_write(uint32_t value)
> >> +{
> >> +  asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (value));
> >> +}
> >> +
> >> +/* PMCCFILTR is an obsolete name for PMXEVTYPER31 in ARMv7 */
> >> +static inline void pmccfiltr_write(uint32_t value)
> >> +{
> >> +  pmselr_write(PMU_CYCLE_IDX);
> >> +  pmxevtyper_write(value);
> >> +}
> >>  #elif defined(__aarch64__)
> >>  static inline uint32_t pmcr_read(void)
> >>  {
> >> @@ -37,6 +83,29 @@ static inline uint32_t pmcr_read(void)
> >>asm volatile("mrs %0, pmcr_el0" : "=r" (ret));
> >>return ret;
> >>  }
> >> +
> >> +static inline void pmcr_write(uint32_t value)
> >> +{
> >> +  asm volatile("msr pmcr_el0, %0" : : "r" (value));
> >> +}
> >> +
> >> +static inline uint32_t pmccntr_read(void)
> >> +{
> >> +  uint32_t cycles;
> >> +
> >> +  asm volatile("mrs %0, pmccntr_el0" : "=r" (cycles));
> >> +  return cycles;
> >> +}
> >> +
> >> +static inline void pmcntenset_write(uint32_t value)
> >> +{
> >> +  asm volatile("msr pmcntenset_el0, %0" : : "r" (value));
> >> +}
> >> +
> >> +static inline void pmccfiltr_write(uint32_t value)
> >> +{
> >> +  asm volatile("msr pmccfiltr_el0, %0" : : "r" (value));
> >> +}
> >>  #endif
> >>  
> >>  /*
> >> @@ -63,11 +132,40 @@ static bool check_pmcr(void)
> >>return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0;
> >>  }
> >>  
> >> +/*
> >> + * Ensure that the cycle counter progresses between back-to-back reads.
> >> + */
> >> +static bool check_cycles_increase(void)
> >> +{
> >> +  pmcr_write(pmcr_read() | PMU_PMCR_E);
> >> +
> >> +  for (int i = 0; i < NR_SAMPLES; i++) {
> >> +  unsigned long a, b;
> >> +
> >> +  a = pmccntr_read();
> >> +  b = pmccntr_read();
> >> +
> >> +  if (a >= 

Re: [Qemu-devel] [kvm-unit-tests PATCH v8 2/3] arm: pmu: Check cycle count increases

2016-11-11 Thread Wei Huang


On 11/11/2016 01:43 AM, Andrew Jones wrote:
> On Tue, Nov 08, 2016 at 12:17:14PM -0600, Wei Huang wrote:
>> From: Christopher Covington 
>>
>> Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
>> even for the smallest delta of two subsequent reads.
>>
>> Signed-off-by: Christopher Covington 
>> Signed-off-by: Wei Huang 
>> ---
>>  arm/pmu.c | 98 
>> +++
>>  1 file changed, 98 insertions(+)
>>
>> diff --git a/arm/pmu.c b/arm/pmu.c
>> index 0b29088..d5e3ac3 100644
>> --- a/arm/pmu.c
>> +++ b/arm/pmu.c
>> @@ -14,6 +14,7 @@
>>   */
>>  #include "libcflat.h"
>>  
>> +#define PMU_PMCR_E (1 << 0)
>>  #define PMU_PMCR_N_SHIFT   11
>>  #define PMU_PMCR_N_MASK0x1f
>>  #define PMU_PMCR_ID_SHIFT  16
>> @@ -21,6 +22,10 @@
>>  #define PMU_PMCR_IMP_SHIFT 24
>>  #define PMU_PMCR_IMP_MASK  0xff
>>  
>> +#define PMU_CYCLE_IDX  31
>> +
>> +#define NR_SAMPLES 10
>> +
>>  #if defined(__arm__)
>>  static inline uint32_t pmcr_read(void)
>>  {
>> @@ -29,6 +34,47 @@ static inline uint32_t pmcr_read(void)
>>  asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
>>  return ret;
>>  }
>> +
>> +static inline void pmcr_write(uint32_t value)
>> +{
>> +asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (value));
>> +}
>> +
>> +static inline void pmselr_write(uint32_t value)
>> +{
>> +asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (value));
>> +}
>> +
>> +static inline void pmxevtyper_write(uint32_t value)
>> +{
>> +asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (value));
>> +}
>> +
>> +/*
>> + * While PMCCNTR can be accessed as a 64 bit coprocessor register, 
>> returning 64
>> + * bits doesn't seem worth the trouble when differential usage of the 
>> result is
>> + * expected (with differences that can easily fit in 32 bits). So just 
>> return
>> + * the lower 32 bits of the cycle count in AArch32.
> 
> Like I said in the last review, I'd rather we not do this. We should
> return the full value and then the test case should confirm the upper
> 32 bits are zero.
> 

Unless I miss something in ARM documentation, ARMv7 PMCCNTR is a 32-bit
register. We can force it to a more coarse-grained cycle counter with
PMCR.D bit=1 (see below). But it is still not a 64-bit register. ARMv8
PMCCNTR_EL0 is a 64-bit register.

"The PMCR.D bit configures whether PMCCNTR increments once every clock
cycle, or once every 64 clock cycles. "

So I think the comment above in the code is an overstatement, which
should be deleted or moved down to ARMv8 pmccntr_read() below.

>> + */
>> +static inline uint32_t pmccntr_read(void)
>> +{
>> +uint32_t cycles;
>> +
>> +asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (cycles));
>> +return cycles;
>> +}
>> +
>> +static inline void pmcntenset_write(uint32_t value)
>> +{
>> +asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (value));
>> +}
>> +
>> +/* PMCCFILTR is an obsolete name for PMXEVTYPER31 in ARMv7 */
>> +static inline void pmccfiltr_write(uint32_t value)
>> +{
>> +pmselr_write(PMU_CYCLE_IDX);
>> +pmxevtyper_write(value);
>> +}
>>  #elif defined(__aarch64__)
>>  static inline uint32_t pmcr_read(void)
>>  {
>> @@ -37,6 +83,29 @@ static inline uint32_t pmcr_read(void)
>>  asm volatile("mrs %0, pmcr_el0" : "=r" (ret));
>>  return ret;
>>  }
>> +
>> +static inline void pmcr_write(uint32_t value)
>> +{
>> +asm volatile("msr pmcr_el0, %0" : : "r" (value));
>> +}
>> +
>> +static inline uint32_t pmccntr_read(void)
>> +{
>> +uint32_t cycles;
>> +
>> +asm volatile("mrs %0, pmccntr_el0" : "=r" (cycles));
>> +return cycles;
>> +}
>> +
>> +static inline void pmcntenset_write(uint32_t value)
>> +{
>> +asm volatile("msr pmcntenset_el0, %0" : : "r" (value));
>> +}
>> +
>> +static inline void pmccfiltr_write(uint32_t value)
>> +{
>> +asm volatile("msr pmccfiltr_el0, %0" : : "r" (value));
>> +}
>>  #endif
>>  
>>  /*
>> @@ -63,11 +132,40 @@ static bool check_pmcr(void)
>>  return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0;
>>  }
>>  
>> +/*
>> + * Ensure that the cycle counter progresses between back-to-back reads.
>> + */
>> +static bool check_cycles_increase(void)
>> +{
>> +pmcr_write(pmcr_read() | PMU_PMCR_E);
>> +
>> +for (int i = 0; i < NR_SAMPLES; i++) {
>> +unsigned long a, b;
>> +
>> +a = pmccntr_read();
>> +b = pmccntr_read();
>> +
>> +if (a >= b) {
>> +printf("Read %ld then %ld.\n", a, b);
>> +return false;
>> +}
>> +}
>> +
>> +pmcr_write(pmcr_read() & ~PMU_PMCR_E);
>> +
>> +return true;
>> +}
>> +
>>  int main(void)
>>  {
>>  report_prefix_push("pmu");
>>  
>> +/* init for PMU event access, right now only care about cycle count */
>> +pmcntenset_write(1 << PMU_CYCLE_IDX);
>> +pmccfiltr_write(0); /* count cycles in EL0, EL1, but not EL2 

Re: [Qemu-devel] [kvm-unit-tests PATCH v8 2/3] arm: pmu: Check cycle count increases

2016-11-10 Thread Andrew Jones
On Tue, Nov 08, 2016 at 12:17:14PM -0600, Wei Huang wrote:
> From: Christopher Covington 
> 
> Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
> even for the smallest delta of two subsequent reads.
> 
> Signed-off-by: Christopher Covington 
> Signed-off-by: Wei Huang 
> ---
>  arm/pmu.c | 98 
> +++
>  1 file changed, 98 insertions(+)
> 
> diff --git a/arm/pmu.c b/arm/pmu.c
> index 0b29088..d5e3ac3 100644
> --- a/arm/pmu.c
> +++ b/arm/pmu.c
> @@ -14,6 +14,7 @@
>   */
>  #include "libcflat.h"
>  
> +#define PMU_PMCR_E (1 << 0)
>  #define PMU_PMCR_N_SHIFT   11
>  #define PMU_PMCR_N_MASK0x1f
>  #define PMU_PMCR_ID_SHIFT  16
> @@ -21,6 +22,10 @@
>  #define PMU_PMCR_IMP_SHIFT 24
>  #define PMU_PMCR_IMP_MASK  0xff
>  
> +#define PMU_CYCLE_IDX  31
> +
> +#define NR_SAMPLES 10
> +
>  #if defined(__arm__)
>  static inline uint32_t pmcr_read(void)
>  {
> @@ -29,6 +34,47 @@ static inline uint32_t pmcr_read(void)
>   asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
>   return ret;
>  }
> +
> +static inline void pmcr_write(uint32_t value)
> +{
> + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (value));
> +}
> +
> +static inline void pmselr_write(uint32_t value)
> +{
> + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (value));
> +}
> +
> +static inline void pmxevtyper_write(uint32_t value)
> +{
> + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (value));
> +}
> +
> +/*
> + * While PMCCNTR can be accessed as a 64 bit coprocessor register, returning 
> 64
> + * bits doesn't seem worth the trouble when differential usage of the result 
> is
> + * expected (with differences that can easily fit in 32 bits). So just return
> + * the lower 32 bits of the cycle count in AArch32.

Like I said in the last review, I'd rather we not do this. We should
return the full value and then the test case should confirm the upper
32 bits are zero.

> + */
> +static inline uint32_t pmccntr_read(void)
> +{
> + uint32_t cycles;
> +
> + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (cycles));
> + return cycles;
> +}
> +
> +static inline void pmcntenset_write(uint32_t value)
> +{
> + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (value));
> +}
> +
> +/* PMCCFILTR is an obsolete name for PMXEVTYPER31 in ARMv7 */
> +static inline void pmccfiltr_write(uint32_t value)
> +{
> + pmselr_write(PMU_CYCLE_IDX);
> + pmxevtyper_write(value);
> +}
>  #elif defined(__aarch64__)
>  static inline uint32_t pmcr_read(void)
>  {
> @@ -37,6 +83,29 @@ static inline uint32_t pmcr_read(void)
>   asm volatile("mrs %0, pmcr_el0" : "=r" (ret));
>   return ret;
>  }
> +
> +static inline void pmcr_write(uint32_t value)
> +{
> + asm volatile("msr pmcr_el0, %0" : : "r" (value));
> +}
> +
> +static inline uint32_t pmccntr_read(void)
> +{
> + uint32_t cycles;
> +
> + asm volatile("mrs %0, pmccntr_el0" : "=r" (cycles));
> + return cycles;
> +}
> +
> +static inline void pmcntenset_write(uint32_t value)
> +{
> + asm volatile("msr pmcntenset_el0, %0" : : "r" (value));
> +}
> +
> +static inline void pmccfiltr_write(uint32_t value)
> +{
> + asm volatile("msr pmccfiltr_el0, %0" : : "r" (value));
> +}
>  #endif
>  
>  /*
> @@ -63,11 +132,40 @@ static bool check_pmcr(void)
>   return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0;
>  }
>  
> +/*
> + * Ensure that the cycle counter progresses between back-to-back reads.
> + */
> +static bool check_cycles_increase(void)
> +{
> + pmcr_write(pmcr_read() | PMU_PMCR_E);
> +
> + for (int i = 0; i < NR_SAMPLES; i++) {
> + unsigned long a, b;
> +
> + a = pmccntr_read();
> + b = pmccntr_read();
> +
> + if (a >= b) {
> + printf("Read %ld then %ld.\n", a, b);
> + return false;
> + }
> + }
> +
> + pmcr_write(pmcr_read() & ~PMU_PMCR_E);
> +
> + return true;
> +}
> +
>  int main(void)
>  {
>   report_prefix_push("pmu");
>  
> + /* init for PMU event access, right now only care about cycle count */
> + pmcntenset_write(1 << PMU_CYCLE_IDX);
> + pmccfiltr_write(0); /* count cycles in EL0, EL1, but not EL2 */
> +
>   report("Control register", check_pmcr());
> + report("Monotonically increasing cycle count", check_cycles_increase());
>  
>   return report_summary();
>  }
> -- 
> 1.8.3.1

Besides needing to use u64's for registers that return u64's, it
looks good to me.

drew
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[kvm-unit-tests PATCH v8 2/3] arm: pmu: Check cycle count increases

2016-11-08 Thread Wei Huang
From: Christopher Covington 

Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
even for the smallest delta of two subsequent reads.

Signed-off-by: Christopher Covington 
Signed-off-by: Wei Huang 
---
 arm/pmu.c | 98 +++
 1 file changed, 98 insertions(+)

diff --git a/arm/pmu.c b/arm/pmu.c
index 0b29088..d5e3ac3 100644
--- a/arm/pmu.c
+++ b/arm/pmu.c
@@ -14,6 +14,7 @@
  */
 #include "libcflat.h"
 
+#define PMU_PMCR_E (1 << 0)
 #define PMU_PMCR_N_SHIFT   11
 #define PMU_PMCR_N_MASK0x1f
 #define PMU_PMCR_ID_SHIFT  16
@@ -21,6 +22,10 @@
 #define PMU_PMCR_IMP_SHIFT 24
 #define PMU_PMCR_IMP_MASK  0xff
 
+#define PMU_CYCLE_IDX  31
+
+#define NR_SAMPLES 10
+
 #if defined(__arm__)
 static inline uint32_t pmcr_read(void)
 {
@@ -29,6 +34,47 @@ static inline uint32_t pmcr_read(void)
asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
return ret;
 }
+
+static inline void pmcr_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (value));
+}
+
+static inline void pmselr_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (value));
+}
+
+static inline void pmxevtyper_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (value));
+}
+
+/*
+ * While PMCCNTR can be accessed as a 64 bit coprocessor register, returning 64
+ * bits doesn't seem worth the trouble when differential usage of the result is
+ * expected (with differences that can easily fit in 32 bits). So just return
+ * the lower 32 bits of the cycle count in AArch32.
+ */
+static inline uint32_t pmccntr_read(void)
+{
+   uint32_t cycles;
+
+   asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (cycles));
+   return cycles;
+}
+
+static inline void pmcntenset_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (value));
+}
+
+/* PMCCFILTR is an obsolete name for PMXEVTYPER31 in ARMv7 */
+static inline void pmccfiltr_write(uint32_t value)
+{
+   pmselr_write(PMU_CYCLE_IDX);
+   pmxevtyper_write(value);
+}
 #elif defined(__aarch64__)
 static inline uint32_t pmcr_read(void)
 {
@@ -37,6 +83,29 @@ static inline uint32_t pmcr_read(void)
asm volatile("mrs %0, pmcr_el0" : "=r" (ret));
return ret;
 }
+
+static inline void pmcr_write(uint32_t value)
+{
+   asm volatile("msr pmcr_el0, %0" : : "r" (value));
+}
+
+static inline uint32_t pmccntr_read(void)
+{
+   uint32_t cycles;
+
+   asm volatile("mrs %0, pmccntr_el0" : "=r" (cycles));
+   return cycles;
+}
+
+static inline void pmcntenset_write(uint32_t value)
+{
+   asm volatile("msr pmcntenset_el0, %0" : : "r" (value));
+}
+
+static inline void pmccfiltr_write(uint32_t value)
+{
+   asm volatile("msr pmccfiltr_el0, %0" : : "r" (value));
+}
 #endif
 
 /*
@@ -63,11 +132,40 @@ static bool check_pmcr(void)
return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0;
 }
 
+/*
+ * Ensure that the cycle counter progresses between back-to-back reads.
+ */
+static bool check_cycles_increase(void)
+{
+   pmcr_write(pmcr_read() | PMU_PMCR_E);
+
+   for (int i = 0; i < NR_SAMPLES; i++) {
+   unsigned long a, b;
+
+   a = pmccntr_read();
+   b = pmccntr_read();
+
+   if (a >= b) {
+   printf("Read %ld then %ld.\n", a, b);
+   return false;
+   }
+   }
+
+   pmcr_write(pmcr_read() & ~PMU_PMCR_E);
+
+   return true;
+}
+
 int main(void)
 {
report_prefix_push("pmu");
 
+   /* init for PMU event access, right now only care about cycle count */
+   pmcntenset_write(1 << PMU_CYCLE_IDX);
+   pmccfiltr_write(0); /* count cycles in EL0, EL1, but not EL2 */
+
report("Control register", check_pmcr());
+   report("Monotonically increasing cycle count", check_cycles_increase());
 
return report_summary();
 }
-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm