Re: [Qemu-devel] [kvm-unit-tests PATCH v8 3/3] arm: pmu: Add CPI checking

2016-11-11 Thread Wei Huang


On 11/11/2016 02:08 AM, Andrew Jones wrote:
> On Tue, Nov 08, 2016 at 12:17:15PM -0600, Wei Huang wrote:
>> From: Christopher Covington 
>>
>> Calculate the numbers of cycles per instruction (CPI) implied by ARM
>> PMU cycle counter values. The code includes a strict checking facility
>> intended for the -icount option in TCG mode in the configuration file.
>>
>> Signed-off-by: Christopher Covington 
>> Signed-off-by: Wei Huang 
>> ---
>>  arm/pmu.c | 101 
>> +-
>>  arm/unittests.cfg |  14 
>>  2 files changed, 114 insertions(+), 1 deletion(-)
>>
>> diff --git a/arm/pmu.c b/arm/pmu.c
>> index d5e3ac3..09aff89 100644
>> --- a/arm/pmu.c
>> +++ b/arm/pmu.c
>> @@ -15,6 +15,7 @@
>>  #include "libcflat.h"
>>  
>>  #define PMU_PMCR_E (1 << 0)
>> +#define PMU_PMCR_C (1 << 2)
>>  #define PMU_PMCR_N_SHIFT   11
>>  #define PMU_PMCR_N_MASK0x1f
>>  #define PMU_PMCR_ID_SHIFT  16
>> @@ -75,6 +76,23 @@ static inline void pmccfiltr_write(uint32_t value)
>>  pmselr_write(PMU_CYCLE_IDX);
>>  pmxevtyper_write(value);
>>  }
>> +
>> +/*
>> + * Extra instructions inserted by the compiler would be difficult to 
>> compensate
>> + * for, so hand assemble everything between, and including, the PMCR 
>> accesses
>> + * to start and stop counting.
>> + */
>> +static inline void loop(int i, uint32_t pmcr)
> 
> We should probably pick a more descriptive name for this function, as
> we intend to add many more PMU tests to this file. While at it, let's
> change 'i' to 'n', as it's the number of times to loop.

I will rename it to fixed_num_loop(). When more tests are added to it,
we can standardize the name, e.g. *_test().

> 
>> +{
>> +asm volatile(
>> +"   mcr p15, 0, %[pmcr], c9, c12, 0\n"
>> +"1: subs%[i], %[i], #1\n"
>> +"   bgt 1b\n"
>> +"   mcr p15, 0, %[z], c9, c12, 0\n"
>> +: [i] "+r" (i)
>> +: [pmcr] "r" (pmcr), [z] "r" (0)
>> +: "cc");
>> +}
>>  #elif defined(__aarch64__)
>>  static inline uint32_t pmcr_read(void)
>>  {
>> @@ -106,6 +124,23 @@ static inline void pmccfiltr_write(uint32_t value)
>>  {
>>  asm volatile("msr pmccfiltr_el0, %0" : : "r" (value));
>>  }
>> +
>> +/*
>> + * Extra instructions inserted by the compiler would be difficult to 
>> compensate
>> + * for, so hand assemble everything between, and including, the PMCR 
>> accesses
>> + * to start and stop counting.
>> + */
>> +static inline void loop(int i, uint32_t pmcr)
>> +{
>> +asm volatile(
>> +"   msr pmcr_el0, %[pmcr]\n"
>> +"1: subs%[i], %[i], #1\n"
>> +"   b.gt1b\n"
>> +"   msr pmcr_el0, xzr\n"
>> +: [i] "+r" (i)
>> +: [pmcr] "r" (pmcr)
>> +: "cc");
>> +}
>>  #endif
>>  
>>  /*
>> @@ -156,8 +191,71 @@ static bool check_cycles_increase(void)
>>  return true;
>>  }
>>  
>> -int main(void)
>> +/*
>> + * Execute a known number of guest instructions. Only odd instruction counts
>> + * greater than or equal to 3 are supported by the in-line assembly code. 
>> The
>> + * control register (PMCR_EL0) is initialized with the provided value 
>> (allowing
>> + * for example for the cycle counter or event counters to be reset). At the 
>> end
>> + * of the exact instruction loop, zero is written to PMCR_EL0 to disable
>> + * counting, allowing the cycle counter or event counters to be read at the
>> + * leisure of the calling code.
>> + */
>> +static void measure_instrs(int num, uint32_t pmcr)
>> +{
>> +int i = (num - 1) / 2;
>> +
>> +assert(num >= 3 && ((num - 1) % 2 == 0));
>> +loop(i, pmcr);
>> +}
>> +
>> +/*
>> + * Measure cycle counts for various known instruction counts. Ensure that 
>> the
>> + * cycle counter progresses (similar to check_cycles_increase() but with 
>> more
>> + * instructions and using reset and stop controls). If supplied a positive,
>> + * nonzero CPI parameter, also strictly check that every measurement matches
>> + * it. Strict CPI checking is used to test -icount mode.
>> + */
>> +static bool check_cpi(int cpi)
>> +{
>> +uint32_t pmcr = pmcr_read() | PMU_PMCR_C | PMU_PMCR_E;
>> +
>> +if (cpi > 0)
>> +printf("Checking for CPI=%d.\n", cpi);
>> +printf("instrs : cycles0 cycles1 ...\n");
>> +
>> +for (int i = 3; i < 300; i += 32) {
>> +int avg, sum = 0;
>> +
>> +printf("%d :", i);
>> +for (int j = 0; j < NR_SAMPLES; j++) {
>> +int cycles;
>> +
>> +measure_instrs(i, pmcr);
>> +cycles =pmccntr_read();
> ^ missing space
> 
>> +printf(" %d", cycles);
>> +
>> +if (!cycles || (cpi > 0 && cycles != i * cpi)) {
>> +printf("\n");
>> +return false;
>> +}
>> +
>> + 

Re: [Qemu-devel] [kvm-unit-tests PATCH v8 3/3] arm: pmu: Add CPI checking

2016-11-11 Thread Andrew Jones
On Tue, Nov 08, 2016 at 12:17:15PM -0600, Wei Huang wrote:
> From: Christopher Covington 
> 
> Calculate the numbers of cycles per instruction (CPI) implied by ARM
> PMU cycle counter values. The code includes a strict checking facility
> intended for the -icount option in TCG mode in the configuration file.
> 
> Signed-off-by: Christopher Covington 
> Signed-off-by: Wei Huang 
> ---
>  arm/pmu.c | 101 
> +-
>  arm/unittests.cfg |  14 
>  2 files changed, 114 insertions(+), 1 deletion(-)
> 
> diff --git a/arm/pmu.c b/arm/pmu.c
> index d5e3ac3..09aff89 100644
> --- a/arm/pmu.c
> +++ b/arm/pmu.c
> @@ -15,6 +15,7 @@
>  #include "libcflat.h"
>  
>  #define PMU_PMCR_E (1 << 0)
> +#define PMU_PMCR_C (1 << 2)
>  #define PMU_PMCR_N_SHIFT   11
>  #define PMU_PMCR_N_MASK0x1f
>  #define PMU_PMCR_ID_SHIFT  16
> @@ -75,6 +76,23 @@ static inline void pmccfiltr_write(uint32_t value)
>   pmselr_write(PMU_CYCLE_IDX);
>   pmxevtyper_write(value);
>  }
> +
> +/*
> + * Extra instructions inserted by the compiler would be difficult to 
> compensate
> + * for, so hand assemble everything between, and including, the PMCR accesses
> + * to start and stop counting.
> + */
> +static inline void loop(int i, uint32_t pmcr)

We should probably pick a more descriptive name for this function, as
we intend to add many more PMU tests to this file. While at it, let's
change 'i' to 'n', as it's the number of times to loop.

> +{
> + asm volatile(
> + "   mcr p15, 0, %[pmcr], c9, c12, 0\n"
> + "1: subs%[i], %[i], #1\n"
> + "   bgt 1b\n"
> + "   mcr p15, 0, %[z], c9, c12, 0\n"
> + : [i] "+r" (i)
> + : [pmcr] "r" (pmcr), [z] "r" (0)
> + : "cc");
> +}
>  #elif defined(__aarch64__)
>  static inline uint32_t pmcr_read(void)
>  {
> @@ -106,6 +124,23 @@ static inline void pmccfiltr_write(uint32_t value)
>  {
>   asm volatile("msr pmccfiltr_el0, %0" : : "r" (value));
>  }
> +
> +/*
> + * Extra instructions inserted by the compiler would be difficult to 
> compensate
> + * for, so hand assemble everything between, and including, the PMCR accesses
> + * to start and stop counting.
> + */
> +static inline void loop(int i, uint32_t pmcr)
> +{
> + asm volatile(
> + "   msr pmcr_el0, %[pmcr]\n"
> + "1: subs%[i], %[i], #1\n"
> + "   b.gt1b\n"
> + "   msr pmcr_el0, xzr\n"
> + : [i] "+r" (i)
> + : [pmcr] "r" (pmcr)
> + : "cc");
> +}
>  #endif
>  
>  /*
> @@ -156,8 +191,71 @@ static bool check_cycles_increase(void)
>   return true;
>  }
>  
> -int main(void)
> +/*
> + * Execute a known number of guest instructions. Only odd instruction counts
> + * greater than or equal to 3 are supported by the in-line assembly code. The
> + * control register (PMCR_EL0) is initialized with the provided value 
> (allowing
> + * for example for the cycle counter or event counters to be reset). At the 
> end
> + * of the exact instruction loop, zero is written to PMCR_EL0 to disable
> + * counting, allowing the cycle counter or event counters to be read at the
> + * leisure of the calling code.
> + */
> +static void measure_instrs(int num, uint32_t pmcr)
> +{
> + int i = (num - 1) / 2;
> +
> + assert(num >= 3 && ((num - 1) % 2 == 0));
> + loop(i, pmcr);
> +}
> +
> +/*
> + * Measure cycle counts for various known instruction counts. Ensure that the
> + * cycle counter progresses (similar to check_cycles_increase() but with more
> + * instructions and using reset and stop controls). If supplied a positive,
> + * nonzero CPI parameter, also strictly check that every measurement matches
> + * it. Strict CPI checking is used to test -icount mode.
> + */
> +static bool check_cpi(int cpi)
> +{
> + uint32_t pmcr = pmcr_read() | PMU_PMCR_C | PMU_PMCR_E;
> + 
> + if (cpi > 0)
> + printf("Checking for CPI=%d.\n", cpi);
> + printf("instrs : cycles0 cycles1 ...\n");
> +
> + for (int i = 3; i < 300; i += 32) {
> + int avg, sum = 0;
> +
> + printf("%d :", i);
> + for (int j = 0; j < NR_SAMPLES; j++) {
> + int cycles;
> +
> + measure_instrs(i, pmcr);
> + cycles =pmccntr_read();
^ missing space

> + printf(" %d", cycles);
> +
> + if (!cycles || (cpi > 0 && cycles != i * cpi)) {
> + printf("\n");
> + return false;
> + }
> +
> + sum += cycles;
> + }
> + avg = sum / NR_SAMPLES;
> + printf(" sum=%d avg=%d avg_ipc=%d avg_cpi=%d\n",
> + sum, avg, i / avg, avg / i);
> + }
> +
> + return true;
> +}
> +
> +int main(int argc, char *argv[])
>  {
> + 

[kvm-unit-tests PATCH v8 3/3] arm: pmu: Add CPI checking

2016-11-08 Thread Wei Huang
From: Christopher Covington 

Calculate the numbers of cycles per instruction (CPI) implied by ARM
PMU cycle counter values. The code includes a strict checking facility
intended for the -icount option in TCG mode in the configuration file.

Signed-off-by: Christopher Covington 
Signed-off-by: Wei Huang 
---
 arm/pmu.c | 101 +-
 arm/unittests.cfg |  14 
 2 files changed, 114 insertions(+), 1 deletion(-)

diff --git a/arm/pmu.c b/arm/pmu.c
index d5e3ac3..09aff89 100644
--- a/arm/pmu.c
+++ b/arm/pmu.c
@@ -15,6 +15,7 @@
 #include "libcflat.h"
 
 #define PMU_PMCR_E (1 << 0)
+#define PMU_PMCR_C (1 << 2)
 #define PMU_PMCR_N_SHIFT   11
 #define PMU_PMCR_N_MASK0x1f
 #define PMU_PMCR_ID_SHIFT  16
@@ -75,6 +76,23 @@ static inline void pmccfiltr_write(uint32_t value)
pmselr_write(PMU_CYCLE_IDX);
pmxevtyper_write(value);
 }
+
+/*
+ * Extra instructions inserted by the compiler would be difficult to compensate
+ * for, so hand assemble everything between, and including, the PMCR accesses
+ * to start and stop counting.
+ */
+static inline void loop(int i, uint32_t pmcr)
+{
+   asm volatile(
+   "   mcr p15, 0, %[pmcr], c9, c12, 0\n"
+   "1: subs%[i], %[i], #1\n"
+   "   bgt 1b\n"
+   "   mcr p15, 0, %[z], c9, c12, 0\n"
+   : [i] "+r" (i)
+   : [pmcr] "r" (pmcr), [z] "r" (0)
+   : "cc");
+}
 #elif defined(__aarch64__)
 static inline uint32_t pmcr_read(void)
 {
@@ -106,6 +124,23 @@ static inline void pmccfiltr_write(uint32_t value)
 {
asm volatile("msr pmccfiltr_el0, %0" : : "r" (value));
 }
+
+/*
+ * Extra instructions inserted by the compiler would be difficult to compensate
+ * for, so hand assemble everything between, and including, the PMCR accesses
+ * to start and stop counting.
+ */
+static inline void loop(int i, uint32_t pmcr)
+{
+   asm volatile(
+   "   msr pmcr_el0, %[pmcr]\n"
+   "1: subs%[i], %[i], #1\n"
+   "   b.gt1b\n"
+   "   msr pmcr_el0, xzr\n"
+   : [i] "+r" (i)
+   : [pmcr] "r" (pmcr)
+   : "cc");
+}
 #endif
 
 /*
@@ -156,8 +191,71 @@ static bool check_cycles_increase(void)
return true;
 }
 
-int main(void)
+/*
+ * Execute a known number of guest instructions. Only odd instruction counts
+ * greater than or equal to 3 are supported by the in-line assembly code. The
+ * control register (PMCR_EL0) is initialized with the provided value (allowing
+ * for example for the cycle counter or event counters to be reset). At the end
+ * of the exact instruction loop, zero is written to PMCR_EL0 to disable
+ * counting, allowing the cycle counter or event counters to be read at the
+ * leisure of the calling code.
+ */
+static void measure_instrs(int num, uint32_t pmcr)
+{
+   int i = (num - 1) / 2;
+
+   assert(num >= 3 && ((num - 1) % 2 == 0));
+   loop(i, pmcr);
+}
+
+/*
+ * Measure cycle counts for various known instruction counts. Ensure that the
+ * cycle counter progresses (similar to check_cycles_increase() but with more
+ * instructions and using reset and stop controls). If supplied a positive,
+ * nonzero CPI parameter, also strictly check that every measurement matches
+ * it. Strict CPI checking is used to test -icount mode.
+ */
+static bool check_cpi(int cpi)
+{
+   uint32_t pmcr = pmcr_read() | PMU_PMCR_C | PMU_PMCR_E;
+   
+   if (cpi > 0)
+   printf("Checking for CPI=%d.\n", cpi);
+   printf("instrs : cycles0 cycles1 ...\n");
+
+   for (int i = 3; i < 300; i += 32) {
+   int avg, sum = 0;
+
+   printf("%d :", i);
+   for (int j = 0; j < NR_SAMPLES; j++) {
+   int cycles;
+
+   measure_instrs(i, pmcr);
+   cycles =pmccntr_read();
+   printf(" %d", cycles);
+
+   if (!cycles || (cpi > 0 && cycles != i * cpi)) {
+   printf("\n");
+   return false;
+   }
+
+   sum += cycles;
+   }
+   avg = sum / NR_SAMPLES;
+   printf(" sum=%d avg=%d avg_ipc=%d avg_cpi=%d\n",
+   sum, avg, i / avg, avg / i);
+   }
+
+   return true;
+}
+
+int main(int argc, char *argv[])
 {
+   int cpi = 0;
+
+   if (argc >= 1)
+   cpi = atol(argv[0]);
+
report_prefix_push("pmu");
 
/* init for PMU event access, right now only care about cycle count */
@@ -166,6 +264,7 @@ int main(void)
 
report("Control register", check_pmcr());
report("Monotonically increasing cycle count", check_cycles_increase());
+   report("Cycle/instruction ratio", check_cpi(cpi));
 
return report_summary();
 }
diff --git a/arm/unittests.cfg