Hi Jerin, Thanks for the comments. > -----Original Message----- > From: Jerin Jacob <jerinjac...@gmail.com> > Sent: Wednesday, June 24, 2020 7:51 AM > To: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com> > Cc: dpdk-dev <dev@dpdk.org>; jer...@marvell.com; > hemant.agra...@nxp.com; akhil.go...@nxp.com; ogerl...@mellanox.com; > Ajit Khaparde (ajit.khapa...@broadcom.com) > <ajit.khapa...@broadcom.com>; ruigeng.w...@arm.com; Dharmik Thakkar > <dharmik.thak...@arm.com>; Phil Yang <phil.y...@arm.com>; dpdk stable > <sta...@dpdk.org> > Subject: Re: [dpdk-dev] [PATCH] eal: generic counter based loop for CPU freq > calculation > > On Tue, Jun 9, 2020 at 3:04 AM Honnappa Nagarahalli > <honnappa.nagaraha...@arm.com> wrote: > > > > get_tsc_freq uses 'nanosleep' system call to calculate the CPU > > frequency. However, 'nanosleep' results in the process getting > > un-scheduled. The kernel saves and restores the PMU state. This > > ensures that the PMU cycles are not counted towards a sleeping > > process. When RTE_ARM_EAL_RDTSC_USE_PMU is defined, this results in > > incorrect CPU frequency calculation. This logic is replaced with > > generic counter based loop. > > > > Bugzilla ID: 450 > > Fixes: af75078fece3 ("first public release") > > The Fix looks good to me. > > The Fixes is not correct. It should be the patch where > RTE_ARM_EAL_RDTSC_USE_PMU got introduced. Ok, will dig that out.
> > > > Cc: sta...@dpdk.org > > > > Signed-off-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com> > > Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com> > > Reviewed-by: Dharmik Thakkar <dharmik.thak...@arm.com> > > Reviewed-by: Phil Yang <phil.y...@arm.com> > > > > --- > > lib/librte_eal/arm/include/rte_cycles_64.h | 45 +++++++++++++++++++--- > > lib/librte_eal/arm/rte_cycles.c | 24 ++++++++++-- > > 2 files changed, 61 insertions(+), 8 deletions(-) > > > > diff --git a/lib/librte_eal/arm/include/rte_cycles_64.h > > b/lib/librte_eal/arm/include/rte_cycles_64.h > > index da557b6a1..6fc352036 100644 > > --- a/lib/librte_eal/arm/include/rte_cycles_64.h > > +++ b/lib/librte_eal/arm/include/rte_cycles_64.h > > @@ -11,6 +11,36 @@ extern "C" { > > > > #include "generic/rte_cycles.h" > > > > +/** Read generic counter frequency */ static inline uint64_t > > I prefer to have __rte_allways_inline > > > +__rte_rd_generic_cntr_freq(void) > > I think, the generic counter is confusing, I think, since the symbol is > exposed > due to placed in header file, it is better to change, __rte_arm64_cntfrq() Ok, makes sense. > > > +{ > > + uint64_t freq; > > + > > + asm volatile("mrs %0, cntfrq_el0" : "=r" (freq)); > > + return freq; > > +} > > + > > +/** Read generic counter */ > > +static inline uint64_t > > Likewise, __rte_arm64_cntvct() > > > > +__rte_rd_generic_cntr(void) > > +{ > > + uint64_t tsc; > > + > > + asm volatile("mrs %0, cntvct_el0" : "=r" (tsc)); > > + return tsc; > > +} > > + > > +static inline uint64_t > > +__rte_rd_generic_cntr_precise(void) > > __rte_arm64_cntfrq_precise() > > > +{ > > + uint64_t tsc; > > + > > + asm volatile("isb" : : : "memory"); > > + asm volatile("mrs %0, cntvct_el0" : "=r" (tsc)); > > + return tsc; > > +} > > + > > /** > > * Read the time base register. > > * > > @@ -25,10 +55,7 @@ extern "C" { > > static inline uint64_t > > rte_rdtsc(void) > > { > > - uint64_t tsc; > > - > > - asm volatile("mrs %0, cntvct_el0" : "=r" (tsc)); > > - return tsc; > > + return __rte_rd_generic_cntr(); > > } > > #else > > /** > > @@ -49,14 +76,22 @@ rte_rdtsc(void) > > * asm volatile("msr pmcr_el0, %0" : : "r" (val)); > > * > > */ > > + > > +/** Read PMU cycle counter */ > > static inline uint64_t > > -rte_rdtsc(void) > > +__rte_rd_pmu_cycle_cntr(void) I will change this to __rte_arm64_pmccntr > > { > > uint64_t tsc; > > > > asm volatile("mrs %0, pmccntr_el0" : "=r"(tsc)); > > return tsc; > > } > > + > > +static inline uint64_t > > +rte_rdtsc(void) > > +{ > > + return __rte_rd_pmu_cycle_cntr(); } > > #endif > > > > static inline uint64_t > > diff --git a/lib/librte_eal/arm/rte_cycles.c > > b/lib/librte_eal/arm/rte_cycles.c index 3500d523e..92c87a8a4 100644 > > --- a/lib/librte_eal/arm/rte_cycles.c > > +++ b/lib/librte_eal/arm/rte_cycles.c > > @@ -3,14 +3,32 @@ > > */ > > > > #include "eal_private.h" > > +#include "rte_cycles.h" > > > > uint64_t > > get_tsc_freq_arch(void) > > { > > #if defined RTE_ARCH_ARM64 && !defined > RTE_ARM_EAL_RDTSC_USE_PMU > > - uint64_t freq; > > - asm volatile("mrs %0, cntfrq_el0" : "=r" (freq)); > > - return freq; > > + return __rte_rd_generic_cntr_freq(); #elif defined > > +RTE_ARCH_ARM64 && defined RTE_ARM_EAL_RDTSC_USE_PMU > > + /* Use the generic counter ticks to calculate the PMU > > + * cycle frequency. > > + */ > > + uint64_t gcnt_ticks; > > + uint64_t start_ticks, cur_ticks; > > + uint64_t start_pmu_cycles, end_pmu_cycles; > > + > > + /* Number of ticks for 1/10 second */ > > + gcnt_ticks = __rte_rd_generic_cntr_freq() / 10; > > + > > + start_ticks = __rte_rd_generic_cntr_precise(); > > + start_pmu_cycles = rte_rdtsc_precise(); > > + do { > > + cur_ticks = __rte_rd_generic_cntr(); > > + } while ((cur_ticks - start_ticks) < gcnt_ticks); > > + end_pmu_cycles = rte_rdtsc_precise(); > > + > > + return ((end_pmu_cycles - start_pmu_cycles) * 10); > > Good thought. On the plus side, it will reduce the boot time by .9 sec. > > > #else > > return 0; > > With above changes: > > Acked-by: Jerin Jacob <jer...@marvell.com> > > > > > #endif > > -- > > 2.17.1 > >