Re: [RFC 2/3] KVM: arm64: pmu: Fix chained SW_INCR counters

2020-01-20 Thread Auger Eric
Hi Marc,
On 1/19/20 6:58 PM, Marc Zyngier wrote:
> On Thu, 5 Dec 2019 20:01:42 +0100
> Auger Eric  wrote:
> 
> Hi Eric,
> 
>> Hi Marc,
>>
>> On 12/5/19 3:52 PM, Marc Zyngier wrote:
>>> On 2019-12-05 14:06, Auger Eric wrote:  
 Hi Marc,

 On 12/5/19 10:43 AM, Marc Zyngier wrote:  
> Hi Eric,
>
> On 2019-12-04 20:44, Eric Auger wrote:  
>> At the moment a SW_INCR counter always overflows on 32-bit
>> boundary, independently on whether the n+1th counter is
>> programmed as CHAIN.
>>
>> Check whether the SW_INCR counter is a 64b counter and if so,
>> implement the 64b logic.
>>
>> Fixes: 80f393a23be6 ("KVM: arm/arm64: Support chained PMU counters")
>> Signed-off-by: Eric Auger 
>> ---
>>  virt/kvm/arm/pmu.c | 16 +++-
>>  1 file changed, 15 insertions(+), 1 deletion(-)
>>
>> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
>> index c3f8b059881e..7ab477db2f75 100644
>> --- a/virt/kvm/arm/pmu.c
>> +++ b/virt/kvm/arm/pmu.c
>> @@ -491,6 +491,8 @@ void kvm_pmu_software_increment(struct kvm_vcpu
>> *vcpu, u64 val)
>>
>>  enable = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
>>  for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
>> +    bool chained = test_bit(i >> 1, vcpu->arch.pmu.chained);
>> +  
>
> I'd rather you use kvm_pmu_pmc_is_chained() rather than open-coding
> this. But see below:
>  
>>  if (!(val & BIT(i)))
>>  continue;
>>  type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
>> @@ -500,8 +502,20 @@ void kvm_pmu_software_increment(struct kvm_vcpu
>> *vcpu, u64 val)
>>  reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
>>  reg = lower_32_bits(reg);
>>  __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
>> -    if (!reg)
>> +    if (reg) /* no overflow */
>> +    continue;
>> +    if (chained) {
>> +    reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
>> +    reg = lower_32_bits(reg);
>> +    __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
>> +    if (reg)
>> +    continue;
>> +    /* mark an overflow on high counter */
>> +    __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
>> +    } else {
>> +    /* mark an overflow */
>>  __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
>> +    }
>>  }
>>  }
>>  }  
>
> I think the whole function is a bit of a mess, and could be better
> structured to treat 64bit counters as a first class citizen.
>
> I'm suggesting something along those lines, which tries to
> streamline things a bit and keep the flow uniform between the
> two word sizes. IMHO, it helps reasonning about it and gives
> scope to the ARMv8.5 full 64bit counters... It is of course
> completely untested.  

 Looks OK to me as well. One remark though, don't we need to test if the
 n+1th reg is enabled before incrementing it?  
>>>
>>> Hmmm. I'm not sure. I think we should make sure that we don't flag
>>> a counter as being chained if the odd counter is disabled, rather
>>> than checking it here. As long as the odd counter is not chained
>>> *and* enabled, we shouldn't touch it.>
>>> Again, untested:
>>>
>>> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
>>> index cf371f643ade..47366817cd2a 100644
>>> --- a/virt/kvm/arm/pmu.c
>>> +++ b/virt/kvm/arm/pmu.c
>>> @@ -15,6 +15,7 @@
>>>  #include 
>>>
>>>  static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64
>>> select_idx);
>>> +static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64
>>> select_idx);
>>>
>>>  #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
>>>
>>> @@ -298,6 +299,7 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu
>>> *vcpu, u64 val)
>>>   * For high counters of chained events we must recreate the
>>>   * perf event with the long (64bit) attribute set.
>>>   */
>>> +    kvm_pmu_update_pmc_chained(vcpu, i);
>>>  if (kvm_pmu_pmc_is_chained(pmc) &&
>>>  kvm_pmu_idx_is_high_counter(i)) {
>>>  kvm_pmu_create_perf_event(vcpu, i);
>>> @@ -645,7 +647,8 @@ static void kvm_pmu_update_pmc_chained(struct
>>> kvm_vcpu *vcpu, u64 select_idx)
>>>  struct kvm_pmu *pmu = >arch.pmu;
>>>  struct kvm_pmc *pmc = >pmc[select_idx];
>>>
>>> -    if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) {
>>> +    if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
>>> +    kvm_pmu_counter_is_enabled(vcpu, pmc->idx)) {  
>>
>> In create_perf_event(), has_chain_evtype() is used and a 64b sample
>> period would be chosen even if the counters are disjoined (since the odd
>> is disabled). We would need to use pmc_is_chained() instead.
>>
>> With 

Re: [RFC 2/3] KVM: arm64: pmu: Fix chained SW_INCR counters

2020-01-19 Thread Marc Zyngier
On Thu, 5 Dec 2019 20:01:42 +0100
Auger Eric  wrote:

Hi Eric,

> Hi Marc,
> 
> On 12/5/19 3:52 PM, Marc Zyngier wrote:
> > On 2019-12-05 14:06, Auger Eric wrote:  
> >> Hi Marc,
> >>
> >> On 12/5/19 10:43 AM, Marc Zyngier wrote:  
> >>> Hi Eric,
> >>>
> >>> On 2019-12-04 20:44, Eric Auger wrote:  
>  At the moment a SW_INCR counter always overflows on 32-bit
>  boundary, independently on whether the n+1th counter is
>  programmed as CHAIN.
> 
>  Check whether the SW_INCR counter is a 64b counter and if so,
>  implement the 64b logic.
> 
>  Fixes: 80f393a23be6 ("KVM: arm/arm64: Support chained PMU counters")
>  Signed-off-by: Eric Auger 
>  ---
>   virt/kvm/arm/pmu.c | 16 +++-
>   1 file changed, 15 insertions(+), 1 deletion(-)
> 
>  diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
>  index c3f8b059881e..7ab477db2f75 100644
>  --- a/virt/kvm/arm/pmu.c
>  +++ b/virt/kvm/arm/pmu.c
>  @@ -491,6 +491,8 @@ void kvm_pmu_software_increment(struct kvm_vcpu
>  *vcpu, u64 val)
> 
>   enable = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
>   for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
>  +    bool chained = test_bit(i >> 1, vcpu->arch.pmu.chained);
>  +  
> >>>
> >>> I'd rather you use kvm_pmu_pmc_is_chained() rather than open-coding
> >>> this. But see below:
> >>>  
>   if (!(val & BIT(i)))
>   continue;
>   type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
>  @@ -500,8 +502,20 @@ void kvm_pmu_software_increment(struct kvm_vcpu
>  *vcpu, u64 val)
>   reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
>   reg = lower_32_bits(reg);
>   __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
>  -    if (!reg)
>  +    if (reg) /* no overflow */
>  +    continue;
>  +    if (chained) {
>  +    reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
>  +    reg = lower_32_bits(reg);
>  +    __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
>  +    if (reg)
>  +    continue;
>  +    /* mark an overflow on high counter */
>  +    __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
>  +    } else {
>  +    /* mark an overflow */
>   __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
>  +    }
>   }
>   }
>   }  
> >>>
> >>> I think the whole function is a bit of a mess, and could be better
> >>> structured to treat 64bit counters as a first class citizen.
> >>>
> >>> I'm suggesting something along those lines, which tries to
> >>> streamline things a bit and keep the flow uniform between the
> >>> two word sizes. IMHO, it helps reasonning about it and gives
> >>> scope to the ARMv8.5 full 64bit counters... It is of course
> >>> completely untested.  
> >>
> >> Looks OK to me as well. One remark though, don't we need to test if the
> >> n+1th reg is enabled before incrementing it?  
> > 
> > Hmmm. I'm not sure. I think we should make sure that we don't flag
> > a counter as being chained if the odd counter is disabled, rather
> > than checking it here. As long as the odd counter is not chained
> > *and* enabled, we shouldn't touch it.>
> > Again, untested:
> > 
> > diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
> > index cf371f643ade..47366817cd2a 100644
> > --- a/virt/kvm/arm/pmu.c
> > +++ b/virt/kvm/arm/pmu.c
> > @@ -15,6 +15,7 @@
> >  #include 
> > 
> >  static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64
> > select_idx);
> > +static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64
> > select_idx);
> > 
> >  #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
> > 
> > @@ -298,6 +299,7 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu
> > *vcpu, u64 val)
> >   * For high counters of chained events we must recreate the
> >   * perf event with the long (64bit) attribute set.
> >   */
> > +    kvm_pmu_update_pmc_chained(vcpu, i);
> >  if (kvm_pmu_pmc_is_chained(pmc) &&
> >  kvm_pmu_idx_is_high_counter(i)) {
> >  kvm_pmu_create_perf_event(vcpu, i);
> > @@ -645,7 +647,8 @@ static void kvm_pmu_update_pmc_chained(struct
> > kvm_vcpu *vcpu, u64 select_idx)
> >  struct kvm_pmu *pmu = >arch.pmu;
> >  struct kvm_pmc *pmc = >pmc[select_idx];
> > 
> > -    if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) {
> > +    if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
> > +    kvm_pmu_counter_is_enabled(vcpu, pmc->idx)) {  
> 
> In create_perf_event(), has_chain_evtype() is used and a 64b sample
> period would be chosen even if the counters are disjoined (since the odd
> is disabled). We would need to use pmc_is_chained() instead.
> 
> With perf_events, the check of whether the odd register is enabled is

Re: [RFC 2/3] KVM: arm64: pmu: Fix chained SW_INCR counters

2019-12-06 Thread Andrew Murray
On Fri, Dec 06, 2019 at 03:35:03PM +, Marc Zyngier wrote:
> On 2019-12-06 15:21, Andrew Murray wrote:
> > On Thu, Dec 05, 2019 at 02:52:26PM +, Marc Zyngier wrote:
> > > On 2019-12-05 14:06, Auger Eric wrote:
> > > > Hi Marc,


> > > > >
> > > > > I think the whole function is a bit of a mess, and could be
> > > better
> > > > > structured to treat 64bit counters as a first class citizen.
> > > > >
> > > > > I'm suggesting something along those lines, which tries to
> > > > > streamline things a bit and keep the flow uniform between the
> > > > > two word sizes. IMHO, it helps reasonning about it and gives
> > > > > scope to the ARMv8.5 full 64bit counters... It is of course
> > > > > completely untested.
> > > >
> > > > Looks OK to me as well. One remark though, don't we need to test
> > > if the
> > > > n+1th reg is enabled before incrementing it?
> > 
> > Indeed - we don't want to indicate an overflow on a disabled counter.
> > 
> > 
> > > 
> > > Hmmm. I'm not sure. I think we should make sure that we don't flag
> > > a counter as being chained if the odd counter is disabled, rather
> > > than checking it here. As long as the odd counter is not chained
> > > *and* enabled, we shouldn't touch it.
> > 
> > Does this mean that we don't care if the low counter is enabled or not
> > when deciding if the pair is chained?
> > 
> > I would find the code easier to follow if we had an explicit 'is the
> > high counter enabled here' check (at the point of deciding where to
> > put the overflow).
> 
> Sure. But the point is that we're spreading that kind of checks all over
> the map, and that we don't have a way to even reason about the state of
> a 64bit counter. Doesn't it strike you as being mildly broken?
> 

Yup! To the point where I can't trust the function names and have to look
at what the code does...


> > > @@ -645,7 +647,8 @@ static void kvm_pmu_update_pmc_chained(struct
> > > kvm_vcpu
> > > *vcpu, u64 select_idx)
> > >   struct kvm_pmu *pmu = >arch.pmu;
> > >   struct kvm_pmc *pmc = >pmc[select_idx];
> > > 
> > > - if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) {
> > > + if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
> > > + kvm_pmu_counter_is_enabled(vcpu, pmc->idx)) {
> > 
> > I.e. here we don't care what the state of enablement is for the low
> > counter.
> > 
> > Also at present, this may break the following use-case
> > 
> >  - User creates and uses a pair of chained counters
> >  - User disables odd/high counter
> >  - User reads values of both counters
> >  - User rewrites CHAIN event to odd/high counter OR user re-enables
> > just the even/low counter
> >  - User reads value of both counters <- this may now different to the
> > last read
> 
> Hey, I didn't say it was perfect ;-). But for sure we can't let the
> PMU bitrot more than it already has, and I'm not sure this is heading
> the right way.

I think we're aligned here. To me this code is becoming very fragile, difficult
for me to make sense of and is stretching the abstractions we've made. This is
why it is difficult to enhance it without breaking something. It's why I felt it
was safer to add 'an extra check' in the SWINCR than to risk touching something
that I didn't have the confidence I could be sure was correct. 


> 
> I'm certainly going to push back on new PMU features until we can properly
> reason about 64bit counters as a top-level entity (as opposed to a bunch
> of discrete counters).

Thanks,

Andrew Murray

> 
> Thanks,
> 
> M.
> -- 
> Jazz is not dead. It just smells funny...
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC 2/3] KVM: arm64: pmu: Fix chained SW_INCR counters

2019-12-06 Thread Andrew Murray
On Thu, Dec 05, 2019 at 08:01:42PM +0100, Auger Eric wrote:
> Hi Marc,
> 
> On 12/5/19 3:52 PM, Marc Zyngier wrote:
> > On 2019-12-05 14:06, Auger Eric wrote:
> >> Hi Marc,
> >>
> >> On 12/5/19 10:43 AM, Marc Zyngier wrote:
> >>> Hi Eric,
> >>>
> >>> On 2019-12-04 20:44, Eric Auger wrote:
>  At the moment a SW_INCR counter always overflows on 32-bit
>  boundary, independently on whether the n+1th counter is
>  programmed as CHAIN.
> 
>  Check whether the SW_INCR counter is a 64b counter and if so,
>  implement the 64b logic.
> 
>  Fixes: 80f393a23be6 ("KVM: arm/arm64: Support chained PMU counters")
>  Signed-off-by: Eric Auger 
>  ---
>   virt/kvm/arm/pmu.c | 16 +++-
>   1 file changed, 15 insertions(+), 1 deletion(-)
> 
>  diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
>  index c3f8b059881e..7ab477db2f75 100644
>  --- a/virt/kvm/arm/pmu.c
>  +++ b/virt/kvm/arm/pmu.c
>  @@ -491,6 +491,8 @@ void kvm_pmu_software_increment(struct kvm_vcpu
>  *vcpu, u64 val)
> 
>   enable = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
>   for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
>  +    bool chained = test_bit(i >> 1, vcpu->arch.pmu.chained);
>  +
> >>>
> >>> I'd rather you use kvm_pmu_pmc_is_chained() rather than open-coding
> >>> this. But see below:
> >>>
>   if (!(val & BIT(i)))
>   continue;
>   type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
>  @@ -500,8 +502,20 @@ void kvm_pmu_software_increment(struct kvm_vcpu
>  *vcpu, u64 val)
>   reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
>   reg = lower_32_bits(reg);
>   __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
>  -    if (!reg)
>  +    if (reg) /* no overflow */
>  +    continue;
>  +    if (chained) {
>  +    reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
>  +    reg = lower_32_bits(reg);
>  +    __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
>  +    if (reg)
>  +    continue;
>  +    /* mark an overflow on high counter */
>  +    __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
>  +    } else {
>  +    /* mark an overflow */
>   __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
>  +    }
>   }
>   }
>   }
> >>>
> >>> I think the whole function is a bit of a mess, and could be better
> >>> structured to treat 64bit counters as a first class citizen.
> >>>
> >>> I'm suggesting something along those lines, which tries to
> >>> streamline things a bit and keep the flow uniform between the
> >>> two word sizes. IMHO, it helps reasonning about it and gives
> >>> scope to the ARMv8.5 full 64bit counters... It is of course
> >>> completely untested.
> >>
> >> Looks OK to me as well. One remark though, don't we need to test if the
> >> n+1th reg is enabled before incrementing it?
> > 
> > Hmmm. I'm not sure. I think we should make sure that we don't flag
> > a counter as being chained if the odd counter is disabled, rather
> > than checking it here. As long as the odd counter is not chained
> > *and* enabled, we shouldn't touch it.>
> > Again, untested:
> > 
> > diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
> > index cf371f643ade..47366817cd2a 100644
> > --- a/virt/kvm/arm/pmu.c
> > +++ b/virt/kvm/arm/pmu.c
> > @@ -15,6 +15,7 @@
> >  #include 
> > 
> >  static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64
> > select_idx);
> > +static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64
> > select_idx);
> > 
> >  #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
> > 
> > @@ -298,6 +299,7 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu
> > *vcpu, u64 val)
> >   * For high counters of chained events we must recreate the
> >   * perf event with the long (64bit) attribute set.
> >   */
> > +    kvm_pmu_update_pmc_chained(vcpu, i);
> >  if (kvm_pmu_pmc_is_chained(pmc) &&
> >  kvm_pmu_idx_is_high_counter(i)) {
> >  kvm_pmu_create_perf_event(vcpu, i);
> > @@ -645,7 +647,8 @@ static void kvm_pmu_update_pmc_chained(struct
> > kvm_vcpu *vcpu, u64 select_idx)
> >  struct kvm_pmu *pmu = >arch.pmu;
> >  struct kvm_pmc *pmc = >pmc[select_idx];
> > 
> > -    if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) {
> > +    if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
> > +    kvm_pmu_counter_is_enabled(vcpu, pmc->idx)) {
> 
> In create_perf_event(), has_chain_evtype() is used and a 64b sample
> period would be chosen even if the counters are disjoined (since the odd
> is disabled). We would need to use pmc_is_chained() instead.

So in this use-case, someone has configured a pair of chained counters
but only enabled the 

Re: [RFC 2/3] KVM: arm64: pmu: Fix chained SW_INCR counters

2019-12-06 Thread Marc Zyngier

On 2019-12-06 15:21, Andrew Murray wrote:

On Thu, Dec 05, 2019 at 02:52:26PM +, Marc Zyngier wrote:

On 2019-12-05 14:06, Auger Eric wrote:
> Hi Marc,
>
> On 12/5/19 10:43 AM, Marc Zyngier wrote:
> > Hi Eric,
> >
> > On 2019-12-04 20:44, Eric Auger wrote:
> > > At the moment a SW_INCR counter always overflows on 32-bit
> > > boundary, independently on whether the n+1th counter is
> > > programmed as CHAIN.
> > >
> > > Check whether the SW_INCR counter is a 64b counter and if so,
> > > implement the 64b logic.
> > >
> > > Fixes: 80f393a23be6 ("KVM: arm/arm64: Support chained PMU
> > > counters")
> > > Signed-off-by: Eric Auger 
> > > ---
> > >  virt/kvm/arm/pmu.c | 16 +++-
> > >  1 file changed, 15 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
> > > index c3f8b059881e..7ab477db2f75 100644
> > > --- a/virt/kvm/arm/pmu.c
> > > +++ b/virt/kvm/arm/pmu.c
> > > @@ -491,6 +491,8 @@ void kvm_pmu_software_increment(struct 
kvm_vcpu

> > > *vcpu, u64 val)
> > >
> > >  enable = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
> > >  for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
> > > +    bool chained = test_bit(i >> 1, 
vcpu->arch.pmu.chained);

> > > +
> >
> > I'd rather you use kvm_pmu_pmc_is_chained() rather than 
open-coding

> > this. But see below:
> >
> > >  if (!(val & BIT(i)))
> > >  continue;
> > >  type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
> > > @@ -500,8 +502,20 @@ void kvm_pmu_software_increment(struct
> > > kvm_vcpu
> > > *vcpu, u64 val)
> > >  reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 
1;

> > >  reg = lower_32_bits(reg);
> > >  __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
> > > -    if (!reg)
> > > +    if (reg) /* no overflow */
> > > +    continue;
> > > +    if (chained) {
> > > +    reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i 
+

> > > 1) + 1;
> > > +    reg = lower_32_bits(reg);
> > > +    __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = 
reg;

> > > +    if (reg)
> > > +    continue;
> > > +    /* mark an overflow on high counter */
> > > +    __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 
1);

> > > +    } else {
> > > +    /* mark an overflow */
> > >  __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
> > > +    }
> > >  }
> > >  }
> > >  }
> >
> > I think the whole function is a bit of a mess, and could be 
better

> > structured to treat 64bit counters as a first class citizen.
> >
> > I'm suggesting something along those lines, which tries to
> > streamline things a bit and keep the flow uniform between the
> > two word sizes. IMHO, it helps reasonning about it and gives
> > scope to the ARMv8.5 full 64bit counters... It is of course
> > completely untested.
>
> Looks OK to me as well. One remark though, don't we need to test 
if the

> n+1th reg is enabled before incrementing it?


Indeed - we don't want to indicate an overflow on a disabled counter.




Hmmm. I'm not sure. I think we should make sure that we don't flag
a counter as being chained if the odd counter is disabled, rather
than checking it here. As long as the odd counter is not chained
*and* enabled, we shouldn't touch it.


Does this mean that we don't care if the low counter is enabled or 
not

when deciding if the pair is chained?

I would find the code easier to follow if we had an explicit 'is the
high counter enabled here' check (at the point of deciding where to
put the overflow).


Sure. But the point is that we're spreading that kind of checks all 
over

the map, and that we don't have a way to even reason about the state of
a 64bit counter. Doesn't it strike you as being mildly broken?






Again, untested:

diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index cf371f643ade..47366817cd2a 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -15,6 +15,7 @@
 #include 

 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64
select_idx);
+static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64
select_idx);

 #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1

@@ -298,6 +299,7 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu 
*vcpu,

u64 val)
 * For high counters of chained events we must recreate the
 * perf event with the long (64bit) attribute set.
 */
+   kvm_pmu_update_pmc_chained(vcpu, i);
if (kvm_pmu_pmc_is_chained(pmc) &&
kvm_pmu_idx_is_high_counter(i)) {
kvm_pmu_create_perf_event(vcpu, i);
@@ -645,7 +647,8 @@ static void kvm_pmu_update_pmc_chained(struct 
kvm_vcpu

*vcpu, u64 select_idx)
struct kvm_pmu *pmu = >arch.pmu;
struct kvm_pmc *pmc = >pmc[select_idx];

-   if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) {
+   if 

Re: [RFC 2/3] KVM: arm64: pmu: Fix chained SW_INCR counters

2019-12-06 Thread Andrew Murray
On Thu, Dec 05, 2019 at 02:52:26PM +, Marc Zyngier wrote:
> On 2019-12-05 14:06, Auger Eric wrote:
> > Hi Marc,
> > 
> > On 12/5/19 10:43 AM, Marc Zyngier wrote:
> > > Hi Eric,
> > > 
> > > On 2019-12-04 20:44, Eric Auger wrote:
> > > > At the moment a SW_INCR counter always overflows on 32-bit
> > > > boundary, independently on whether the n+1th counter is
> > > > programmed as CHAIN.
> > > > 
> > > > Check whether the SW_INCR counter is a 64b counter and if so,
> > > > implement the 64b logic.
> > > > 
> > > > Fixes: 80f393a23be6 ("KVM: arm/arm64: Support chained PMU
> > > > counters")
> > > > Signed-off-by: Eric Auger 
> > > > ---
> > > >  virt/kvm/arm/pmu.c | 16 +++-
> > > >  1 file changed, 15 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
> > > > index c3f8b059881e..7ab477db2f75 100644
> > > > --- a/virt/kvm/arm/pmu.c
> > > > +++ b/virt/kvm/arm/pmu.c
> > > > @@ -491,6 +491,8 @@ void kvm_pmu_software_increment(struct kvm_vcpu
> > > > *vcpu, u64 val)
> > > > 
> > > >  enable = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
> > > >  for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
> > > > +    bool chained = test_bit(i >> 1, vcpu->arch.pmu.chained);
> > > > +
> > > 
> > > I'd rather you use kvm_pmu_pmc_is_chained() rather than open-coding
> > > this. But see below:
> > > 
> > > >  if (!(val & BIT(i)))
> > > >  continue;
> > > >  type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
> > > > @@ -500,8 +502,20 @@ void kvm_pmu_software_increment(struct
> > > > kvm_vcpu
> > > > *vcpu, u64 val)
> > > >  reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
> > > >  reg = lower_32_bits(reg);
> > > >  __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
> > > > -    if (!reg)
> > > > +    if (reg) /* no overflow */
> > > > +    continue;
> > > > +    if (chained) {
> > > > +    reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i +
> > > > 1) + 1;
> > > > +    reg = lower_32_bits(reg);
> > > > +    __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
> > > > +    if (reg)
> > > > +    continue;
> > > > +    /* mark an overflow on high counter */
> > > > +    __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
> > > > +    } else {
> > > > +    /* mark an overflow */
> > > >  __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
> > > > +    }
> > > >  }
> > > >  }
> > > >  }
> > > 
> > > I think the whole function is a bit of a mess, and could be better
> > > structured to treat 64bit counters as a first class citizen.
> > > 
> > > I'm suggesting something along those lines, which tries to
> > > streamline things a bit and keep the flow uniform between the
> > > two word sizes. IMHO, it helps reasonning about it and gives
> > > scope to the ARMv8.5 full 64bit counters... It is of course
> > > completely untested.
> > 
> > Looks OK to me as well. One remark though, don't we need to test if the
> > n+1th reg is enabled before incrementing it?

Indeed - we don't want to indicate an overflow on a disabled counter.


> 
> Hmmm. I'm not sure. I think we should make sure that we don't flag
> a counter as being chained if the odd counter is disabled, rather
> than checking it here. As long as the odd counter is not chained
> *and* enabled, we shouldn't touch it.

Does this mean that we don't care if the low counter is enabled or not
when deciding if the pair is chained?

I would find the code easier to follow if we had an explicit 'is the
high counter enabled here' check (at the point of deciding where to
put the overflow).


> 
> Again, untested:
> 
> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
> index cf371f643ade..47366817cd2a 100644
> --- a/virt/kvm/arm/pmu.c
> +++ b/virt/kvm/arm/pmu.c
> @@ -15,6 +15,7 @@
>  #include 
> 
>  static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64
> select_idx);
> +static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64
> select_idx);
> 
>  #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
> 
> @@ -298,6 +299,7 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu,
> u64 val)
>* For high counters of chained events we must recreate the
>* perf event with the long (64bit) attribute set.
>*/
> + kvm_pmu_update_pmc_chained(vcpu, i);
>   if (kvm_pmu_pmc_is_chained(pmc) &&
>   kvm_pmu_idx_is_high_counter(i)) {
>   kvm_pmu_create_perf_event(vcpu, i);
> @@ -645,7 +647,8 @@ static void kvm_pmu_update_pmc_chained(struct kvm_vcpu
> *vcpu, u64 select_idx)
>   struct kvm_pmu *pmu = >arch.pmu;
>   struct kvm_pmc *pmc = >pmc[select_idx];
> 
> - if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) {
> + if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
> + 

Re: [RFC 2/3] KVM: arm64: pmu: Fix chained SW_INCR counters

2019-12-06 Thread Auger Eric
Hi,

On 12/5/19 8:01 PM, Auger Eric wrote:
> Hi Marc,
> 
> On 12/5/19 3:52 PM, Marc Zyngier wrote:
>> On 2019-12-05 14:06, Auger Eric wrote:
>>> Hi Marc,
>>>
>>> On 12/5/19 10:43 AM, Marc Zyngier wrote:
 Hi Eric,

 On 2019-12-04 20:44, Eric Auger wrote:
> At the moment a SW_INCR counter always overflows on 32-bit
> boundary, independently on whether the n+1th counter is
> programmed as CHAIN.
>
> Check whether the SW_INCR counter is a 64b counter and if so,
> implement the 64b logic.
>
> Fixes: 80f393a23be6 ("KVM: arm/arm64: Support chained PMU counters")
> Signed-off-by: Eric Auger 
> ---
>  virt/kvm/arm/pmu.c | 16 +++-
>  1 file changed, 15 insertions(+), 1 deletion(-)
>
> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
> index c3f8b059881e..7ab477db2f75 100644
> --- a/virt/kvm/arm/pmu.c
> +++ b/virt/kvm/arm/pmu.c
> @@ -491,6 +491,8 @@ void kvm_pmu_software_increment(struct kvm_vcpu
> *vcpu, u64 val)
>
>  enable = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
>  for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
> +    bool chained = test_bit(i >> 1, vcpu->arch.pmu.chained);
> +

 I'd rather you use kvm_pmu_pmc_is_chained() rather than open-coding
 this. But see below:

>  if (!(val & BIT(i)))
>  continue;
>  type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
> @@ -500,8 +502,20 @@ void kvm_pmu_software_increment(struct kvm_vcpu
> *vcpu, u64 val)
>  reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
>  reg = lower_32_bits(reg);
>  __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
> -    if (!reg)
> +    if (reg) /* no overflow */
> +    continue;
> +    if (chained) {
> +    reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
> +    reg = lower_32_bits(reg);
> +    __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
> +    if (reg)
> +    continue;
> +    /* mark an overflow on high counter */
> +    __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
> +    } else {
> +    /* mark an overflow */
>  __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
> +    }
>  }
>  }
>  }

 I think the whole function is a bit of a mess, and could be better
 structured to treat 64bit counters as a first class citizen.

 I'm suggesting something along those lines, which tries to
 streamline things a bit and keep the flow uniform between the
 two word sizes. IMHO, it helps reasonning about it and gives
 scope to the ARMv8.5 full 64bit counters... It is of course
 completely untested.
>>>
>>> Looks OK to me as well. One remark though, don't we need to test if the
>>> n+1th reg is enabled before incrementing it?
>>
>> Hmmm. I'm not sure. I think we should make sure that we don't flag
>> a counter as being chained if the odd counter is disabled, rather
>> than checking it here. As long as the odd counter is not chained
>> *and* enabled, we shouldn't touch it.>
>> Again, untested:
>>
>> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
>> index cf371f643ade..47366817cd2a 100644
>> --- a/virt/kvm/arm/pmu.c
>> +++ b/virt/kvm/arm/pmu.c
>> @@ -15,6 +15,7 @@
>>  #include 
>>
>>  static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64
>> select_idx);
>> +static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64
>> select_idx);
>>
>>  #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
>>
>> @@ -298,6 +299,7 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu
>> *vcpu, u64 val)
>>   * For high counters of chained events we must recreate the
>>   * perf event with the long (64bit) attribute set.
>>   */
>> +    kvm_pmu_update_pmc_chained(vcpu, i);
>>  if (kvm_pmu_pmc_is_chained(pmc) &&
>>  kvm_pmu_idx_is_high_counter(i)) {
>>  kvm_pmu_create_perf_event(vcpu, i);
>> @@ -645,7 +647,8 @@ static void kvm_pmu_update_pmc_chained(struct
>> kvm_vcpu *vcpu, u64 select_idx)
>>  struct kvm_pmu *pmu = >arch.pmu;
>>  struct kvm_pmc *pmc = >pmc[select_idx];
>>
>> -    if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) {
>> +    if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
>> +    kvm_pmu_counter_is_enabled(vcpu, pmc->idx)) {
> 
> In create_perf_event(), has_chain_evtype() is used and a 64b sample
> period would be chosen even if the counters are disjoined (since the odd
> is disabled). We would need to use pmc_is_chained() instead.
> 
> With perf_events, the check of whether the odd register is enabled is
> properly done (create_perf_event).

Hum that's not fully true. If we do not enable the CHAIN odd one but
only the event one, the correct 32b perf counter 

Re: [RFC 2/3] KVM: arm64: pmu: Fix chained SW_INCR counters

2019-12-05 Thread Auger Eric
Hi Marc,

On 12/5/19 3:52 PM, Marc Zyngier wrote:
> On 2019-12-05 14:06, Auger Eric wrote:
>> Hi Marc,
>>
>> On 12/5/19 10:43 AM, Marc Zyngier wrote:
>>> Hi Eric,
>>>
>>> On 2019-12-04 20:44, Eric Auger wrote:
 At the moment a SW_INCR counter always overflows on 32-bit
 boundary, independently on whether the n+1th counter is
 programmed as CHAIN.

 Check whether the SW_INCR counter is a 64b counter and if so,
 implement the 64b logic.

 Fixes: 80f393a23be6 ("KVM: arm/arm64: Support chained PMU counters")
 Signed-off-by: Eric Auger 
 ---
  virt/kvm/arm/pmu.c | 16 +++-
  1 file changed, 15 insertions(+), 1 deletion(-)

 diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
 index c3f8b059881e..7ab477db2f75 100644
 --- a/virt/kvm/arm/pmu.c
 +++ b/virt/kvm/arm/pmu.c
 @@ -491,6 +491,8 @@ void kvm_pmu_software_increment(struct kvm_vcpu
 *vcpu, u64 val)

  enable = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
  for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
 +    bool chained = test_bit(i >> 1, vcpu->arch.pmu.chained);
 +
>>>
>>> I'd rather you use kvm_pmu_pmc_is_chained() rather than open-coding
>>> this. But see below:
>>>
  if (!(val & BIT(i)))
  continue;
  type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
 @@ -500,8 +502,20 @@ void kvm_pmu_software_increment(struct kvm_vcpu
 *vcpu, u64 val)
  reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
  reg = lower_32_bits(reg);
  __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
 -    if (!reg)
 +    if (reg) /* no overflow */
 +    continue;
 +    if (chained) {
 +    reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
 +    reg = lower_32_bits(reg);
 +    __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
 +    if (reg)
 +    continue;
 +    /* mark an overflow on high counter */
 +    __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
 +    } else {
 +    /* mark an overflow */
  __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
 +    }
  }
  }
  }
>>>
>>> I think the whole function is a bit of a mess, and could be better
>>> structured to treat 64bit counters as a first class citizen.
>>>
>>> I'm suggesting something along those lines, which tries to
>>> streamline things a bit and keep the flow uniform between the
>>> two word sizes. IMHO, it helps reasonning about it and gives
>>> scope to the ARMv8.5 full 64bit counters... It is of course
>>> completely untested.
>>
>> Looks OK to me as well. One remark though, don't we need to test if the
>> n+1th reg is enabled before incrementing it?
> 
> Hmmm. I'm not sure. I think we should make sure that we don't flag
> a counter as being chained if the odd counter is disabled, rather
> than checking it here. As long as the odd counter is not chained
> *and* enabled, we shouldn't touch it.>
> Again, untested:
> 
> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
> index cf371f643ade..47366817cd2a 100644
> --- a/virt/kvm/arm/pmu.c
> +++ b/virt/kvm/arm/pmu.c
> @@ -15,6 +15,7 @@
>  #include 
> 
>  static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64
> select_idx);
> +static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64
> select_idx);
> 
>  #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
> 
> @@ -298,6 +299,7 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu
> *vcpu, u64 val)
>   * For high counters of chained events we must recreate the
>   * perf event with the long (64bit) attribute set.
>   */
> +    kvm_pmu_update_pmc_chained(vcpu, i);
>  if (kvm_pmu_pmc_is_chained(pmc) &&
>  kvm_pmu_idx_is_high_counter(i)) {
>  kvm_pmu_create_perf_event(vcpu, i);
> @@ -645,7 +647,8 @@ static void kvm_pmu_update_pmc_chained(struct
> kvm_vcpu *vcpu, u64 select_idx)
>  struct kvm_pmu *pmu = >arch.pmu;
>  struct kvm_pmc *pmc = >pmc[select_idx];
> 
> -    if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) {
> +    if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
> +    kvm_pmu_counter_is_enabled(vcpu, pmc->idx)) {

In create_perf_event(), has_chain_evtype() is used and a 64b sample
period would be chosen even if the counters are disjoined (since the odd
is disabled). We would need to use pmc_is_chained() instead.

With perf_events, the check of whether the odd register is enabled is
properly done (create_perf_event). Then I understand whenever there is a
change in enable state or type we delete the previous perf event and
re-create a new one. Enable state check just is missing for SW_INCR.

Some other questions:
- do we need a perf event to be created even if the counter is not
enabled? For instance 

Re: [RFC 2/3] KVM: arm64: pmu: Fix chained SW_INCR counters

2019-12-05 Thread Marc Zyngier

On 2019-12-05 14:06, Auger Eric wrote:

Hi Marc,

On 12/5/19 10:43 AM, Marc Zyngier wrote:

Hi Eric,

On 2019-12-04 20:44, Eric Auger wrote:

At the moment a SW_INCR counter always overflows on 32-bit
boundary, independently on whether the n+1th counter is
programmed as CHAIN.

Check whether the SW_INCR counter is a 64b counter and if so,
implement the 64b logic.

Fixes: 80f393a23be6 ("KVM: arm/arm64: Support chained PMU 
counters")

Signed-off-by: Eric Auger 
---
 virt/kvm/arm/pmu.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index c3f8b059881e..7ab477db2f75 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -491,6 +491,8 @@ void kvm_pmu_software_increment(struct kvm_vcpu
*vcpu, u64 val)

 enable = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
 for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
+    bool chained = test_bit(i >> 1, vcpu->arch.pmu.chained);
+


I'd rather you use kvm_pmu_pmc_is_chained() rather than open-coding
this. But see below:


 if (!(val & BIT(i)))
 continue;
 type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
@@ -500,8 +502,20 @@ void kvm_pmu_software_increment(struct 
kvm_vcpu

*vcpu, u64 val)
 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
 reg = lower_32_bits(reg);
 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
-    if (!reg)
+    if (reg) /* no overflow */
+    continue;
+    if (chained) {
+    reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) 
+ 1;

+    reg = lower_32_bits(reg);
+    __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
+    if (reg)
+    continue;
+    /* mark an overflow on high counter */
+    __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
+    } else {
+    /* mark an overflow */
 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
+    }
 }
 }
 }


I think the whole function is a bit of a mess, and could be better
structured to treat 64bit counters as a first class citizen.

I'm suggesting something along those lines, which tries to
streamline things a bit and keep the flow uniform between the
two word sizes. IMHO, it helps reasonning about it and gives
scope to the ARMv8.5 full 64bit counters... It is of course
completely untested.


Looks OK to me as well. One remark though, don't we need to test if 
the

n+1th reg is enabled before incrementing it?


Hmmm. I'm not sure. I think we should make sure that we don't flag
a counter as being chained if the odd counter is disabled, rather
than checking it here. As long as the odd counter is not chained
*and* enabled, we shouldn't touch it.

Again, untested:

diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index cf371f643ade..47366817cd2a 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -15,6 +15,7 @@
 #include 

 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 
select_idx);
+static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 
select_idx);


 #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1

@@ -298,6 +299,7 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu 
*vcpu, u64 val)

 * For high counters of chained events we must recreate the
 * perf event with the long (64bit) attribute set.
 */
+   kvm_pmu_update_pmc_chained(vcpu, i);
if (kvm_pmu_pmc_is_chained(pmc) &&
kvm_pmu_idx_is_high_counter(i)) {
kvm_pmu_create_perf_event(vcpu, i);
@@ -645,7 +647,8 @@ static void kvm_pmu_update_pmc_chained(struct 
kvm_vcpu *vcpu, u64 select_idx)

struct kvm_pmu *pmu = >arch.pmu;
struct kvm_pmc *pmc = >pmc[select_idx];

-   if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) {
+   if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
+   kvm_pmu_counter_is_enabled(vcpu, pmc->idx)) {
/*
 * During promotion from !chained to chained we must ensure
 * the adjacent counter is stopped and its event destroyed

What do you think?

M.
--
Jazz is not dead. It just smells funny...
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC 2/3] KVM: arm64: pmu: Fix chained SW_INCR counters

2019-12-05 Thread Auger Eric
Hi Marc,

On 12/5/19 10:43 AM, Marc Zyngier wrote:
> Hi Eric,
> 
> On 2019-12-04 20:44, Eric Auger wrote:
>> At the moment a SW_INCR counter always overflows on 32-bit
>> boundary, independently on whether the n+1th counter is
>> programmed as CHAIN.
>>
>> Check whether the SW_INCR counter is a 64b counter and if so,
>> implement the 64b logic.
>>
>> Fixes: 80f393a23be6 ("KVM: arm/arm64: Support chained PMU counters")
>> Signed-off-by: Eric Auger 
>> ---
>>  virt/kvm/arm/pmu.c | 16 +++-
>>  1 file changed, 15 insertions(+), 1 deletion(-)
>>
>> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
>> index c3f8b059881e..7ab477db2f75 100644
>> --- a/virt/kvm/arm/pmu.c
>> +++ b/virt/kvm/arm/pmu.c
>> @@ -491,6 +491,8 @@ void kvm_pmu_software_increment(struct kvm_vcpu
>> *vcpu, u64 val)
>>
>>  enable = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
>>  for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
>> +    bool chained = test_bit(i >> 1, vcpu->arch.pmu.chained);
>> +
> 
> I'd rather you use kvm_pmu_pmc_is_chained() rather than open-coding
> this. But see below:
> 
>>  if (!(val & BIT(i)))
>>  continue;
>>  type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
>> @@ -500,8 +502,20 @@ void kvm_pmu_software_increment(struct kvm_vcpu
>> *vcpu, u64 val)
>>  reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
>>  reg = lower_32_bits(reg);
>>  __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
>> -    if (!reg)
>> +    if (reg) /* no overflow */
>> +    continue;
>> +    if (chained) {
>> +    reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
>> +    reg = lower_32_bits(reg);
>> +    __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
>> +    if (reg)
>> +    continue;
>> +    /* mark an overflow on high counter */
>> +    __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
>> +    } else {
>> +    /* mark an overflow */
>>  __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
>> +    }
>>  }
>>  }
>>  }
> 
> I think the whole function is a bit of a mess, and could be better
> structured to treat 64bit counters as a first class citizen.
> 
> I'm suggesting something along those lines, which tries to
> streamline things a bit and keep the flow uniform between the
> two word sizes. IMHO, it helps reasonning about it and gives
> scope to the ARMv8.5 full 64bit counters... It is of course
> completely untested.

Looks OK to me as well. One remark though, don't we need to test if the
n+1th reg is enabled before incrementing it?

Thanks

Eric
> 
> Thoughts?
> 
>     M.
> 
> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
> index 8731dfeced8b..cf371f643ade 100644
> --- a/virt/kvm/arm/pmu.c
> +++ b/virt/kvm/arm/pmu.c
> @@ -480,26 +480,43 @@ static void kvm_pmu_perf_overflow(struct
> perf_event *perf_event,
>   */
>  void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
>  {
> +    struct kvm_pmu *pmu = >arch.pmu;
>  int i;
> -    u64 type, enable, reg;
> 
> -    if (val == 0)
> -    return;
> +    /* Weed out disabled counters */
> +    val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
> 
> -    enable = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
>  for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
> +    u64 type, reg;
> +    int ovs = i;
> +
>  if (!(val & BIT(i)))
>  continue;
> -    type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
> -   & ARMV8_PMU_EVTYPE_EVENT;
> -    if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR)
> -    && (enable & BIT(i))) {
> -    reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
> -    reg = lower_32_bits(reg);
> -    __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
> -    if (!reg)
> -    __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
> +
> +    /* PMSWINC only applies to ... SW_INC! */
> +    type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
> +    type &= ARMV8_PMU_EVTYPE_EVENT;
> +    if (type != ARMV8_PMUV3_PERFCTR_SW_INCR)
> +    continue;
> +
> +    /* Potential 64bit value */
> +    reg = kvm_pmu_get_counter_value(vcpu, i) + 1;
> +
> +    /* Start by writing back the low 32bits */
> +    __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = lower_32_bits(reg);
> +
> +    /*
> + * 64bit counter? Write back the upper bits and target
> + * the overflow bit at the next counter
> + */
> +    if (kvm_pmu_pmc_is_chained(>pmc[i])) {
> +    reg = upper_32_bits(reg);
> +    __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
> +    ovs++;
>  }
> +
> +    if (!lower_32_bits(reg))
> +    __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(ovs);
>  }
>  }
> 
> 

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu

Re: [RFC 2/3] KVM: arm64: pmu: Fix chained SW_INCR counters

2019-12-05 Thread Marc Zyngier

Hi Eric,

On 2019-12-04 20:44, Eric Auger wrote:

At the moment a SW_INCR counter always overflows on 32-bit
boundary, independently on whether the n+1th counter is
programmed as CHAIN.

Check whether the SW_INCR counter is a 64b counter and if so,
implement the 64b logic.

Fixes: 80f393a23be6 ("KVM: arm/arm64: Support chained PMU counters")
Signed-off-by: Eric Auger 
---
 virt/kvm/arm/pmu.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index c3f8b059881e..7ab477db2f75 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -491,6 +491,8 @@ void kvm_pmu_software_increment(struct kvm_vcpu
*vcpu, u64 val)

enable = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
+   bool chained = test_bit(i >> 1, vcpu->arch.pmu.chained);
+


I'd rather you use kvm_pmu_pmc_is_chained() rather than open-coding
this. But see below:


if (!(val & BIT(i)))
continue;
type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
@@ -500,8 +502,20 @@ void kvm_pmu_software_increment(struct kvm_vcpu
*vcpu, u64 val)
reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
reg = lower_32_bits(reg);
__vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
-   if (!reg)
+   if (reg) /* no overflow */
+   continue;
+   if (chained) {
+   reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 
1) + 1;
+   reg = lower_32_bits(reg);
+   __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = 
reg;
+   if (reg)
+   continue;
+   /* mark an overflow on high counter */
+   __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 
1);
+   } else {
+   /* mark an overflow */
__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
+   }
}
}
 }


I think the whole function is a bit of a mess, and could be better
structured to treat 64bit counters as a first class citizen.

I'm suggesting something along those lines, which tries to
streamline things a bit and keep the flow uniform between the
two word sizes. IMHO, it helps reasonning about it and gives
scope to the ARMv8.5 full 64bit counters... It is of course
completely untested.

Thoughts?

M.

diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index 8731dfeced8b..cf371f643ade 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -480,26 +480,43 @@ static void kvm_pmu_perf_overflow(struct 
perf_event *perf_event,

  */
 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
 {
+   struct kvm_pmu *pmu = >arch.pmu;
int i;
-   u64 type, enable, reg;

-   if (val == 0)
-   return;
+   /* Weed out disabled counters */
+   val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);

-   enable = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
+   u64 type, reg;
+   int ovs = i;
+
if (!(val & BIT(i)))
continue;
-   type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
-  & ARMV8_PMU_EVTYPE_EVENT;
-   if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR)
-   && (enable & BIT(i))) {
-   reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
-   reg = lower_32_bits(reg);
-   __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
-   if (!reg)
-   __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
+
+   /* PMSWINC only applies to ... SW_INC! */
+   type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
+   type &= ARMV8_PMU_EVTYPE_EVENT;
+   if (type != ARMV8_PMUV3_PERFCTR_SW_INCR)
+   continue;
+
+   /* Potential 64bit value */
+   reg = kvm_pmu_get_counter_value(vcpu, i) + 1;
+
+   /* Start by writing back the low 32bits */
+   __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = lower_32_bits(reg);
+
+   /*
+* 64bit counter? Write back the upper bits and target
+* the overflow bit at the next counter
+*/
+   if (kvm_pmu_pmc_is_chained(>pmc[i])) {
+   reg = upper_32_bits(reg);
+   __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
+   ovs++;
}
+
+   if (!lower_32_bits(reg))
+   __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(ovs);
}