Re: [PATCH 8/9] arm/arm64: KVM: Rework the arch timer to use level-triggered semantics

2015-09-03 Thread Christoffer Dall
On Thu, Sep 03, 2015 at 06:29:14PM +0100, Marc Zyngier wrote:
> On 03/09/15 18:23, Christoffer Dall wrote:
> > On Thu, Sep 03, 2015 at 06:06:39PM +0100, Marc Zyngier wrote:
> >> On 30/08/15 14:54, Christoffer Dall wrote:
> >>> The arch timer currently uses edge-triggered semantics in the sense that
> >>> the line is never sampled by the vgic and lowering the line from the
> >>> timer to the vgic doesn't have any affect on the pending state of
> >>> virtual interrupts in the vgic.  This means that we do not support a
> >>> guest with the otherwise valid behavior of (1) disable interrupts (2)
> >>> enable the timer (3) disable the timer (4) enable interrupts.  Such a
> >>> guest would validly not expect to see any interrupts on real hardware,
> >>> but will see interrupts on KVM.
> >>>
> >>> This patches fixes this shortcoming through the following series of
> >>> changes.
> >>>
> >>> First, we change the flow of the timer/vgic sync/flush operations.  Now
> >>> the timer is always flushed/synced before the vgic, because the vgic
> >>> samples the state of the timer output.  This has the implication that we
> >>> move the timer operations in to non-preempible sections, but that is
> >>> fine after the previous commit getting rid of hrtimer schedules on every
> >>> entry/exit.
> >>>
> >>> Second, we change the internal behavior of the timer, letting the timer
> >>> keep track of its previous output state, and only lower/raise the line
> >>> to the vgic when the state changes.  Note that in theory this could have
> >>> been accomplished more simply by signalling the vgic every time the
> >>> state *potentially* changed, but we don't want to be hitting the vgic
> >>> more often than necessary.
> >>>
> >>> Third, we get rid of the use of the map->active field in the vgic and
> >>> instead simply set the interrupt as active on the physical distributor
> >>> whenever we signal a mapped interrupt to the guest, and we reset the
> >>> active state when we sync back the HW state from the vgic.
> >>>
> >>> Fourth, and finally, we now initialize the timer PPIs (and all the other
> >>> unused PPIs for now), to be level-triggered, and modify the sync code to
> >>> sample the line state on HW sync and re-inject a new interrupt if it is
> >>> still pending at that time.
> >>>
> >>> Signed-off-by: Christoffer Dall 
> >>> ---
> >>>  arch/arm/kvm/arm.c   | 11 +--
> >>>  include/kvm/arm_arch_timer.h |  2 +-
> >>>  include/kvm/arm_vgic.h   |  3 --
> >>>  virt/kvm/arm/arch_timer.c| 68 
> >>> +++-
> >>>  virt/kvm/arm/vgic.c  | 67 
> >>> +++
> >>>  5 files changed, 81 insertions(+), 70 deletions(-)
> >>>
> >>> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> >>> index bdf8871..102a4aa 100644
> >>> --- a/arch/arm/kvm/arm.c
> >>> +++ b/arch/arm/kvm/arm.c
> >>> @@ -561,9 +561,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, 
> >>> struct kvm_run *run)
> >>>  
> >>>   if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) {
> >>>   local_irq_enable();
> >>> + kvm_timer_sync_hwstate(vcpu);
> >>>   kvm_vgic_sync_hwstate(vcpu);
> >>>   preempt_enable();
> >>> - kvm_timer_sync_hwstate(vcpu);
> >>>   continue;
> >>>   }
> >>>  
> >>> @@ -608,12 +608,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, 
> >>> struct kvm_run *run)
> >>>   kvm_guest_exit();
> >>>   trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
> >>>  
> >>> + /*
> >>> +  * We must sync the timer state before the vgic state so that
> >>> +  * the vgic can properly sample the updated state of the
> >>> +  * interrupt line.
> >>> +  */
> >>> + kvm_timer_sync_hwstate(vcpu);
> >>> +
> >>>   kvm_vgic_sync_hwstate(vcpu);
> >>>  
> >>>   preempt_enable();
> >>>  
> >>> - kvm_timer_sync_hwstate(vcpu);
> >>> -
> >>>   ret = handle_exit(vcpu, run, ret);
> >>>   }
> >>>  
> >>> diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
> >>> index ef14cc1..1800227 100644
> >>> --- a/include/kvm/arm_arch_timer.h
> >>> +++ b/include/kvm/arm_arch_timer.h
> >>> @@ -51,7 +51,7 @@ struct arch_timer_cpu {
> >>>   boolarmed;
> >>>  
> >>>   /* Timer IRQ */
> >>> - const struct kvm_irq_level  *irq;
> >>> + struct kvm_irq_levelirq;
> >>>  
> >>>   /* VGIC mapping */
> >>>   struct irq_phys_map *map;
> >>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> >>> index d901f1a..99011a0 100644
> >>> --- a/include/kvm/arm_vgic.h
> >>> +++ b/include/kvm/arm_vgic.h
> >>> @@ -163,7 +163,6 @@ struct irq_phys_map {
> >>>   u32 virt_irq;
> >>>   u32 phys_irq;
> >>>   u32 irq;
> >>> - boolactive;
> >>>  };
> >>>  
> >>

Re: [PATCH 8/9] arm/arm64: KVM: Rework the arch timer to use level-triggered semantics

2015-09-03 Thread Marc Zyngier
On 03/09/15 18:23, Christoffer Dall wrote:
> On Thu, Sep 03, 2015 at 06:06:39PM +0100, Marc Zyngier wrote:
>> On 30/08/15 14:54, Christoffer Dall wrote:
>>> The arch timer currently uses edge-triggered semantics in the sense that
>>> the line is never sampled by the vgic and lowering the line from the
>>> timer to the vgic doesn't have any affect on the pending state of
>>> virtual interrupts in the vgic.  This means that we do not support a
>>> guest with the otherwise valid behavior of (1) disable interrupts (2)
>>> enable the timer (3) disable the timer (4) enable interrupts.  Such a
>>> guest would validly not expect to see any interrupts on real hardware,
>>> but will see interrupts on KVM.
>>>
>>> This patches fixes this shortcoming through the following series of
>>> changes.
>>>
>>> First, we change the flow of the timer/vgic sync/flush operations.  Now
>>> the timer is always flushed/synced before the vgic, because the vgic
>>> samples the state of the timer output.  This has the implication that we
>>> move the timer operations in to non-preempible sections, but that is
>>> fine after the previous commit getting rid of hrtimer schedules on every
>>> entry/exit.
>>>
>>> Second, we change the internal behavior of the timer, letting the timer
>>> keep track of its previous output state, and only lower/raise the line
>>> to the vgic when the state changes.  Note that in theory this could have
>>> been accomplished more simply by signalling the vgic every time the
>>> state *potentially* changed, but we don't want to be hitting the vgic
>>> more often than necessary.
>>>
>>> Third, we get rid of the use of the map->active field in the vgic and
>>> instead simply set the interrupt as active on the physical distributor
>>> whenever we signal a mapped interrupt to the guest, and we reset the
>>> active state when we sync back the HW state from the vgic.
>>>
>>> Fourth, and finally, we now initialize the timer PPIs (and all the other
>>> unused PPIs for now), to be level-triggered, and modify the sync code to
>>> sample the line state on HW sync and re-inject a new interrupt if it is
>>> still pending at that time.
>>>
>>> Signed-off-by: Christoffer Dall 
>>> ---
>>>  arch/arm/kvm/arm.c   | 11 +--
>>>  include/kvm/arm_arch_timer.h |  2 +-
>>>  include/kvm/arm_vgic.h   |  3 --
>>>  virt/kvm/arm/arch_timer.c| 68 
>>> +++-
>>>  virt/kvm/arm/vgic.c  | 67 
>>> +++
>>>  5 files changed, 81 insertions(+), 70 deletions(-)
>>>
>>> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
>>> index bdf8871..102a4aa 100644
>>> --- a/arch/arm/kvm/arm.c
>>> +++ b/arch/arm/kvm/arm.c
>>> @@ -561,9 +561,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, 
>>> struct kvm_run *run)
>>>  
>>> if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) {
>>> local_irq_enable();
>>> +   kvm_timer_sync_hwstate(vcpu);
>>> kvm_vgic_sync_hwstate(vcpu);
>>> preempt_enable();
>>> -   kvm_timer_sync_hwstate(vcpu);
>>> continue;
>>> }
>>>  
>>> @@ -608,12 +608,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, 
>>> struct kvm_run *run)
>>> kvm_guest_exit();
>>> trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
>>>  
>>> +   /*
>>> +* We must sync the timer state before the vgic state so that
>>> +* the vgic can properly sample the updated state of the
>>> +* interrupt line.
>>> +*/
>>> +   kvm_timer_sync_hwstate(vcpu);
>>> +
>>> kvm_vgic_sync_hwstate(vcpu);
>>>  
>>> preempt_enable();
>>>  
>>> -   kvm_timer_sync_hwstate(vcpu);
>>> -
>>> ret = handle_exit(vcpu, run, ret);
>>> }
>>>  
>>> diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
>>> index ef14cc1..1800227 100644
>>> --- a/include/kvm/arm_arch_timer.h
>>> +++ b/include/kvm/arm_arch_timer.h
>>> @@ -51,7 +51,7 @@ struct arch_timer_cpu {
>>> boolarmed;
>>>  
>>> /* Timer IRQ */
>>> -   const struct kvm_irq_level  *irq;
>>> +   struct kvm_irq_levelirq;
>>>  
>>> /* VGIC mapping */
>>> struct irq_phys_map *map;
>>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
>>> index d901f1a..99011a0 100644
>>> --- a/include/kvm/arm_vgic.h
>>> +++ b/include/kvm/arm_vgic.h
>>> @@ -163,7 +163,6 @@ struct irq_phys_map {
>>> u32 virt_irq;
>>> u32 phys_irq;
>>> u32 irq;
>>> -   boolactive;
>>>  };
>>>  
>>>  struct irq_phys_map_entry {
>>> @@ -358,8 +357,6 @@ int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
>>>  struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
>>>in

Re: [PATCH 8/9] arm/arm64: KVM: Rework the arch timer to use level-triggered semantics

2015-09-03 Thread Christoffer Dall
On Thu, Sep 03, 2015 at 06:06:39PM +0100, Marc Zyngier wrote:
> On 30/08/15 14:54, Christoffer Dall wrote:
> > The arch timer currently uses edge-triggered semantics in the sense that
> > the line is never sampled by the vgic and lowering the line from the
> > timer to the vgic doesn't have any affect on the pending state of
> > virtual interrupts in the vgic.  This means that we do not support a
> > guest with the otherwise valid behavior of (1) disable interrupts (2)
> > enable the timer (3) disable the timer (4) enable interrupts.  Such a
> > guest would validly not expect to see any interrupts on real hardware,
> > but will see interrupts on KVM.
> > 
> > This patches fixes this shortcoming through the following series of
> > changes.
> > 
> > First, we change the flow of the timer/vgic sync/flush operations.  Now
> > the timer is always flushed/synced before the vgic, because the vgic
> > samples the state of the timer output.  This has the implication that we
> > move the timer operations in to non-preempible sections, but that is
> > fine after the previous commit getting rid of hrtimer schedules on every
> > entry/exit.
> > 
> > Second, we change the internal behavior of the timer, letting the timer
> > keep track of its previous output state, and only lower/raise the line
> > to the vgic when the state changes.  Note that in theory this could have
> > been accomplished more simply by signalling the vgic every time the
> > state *potentially* changed, but we don't want to be hitting the vgic
> > more often than necessary.
> > 
> > Third, we get rid of the use of the map->active field in the vgic and
> > instead simply set the interrupt as active on the physical distributor
> > whenever we signal a mapped interrupt to the guest, and we reset the
> > active state when we sync back the HW state from the vgic.
> > 
> > Fourth, and finally, we now initialize the timer PPIs (and all the other
> > unused PPIs for now), to be level-triggered, and modify the sync code to
> > sample the line state on HW sync and re-inject a new interrupt if it is
> > still pending at that time.
> > 
> > Signed-off-by: Christoffer Dall 
> > ---
> >  arch/arm/kvm/arm.c   | 11 +--
> >  include/kvm/arm_arch_timer.h |  2 +-
> >  include/kvm/arm_vgic.h   |  3 --
> >  virt/kvm/arm/arch_timer.c| 68 
> > +++-
> >  virt/kvm/arm/vgic.c  | 67 
> > +++
> >  5 files changed, 81 insertions(+), 70 deletions(-)
> > 
> > diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> > index bdf8871..102a4aa 100644
> > --- a/arch/arm/kvm/arm.c
> > +++ b/arch/arm/kvm/arm.c
> > @@ -561,9 +561,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, 
> > struct kvm_run *run)
> >  
> > if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) {
> > local_irq_enable();
> > +   kvm_timer_sync_hwstate(vcpu);
> > kvm_vgic_sync_hwstate(vcpu);
> > preempt_enable();
> > -   kvm_timer_sync_hwstate(vcpu);
> > continue;
> > }
> >  
> > @@ -608,12 +608,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, 
> > struct kvm_run *run)
> > kvm_guest_exit();
> > trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
> >  
> > +   /*
> > +* We must sync the timer state before the vgic state so that
> > +* the vgic can properly sample the updated state of the
> > +* interrupt line.
> > +*/
> > +   kvm_timer_sync_hwstate(vcpu);
> > +
> > kvm_vgic_sync_hwstate(vcpu);
> >  
> > preempt_enable();
> >  
> > -   kvm_timer_sync_hwstate(vcpu);
> > -
> > ret = handle_exit(vcpu, run, ret);
> > }
> >  
> > diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
> > index ef14cc1..1800227 100644
> > --- a/include/kvm/arm_arch_timer.h
> > +++ b/include/kvm/arm_arch_timer.h
> > @@ -51,7 +51,7 @@ struct arch_timer_cpu {
> > boolarmed;
> >  
> > /* Timer IRQ */
> > -   const struct kvm_irq_level  *irq;
> > +   struct kvm_irq_levelirq;
> >  
> > /* VGIC mapping */
> > struct irq_phys_map *map;
> > diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> > index d901f1a..99011a0 100644
> > --- a/include/kvm/arm_vgic.h
> > +++ b/include/kvm/arm_vgic.h
> > @@ -163,7 +163,6 @@ struct irq_phys_map {
> > u32 virt_irq;
> > u32 phys_irq;
> > u32 irq;
> > -   boolactive;
> >  };
> >  
> >  struct irq_phys_map_entry {
> > @@ -358,8 +357,6 @@ int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
> >  struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
> >int virt_irq, int irq);
> >  int kvm_vgic

Re: [PATCH 8/9] arm/arm64: KVM: Rework the arch timer to use level-triggered semantics

2015-09-03 Thread Marc Zyngier
On 30/08/15 14:54, Christoffer Dall wrote:
> The arch timer currently uses edge-triggered semantics in the sense that
> the line is never sampled by the vgic and lowering the line from the
> timer to the vgic doesn't have any affect on the pending state of
> virtual interrupts in the vgic.  This means that we do not support a
> guest with the otherwise valid behavior of (1) disable interrupts (2)
> enable the timer (3) disable the timer (4) enable interrupts.  Such a
> guest would validly not expect to see any interrupts on real hardware,
> but will see interrupts on KVM.
> 
> This patches fixes this shortcoming through the following series of
> changes.
> 
> First, we change the flow of the timer/vgic sync/flush operations.  Now
> the timer is always flushed/synced before the vgic, because the vgic
> samples the state of the timer output.  This has the implication that we
> move the timer operations in to non-preempible sections, but that is
> fine after the previous commit getting rid of hrtimer schedules on every
> entry/exit.
> 
> Second, we change the internal behavior of the timer, letting the timer
> keep track of its previous output state, and only lower/raise the line
> to the vgic when the state changes.  Note that in theory this could have
> been accomplished more simply by signalling the vgic every time the
> state *potentially* changed, but we don't want to be hitting the vgic
> more often than necessary.
> 
> Third, we get rid of the use of the map->active field in the vgic and
> instead simply set the interrupt as active on the physical distributor
> whenever we signal a mapped interrupt to the guest, and we reset the
> active state when we sync back the HW state from the vgic.
> 
> Fourth, and finally, we now initialize the timer PPIs (and all the other
> unused PPIs for now), to be level-triggered, and modify the sync code to
> sample the line state on HW sync and re-inject a new interrupt if it is
> still pending at that time.
> 
> Signed-off-by: Christoffer Dall 
> ---
>  arch/arm/kvm/arm.c   | 11 +--
>  include/kvm/arm_arch_timer.h |  2 +-
>  include/kvm/arm_vgic.h   |  3 --
>  virt/kvm/arm/arch_timer.c| 68 
> +++-
>  virt/kvm/arm/vgic.c  | 67 +++
>  5 files changed, 81 insertions(+), 70 deletions(-)
> 
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index bdf8871..102a4aa 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -561,9 +561,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
> kvm_run *run)
>  
>   if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) {
>   local_irq_enable();
> + kvm_timer_sync_hwstate(vcpu);
>   kvm_vgic_sync_hwstate(vcpu);
>   preempt_enable();
> - kvm_timer_sync_hwstate(vcpu);
>   continue;
>   }
>  
> @@ -608,12 +608,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, 
> struct kvm_run *run)
>   kvm_guest_exit();
>   trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
>  
> + /*
> +  * We must sync the timer state before the vgic state so that
> +  * the vgic can properly sample the updated state of the
> +  * interrupt line.
> +  */
> + kvm_timer_sync_hwstate(vcpu);
> +
>   kvm_vgic_sync_hwstate(vcpu);
>  
>   preempt_enable();
>  
> - kvm_timer_sync_hwstate(vcpu);
> -
>   ret = handle_exit(vcpu, run, ret);
>   }
>  
> diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
> index ef14cc1..1800227 100644
> --- a/include/kvm/arm_arch_timer.h
> +++ b/include/kvm/arm_arch_timer.h
> @@ -51,7 +51,7 @@ struct arch_timer_cpu {
>   boolarmed;
>  
>   /* Timer IRQ */
> - const struct kvm_irq_level  *irq;
> + struct kvm_irq_levelirq;
>  
>   /* VGIC mapping */
>   struct irq_phys_map *map;
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index d901f1a..99011a0 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -163,7 +163,6 @@ struct irq_phys_map {
>   u32 virt_irq;
>   u32 phys_irq;
>   u32 irq;
> - boolactive;
>  };
>  
>  struct irq_phys_map_entry {
> @@ -358,8 +357,6 @@ int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
>  struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
>  int virt_irq, int irq);
>  int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
> -bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map);
> -void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active);
>  
>  #define irqchip