Re: [PATCH 8/9] arm/arm64: KVM: Rework the arch timer to use level-triggered semantics
On Thu, Sep 03, 2015 at 06:29:14PM +0100, Marc Zyngier wrote: > On 03/09/15 18:23, Christoffer Dall wrote: > > On Thu, Sep 03, 2015 at 06:06:39PM +0100, Marc Zyngier wrote: > >> On 30/08/15 14:54, Christoffer Dall wrote: > >>> The arch timer currently uses edge-triggered semantics in the sense that > >>> the line is never sampled by the vgic and lowering the line from the > >>> timer to the vgic doesn't have any affect on the pending state of > >>> virtual interrupts in the vgic. This means that we do not support a > >>> guest with the otherwise valid behavior of (1) disable interrupts (2) > >>> enable the timer (3) disable the timer (4) enable interrupts. Such a > >>> guest would validly not expect to see any interrupts on real hardware, > >>> but will see interrupts on KVM. > >>> > >>> This patches fixes this shortcoming through the following series of > >>> changes. > >>> > >>> First, we change the flow of the timer/vgic sync/flush operations. Now > >>> the timer is always flushed/synced before the vgic, because the vgic > >>> samples the state of the timer output. This has the implication that we > >>> move the timer operations in to non-preempible sections, but that is > >>> fine after the previous commit getting rid of hrtimer schedules on every > >>> entry/exit. > >>> > >>> Second, we change the internal behavior of the timer, letting the timer > >>> keep track of its previous output state, and only lower/raise the line > >>> to the vgic when the state changes. Note that in theory this could have > >>> been accomplished more simply by signalling the vgic every time the > >>> state *potentially* changed, but we don't want to be hitting the vgic > >>> more often than necessary. > >>> > >>> Third, we get rid of the use of the map->active field in the vgic and > >>> instead simply set the interrupt as active on the physical distributor > >>> whenever we signal a mapped interrupt to the guest, and we reset the > >>> active state when we sync back the HW state from the vgic. > >>> > >>> Fourth, and finally, we now initialize the timer PPIs (and all the other > >>> unused PPIs for now), to be level-triggered, and modify the sync code to > >>> sample the line state on HW sync and re-inject a new interrupt if it is > >>> still pending at that time. > >>> > >>> Signed-off-by: Christoffer Dall > >>> --- > >>> arch/arm/kvm/arm.c | 11 +-- > >>> include/kvm/arm_arch_timer.h | 2 +- > >>> include/kvm/arm_vgic.h | 3 -- > >>> virt/kvm/arm/arch_timer.c| 68 > >>> +++- > >>> virt/kvm/arm/vgic.c | 67 > >>> +++ > >>> 5 files changed, 81 insertions(+), 70 deletions(-) > >>> > >>> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c > >>> index bdf8871..102a4aa 100644 > >>> --- a/arch/arm/kvm/arm.c > >>> +++ b/arch/arm/kvm/arm.c > >>> @@ -561,9 +561,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, > >>> struct kvm_run *run) > >>> > >>> if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) { > >>> local_irq_enable(); > >>> + kvm_timer_sync_hwstate(vcpu); > >>> kvm_vgic_sync_hwstate(vcpu); > >>> preempt_enable(); > >>> - kvm_timer_sync_hwstate(vcpu); > >>> continue; > >>> } > >>> > >>> @@ -608,12 +608,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, > >>> struct kvm_run *run) > >>> kvm_guest_exit(); > >>> trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); > >>> > >>> + /* > >>> + * We must sync the timer state before the vgic state so that > >>> + * the vgic can properly sample the updated state of the > >>> + * interrupt line. > >>> + */ > >>> + kvm_timer_sync_hwstate(vcpu); > >>> + > >>> kvm_vgic_sync_hwstate(vcpu); > >>> > >>> preempt_enable(); > >>> > >>> - kvm_timer_sync_hwstate(vcpu); > >>> - > >>> ret = handle_exit(vcpu, run, ret); > >>> } > >>> > >>> diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h > >>> index ef14cc1..1800227 100644 > >>> --- a/include/kvm/arm_arch_timer.h > >>> +++ b/include/kvm/arm_arch_timer.h > >>> @@ -51,7 +51,7 @@ struct arch_timer_cpu { > >>> boolarmed; > >>> > >>> /* Timer IRQ */ > >>> - const struct kvm_irq_level *irq; > >>> + struct kvm_irq_levelirq; > >>> > >>> /* VGIC mapping */ > >>> struct irq_phys_map *map; > >>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h > >>> index d901f1a..99011a0 100644 > >>> --- a/include/kvm/arm_vgic.h > >>> +++ b/include/kvm/arm_vgic.h > >>> @@ -163,7 +163,6 @@ struct irq_phys_map { > >>> u32 virt_irq; > >>> u32 phys_irq; > >>> u32 irq; > >>> - boolactive; > >>> }; > >>> > >>
Re: [PATCH 8/9] arm/arm64: KVM: Rework the arch timer to use level-triggered semantics
On 03/09/15 18:23, Christoffer Dall wrote: > On Thu, Sep 03, 2015 at 06:06:39PM +0100, Marc Zyngier wrote: >> On 30/08/15 14:54, Christoffer Dall wrote: >>> The arch timer currently uses edge-triggered semantics in the sense that >>> the line is never sampled by the vgic and lowering the line from the >>> timer to the vgic doesn't have any affect on the pending state of >>> virtual interrupts in the vgic. This means that we do not support a >>> guest with the otherwise valid behavior of (1) disable interrupts (2) >>> enable the timer (3) disable the timer (4) enable interrupts. Such a >>> guest would validly not expect to see any interrupts on real hardware, >>> but will see interrupts on KVM. >>> >>> This patches fixes this shortcoming through the following series of >>> changes. >>> >>> First, we change the flow of the timer/vgic sync/flush operations. Now >>> the timer is always flushed/synced before the vgic, because the vgic >>> samples the state of the timer output. This has the implication that we >>> move the timer operations in to non-preempible sections, but that is >>> fine after the previous commit getting rid of hrtimer schedules on every >>> entry/exit. >>> >>> Second, we change the internal behavior of the timer, letting the timer >>> keep track of its previous output state, and only lower/raise the line >>> to the vgic when the state changes. Note that in theory this could have >>> been accomplished more simply by signalling the vgic every time the >>> state *potentially* changed, but we don't want to be hitting the vgic >>> more often than necessary. >>> >>> Third, we get rid of the use of the map->active field in the vgic and >>> instead simply set the interrupt as active on the physical distributor >>> whenever we signal a mapped interrupt to the guest, and we reset the >>> active state when we sync back the HW state from the vgic. >>> >>> Fourth, and finally, we now initialize the timer PPIs (and all the other >>> unused PPIs for now), to be level-triggered, and modify the sync code to >>> sample the line state on HW sync and re-inject a new interrupt if it is >>> still pending at that time. >>> >>> Signed-off-by: Christoffer Dall >>> --- >>> arch/arm/kvm/arm.c | 11 +-- >>> include/kvm/arm_arch_timer.h | 2 +- >>> include/kvm/arm_vgic.h | 3 -- >>> virt/kvm/arm/arch_timer.c| 68 >>> +++- >>> virt/kvm/arm/vgic.c | 67 >>> +++ >>> 5 files changed, 81 insertions(+), 70 deletions(-) >>> >>> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c >>> index bdf8871..102a4aa 100644 >>> --- a/arch/arm/kvm/arm.c >>> +++ b/arch/arm/kvm/arm.c >>> @@ -561,9 +561,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, >>> struct kvm_run *run) >>> >>> if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) { >>> local_irq_enable(); >>> + kvm_timer_sync_hwstate(vcpu); >>> kvm_vgic_sync_hwstate(vcpu); >>> preempt_enable(); >>> - kvm_timer_sync_hwstate(vcpu); >>> continue; >>> } >>> >>> @@ -608,12 +608,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, >>> struct kvm_run *run) >>> kvm_guest_exit(); >>> trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); >>> >>> + /* >>> +* We must sync the timer state before the vgic state so that >>> +* the vgic can properly sample the updated state of the >>> +* interrupt line. >>> +*/ >>> + kvm_timer_sync_hwstate(vcpu); >>> + >>> kvm_vgic_sync_hwstate(vcpu); >>> >>> preempt_enable(); >>> >>> - kvm_timer_sync_hwstate(vcpu); >>> - >>> ret = handle_exit(vcpu, run, ret); >>> } >>> >>> diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h >>> index ef14cc1..1800227 100644 >>> --- a/include/kvm/arm_arch_timer.h >>> +++ b/include/kvm/arm_arch_timer.h >>> @@ -51,7 +51,7 @@ struct arch_timer_cpu { >>> boolarmed; >>> >>> /* Timer IRQ */ >>> - const struct kvm_irq_level *irq; >>> + struct kvm_irq_levelirq; >>> >>> /* VGIC mapping */ >>> struct irq_phys_map *map; >>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h >>> index d901f1a..99011a0 100644 >>> --- a/include/kvm/arm_vgic.h >>> +++ b/include/kvm/arm_vgic.h >>> @@ -163,7 +163,6 @@ struct irq_phys_map { >>> u32 virt_irq; >>> u32 phys_irq; >>> u32 irq; >>> - boolactive; >>> }; >>> >>> struct irq_phys_map_entry { >>> @@ -358,8 +357,6 @@ int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu); >>> struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, >>>in
Re: [PATCH 8/9] arm/arm64: KVM: Rework the arch timer to use level-triggered semantics
On Thu, Sep 03, 2015 at 06:06:39PM +0100, Marc Zyngier wrote: > On 30/08/15 14:54, Christoffer Dall wrote: > > The arch timer currently uses edge-triggered semantics in the sense that > > the line is never sampled by the vgic and lowering the line from the > > timer to the vgic doesn't have any affect on the pending state of > > virtual interrupts in the vgic. This means that we do not support a > > guest with the otherwise valid behavior of (1) disable interrupts (2) > > enable the timer (3) disable the timer (4) enable interrupts. Such a > > guest would validly not expect to see any interrupts on real hardware, > > but will see interrupts on KVM. > > > > This patches fixes this shortcoming through the following series of > > changes. > > > > First, we change the flow of the timer/vgic sync/flush operations. Now > > the timer is always flushed/synced before the vgic, because the vgic > > samples the state of the timer output. This has the implication that we > > move the timer operations in to non-preempible sections, but that is > > fine after the previous commit getting rid of hrtimer schedules on every > > entry/exit. > > > > Second, we change the internal behavior of the timer, letting the timer > > keep track of its previous output state, and only lower/raise the line > > to the vgic when the state changes. Note that in theory this could have > > been accomplished more simply by signalling the vgic every time the > > state *potentially* changed, but we don't want to be hitting the vgic > > more often than necessary. > > > > Third, we get rid of the use of the map->active field in the vgic and > > instead simply set the interrupt as active on the physical distributor > > whenever we signal a mapped interrupt to the guest, and we reset the > > active state when we sync back the HW state from the vgic. > > > > Fourth, and finally, we now initialize the timer PPIs (and all the other > > unused PPIs for now), to be level-triggered, and modify the sync code to > > sample the line state on HW sync and re-inject a new interrupt if it is > > still pending at that time. > > > > Signed-off-by: Christoffer Dall > > --- > > arch/arm/kvm/arm.c | 11 +-- > > include/kvm/arm_arch_timer.h | 2 +- > > include/kvm/arm_vgic.h | 3 -- > > virt/kvm/arm/arch_timer.c| 68 > > +++- > > virt/kvm/arm/vgic.c | 67 > > +++ > > 5 files changed, 81 insertions(+), 70 deletions(-) > > > > diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c > > index bdf8871..102a4aa 100644 > > --- a/arch/arm/kvm/arm.c > > +++ b/arch/arm/kvm/arm.c > > @@ -561,9 +561,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, > > struct kvm_run *run) > > > > if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) { > > local_irq_enable(); > > + kvm_timer_sync_hwstate(vcpu); > > kvm_vgic_sync_hwstate(vcpu); > > preempt_enable(); > > - kvm_timer_sync_hwstate(vcpu); > > continue; > > } > > > > @@ -608,12 +608,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, > > struct kvm_run *run) > > kvm_guest_exit(); > > trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); > > > > + /* > > +* We must sync the timer state before the vgic state so that > > +* the vgic can properly sample the updated state of the > > +* interrupt line. > > +*/ > > + kvm_timer_sync_hwstate(vcpu); > > + > > kvm_vgic_sync_hwstate(vcpu); > > > > preempt_enable(); > > > > - kvm_timer_sync_hwstate(vcpu); > > - > > ret = handle_exit(vcpu, run, ret); > > } > > > > diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h > > index ef14cc1..1800227 100644 > > --- a/include/kvm/arm_arch_timer.h > > +++ b/include/kvm/arm_arch_timer.h > > @@ -51,7 +51,7 @@ struct arch_timer_cpu { > > boolarmed; > > > > /* Timer IRQ */ > > - const struct kvm_irq_level *irq; > > + struct kvm_irq_levelirq; > > > > /* VGIC mapping */ > > struct irq_phys_map *map; > > diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h > > index d901f1a..99011a0 100644 > > --- a/include/kvm/arm_vgic.h > > +++ b/include/kvm/arm_vgic.h > > @@ -163,7 +163,6 @@ struct irq_phys_map { > > u32 virt_irq; > > u32 phys_irq; > > u32 irq; > > - boolactive; > > }; > > > > struct irq_phys_map_entry { > > @@ -358,8 +357,6 @@ int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu); > > struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, > >int virt_irq, int irq); > > int kvm_vgic
Re: [PATCH 8/9] arm/arm64: KVM: Rework the arch timer to use level-triggered semantics
On 30/08/15 14:54, Christoffer Dall wrote: > The arch timer currently uses edge-triggered semantics in the sense that > the line is never sampled by the vgic and lowering the line from the > timer to the vgic doesn't have any affect on the pending state of > virtual interrupts in the vgic. This means that we do not support a > guest with the otherwise valid behavior of (1) disable interrupts (2) > enable the timer (3) disable the timer (4) enable interrupts. Such a > guest would validly not expect to see any interrupts on real hardware, > but will see interrupts on KVM. > > This patches fixes this shortcoming through the following series of > changes. > > First, we change the flow of the timer/vgic sync/flush operations. Now > the timer is always flushed/synced before the vgic, because the vgic > samples the state of the timer output. This has the implication that we > move the timer operations in to non-preempible sections, but that is > fine after the previous commit getting rid of hrtimer schedules on every > entry/exit. > > Second, we change the internal behavior of the timer, letting the timer > keep track of its previous output state, and only lower/raise the line > to the vgic when the state changes. Note that in theory this could have > been accomplished more simply by signalling the vgic every time the > state *potentially* changed, but we don't want to be hitting the vgic > more often than necessary. > > Third, we get rid of the use of the map->active field in the vgic and > instead simply set the interrupt as active on the physical distributor > whenever we signal a mapped interrupt to the guest, and we reset the > active state when we sync back the HW state from the vgic. > > Fourth, and finally, we now initialize the timer PPIs (and all the other > unused PPIs for now), to be level-triggered, and modify the sync code to > sample the line state on HW sync and re-inject a new interrupt if it is > still pending at that time. > > Signed-off-by: Christoffer Dall > --- > arch/arm/kvm/arm.c | 11 +-- > include/kvm/arm_arch_timer.h | 2 +- > include/kvm/arm_vgic.h | 3 -- > virt/kvm/arm/arch_timer.c| 68 > +++- > virt/kvm/arm/vgic.c | 67 +++ > 5 files changed, 81 insertions(+), 70 deletions(-) > > diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c > index bdf8871..102a4aa 100644 > --- a/arch/arm/kvm/arm.c > +++ b/arch/arm/kvm/arm.c > @@ -561,9 +561,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct > kvm_run *run) > > if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) { > local_irq_enable(); > + kvm_timer_sync_hwstate(vcpu); > kvm_vgic_sync_hwstate(vcpu); > preempt_enable(); > - kvm_timer_sync_hwstate(vcpu); > continue; > } > > @@ -608,12 +608,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, > struct kvm_run *run) > kvm_guest_exit(); > trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); > > + /* > + * We must sync the timer state before the vgic state so that > + * the vgic can properly sample the updated state of the > + * interrupt line. > + */ > + kvm_timer_sync_hwstate(vcpu); > + > kvm_vgic_sync_hwstate(vcpu); > > preempt_enable(); > > - kvm_timer_sync_hwstate(vcpu); > - > ret = handle_exit(vcpu, run, ret); > } > > diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h > index ef14cc1..1800227 100644 > --- a/include/kvm/arm_arch_timer.h > +++ b/include/kvm/arm_arch_timer.h > @@ -51,7 +51,7 @@ struct arch_timer_cpu { > boolarmed; > > /* Timer IRQ */ > - const struct kvm_irq_level *irq; > + struct kvm_irq_levelirq; > > /* VGIC mapping */ > struct irq_phys_map *map; > diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h > index d901f1a..99011a0 100644 > --- a/include/kvm/arm_vgic.h > +++ b/include/kvm/arm_vgic.h > @@ -163,7 +163,6 @@ struct irq_phys_map { > u32 virt_irq; > u32 phys_irq; > u32 irq; > - boolactive; > }; > > struct irq_phys_map_entry { > @@ -358,8 +357,6 @@ int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu); > struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, > int virt_irq, int irq); > int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map); > -bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map); > -void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active); > > #define irqchip