KVM Arm64 and Linux-RT issues
Hi all, I have been playing with the latest branch of Linux RT (5.2-rt1) and notice the following splat when starting a KVM guest. [ 122.336254] 003: BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:968 [ 122.336263] 003: in_atomic(): 1, irqs_disabled(): 0, pid: 1430, name: kvm-vcpu-1 [ 122.336267] 003: 2 locks held by kvm-vcpu-1/1430: [ 122.336271] 003: #0: 8007c1518100 (&vcpu->mutex){+.+.}, at: kvm_vcpu_ioctl+0x70/0xae0 [ 122.336287] 003: #1: 8007fb08b478 (&cpu_base->softirq_expiry_lock){+.+.}, at: hrtimer_grab_expiry_lock+0x24/0x40 [ 122.336299] 003: Preemption disabled at: [ 122.336300] 003: [] schedule+0x30/0xd8 [ 122.336308] 003: CPU: 3 PID: 1430 Comm: kvm-vcpu-1 Tainted: GW 5.2.0-rt1-8-g5bc0332820fd #88 [ 122.336311] 003: Hardware name: AMD Seattle (Rev.B0) Development Board (Overdrive) (DT) [ 122.336314] 003: Call trace: [ 122.336315] 003: dump_backtrace+0x0/0x130 [ 122.336319] 003: show_stack+0x14/0x20 [ 122.336321] 003: dump_stack+0xbc/0x104 [ 122.336324] 003: ___might_sleep+0x198/0x238 [ 122.336327] 003: rt_spin_lock+0x5c/0x70 [ 122.336330] 003: hrtimer_grab_expiry_lock+0x24/0x40 [ 122.336332] 003: hrtimer_cancel+0x1c/0x38 [ 122.336334] 003: kvm_timer_vcpu_load+0x78/0x3e0 [ 122.336338] 003: kvm_arch_vcpu_load+0x130/0x298 [ 122.336340] 003: kvm_sched_in+0x38/0x68 [ 122.336342] 003: finish_task_switch+0x14c/0x300 [ 122.336344] 003: __schedule+0x2b8/0x8d0 [ 122.336346] 003: schedule+0x38/0xd8 [ 122.336347] 003: kvm_vcpu_block+0xac/0x790 [ 122.336349] 003: kvm_handle_wfx+0x210/0x520 [ 122.336352] 003: handle_exit+0x134/0x1d0 [ 122.336355] 003: kvm_arch_vcpu_ioctl_run+0x658/0xbc0 [ 122.336357] 003: kvm_vcpu_ioctl+0x3a0/0xae0 [ 122.336359] 003: do_vfs_ioctl+0xbc/0x910 [ 122.336363] 003: ksys_ioctl+0x78/0xa8 [ 122.336365] 003: __arm64_sys_ioctl+0x1c/0x28 [ 122.336367] 003: el0_svc_common.constprop.0+0x90/0x188 [ 122.336370] 003: el0_svc_handler+0x28/0x78 [ 122.336373] 003: el0_svc+0x8/0xc [ 122.564216] 000: BUG: scheduling while atomic: kvm-vcpu-1/1430/0x0002 [ 122.564221] 000: 2 locks held by kvm-vcpu-1/1430: [ 122.564224] 000: #0: 8007c1518100 (&vcpu->mutex){+.+.}, at: kvm_vcpu_ioctl+0x70/0xae0 [ 122.564236] 000: #1: 8007fb08b478 (&cpu_base->softirq_expiry_lock){+.+.}, at: hrtimer_grab_expiry_lock+0x24/0x40 [ 122.564245] 000: Modules linked in: [ 122.564248] 000: Preemption disabled at: [ 122.564249] 000: [] schedule+0x30/0xd8 [ 122.564255] 000: CPU: 0 PID: 1430 Comm: kvm-vcpu-1 Tainted: GW 5.2.0-rt1-8-g5bc0332820fd #88 [ 122.564259] 000: Hardware name: AMD Seattle (Rev.B0) Development Board (Overdrive) (DT) [ 122.564261] 000: Call trace: [ 122.564262] 000: dump_backtrace+0x0/0x130 [ 122.564264] 000: show_stack+0x14/0x20 [ 122.564267] 000: dump_stack+0xbc/0x104 [ 122.564269] 000: __schedule_bug+0xbc/0xe8 [ 122.564272] 000: __schedule+0x6e4/0x8d0 [ 122.564274] 000: schedule+0x38/0xd8 [ 122.564275] 000: rt_spin_lock_slowlock_locked+0xf8/0x390 [ 122.564277] 000: rt_spin_lock_slowlock+0x68/0xa0 [ 122.564279] 000: rt_spin_lock+0x64/0x70 [ 122.564281] 000: hrtimer_grab_expiry_lock+0x24/0x40 [ 122.564283] 000: hrtimer_cancel+0x1c/0x38 [ 122.564285] 000: kvm_timer_vcpu_load+0x78/0x3e0 [ 122.564287] 000: kvm_arch_vcpu_load+0x130/0x298 [ 122.564290] 000: kvm_sched_in+0x38/0x68 [ 122.564291] 000: finish_task_switch+0x14c/0x300 [ 122.564293] 000: __schedule+0x2b8/0x8d0 [ 122.564295] 000: schedule+0x38/0xd8 [ 122.564297] 000: kvm_vcpu_block+0xac/0x790 [ 122.564298] 000: kvm_handle_wfx+0x210/0x520 [ 122.564301] 000: handle_exit+0x134/0x1d0 [ 122.564303] 000: kvm_arch_vcpu_ioctl_run+0x658/0xbc0 [ 122.564305] 000: kvm_vcpu_ioctl+0x3a0/0xae0 [ 122.564307] 000: do_vfs_ioctl+0xbc/0x910 [ 122.564309] 000: ksys_ioctl+0x78/0xa8 [ 122.564311] 000: __arm64_sys_ioctl+0x1c/0x28 [ 122.564313] 000: el0_svc_common.constprop.0+0x90/0x188 [ 122.564316] 000: el0_svc_handler+0x28/0x78 [ 122.564318] 000: el0_svc+0x8/0xc [ 122.564341] 000: WARNING: CPU: 0 PID: 1430 at kernel/sched/core.c:7366 migrate_enable+0x21c/0x428 [ 122.564346] 000: Modules linked in: [ 122.564348] 000: CPU: 0 PID: 1430 Comm: kvm-vcpu-1 Tainted: GW 5.2.0-rt1-8-g5bc0332820fd #88 [ 122.564351] 000: Hardware name: AMD Seattle (Rev.B0) Development Board (Overdrive) (DT) [ 122.564352] 000: pstate: 6005 (nZCv daif -PAN -UAO) [ 122.564355] 000: pc : migrate_enable+0x21c/0x428 [ 122.564357] 000: lr : rt_spin_unlock+0x2c/0x48 [ 122.564359] 000: sp : 2648b680 [ 122.564362] 000: x29: 2648b680 x28: 11af9b00 [ 122.564365] 000: x27: 8007fb03d918 x26: 8007e9579000 [ 122.564368] 000: x25: x24: fffefdf7d6910c00 [ 122.564371] 000: x23: x22: 0008 [ 122.564374] 000: x21: 0001 x20: 8007c284cd80 [ 122.564377] 000: x19: 11af0d88 x18:
Re: [PATCH v2 8/9] KVM: arm/arm64: vgic-its: Check the LPI translation cache on MSI injection
Hi Eric, On 23/07/2019 16:10, Auger Eric wrote: > Hi Marc, > > On 6/11/19 7:03 PM, Marc Zyngier wrote: >> When performing an MSI injection, let's first check if the translation >> is already in the cache. If so, let's inject it quickly without >> going through the whole translation process. >> >> Signed-off-by: Marc Zyngier >> --- >> virt/kvm/arm/vgic/vgic-its.c | 36 >> virt/kvm/arm/vgic/vgic.h | 1 + >> 2 files changed, 37 insertions(+) >> >> diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c >> index 62932458476a..83d80ec33473 100644 >> --- a/virt/kvm/arm/vgic/vgic-its.c >> +++ b/virt/kvm/arm/vgic/vgic-its.c >> @@ -577,6 +577,20 @@ static struct vgic_irq *__vgic_its_check_cache(struct >> vgic_dist *dist, >> return irq; >> } >> >> +static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t >> db, >> + u32 devid, u32 eventid) >> +{ >> +struct vgic_dist *dist = &kvm->arch.vgic; >> +struct vgic_irq *irq; >> +unsigned long flags; >> + >> +raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); >> +irq = __vgic_its_check_cache(dist, db, devid, eventid); >> +raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); >> + >> +return irq; >> +} >> + >> static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its >> *its, >> u32 devid, u32 eventid, >> struct vgic_irq *irq) >> @@ -736,6 +750,25 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct >> vgic_its *its, >> return 0; >> } >> >> +int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi) >> +{ >> +struct vgic_irq *irq; >> +unsigned long flags; >> +phys_addr_t db; >> + >> +db = (u64)msi->address_hi << 32 | msi->address_lo; >> +irq = vgic_its_check_cache(kvm, db, msi->devid, msi->data); > > I think we miss a check of its->enabled. This is currently done in > vgic_its_resolve_lpi() but now likely to be bypassed. But why would a translation be cached if the ITS is disabled? It should never haver been there the first place (vgic_its_resolve_lpi does check for the ITS being enabled, as you pointed out). Which makes me think that we miss an invalidate on an ITS being disabled: diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 2633b0e88981..5f2ad74ad834 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -1719,6 +1719,8 @@ static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, goto out; its->enabled = !!(val & GITS_CTLR_ENABLE); + if (!its->enabled) + vgic_its_invalidate_cache(kvm); /* * Try to process any pending commands. This function bails out early What do you think? M. -- Jazz is not dead. It just smells funny... ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH v2 7/9] KVM: arm/arm64: vgic-its: Cache successful MSI->LPI translation
Hi Marc, On 6/11/19 7:03 PM, Marc Zyngier wrote: > On a successful translation, preserve the parameters in the LPI > translation cache. Each translation is reusing the last slot > in the list, naturally evincting the least recently used entry. evicting > > Signed-off-by: Marc Zyngier > --- > virt/kvm/arm/vgic/vgic-its.c | 86 > 1 file changed, 86 insertions(+) > > diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c > index 0aa0cbbc3af6..62932458476a 100644 > --- a/virt/kvm/arm/vgic/vgic-its.c > +++ b/virt/kvm/arm/vgic/vgic-its.c > @@ -546,6 +546,90 @@ static unsigned long vgic_mmio_read_its_idregs(struct > kvm *kvm, > return 0; > } > > +static struct vgic_irq *__vgic_its_check_cache(struct vgic_dist *dist, > +phys_addr_t db, > +u32 devid, u32 eventid) > +{ > + struct vgic_translation_cache_entry *cte; > + struct vgic_irq *irq = NULL; > + > + list_for_each_entry(cte, &dist->lpi_translation_cache, entry) { > + /* > + * If we hit a NULL entry, there is nothing after this > + * point. > + */ > + if (!cte->irq) > + break; > + > + if (cte->db == db && > + cte->devid == devid && > + cte->eventid == eventid) { > + /* > + * Move this entry to the head, as it is the > + * most recently used. > + */ > + list_move(&cte->entry, &dist->lpi_translation_cache); > + irq = cte->irq; > + break; > + } > + } > + > + return irq; > +} > + > +static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its, > +u32 devid, u32 eventid, > +struct vgic_irq *irq) > +{ > + struct vgic_dist *dist = &kvm->arch.vgic; > + struct vgic_translation_cache_entry *cte; > + unsigned long flags; > + phys_addr_t db; > + > + /* Do not cache a directly injected interrupt */ > + if (irq->hw) > + return; > + > + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); > + > + if (unlikely(list_empty(&dist->lpi_translation_cache))) > + goto out; > + > + /* > + * We could have raced with another CPU caching the same > + * translation behind our back, so let's check it is not in > + * already > + */ > + db = its->vgic_its_base + GITS_TRANSLATER; > + if (__vgic_its_check_cache(dist, db, devid, eventid)) > + goto out; > + > + /* Always reuse the last entry (LRU policy) */ > + cte = list_last_entry(&dist->lpi_translation_cache, > + typeof(*cte), entry); > + > + /* > + * Caching the translation implies having an extra reference > + * to the interrupt, so drop the potential reference on what > + * was in the cache, and increment it on the new interrupt. > + */ > + if (cte->irq) > + __vgic_put_lpi_locked(kvm, cte->irq); > + > + vgic_get_irq_kref(irq); > + > + cte->db = db; > + cte->devid = devid; > + cte->eventid= eventid; > + cte->irq= irq; > + > + /* Move the new translation to the head of the list */ > + list_move(&cte->entry, &dist->lpi_translation_cache); > + > +out: > + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); > +} > + > void vgic_its_invalidate_cache(struct kvm *kvm) > { > struct vgic_dist *dist = &kvm->arch.vgic; > @@ -589,6 +673,8 @@ int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its > *its, > if (!vcpu->arch.vgic_cpu.lpis_enabled) > return -EBUSY; > > + vgic_its_cache_translation(kvm, its, devid, eventid, ite->irq); > + > *irq = ite->irq; > return 0; > } > Reviewed-by: Eric Auger Thanks Eric ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH v2 9/9] KVM: arm/arm64: vgic-irqfd: Implement kvm_arch_set_irq_inatomic
Hi Marc, On 6/11/19 7:03 PM, Marc Zyngier wrote: > Now that we have a cache of MSI->LPI translations, it is pretty > easy to implement kvm_arch_set_irq_inatomic (this cache can be > parsed without sleeping). > > Hopefully, this will improve some LPI-heavy workloads. > > Signed-off-by: Marc Zyngier > --- > virt/kvm/arm/vgic/vgic-irqfd.c | 36 -- > 1 file changed, 30 insertions(+), 6 deletions(-) > > diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c > index 99e026d2dade..9f203ed8c8f3 100644 > --- a/virt/kvm/arm/vgic/vgic-irqfd.c > +++ b/virt/kvm/arm/vgic/vgic-irqfd.c > @@ -77,6 +77,15 @@ int kvm_set_routing_entry(struct kvm *kvm, > return r; > } > > +static void kvm_populate_msi(struct kvm_kernel_irq_routing_entry *e, > + struct kvm_msi *msi) > +{ > + msi->address_lo = e->msi.address_lo; > + msi->address_hi = e->msi.address_hi; > + msi->data = e->msi.data; > + msi->flags = e->msi.flags; > + msi->devid = e->msi.devid; > +} > /** > * kvm_set_msi: inject the MSI corresponding to the s/:/ - > * MSI routing entry > @@ -90,21 +99,36 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, > { > struct kvm_msi msi; > > - msi.address_lo = e->msi.address_lo; > - msi.address_hi = e->msi.address_hi; > - msi.data = e->msi.data; > - msi.flags = e->msi.flags; > - msi.devid = e->msi.devid; > - > if (!vgic_has_its(kvm)) > return -ENODEV; > > if (!level) > return -1; > > + kvm_populate_msi(e, &msi); > return vgic_its_inject_msi(kvm, &msi); > } > > +/** > + * kvm_arch_set_irq_inatomic: fast-path for irqfd injection s/:/ - > + * > + * Currently only direct MSI injecton is supported. injection > + */ > +int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, > + struct kvm *kvm, int irq_source_id, int level, > + bool line_status) > +{ > + if (e->type == KVM_IRQ_ROUTING_MSI && vgic_has_its(kvm) && level) { > + struct kvm_msi msi; > + > + kvm_populate_msi(e, &msi); > + if (!vgic_its_inject_cached_translation(kvm, &msi)) > + return 0; if this fails since its->enabled is false we will re-attempt the injection though the normal injection path but that's not a big deal. > + } > + > + return -EWOULDBLOCK; > +} > + > int kvm_vgic_setup_default_irq_routing(struct kvm *kvm) > { > struct kvm_irq_routing_entry *entries; > Reviewed-by: Eric Auger Thanks Eric ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH v2 8/9] KVM: arm/arm64: vgic-its: Check the LPI translation cache on MSI injection
Hi Marc, On 6/11/19 7:03 PM, Marc Zyngier wrote: > When performing an MSI injection, let's first check if the translation > is already in the cache. If so, let's inject it quickly without > going through the whole translation process. > > Signed-off-by: Marc Zyngier > --- > virt/kvm/arm/vgic/vgic-its.c | 36 > virt/kvm/arm/vgic/vgic.h | 1 + > 2 files changed, 37 insertions(+) > > diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c > index 62932458476a..83d80ec33473 100644 > --- a/virt/kvm/arm/vgic/vgic-its.c > +++ b/virt/kvm/arm/vgic/vgic-its.c > @@ -577,6 +577,20 @@ static struct vgic_irq *__vgic_its_check_cache(struct > vgic_dist *dist, > return irq; > } > > +static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db, > + u32 devid, u32 eventid) > +{ > + struct vgic_dist *dist = &kvm->arch.vgic; > + struct vgic_irq *irq; > + unsigned long flags; > + > + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); > + irq = __vgic_its_check_cache(dist, db, devid, eventid); > + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); > + > + return irq; > +} > + > static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its, > u32 devid, u32 eventid, > struct vgic_irq *irq) > @@ -736,6 +750,25 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct > vgic_its *its, > return 0; > } > > +int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi) > +{ > + struct vgic_irq *irq; > + unsigned long flags; > + phys_addr_t db; > + > + db = (u64)msi->address_hi << 32 | msi->address_lo; > + irq = vgic_its_check_cache(kvm, db, msi->devid, msi->data); I think we miss a check of its->enabled. This is currently done in vgic_its_resolve_lpi() but now likely to be bypassed. Doing that in this function is needed for next patch I think. Thanks Eric > + > + if (!irq) > + return -1; > + > + raw_spin_lock_irqsave(&irq->irq_lock, flags); > + irq->pending_latch = true; > + vgic_queue_irq_unlock(kvm, irq, flags); > + > + return 0; > +} > + > /* > * Queries the KVM IO bus framework to get the ITS pointer from the given > * doorbell address. > @@ -747,6 +780,9 @@ int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi > *msi) > struct vgic_its *its; > int ret; > > + if (!vgic_its_inject_cached_translation(kvm, msi)) > + return 1; > + > its = vgic_msi_to_its(kvm, msi); > if (IS_ERR(its)) > return PTR_ERR(its); > diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h > index 072f810dc441..ad6eba1e2beb 100644 > --- a/virt/kvm/arm/vgic/vgic.h > +++ b/virt/kvm/arm/vgic/vgic.h > @@ -317,6 +317,7 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu > *vcpu, u32 **intid_ptr); > int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, >u32 devid, u32 eventid, struct vgic_irq **irq); > struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi); > +int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi); > void vgic_lpi_translation_cache_init(struct kvm *kvm); > void vgic_lpi_translation_cache_destroy(struct kvm *kvm); > void vgic_its_invalidate_cache(struct kvm *kvm); > ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH v2 5/9] KVM: arm/arm64: vgic-its: Invalidate MSI-LPI translation cache on disabling LPIs
Hi Marc, On 6/11/19 7:03 PM, Marc Zyngier wrote: > If a vcpu disables LPIs at its redistributor level, we need to make sure > we won't pend more interrupts. For this, we need to invalidate the LPI > translation cache. > > Signed-off-by: Marc Zyngier Reviewed-by: Eric Auger Thanks Eric > --- > virt/kvm/arm/vgic/vgic-mmio-v3.c | 4 +++- > 1 file changed, 3 insertions(+), 1 deletion(-) > > diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c > b/virt/kvm/arm/vgic/vgic-mmio-v3.c > index 936962abc38d..cb60da48810d 100644 > --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c > +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c > @@ -192,8 +192,10 @@ static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu > *vcpu, > > vgic_cpu->lpis_enabled = val & GICR_CTLR_ENABLE_LPIS; > > - if (was_enabled && !vgic_cpu->lpis_enabled) > + if (was_enabled && !vgic_cpu->lpis_enabled) { > vgic_flush_pending_lpis(vcpu); > + vgic_its_invalidate_cache(vcpu->kvm); > + } > > if (!was_enabled && vgic_cpu->lpis_enabled) > vgic_enable_lpis(vcpu); > ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH v2 6/9] KVM: arm/arm64: vgic-its: Invalidate MSI-LPI translation cache on vgic teardown
Hi Marc, On 6/11/19 7:03 PM, Marc Zyngier wrote: > In order to avoid leaking vgic_irq structures on teardown, we need to > drop all references to LPIs before deallocating the cache itself. > > This is done by invalidating the cache on vgic teardown. > > Signed-off-by: Marc Zyngier > --- > virt/kvm/arm/vgic/vgic-its.c | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c > index 5254bb762e1b..0aa0cbbc3af6 100644 > --- a/virt/kvm/arm/vgic/vgic-its.c > +++ b/virt/kvm/arm/vgic/vgic-its.c > @@ -1739,6 +1739,8 @@ void vgic_lpi_translation_cache_destroy(struct kvm *kvm) > struct vgic_dist *dist = &kvm->arch.vgic; > struct vgic_translation_cache_entry *cte, *tmp; > > + vgic_its_invalidate_cache(kvm); > + > list_for_each_entry_safe(cte, tmp, >&dist->lpi_translation_cache, entry) { > list_del(&cte->entry); > Reviewed-by: Eric Auger Thanks Eric ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH v2 4/9] KVM: arm/arm64: vgic-its: Invalidate MSI-LPI translation cache on specific commands
On 23/07/2019 13:47, Auger Eric wrote: > Hi Marc, > > On 7/23/19 2:43 PM, Marc Zyngier wrote: >> On 23/07/2019 13:25, Auger Eric wrote: >>> Hi Marc, >>> >>> On 7/22/19 12:54 PM, Marc Zyngier wrote: Hi Eric, On 01/07/2019 13:38, Auger Eric wrote: > Hi Marc, > > On 6/11/19 7:03 PM, Marc Zyngier wrote: >> The LPI translation cache needs to be discarded when an ITS command >> may affect the translation of an LPI (DISCARD and MAPD with V=0) or >> the routing of an LPI to a redistributor with disabled LPIs (MOVI, >> MOVALL). >> >> We decide to perform a full invalidation of the cache, irrespective >> of the LPI that is affected. Commands are supposed to be rare enough >> that it doesn't matter. >> >> Signed-off-by: Marc Zyngier >> --- >> virt/kvm/arm/vgic/vgic-its.c | 8 >> 1 file changed, 8 insertions(+) >> >> diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c >> index 9b6b66204b97..5254bb762e1b 100644 >> --- a/virt/kvm/arm/vgic/vgic-its.c >> +++ b/virt/kvm/arm/vgic/vgic-its.c >> @@ -733,6 +733,8 @@ static int vgic_its_cmd_handle_discard(struct kvm >> *kvm, struct vgic_its *its, >> * don't bother here since we clear the ITTE anyway and >> the >> * pending state is a property of the ITTE struct. >> */ >> +vgic_its_invalidate_cache(kvm); >> + >> its_free_ite(kvm, ite); >> return 0; >> } >> @@ -768,6 +770,8 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, >> struct vgic_its *its, >> ite->collection = collection; >> vcpu = kvm_get_vcpu(kvm, collection->target_addr); >> >> +vgic_its_invalidate_cache(kvm); >> + >> return update_affinity(ite->irq, vcpu); >> } >> >> @@ -996,6 +1000,8 @@ static void vgic_its_free_device(struct kvm *kvm, >> struct its_device *device) >> list_for_each_entry_safe(ite, temp, &device->itt_head, ite_list) >> its_free_ite(kvm, ite); >> >> +vgic_its_invalidate_cache(kvm); >> + >> list_del(&device->dev_list); >> kfree(device); >> } >> @@ -1249,6 +1255,8 @@ static int vgic_its_cmd_handle_movall(struct kvm >> *kvm, struct vgic_its *its, >> vgic_put_irq(kvm, irq); >> } >> >> +vgic_its_invalidate_cache(kvm); > All the commands are executed with the its_lock held. Now we don't take > it anymore on the fast cache injection path. Don't we have a window > where the move has been applied at table level and the cache is not yet > invalidated? Same question for vgic_its_free_device(). There is definitely a race, but that race is invisible from the guest's perspective. The guest can only assume that the command has taken effect once a SYNC command has been executed, and it cannot observe that the SYNC command has been executed before we have invalidated the cache. Does this answer your question? >>> >>> OK make sense. Thank you for the clarification >>> >>> Another question, don't we need to invalidate the cache on MAPC V=0 as >>> well? Removing the mapping of the collection results in interrupts >>> belonging to that collection being ignored. If we don't flush the >>> pending bit will be set? >> >> Yup, that's a good point. I think i had that at some point, and ended up >> dropping it, probably missing the point that the interrupt would be made >> pending. >> >> I'll add this: >> >> @@ -1218,6 +1218,7 @@ static int vgic_its_cmd_handle_mapc(struct kvm *kvm, >> struct vgic_its *its, >> >> if (!valid) { >> vgic_its_free_collection(its, coll_id); >> +vgic_its_invalidate_cache(kvm); >> } else { >> collection = find_collection(its, coll_id); >> > Yep, with that change feel free to add my R-b > > Reviewed-by: Eric Auger Thanks! M. -- Jazz is not dead. It just smells funny... ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH v2 4/9] KVM: arm/arm64: vgic-its: Invalidate MSI-LPI translation cache on specific commands
Hi Marc, On 7/23/19 2:43 PM, Marc Zyngier wrote: > On 23/07/2019 13:25, Auger Eric wrote: >> Hi Marc, >> >> On 7/22/19 12:54 PM, Marc Zyngier wrote: >>> Hi Eric, >>> >>> On 01/07/2019 13:38, Auger Eric wrote: Hi Marc, On 6/11/19 7:03 PM, Marc Zyngier wrote: > The LPI translation cache needs to be discarded when an ITS command > may affect the translation of an LPI (DISCARD and MAPD with V=0) or > the routing of an LPI to a redistributor with disabled LPIs (MOVI, > MOVALL). > > We decide to perform a full invalidation of the cache, irrespective > of the LPI that is affected. Commands are supposed to be rare enough > that it doesn't matter. > > Signed-off-by: Marc Zyngier > --- > virt/kvm/arm/vgic/vgic-its.c | 8 > 1 file changed, 8 insertions(+) > > diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c > index 9b6b66204b97..5254bb762e1b 100644 > --- a/virt/kvm/arm/vgic/vgic-its.c > +++ b/virt/kvm/arm/vgic/vgic-its.c > @@ -733,6 +733,8 @@ static int vgic_its_cmd_handle_discard(struct kvm > *kvm, struct vgic_its *its, >* don't bother here since we clear the ITTE anyway and the >* pending state is a property of the ITTE struct. >*/ > + vgic_its_invalidate_cache(kvm); > + > its_free_ite(kvm, ite); > return 0; > } > @@ -768,6 +770,8 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, > struct vgic_its *its, > ite->collection = collection; > vcpu = kvm_get_vcpu(kvm, collection->target_addr); > > + vgic_its_invalidate_cache(kvm); > + > return update_affinity(ite->irq, vcpu); > } > > @@ -996,6 +1000,8 @@ static void vgic_its_free_device(struct kvm *kvm, > struct its_device *device) > list_for_each_entry_safe(ite, temp, &device->itt_head, ite_list) > its_free_ite(kvm, ite); > > + vgic_its_invalidate_cache(kvm); > + > list_del(&device->dev_list); > kfree(device); > } > @@ -1249,6 +1255,8 @@ static int vgic_its_cmd_handle_movall(struct kvm > *kvm, struct vgic_its *its, > vgic_put_irq(kvm, irq); > } > > + vgic_its_invalidate_cache(kvm); All the commands are executed with the its_lock held. Now we don't take it anymore on the fast cache injection path. Don't we have a window where the move has been applied at table level and the cache is not yet invalidated? Same question for vgic_its_free_device(). >>> >>> There is definitely a race, but that race is invisible from the guest's >>> perspective. The guest can only assume that the command has taken effect >>> once a SYNC command has been executed, and it cannot observe that the >>> SYNC command has been executed before we have invalidated the cache. >>> >>> Does this answer your question? >> >> OK make sense. Thank you for the clarification >> >> Another question, don't we need to invalidate the cache on MAPC V=0 as >> well? Removing the mapping of the collection results in interrupts >> belonging to that collection being ignored. If we don't flush the >> pending bit will be set? > > Yup, that's a good point. I think i had that at some point, and ended up > dropping it, probably missing the point that the interrupt would be made > pending. > > I'll add this: > > @@ -1218,6 +1218,7 @@ static int vgic_its_cmd_handle_mapc(struct kvm *kvm, > struct vgic_its *its, > > if (!valid) { > vgic_its_free_collection(its, coll_id); > + vgic_its_invalidate_cache(kvm); > } else { > collection = find_collection(its, coll_id); > Yep, with that change feel free to add my R-b Reviewed-by: Eric Auger Thanks Eric > > Thanks, > > M. > ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH v2 1/9] KVM: arm/arm64: vgic: Add LPI translation cache definition
Hi Marc, On 6/11/19 7:03 PM, Marc Zyngier wrote: > Add the basic data structure that expresses an MSI to LPI > translation as well as the allocation/release hooks. > > THe size of the cache is arbitrarily defined as 4*nr_vcpus. The > > Signed-off-by: Marc Zyngier > --- > include/kvm/arm_vgic.h| 3 +++ > virt/kvm/arm/vgic/vgic-init.c | 5 > virt/kvm/arm/vgic/vgic-its.c | 49 +++ > virt/kvm/arm/vgic/vgic.h | 2 ++ > 4 files changed, 59 insertions(+) > > diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h > index c36c86f1ec9a..ca7bcf52dc85 100644 > --- a/include/kvm/arm_vgic.h > +++ b/include/kvm/arm_vgic.h > @@ -260,6 +260,9 @@ struct vgic_dist { > struct list_headlpi_list_head; > int lpi_list_count; > > + /* LPI translation cache */ > + struct list_headlpi_translation_cache; > + > /* used by vgic-debug */ > struct vgic_state_iter *iter; > > diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c > index 3bdb31eaed64..c7c4c77dd430 100644 > --- a/virt/kvm/arm/vgic/vgic-init.c > +++ b/virt/kvm/arm/vgic/vgic-init.c > @@ -64,6 +64,7 @@ void kvm_vgic_early_init(struct kvm *kvm) > struct vgic_dist *dist = &kvm->arch.vgic; > > INIT_LIST_HEAD(&dist->lpi_list_head); > + INIT_LIST_HEAD(&dist->lpi_translation_cache); > raw_spin_lock_init(&dist->lpi_list_lock); > } > > @@ -305,6 +306,7 @@ int vgic_init(struct kvm *kvm) > } > > if (vgic_has_its(kvm)) { > + vgic_lpi_translation_cache_init(kvm); > ret = vgic_v4_init(kvm); > if (ret) > goto out; > @@ -346,6 +348,9 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm) > INIT_LIST_HEAD(&dist->rd_regions); > } > > + if (vgic_has_its(kvm)) > + vgic_lpi_translation_cache_destroy(kvm); > + > if (vgic_supports_direct_msis(kvm)) > vgic_v4_teardown(kvm); > } > diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c > index 44ceaccb18cf..ce9bcddeb7f1 100644 > --- a/virt/kvm/arm/vgic/vgic-its.c > +++ b/virt/kvm/arm/vgic/vgic-its.c > @@ -149,6 +149,14 @@ struct its_ite { > u32 event_id; > }; > > +struct vgic_translation_cache_entry { > + struct list_headentry; > + phys_addr_t db; > + u32 devid; > + u32 eventid; > + struct vgic_irq *irq; > +}; > + > /** > * struct vgic_its_abi - ITS abi ops and settings > * @cte_esz: collection table entry size > @@ -1668,6 +1676,45 @@ static int vgic_register_its_iodev(struct kvm *kvm, > struct vgic_its *its, > return ret; > } > > +/* Default is 16 cached LPIs per vcpu */ > +#define LPI_DEFAULT_PCPU_CACHE_SIZE 16 > + > +void vgic_lpi_translation_cache_init(struct kvm *kvm) > +{ > + struct vgic_dist *dist = &kvm->arch.vgic; > + unsigned int sz; > + int i; > + > + if (!list_empty(&dist->lpi_translation_cache)) > + return; > + > + sz = atomic_read(&kvm->online_vcpus) * LPI_DEFAULT_PCPU_CACHE_SIZE; > + > + for (i = 0; i < sz; i++) { > + struct vgic_translation_cache_entry *cte; > + > + /* An allocation failure is not fatal */ > + cte = kzalloc(sizeof(*cte), GFP_KERNEL); > + if (WARN_ON(!cte)) > + break; > + > + INIT_LIST_HEAD(&cte->entry); > + list_add(&cte->entry, &dist->lpi_translation_cache); > + } > +} > + > +void vgic_lpi_translation_cache_destroy(struct kvm *kvm) > +{ > + struct vgic_dist *dist = &kvm->arch.vgic; > + struct vgic_translation_cache_entry *cte, *tmp; > + > + list_for_each_entry_safe(cte, tmp, > + &dist->lpi_translation_cache, entry) { > + list_del(&cte->entry); > + kfree(cte); > + } > +} > + > #define INITIAL_BASER_VALUE\ > (GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb)| \ >GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, SameAsInner) | \ > @@ -1696,6 +1743,8 @@ static int vgic_its_create(struct kvm_device *dev, u32 > type) > kfree(its); > return ret; > } > + > + vgic_lpi_translation_cache_init(dev->kvm); > } > > mutex_init(&its->its_lock); > diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h > index abeeffabc456..50aad705c4a9 100644 > --- a/virt/kvm/arm/vgic/vgic.h > +++ b/virt/kvm/arm/vgic/vgic.h > @@ -316,6 +316,8 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu > *vcpu, u32 **intid_ptr); > int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, >u32 devid, u32 eventid, struct vgic_irq **irq); > struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi); > +void vg
Re: [PATCH v2 4/9] KVM: arm/arm64: vgic-its: Invalidate MSI-LPI translation cache on specific commands
On 23/07/2019 13:25, Auger Eric wrote: > Hi Marc, > > On 7/22/19 12:54 PM, Marc Zyngier wrote: >> Hi Eric, >> >> On 01/07/2019 13:38, Auger Eric wrote: >>> Hi Marc, >>> >>> On 6/11/19 7:03 PM, Marc Zyngier wrote: The LPI translation cache needs to be discarded when an ITS command may affect the translation of an LPI (DISCARD and MAPD with V=0) or the routing of an LPI to a redistributor with disabled LPIs (MOVI, MOVALL). We decide to perform a full invalidation of the cache, irrespective of the LPI that is affected. Commands are supposed to be rare enough that it doesn't matter. Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-its.c | 8 1 file changed, 8 insertions(+) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 9b6b66204b97..5254bb762e1b 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -733,6 +733,8 @@ static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its, * don't bother here since we clear the ITTE anyway and the * pending state is a property of the ITTE struct. */ + vgic_its_invalidate_cache(kvm); + its_free_ite(kvm, ite); return 0; } @@ -768,6 +770,8 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its, ite->collection = collection; vcpu = kvm_get_vcpu(kvm, collection->target_addr); + vgic_its_invalidate_cache(kvm); + return update_affinity(ite->irq, vcpu); } @@ -996,6 +1000,8 @@ static void vgic_its_free_device(struct kvm *kvm, struct its_device *device) list_for_each_entry_safe(ite, temp, &device->itt_head, ite_list) its_free_ite(kvm, ite); + vgic_its_invalidate_cache(kvm); + list_del(&device->dev_list); kfree(device); } @@ -1249,6 +1255,8 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, vgic_put_irq(kvm, irq); } + vgic_its_invalidate_cache(kvm); >>> All the commands are executed with the its_lock held. Now we don't take >>> it anymore on the fast cache injection path. Don't we have a window >>> where the move has been applied at table level and the cache is not yet >>> invalidated? Same question for vgic_its_free_device(). >> >> There is definitely a race, but that race is invisible from the guest's >> perspective. The guest can only assume that the command has taken effect >> once a SYNC command has been executed, and it cannot observe that the >> SYNC command has been executed before we have invalidated the cache. >> >> Does this answer your question? > > OK make sense. Thank you for the clarification > > Another question, don't we need to invalidate the cache on MAPC V=0 as > well? Removing the mapping of the collection results in interrupts > belonging to that collection being ignored. If we don't flush the > pending bit will be set? Yup, that's a good point. I think i had that at some point, and ended up dropping it, probably missing the point that the interrupt would be made pending. I'll add this: @@ -1218,6 +1218,7 @@ static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its, if (!valid) { vgic_its_free_collection(its, coll_id); + vgic_its_invalidate_cache(kvm); } else { collection = find_collection(its, coll_id); Thanks, M. -- Jazz is not dead. It just smells funny... ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH v2 3/9] KVM: arm/arm64: vgic-its: Add MSI-LPI translation cache invalidation
Hi Marc, On 6/11/19 7:03 PM, Marc Zyngier wrote: > There's a number of cases where we need to invalidate the caching > of translations, so let's add basic support for that. > > Signed-off-by: Marc Zyngier Reviewed-by: Eric Auger Thanks Eric > --- > virt/kvm/arm/vgic/vgic-its.c | 23 +++ > virt/kvm/arm/vgic/vgic.h | 1 + > 2 files changed, 24 insertions(+) > > diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c > index ce9bcddeb7f1..9b6b66204b97 100644 > --- a/virt/kvm/arm/vgic/vgic-its.c > +++ b/virt/kvm/arm/vgic/vgic-its.c > @@ -546,6 +546,29 @@ static unsigned long vgic_mmio_read_its_idregs(struct > kvm *kvm, > return 0; > } > > +void vgic_its_invalidate_cache(struct kvm *kvm) > +{ > + struct vgic_dist *dist = &kvm->arch.vgic; > + struct vgic_translation_cache_entry *cte; > + unsigned long flags; > + > + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); > + > + list_for_each_entry(cte, &dist->lpi_translation_cache, entry) { > + /* > + * If we hit a NULL entry, there is nothing after this > + * point. > + */ > + if (!cte->irq) > + break; > + > + __vgic_put_lpi_locked(kvm, cte->irq); > + cte->irq = NULL; > + } > + > + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); > +} > + > int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, >u32 devid, u32 eventid, struct vgic_irq **irq) > { > diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h > index afac2fed7df4..072f810dc441 100644 > --- a/virt/kvm/arm/vgic/vgic.h > +++ b/virt/kvm/arm/vgic/vgic.h > @@ -319,6 +319,7 @@ int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its > *its, > struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi); > void vgic_lpi_translation_cache_init(struct kvm *kvm); > void vgic_lpi_translation_cache_destroy(struct kvm *kvm); > +void vgic_its_invalidate_cache(struct kvm *kvm); > > bool vgic_supports_direct_msis(struct kvm *kvm); > int vgic_v4_init(struct kvm *kvm); > ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH v2 4/9] KVM: arm/arm64: vgic-its: Invalidate MSI-LPI translation cache on specific commands
Hi Marc, On 7/22/19 12:54 PM, Marc Zyngier wrote: > Hi Eric, > > On 01/07/2019 13:38, Auger Eric wrote: >> Hi Marc, >> >> On 6/11/19 7:03 PM, Marc Zyngier wrote: >>> The LPI translation cache needs to be discarded when an ITS command >>> may affect the translation of an LPI (DISCARD and MAPD with V=0) or >>> the routing of an LPI to a redistributor with disabled LPIs (MOVI, >>> MOVALL). >>> >>> We decide to perform a full invalidation of the cache, irrespective >>> of the LPI that is affected. Commands are supposed to be rare enough >>> that it doesn't matter. >>> >>> Signed-off-by: Marc Zyngier >>> --- >>> virt/kvm/arm/vgic/vgic-its.c | 8 >>> 1 file changed, 8 insertions(+) >>> >>> diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c >>> index 9b6b66204b97..5254bb762e1b 100644 >>> --- a/virt/kvm/arm/vgic/vgic-its.c >>> +++ b/virt/kvm/arm/vgic/vgic-its.c >>> @@ -733,6 +733,8 @@ static int vgic_its_cmd_handle_discard(struct kvm *kvm, >>> struct vgic_its *its, >>> * don't bother here since we clear the ITTE anyway and the >>> * pending state is a property of the ITTE struct. >>> */ >>> + vgic_its_invalidate_cache(kvm); >>> + >>> its_free_ite(kvm, ite); >>> return 0; >>> } >>> @@ -768,6 +770,8 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, >>> struct vgic_its *its, >>> ite->collection = collection; >>> vcpu = kvm_get_vcpu(kvm, collection->target_addr); >>> >>> + vgic_its_invalidate_cache(kvm); >>> + >>> return update_affinity(ite->irq, vcpu); >>> } >>> >>> @@ -996,6 +1000,8 @@ static void vgic_its_free_device(struct kvm *kvm, >>> struct its_device *device) >>> list_for_each_entry_safe(ite, temp, &device->itt_head, ite_list) >>> its_free_ite(kvm, ite); >>> >>> + vgic_its_invalidate_cache(kvm); >>> + >>> list_del(&device->dev_list); >>> kfree(device); >>> } >>> @@ -1249,6 +1255,8 @@ static int vgic_its_cmd_handle_movall(struct kvm >>> *kvm, struct vgic_its *its, >>> vgic_put_irq(kvm, irq); >>> } >>> >>> + vgic_its_invalidate_cache(kvm); >> All the commands are executed with the its_lock held. Now we don't take >> it anymore on the fast cache injection path. Don't we have a window >> where the move has been applied at table level and the cache is not yet >> invalidated? Same question for vgic_its_free_device(). > > There is definitely a race, but that race is invisible from the guest's > perspective. The guest can only assume that the command has taken effect > once a SYNC command has been executed, and it cannot observe that the > SYNC command has been executed before we have invalidated the cache. > > Does this answer your question? OK make sense. Thank you for the clarification Another question, don't we need to invalidate the cache on MAPC V=0 as well? Removing the mapping of the collection results in interrupts belonging to that collection being ignored. If we don't flush the pending bit will be set? Thanks Eric > > Thanks, > > M. > ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH v2 0/9] KVM: arm/arm64: vgic: ITS translation cache
On Tue, 11 Jun 2019 18:03:27 +0100 Marc Zyngier wrote: Hi, > It recently became apparent[1] that our LPI injection path is not as > efficient as it could be when injecting interrupts coming from a VFIO > assigned device. > > Although the proposed patch wasn't 100% correct, it outlined at least > two issues: > > (1) Injecting an LPI from VFIO always results in a context switch to a > worker thread: no good > > (2) We have no way of amortising the cost of translating a DID+EID pair > to an LPI number > > The reason for (1) is that we may sleep when translating an LPI, so we > do need a context process. A way to fix that is to implement a small > LPI translation cache that could be looked up from an atomic > context. It would also solve (2). > > This is what this small series proposes. It implements a very basic > LRU cache of pre-translated LPIs, which gets used to implement > kvm_arch_set_irq_inatomic. The size of the cache is currently > hard-coded at 16 times the number of vcpus, a number I have picked > under the influence of Ali Saidi. If that's not enough for you, blame > me, though. > > Does it work? well, it doesn't crash, and is thus perfect. More > seriously, I don't really have a way to benchmark it directly, so my > observations are only indirect: > > On a TX2 system, I run a 4 vcpu VM with an Ethernet interface passed > to it directly. From the host, I inject interrupts using debugfs. In > parallel, I look at the number of context switch, and the number of > interrupts on the host. Without this series, I get the same number for > both IRQ and CS (about half a million of each per second is pretty > easy to reach). With this series, the number of context switches drops > to something pretty small (in the low 2k), while the number of > interrupts stays the same. > > Yes, this is a pretty rubbish benchmark, what did you expect? ;-) > > So I'm putting this out for people with real workloads to try out and > report what they see. So I gave that a shot with some benchmarks. As expected, it is quite hard to show an improvement with just one guest running, although we could show a 103%(!) improvement of the memcached QPS score in one experiment when running it in a guest with an external load generator. Throwing more users into the game showed a significant improvement: Benchmark 1: kernel compile/FIO: Compiling a kernel on the host, while letting a guest run FIO with 4K randreads from a passed-through NVMe SSD: The IOPS with this series improved by 27% compared to pure mainline, reaching 80% of the host value. Kernel compilation time improved by 8.5% compared to mainline. Benchmark 2: FIO/FIO: Running FIO on a passed through SATA SSD in one guest, and FIO on a passed through NVMe SSD in another guest, at the same time: The IOPS with this series improved by 23% for the NVMe and 34% for the SATA disk, compared to pure mainline. So judging from these results, I think this series is a significant improvement, which justifies it to be merged, to receive wider testing. It would be good if others could also do performance experiments and post their results. Cheers, Andre. > [1] > https://lore.kernel.org/lkml/1552833373-19828-1-git-send-email-yuzeng...@huawei.com/ > > * From v1: > > - Fixed race on allocation, where the same LPI could be cached multiple > times > - Now invalidate the cache on vgic teardown, avoiding memory leaks > - Change patch split slightly, general reshuffling > - Small cleanups here and there > - Rebased on 5.2-rc4 > > Marc Zyngier (9): > KVM: arm/arm64: vgic: Add LPI translation cache definition > KVM: arm/arm64: vgic: Add __vgic_put_lpi_locked primitive > KVM: arm/arm64: vgic-its: Add MSI-LPI translation cache invalidation > KVM: arm/arm64: vgic-its: Invalidate MSI-LPI translation cache on > specific commands > KVM: arm/arm64: vgic-its: Invalidate MSI-LPI translation cache on > disabling LPIs > KVM: arm/arm64: vgic-its: Invalidate MSI-LPI translation cache on vgic > teardown > KVM: arm/arm64: vgic-its: Cache successful MSI->LPI translation > KVM: arm/arm64: vgic-its: Check the LPI translation cache on MSI > injection > KVM: arm/arm64: vgic-irqfd: Implement kvm_arch_set_irq_inatomic > > include/kvm/arm_vgic.h | 3 + > virt/kvm/arm/vgic/vgic-init.c| 5 + > virt/kvm/arm/vgic/vgic-irqfd.c | 36 +- > virt/kvm/arm/vgic/vgic-its.c | 204 +++ > virt/kvm/arm/vgic/vgic-mmio-v3.c | 4 +- > virt/kvm/arm/vgic/vgic.c | 26 ++-- > virt/kvm/arm/vgic/vgic.h | 5 + > 7 files changed, 267 insertions(+), 16 deletions(-) > ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH v2] KVM: arm/arm64: Introduce kvm_pmu_vcpu_init() to setup PMU counter idx
On 2019-07-23 09:17, Julien Thierry wrote: Hi Zenghui, On 18/07/2019 09:15, Zenghui Yu wrote: We use "pmc->idx" and the "chained" bitmap to determine if the pmc is chained, in kvm_pmu_pmc_is_chained(). But idx might be uninitialized (and random) when we doing this decision, through a KVM_ARM_VCPU_INIT ioctl -> kvm_pmu_vcpu_reset(). And the test_bit() against this random idx will potentially hit a KASAN BUG [1]. In general, idx is the static property of a PMU counter that is not expected to be modified across resets, as suggested by Julien. It looks more reasonable if we can setup the PMU counter idx for a vcpu in its creation time. Introduce a new function - kvm_pmu_vcpu_init() for this basic setup. Oh, and the KASAN BUG will get fixed this way. [1] https://www.spinics.net/lists/kvm-arm/msg36700.html Fixes: 80f393a23be6 ("KVM: arm/arm64: Support chained PMU counters") Suggested-by: Andrew Murray Suggested-by: Julien Thierry Cc: Marc Zyngier Signed-off-by: Zenghui Yu --- Changes since v1: - Introduce kvm_pmu_vcpu_init() in vcpu's creation time, move the assignment of pmc->idx into it. - Thus change the subject. The old one is "KVM: arm/arm64: Assign pmc->idx before kvm_pmu_stop_counter()". Julien, I haven't collected your Acked-by into this version. If you're still happy with the change, please Ack again. Thanks! Thanks for making the change. This looks good to me: Acked-by: Julien Thierry Applied, thanks both. M. -- Jazz is not dead. It just smells funny... ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH v2] KVM: arm/arm64: Introduce kvm_pmu_vcpu_init() to setup PMU counter idx
Hi Zenghui, On 18/07/2019 09:15, Zenghui Yu wrote: > We use "pmc->idx" and the "chained" bitmap to determine if the pmc is > chained, in kvm_pmu_pmc_is_chained(). But idx might be uninitialized > (and random) when we doing this decision, through a KVM_ARM_VCPU_INIT > ioctl -> kvm_pmu_vcpu_reset(). And the test_bit() against this random > idx will potentially hit a KASAN BUG [1]. > > In general, idx is the static property of a PMU counter that is not > expected to be modified across resets, as suggested by Julien. It > looks more reasonable if we can setup the PMU counter idx for a vcpu > in its creation time. Introduce a new function - kvm_pmu_vcpu_init() > for this basic setup. Oh, and the KASAN BUG will get fixed this way. > > [1] https://www.spinics.net/lists/kvm-arm/msg36700.html > > Fixes: 80f393a23be6 ("KVM: arm/arm64: Support chained PMU counters") > Suggested-by: Andrew Murray > Suggested-by: Julien Thierry > Cc: Marc Zyngier > Signed-off-by: Zenghui Yu > --- > > Changes since v1: > - Introduce kvm_pmu_vcpu_init() in vcpu's creation time, move the >assignment of pmc->idx into it. > - Thus change the subject. The old one is "KVM: arm/arm64: Assign >pmc->idx before kvm_pmu_stop_counter()". > > Julien, I haven't collected your Acked-by into this version. If you're > still happy with the change, please Ack again. Thanks! > Thanks for making the change. This looks good to me: Acked-by: Julien Thierry Thanks, Julien > include/kvm/arm_pmu.h | 2 ++ > virt/kvm/arm/arm.c| 2 ++ > virt/kvm/arm/pmu.c| 18 +++--- > 3 files changed, 19 insertions(+), 3 deletions(-) > > diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h > index 16c769a..6db0304 100644 > --- a/include/kvm/arm_pmu.h > +++ b/include/kvm/arm_pmu.h > @@ -34,6 +34,7 @@ struct kvm_pmu { > u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx); > void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 > val); > u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu); > +void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu); > void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu); > void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu); > void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val); > @@ -71,6 +72,7 @@ static inline u64 kvm_pmu_valid_counter_mask(struct > kvm_vcpu *vcpu) > { > return 0; > } > +static inline void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) {} > static inline void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) {} > static inline void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) {} > static inline void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 > val) {} > diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c > index f645c0f..c704fa6 100644 > --- a/virt/kvm/arm/arm.c > +++ b/virt/kvm/arm/arm.c > @@ -340,6 +340,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) > /* Set up the timer */ > kvm_timer_vcpu_init(vcpu); > > + kvm_pmu_vcpu_init(vcpu); > + > kvm_arm_reset_debug_ptr(vcpu); > > return kvm_vgic_vcpu_init(vcpu); > diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c > index 3dd8238..362a018 100644 > --- a/virt/kvm/arm/pmu.c > +++ b/virt/kvm/arm/pmu.c > @@ -215,6 +215,20 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, > struct kvm_pmc *pmc) > } > > /** > + * kvm_pmu_vcpu_init - assign pmu counter idx for cpu > + * @vcpu: The vcpu pointer > + * > + */ > +void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) > +{ > + int i; > + struct kvm_pmu *pmu = &vcpu->arch.pmu; > + > + for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) > + pmu->pmc[i].idx = i; > +} > + > +/** > * kvm_pmu_vcpu_reset - reset pmu state for cpu > * @vcpu: The vcpu pointer > * > @@ -224,10 +238,8 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) > int i; > struct kvm_pmu *pmu = &vcpu->arch.pmu; > > - for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { > + for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) > kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); > - pmu->pmc[i].idx = i; > - } > > bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS); > } > -- Julien Thierry