On Mon, 16 Nov 2020 14:43:12 +0100 Cédric Le Goater <c...@kaod.org> wrote:
> On 11/13/20 4:14 PM, Greg Kurz wrote: > > Recent commit acbdb9956fe9 introduced a dedicated path to create > > IPIs in KVM. This is done from under kvmppc_xive_cpu_connect() with > > the assumption that the IPI number is equal to the vCPU id. The > > latter is wrong: the guest chooses an arbitrary LISN from the > > "ibm,xive-lisn-ranges" and assigns it to a target vCPU with the > > H_INT_SET_SOURCE_CONFIG hcall. This went unnoticed so far because > > IPI numbers and vCPU ids happen to match by default. This is no > > longer the case though when setting the VSMT machine property to > > a value that is different from (ie. bigger than) the number of > > vCPUs per core (ie. -smp threads). Wrong IPIs end up being created > > in KVM but the guest still uses the ones it has allocated and gets > > very confused (see BugLink below). > > > > Fix this by creating the IPI at the only place where we have > > its appropriate number : when the guest allocates it with the > > H_INT_SET_SOURCE_CONFIG hcall. We detect this is an IPI because > > it is < SPAPR_XIRQ_BASE and we get the vCPU id from the hcall > > arguments. The EAS of the IPI is tracked in the kvm_enabled_cpus > > list. It is now used instead of vcpu_id to filter unallocated IPIs > > out in xive_source_is_valid(). It also allows to only reset the > > IPI on the first call to H_INT_SET_SOURCE_CONFIG. > > > > Restore unmasked IPIs from the vCPU contexts in kvmppc_xive_post_load(). > > Masked ones will be created when the guests eventually unmask them > > with H_INT_SET_SOURCE_CONFIG. > > > > Reported-by: Satheesh Rajendran <sathn...@linux.vnet.ibm.com> > > Fixes: acbdb9956fe9 ("spapr/xive: Allocate IPIs independently from the > > other sources") > > BugLink: https://bugs.launchpad.net/qemu/+bug/1900241 > > Cc: c...@kaod.org > > Signed-off-by: Greg Kurz <gr...@kaod.org> > > > I am quite certain this is correct but the complexity looks like a > potential source of bugs. So I think it is simpler to revert the > optimization introduced by acbdb9956fe9 and find a better solution > covering SMT also. > I tend to agree. Even if I could successfully test various scenarios around hotplug and migration, I'm really not super confident to merge this in an RC. I'll post a series that reverts acbdb9956fe9 ASAP. > Thanks, > > C. > > > > > --- > > hw/intc/spapr_xive_kvm.c | 141 > > +++++++++++++++++++++++++++++++++++++++++----- > > 1 file changed, 127 insertions(+), 14 deletions(-) > > > > diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c > > index 66bf4c06fe55..4e5871c3aac2 100644 > > --- a/hw/intc/spapr_xive_kvm.c > > +++ b/hw/intc/spapr_xive_kvm.c > > @@ -30,6 +30,7 @@ > > */ > > typedef struct KVMEnabledCPU { > > unsigned long vcpu_id; > > + XiveEAS *ipi_eas; > > QLIST_ENTRY(KVMEnabledCPU) node; > > } KVMEnabledCPU; > > > > @@ -55,6 +56,7 @@ static void kvm_cpu_enable(CPUState *cs) > > > > enabled_cpu = g_malloc(sizeof(*enabled_cpu)); > > enabled_cpu->vcpu_id = vcpu_id; > > + enabled_cpu->ipi_eas = NULL; > > QLIST_INSERT_HEAD(&kvm_enabled_cpus, enabled_cpu, node); > > } > > > > @@ -156,26 +158,29 @@ int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, > > Error **errp) > > */ > > typedef struct { > > SpaprXive *xive; > > + uint32_t lisn; > > Error *err; > > int rc; > > } XiveInitIPI; > > > > static void kvmppc_xive_reset_ipi_on_cpu(CPUState *cs, run_on_cpu_data arg) > > { > > - unsigned long ipi = kvm_arch_vcpu_id(cs); > > XiveInitIPI *s = arg.host_ptr; > > + unsigned long ipi = s->lisn; > > uint64_t state = 0; > > > > s->rc = kvm_device_access(s->xive->fd, KVM_DEV_XIVE_GRP_SOURCE, ipi, > > &state, true, &s->err); > > } > > > > -static int kvmppc_xive_reset_ipi(SpaprXive *xive, CPUState *cs, Error > > **errp) > > +static int kvmppc_xive_reset_ipi(SpaprXive *xive, CPUState *cs, uint32_t > > lisn, > > + Error **errp) > > { > > XiveInitIPI s = { > > .xive = xive, > > .err = NULL, > > .rc = 0, > > + .lisn = lisn, > > }; > > > > run_on_cpu(cs, kvmppc_xive_reset_ipi_on_cpu, RUN_ON_CPU_HOST_PTR(&s)); > > @@ -214,12 +219,6 @@ int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error > > **errp) > > return ret; > > } > > > > - /* Create/reset the vCPU IPI */ > > - ret = kvmppc_xive_reset_ipi(xive, tctx->cs, errp); > > - if (ret < 0) { > > - return ret; > > - } > > - > > kvm_cpu_enable(tctx->cs); > > return 0; > > } > > @@ -228,6 +227,62 @@ int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error > > **errp) > > * XIVE Interrupt Source (KVM) > > */ > > > > +static bool spapr_xive_is_ipi(uint32_t lisn) > > +{ > > + return lisn < SPAPR_XIRQ_BASE; > > +} > > + > > +static bool kvm_ipi_is_enabled(SpaprXive *xive, uint32_t lisn) > > +{ > > + KVMEnabledCPU *enabled_cpu; > > + > > + g_assert(spapr_xive_is_ipi(lisn)); > > + > > + QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) { > > + if (enabled_cpu->ipi_eas == &xive->eat[lisn]) { > > + return true; > > + } > > + } > > + return false; > > +} > > + > > +static int kvm_ipi_enable(SpaprXive *xive, uint32_t lisn, uint32_t vcpu_id, > > + Error **errp) > > +{ > > + KVMEnabledCPU *enabled_cpu; > > + > > + g_assert(spapr_xive_is_ipi(lisn)); > > + > > + QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) { > > + if (enabled_cpu->vcpu_id == vcpu_id) { > > + CPUState *cs = CPU(spapr_find_cpu(vcpu_id)); > > + XiveEAS *eas = &xive->eat[lisn]; > > + > > + /* No change ? */ > > + if (enabled_cpu->ipi_eas && enabled_cpu->ipi_eas == eas) { > > + return 0; > > + } > > + > > + /* XXX: abort ? */ > > + if (!cs) { > > + break; > > + } > > + > > + /* Create/reset the vCPU IPI */ > > + int ret = kvmppc_xive_reset_ipi(xive, cs, lisn, errp); > > + if (ret < 0) { > > + return ret; > > + } > > + > > + enabled_cpu->ipi_eas = eas; > > + return 0; > > + } > > + } > > + > > + error_setg(errp, "vCPU #%d not found", vcpu_id); > > + return -ESRCH; > > +} > > + > > int kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS > > *eas, > > Error **errp) > > { > > @@ -248,6 +303,14 @@ int kvmppc_xive_set_source_config(SpaprXive *xive, > > uint32_t lisn, XiveEAS *eas, > > > > spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); > > > > + if (spapr_xive_is_ipi(lisn)) { > > + /* Create the vCPU IPI */ > > + int ret = kvm_ipi_enable(xive, lisn, server, errp); > > + if (ret < 0) { > > + return ret; > > + } > > + } > > + > > kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT & > > KVM_XIVE_SOURCE_PRIORITY_MASK; > > kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT & > > @@ -280,7 +343,7 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int > > srcno, Error **errp) > > assert(xive->fd != -1); > > > > /* > > - * The vCPU IPIs are now allocated in kvmppc_xive_cpu_connect() > > + * The vCPU IPIs are now allocated in kvmppc_xive_set_source_config() > > * and not with all sources in kvmppc_xive_source_reset() > > */ > > assert(srcno >= SPAPR_XIRQ_BASE); > > @@ -300,12 +363,12 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, > > int srcno, Error **errp) > > * To be valid, a source must have been claimed by the machine (valid > > * entry in the EAS table) and if it is a vCPU IPI, the vCPU should > > * have been enabled, which means the IPI has been allocated in > > - * kvmppc_xive_cpu_connect(). > > + * kvmppc_xive_set_source_config(). > > */ > > static bool xive_source_is_valid(SpaprXive *xive, int i) > > { > > return xive_eas_is_valid(&xive->eat[i]) && > > - (i >= SPAPR_XIRQ_BASE || kvm_cpu_is_enabled(i)); > > + (!spapr_xive_is_ipi(i) || kvm_ipi_is_enabled(xive, i)); > > } > > > > static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp) > > @@ -314,8 +377,8 @@ static int kvmppc_xive_source_reset(XiveSource *xsrc, > > Error **errp) > > int i; > > > > /* > > - * Skip the vCPU IPIs. These are created/reset when the vCPUs are > > - * connected in kvmppc_xive_cpu_connect() > > + * Skip the vCPU IPIs. These are created/reset on-demand in > > + * kvmppc_xive_set_source_config(). > > */ > > for (i = SPAPR_XIRQ_BASE; i < xsrc->nr_irqs; i++) { > > int ret; > > @@ -724,7 +787,57 @@ int kvmppc_xive_post_load(SpaprXive *xive, int > > version_id) > > } > > > > /* Restore the EAT */ > > - for (i = 0; i < xive->nr_irqs; i++) { > > + > > + /* IPIs are restored from the appropriate vCPU context */ > > + CPU_FOREACH(cs) { > > + /* > > + * The EAT has valid entries to accomodate all possible vCPUs, > > + * but we only want to allocate in KVM the IPIs that were > > + * actually allocated before migration. Let's consider the full > > + * list of IPIs to find an EAS that matches the vCPU id. > > + * > > + * If an IPI appears unmasked in the EAT, it is a proof that the > > + * guest did successfully call H_INT_SET_SOURCE_CONFIG and we > > + * should thus create the IPI at the KVM level if the END index > > + * matches the vCPU id. > > + * > > + * If an IPI appears masked in the EAT, then we don't know exactly > > + * what happened before migration but we don't care. The IPI will > > + * be created when the guest eventually unmasks it with a > > subsequent > > + * call to H_INT_SET_SOURCE_CONFIG. > > + */ > > + for (i = 0; i < SPAPR_XIRQ_BASE; i++) { > > + XiveEAS *eas = &xive->eat[i]; > > + uint32_t end_idx; > > + uint32_t end_blk; > > + uint8_t priority; > > + uint32_t server; > > + > > + if (!xive_eas_is_valid(eas)) { > > + continue; > > + } > > + > > + if (xive_eas_is_masked(eas)) { > > + continue; > > + } > > + > > + end_idx = xive_get_field64(EAS_END_INDEX, eas->w); > > + end_blk = xive_get_field64(EAS_END_BLOCK, eas->w); > > + spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); > > + if (server != kvm_arch_vcpu_id(cs)) { > > + continue; > > + } > > + > > + ret = kvmppc_xive_set_source_config(xive, i, eas, &local_err); > > + if (ret < 0) { > > + goto fail; > > + } > > + break; > > + } > > + } > > + > > + /* Now restore non-IPIs */ > > + for (i = SPAPR_XIRQ_BASE; i < xive->nr_irqs; i++) { > > if (!xive_source_is_valid(xive, i)) { > > continue; > > } > > > > >