On 11/13/20 4:14 PM, Greg Kurz wrote: > Recent commit acbdb9956fe9 introduced a dedicated path to create > IPIs in KVM. This is done from under kvmppc_xive_cpu_connect() with > the assumption that the IPI number is equal to the vCPU id. The > latter is wrong: the guest chooses an arbitrary LISN from the > "ibm,xive-lisn-ranges" and assigns it to a target vCPU with the > H_INT_SET_SOURCE_CONFIG hcall. This went unnoticed so far because > IPI numbers and vCPU ids happen to match by default. This is no > longer the case though when setting the VSMT machine property to > a value that is different from (ie. bigger than) the number of > vCPUs per core (ie. -smp threads). Wrong IPIs end up being created > in KVM but the guest still uses the ones it has allocated and gets > very confused (see BugLink below). > > Fix this by creating the IPI at the only place where we have > its appropriate number : when the guest allocates it with the > H_INT_SET_SOURCE_CONFIG hcall. We detect this is an IPI because > it is < SPAPR_XIRQ_BASE and we get the vCPU id from the hcall > arguments. The EAS of the IPI is tracked in the kvm_enabled_cpus > list. It is now used instead of vcpu_id to filter unallocated IPIs > out in xive_source_is_valid(). It also allows to only reset the > IPI on the first call to H_INT_SET_SOURCE_CONFIG. > > Restore unmasked IPIs from the vCPU contexts in kvmppc_xive_post_load(). > Masked ones will be created when the guests eventually unmask them > with H_INT_SET_SOURCE_CONFIG. > > Reported-by: Satheesh Rajendran <sathn...@linux.vnet.ibm.com> > Fixes: acbdb9956fe9 ("spapr/xive: Allocate IPIs independently from the other > sources") > BugLink: https://bugs.launchpad.net/qemu/+bug/1900241 > Cc: c...@kaod.org > Signed-off-by: Greg Kurz <gr...@kaod.org>
I am quite certain this is correct but the complexity looks like a potential source of bugs. So I think it is simpler to revert the optimization introduced by acbdb9956fe9 and find a better solution covering SMT also. Thanks, C. > --- > hw/intc/spapr_xive_kvm.c | 141 > +++++++++++++++++++++++++++++++++++++++++----- > 1 file changed, 127 insertions(+), 14 deletions(-) > > diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c > index 66bf4c06fe55..4e5871c3aac2 100644 > --- a/hw/intc/spapr_xive_kvm.c > +++ b/hw/intc/spapr_xive_kvm.c > @@ -30,6 +30,7 @@ > */ > typedef struct KVMEnabledCPU { > unsigned long vcpu_id; > + XiveEAS *ipi_eas; > QLIST_ENTRY(KVMEnabledCPU) node; > } KVMEnabledCPU; > > @@ -55,6 +56,7 @@ static void kvm_cpu_enable(CPUState *cs) > > enabled_cpu = g_malloc(sizeof(*enabled_cpu)); > enabled_cpu->vcpu_id = vcpu_id; > + enabled_cpu->ipi_eas = NULL; > QLIST_INSERT_HEAD(&kvm_enabled_cpus, enabled_cpu, node); > } > > @@ -156,26 +158,29 @@ int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, > Error **errp) > */ > typedef struct { > SpaprXive *xive; > + uint32_t lisn; > Error *err; > int rc; > } XiveInitIPI; > > static void kvmppc_xive_reset_ipi_on_cpu(CPUState *cs, run_on_cpu_data arg) > { > - unsigned long ipi = kvm_arch_vcpu_id(cs); > XiveInitIPI *s = arg.host_ptr; > + unsigned long ipi = s->lisn; > uint64_t state = 0; > > s->rc = kvm_device_access(s->xive->fd, KVM_DEV_XIVE_GRP_SOURCE, ipi, > &state, true, &s->err); > } > > -static int kvmppc_xive_reset_ipi(SpaprXive *xive, CPUState *cs, Error **errp) > +static int kvmppc_xive_reset_ipi(SpaprXive *xive, CPUState *cs, uint32_t > lisn, > + Error **errp) > { > XiveInitIPI s = { > .xive = xive, > .err = NULL, > .rc = 0, > + .lisn = lisn, > }; > > run_on_cpu(cs, kvmppc_xive_reset_ipi_on_cpu, RUN_ON_CPU_HOST_PTR(&s)); > @@ -214,12 +219,6 @@ int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) > return ret; > } > > - /* Create/reset the vCPU IPI */ > - ret = kvmppc_xive_reset_ipi(xive, tctx->cs, errp); > - if (ret < 0) { > - return ret; > - } > - > kvm_cpu_enable(tctx->cs); > return 0; > } > @@ -228,6 +227,62 @@ int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) > * XIVE Interrupt Source (KVM) > */ > > +static bool spapr_xive_is_ipi(uint32_t lisn) > +{ > + return lisn < SPAPR_XIRQ_BASE; > +} > + > +static bool kvm_ipi_is_enabled(SpaprXive *xive, uint32_t lisn) > +{ > + KVMEnabledCPU *enabled_cpu; > + > + g_assert(spapr_xive_is_ipi(lisn)); > + > + QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) { > + if (enabled_cpu->ipi_eas == &xive->eat[lisn]) { > + return true; > + } > + } > + return false; > +} > + > +static int kvm_ipi_enable(SpaprXive *xive, uint32_t lisn, uint32_t vcpu_id, > + Error **errp) > +{ > + KVMEnabledCPU *enabled_cpu; > + > + g_assert(spapr_xive_is_ipi(lisn)); > + > + QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) { > + if (enabled_cpu->vcpu_id == vcpu_id) { > + CPUState *cs = CPU(spapr_find_cpu(vcpu_id)); > + XiveEAS *eas = &xive->eat[lisn]; > + > + /* No change ? */ > + if (enabled_cpu->ipi_eas && enabled_cpu->ipi_eas == eas) { > + return 0; > + } > + > + /* XXX: abort ? */ > + if (!cs) { > + break; > + } > + > + /* Create/reset the vCPU IPI */ > + int ret = kvmppc_xive_reset_ipi(xive, cs, lisn, errp); > + if (ret < 0) { > + return ret; > + } > + > + enabled_cpu->ipi_eas = eas; > + return 0; > + } > + } > + > + error_setg(errp, "vCPU #%d not found", vcpu_id); > + return -ESRCH; > +} > + > int kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS > *eas, > Error **errp) > { > @@ -248,6 +303,14 @@ int kvmppc_xive_set_source_config(SpaprXive *xive, > uint32_t lisn, XiveEAS *eas, > > spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); > > + if (spapr_xive_is_ipi(lisn)) { > + /* Create the vCPU IPI */ > + int ret = kvm_ipi_enable(xive, lisn, server, errp); > + if (ret < 0) { > + return ret; > + } > + } > + > kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT & > KVM_XIVE_SOURCE_PRIORITY_MASK; > kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT & > @@ -280,7 +343,7 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int > srcno, Error **errp) > assert(xive->fd != -1); > > /* > - * The vCPU IPIs are now allocated in kvmppc_xive_cpu_connect() > + * The vCPU IPIs are now allocated in kvmppc_xive_set_source_config() > * and not with all sources in kvmppc_xive_source_reset() > */ > assert(srcno >= SPAPR_XIRQ_BASE); > @@ -300,12 +363,12 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int > srcno, Error **errp) > * To be valid, a source must have been claimed by the machine (valid > * entry in the EAS table) and if it is a vCPU IPI, the vCPU should > * have been enabled, which means the IPI has been allocated in > - * kvmppc_xive_cpu_connect(). > + * kvmppc_xive_set_source_config(). > */ > static bool xive_source_is_valid(SpaprXive *xive, int i) > { > return xive_eas_is_valid(&xive->eat[i]) && > - (i >= SPAPR_XIRQ_BASE || kvm_cpu_is_enabled(i)); > + (!spapr_xive_is_ipi(i) || kvm_ipi_is_enabled(xive, i)); > } > > static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp) > @@ -314,8 +377,8 @@ static int kvmppc_xive_source_reset(XiveSource *xsrc, > Error **errp) > int i; > > /* > - * Skip the vCPU IPIs. These are created/reset when the vCPUs are > - * connected in kvmppc_xive_cpu_connect() > + * Skip the vCPU IPIs. These are created/reset on-demand in > + * kvmppc_xive_set_source_config(). > */ > for (i = SPAPR_XIRQ_BASE; i < xsrc->nr_irqs; i++) { > int ret; > @@ -724,7 +787,57 @@ int kvmppc_xive_post_load(SpaprXive *xive, int > version_id) > } > > /* Restore the EAT */ > - for (i = 0; i < xive->nr_irqs; i++) { > + > + /* IPIs are restored from the appropriate vCPU context */ > + CPU_FOREACH(cs) { > + /* > + * The EAT has valid entries to accomodate all possible vCPUs, > + * but we only want to allocate in KVM the IPIs that were > + * actually allocated before migration. Let's consider the full > + * list of IPIs to find an EAS that matches the vCPU id. > + * > + * If an IPI appears unmasked in the EAT, it is a proof that the > + * guest did successfully call H_INT_SET_SOURCE_CONFIG and we > + * should thus create the IPI at the KVM level if the END index > + * matches the vCPU id. > + * > + * If an IPI appears masked in the EAT, then we don't know exactly > + * what happened before migration but we don't care. The IPI will > + * be created when the guest eventually unmasks it with a subsequent > + * call to H_INT_SET_SOURCE_CONFIG. > + */ > + for (i = 0; i < SPAPR_XIRQ_BASE; i++) { > + XiveEAS *eas = &xive->eat[i]; > + uint32_t end_idx; > + uint32_t end_blk; > + uint8_t priority; > + uint32_t server; > + > + if (!xive_eas_is_valid(eas)) { > + continue; > + } > + > + if (xive_eas_is_masked(eas)) { > + continue; > + } > + > + end_idx = xive_get_field64(EAS_END_INDEX, eas->w); > + end_blk = xive_get_field64(EAS_END_BLOCK, eas->w); > + spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); > + if (server != kvm_arch_vcpu_id(cs)) { > + continue; > + } > + > + ret = kvmppc_xive_set_source_config(xive, i, eas, &local_err); > + if (ret < 0) { > + goto fail; > + } > + break; > + } > + } > + > + /* Now restore non-IPIs */ > + for (i = SPAPR_XIRQ_BASE; i < xive->nr_irqs; i++) { > if (!xive_source_is_valid(xive, i)) { > continue; > } > >