Recent commit acbdb9956fe9 introduced a dedicated path to create IPIs in KVM. This is done from under kvmppc_xive_cpu_connect() with the assumption that the IPI number is equal to the vCPU id. The latter is wrong: the guest chooses an arbitrary LISN from the "ibm,xive-lisn-ranges" and assigns it to a target vCPU with the H_INT_SET_SOURCE_CONFIG hcall. This went unnoticed so far because IPI numbers and vCPU ids happen to match by default. This is no longer the case though when setting the VSMT machine property to a value that is different from (ie. bigger than) the number of vCPUs per core (ie. -smp threads). Wrong IPIs end up being created in KVM but the guest still uses the ones it has allocated and gets very confused (see BugLink below).
Fix this by creating the IPI at the only place where we have its appropriate number : when the guest allocates it with the H_INT_SET_SOURCE_CONFIG hcall. We detect this is an IPI because it is < SPAPR_XIRQ_BASE and we get the vCPU id from the hcall arguments. The EAS of the IPI is tracked in the kvm_enabled_cpus list. It is now used instead of vcpu_id to filter unallocated IPIs out in xive_source_is_valid(). It also allows to only reset the IPI on the first call to H_INT_SET_SOURCE_CONFIG. Restore unmasked IPIs from the vCPU contexts in kvmppc_xive_post_load(). Masked ones will be created when the guests eventually unmask them with H_INT_SET_SOURCE_CONFIG. Reported-by: Satheesh Rajendran <sathn...@linux.vnet.ibm.com> Fixes: acbdb9956fe9 ("spapr/xive: Allocate IPIs independently from the other sources") BugLink: https://bugs.launchpad.net/qemu/+bug/1900241 Cc: c...@kaod.org Signed-off-by: Greg Kurz <gr...@kaod.org> --- hw/intc/spapr_xive_kvm.c | 141 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 127 insertions(+), 14 deletions(-) diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c index 66bf4c06fe55..4e5871c3aac2 100644 --- a/hw/intc/spapr_xive_kvm.c +++ b/hw/intc/spapr_xive_kvm.c @@ -30,6 +30,7 @@ */ typedef struct KVMEnabledCPU { unsigned long vcpu_id; + XiveEAS *ipi_eas; QLIST_ENTRY(KVMEnabledCPU) node; } KVMEnabledCPU; @@ -55,6 +56,7 @@ static void kvm_cpu_enable(CPUState *cs) enabled_cpu = g_malloc(sizeof(*enabled_cpu)); enabled_cpu->vcpu_id = vcpu_id; + enabled_cpu->ipi_eas = NULL; QLIST_INSERT_HEAD(&kvm_enabled_cpus, enabled_cpu, node); } @@ -156,26 +158,29 @@ int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp) */ typedef struct { SpaprXive *xive; + uint32_t lisn; Error *err; int rc; } XiveInitIPI; static void kvmppc_xive_reset_ipi_on_cpu(CPUState *cs, run_on_cpu_data arg) { - unsigned long ipi = kvm_arch_vcpu_id(cs); XiveInitIPI *s = arg.host_ptr; + unsigned long ipi = s->lisn; uint64_t state = 0; s->rc = kvm_device_access(s->xive->fd, KVM_DEV_XIVE_GRP_SOURCE, ipi, &state, true, &s->err); } -static int kvmppc_xive_reset_ipi(SpaprXive *xive, CPUState *cs, Error **errp) +static int kvmppc_xive_reset_ipi(SpaprXive *xive, CPUState *cs, uint32_t lisn, + Error **errp) { XiveInitIPI s = { .xive = xive, .err = NULL, .rc = 0, + .lisn = lisn, }; run_on_cpu(cs, kvmppc_xive_reset_ipi_on_cpu, RUN_ON_CPU_HOST_PTR(&s)); @@ -214,12 +219,6 @@ int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) return ret; } - /* Create/reset the vCPU IPI */ - ret = kvmppc_xive_reset_ipi(xive, tctx->cs, errp); - if (ret < 0) { - return ret; - } - kvm_cpu_enable(tctx->cs); return 0; } @@ -228,6 +227,62 @@ int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) * XIVE Interrupt Source (KVM) */ +static bool spapr_xive_is_ipi(uint32_t lisn) +{ + return lisn < SPAPR_XIRQ_BASE; +} + +static bool kvm_ipi_is_enabled(SpaprXive *xive, uint32_t lisn) +{ + KVMEnabledCPU *enabled_cpu; + + g_assert(spapr_xive_is_ipi(lisn)); + + QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) { + if (enabled_cpu->ipi_eas == &xive->eat[lisn]) { + return true; + } + } + return false; +} + +static int kvm_ipi_enable(SpaprXive *xive, uint32_t lisn, uint32_t vcpu_id, + Error **errp) +{ + KVMEnabledCPU *enabled_cpu; + + g_assert(spapr_xive_is_ipi(lisn)); + + QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) { + if (enabled_cpu->vcpu_id == vcpu_id) { + CPUState *cs = CPU(spapr_find_cpu(vcpu_id)); + XiveEAS *eas = &xive->eat[lisn]; + + /* No change ? */ + if (enabled_cpu->ipi_eas && enabled_cpu->ipi_eas == eas) { + return 0; + } + + /* XXX: abort ? */ + if (!cs) { + break; + } + + /* Create/reset the vCPU IPI */ + int ret = kvmppc_xive_reset_ipi(xive, cs, lisn, errp); + if (ret < 0) { + return ret; + } + + enabled_cpu->ipi_eas = eas; + return 0; + } + } + + error_setg(errp, "vCPU #%d not found", vcpu_id); + return -ESRCH; +} + int kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, Error **errp) { @@ -248,6 +303,14 @@ int kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); + if (spapr_xive_is_ipi(lisn)) { + /* Create the vCPU IPI */ + int ret = kvm_ipi_enable(xive, lisn, server, errp); + if (ret < 0) { + return ret; + } + } + kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT & KVM_XIVE_SOURCE_PRIORITY_MASK; kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT & @@ -280,7 +343,7 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp) assert(xive->fd != -1); /* - * The vCPU IPIs are now allocated in kvmppc_xive_cpu_connect() + * The vCPU IPIs are now allocated in kvmppc_xive_set_source_config() * and not with all sources in kvmppc_xive_source_reset() */ assert(srcno >= SPAPR_XIRQ_BASE); @@ -300,12 +363,12 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp) * To be valid, a source must have been claimed by the machine (valid * entry in the EAS table) and if it is a vCPU IPI, the vCPU should * have been enabled, which means the IPI has been allocated in - * kvmppc_xive_cpu_connect(). + * kvmppc_xive_set_source_config(). */ static bool xive_source_is_valid(SpaprXive *xive, int i) { return xive_eas_is_valid(&xive->eat[i]) && - (i >= SPAPR_XIRQ_BASE || kvm_cpu_is_enabled(i)); + (!spapr_xive_is_ipi(i) || kvm_ipi_is_enabled(xive, i)); } static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp) @@ -314,8 +377,8 @@ static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp) int i; /* - * Skip the vCPU IPIs. These are created/reset when the vCPUs are - * connected in kvmppc_xive_cpu_connect() + * Skip the vCPU IPIs. These are created/reset on-demand in + * kvmppc_xive_set_source_config(). */ for (i = SPAPR_XIRQ_BASE; i < xsrc->nr_irqs; i++) { int ret; @@ -724,7 +787,57 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id) } /* Restore the EAT */ - for (i = 0; i < xive->nr_irqs; i++) { + + /* IPIs are restored from the appropriate vCPU context */ + CPU_FOREACH(cs) { + /* + * The EAT has valid entries to accomodate all possible vCPUs, + * but we only want to allocate in KVM the IPIs that were + * actually allocated before migration. Let's consider the full + * list of IPIs to find an EAS that matches the vCPU id. + * + * If an IPI appears unmasked in the EAT, it is a proof that the + * guest did successfully call H_INT_SET_SOURCE_CONFIG and we + * should thus create the IPI at the KVM level if the END index + * matches the vCPU id. + * + * If an IPI appears masked in the EAT, then we don't know exactly + * what happened before migration but we don't care. The IPI will + * be created when the guest eventually unmasks it with a subsequent + * call to H_INT_SET_SOURCE_CONFIG. + */ + for (i = 0; i < SPAPR_XIRQ_BASE; i++) { + XiveEAS *eas = &xive->eat[i]; + uint32_t end_idx; + uint32_t end_blk; + uint8_t priority; + uint32_t server; + + if (!xive_eas_is_valid(eas)) { + continue; + } + + if (xive_eas_is_masked(eas)) { + continue; + } + + end_idx = xive_get_field64(EAS_END_INDEX, eas->w); + end_blk = xive_get_field64(EAS_END_BLOCK, eas->w); + spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); + if (server != kvm_arch_vcpu_id(cs)) { + continue; + } + + ret = kvmppc_xive_set_source_config(xive, i, eas, &local_err); + if (ret < 0) { + goto fail; + } + break; + } + } + + /* Now restore non-IPIs */ + for (i = SPAPR_XIRQ_BASE; i < xive->nr_irqs; i++) { if (!xive_source_is_valid(xive, i)) { continue; }