Hi Mark, > On Sep 11, 2018, at 8:17 AM, Mark Rutland <[email protected]> wrote: > > [NOTICE: This email originated from an external sender. Please be mindful of > safe email handling and proprietary information protection practices.] > ________________________________________________________________________________________________________________________ > > On Wed, Aug 15, 2018 at 11:31:35AM -0700, Hoan Tran wrote: >> This patch adds CPU hotplug support where the PMU migrates the context to >> another online CPU when its CPU is offline. >> >> It fixes the below issue where the user does offline the CPU which is >> assigned >> to this PMU. >> >> Assuming, CPU0 is assigned for this PMU. When the user does offline CPU0 >> [root@(none) ~]# echo 0 > /sys/devices/system/cpu/cpu0/online >> This PMU does not work anymore and shows the below error. >> [root@(none) ~]# perf stat -a -e l3c0/cycle-count/,l3c0/write/ sleep 1 >> Error: >> The sys_perf_event_open() syscall returned with 19 (No such device) >> for event (l3c0/cycle-count/). >> /bin/dmesg may provide additional information. >> No CONFIG_PERF_EVENTS=y kernel support configured? >> >> With this patch, when CPU0 is offline, PMU migrates to another online CPU and >> works on that CPU. >> >> Signed-off-by: Hoan Tran <[email protected]> >> --- >> drivers/perf/xgene_pmu.c | 71 >> ++++++++++++++++++++++++++++++++++++++++++---- >> include/linux/cpuhotplug.h | 1 + >> 2 files changed, 66 insertions(+), 6 deletions(-) >> >> diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c >> index 0e31f13..248a3f7 100644 >> --- a/drivers/perf/xgene_pmu.c >> +++ b/drivers/perf/xgene_pmu.c >> @@ -21,6 +21,7 @@ >> >> #include <linux/acpi.h> >> #include <linux/clk.h> >> +#include <linux/cpuhotplug.h> >> #include <linux/cpumask.h> >> #include <linux/interrupt.h> >> #include <linux/io.h> >> @@ -130,12 +131,14 @@ struct xgene_pmu_ops { >> >> struct xgene_pmu { >> struct device *dev; >> + struct hlist_node node; >> int version; >> void __iomem *pcppmu_csr; >> u32 mcb_active_mask; >> u32 mc_active_mask; >> u32 l3c_active_mask; >> cpumask_t cpu; >> + int irq; >> raw_spinlock_t lock; >> const struct xgene_pmu_ops *ops; >> struct list_head l3cpmus; >> @@ -1806,6 +1809,53 @@ static const struct acpi_device_id >> xgene_pmu_acpi_match[] = { >> MODULE_DEVICE_TABLE(acpi, xgene_pmu_acpi_match); >> #endif >> >> +static int xgene_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) >> +{ >> + struct xgene_pmu *xgene_pmu = hlist_entry_safe(node, struct >> xgene_pmu, >> + node); >> + >> + if (cpumask_empty(&xgene_pmu->cpu)) >> + cpumask_set_cpu(cpu, &xgene_pmu->cpu); >> + >> + /* Overflow interrupt also should use the same CPU */ >> + WARN_ON(irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu)); >> + >> + return 0; >> +} >> + >> +static int xgene_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) >> +{ >> + struct xgene_pmu *xgene_pmu = hlist_entry_safe(node, struct >> xgene_pmu, >> + node); >> + struct xgene_pmu_dev_ctx *ctx; >> + unsigned int target; >> + >> + if (!cpumask_test_and_clear_cpu(cpu, &xgene_pmu->cpu)) >> + return 0; >> + target = cpumask_any_but(cpu_online_mask, cpu); >> + if (target >= nr_cpu_ids) >> + return 0; >> + >> + list_for_each_entry(ctx, &xgene_pmu->mcpmus, next) { >> + perf_pmu_migrate_context(&ctx->pmu_dev->pmu, cpu, target); >> + } >> + list_for_each_entry(ctx, &xgene_pmu->mcbpmus, next) { >> + perf_pmu_migrate_context(&ctx->pmu_dev->pmu, cpu, target); >> + } >> + list_for_each_entry(ctx, &xgene_pmu->l3cpmus, next) { >> + perf_pmu_migrate_context(&ctx->pmu_dev->pmu, cpu, target); >> + } >> + list_for_each_entry(ctx, &xgene_pmu->iobpmus, next) { >> + perf_pmu_migrate_context(&ctx->pmu_dev->pmu, cpu, target); >> + } >> + >> + cpumask_set_cpu(target, &xgene_pmu->cpu); >> + /* Overflow interrupt also should use the same CPU */ >> + WARN_ON(irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu)); >> + >> + return 0; >> +} >> + >> static int xgene_pmu_probe(struct platform_device *pdev) >> { >> const struct xgene_pmu_data *dev_data; >> @@ -1815,6 +1865,14 @@ static int xgene_pmu_probe(struct platform_device >> *pdev) >> int irq, rc; >> int version; >> >> + /* Install a hook to update the reader CPU in case it goes offline */ >> + rc = cpuhp_setup_state_multi(CPUHP_AP_PERF_XGENE_ONLINE, >> + "CPUHP_AP_PERF_XGENE_ONLINE", >> + xgene_pmu_online_cpu, >> + xgene_pmu_offline_cpu); >> + if (rc) >> + return rc; >> + >> xgene_pmu = devm_kzalloc(&pdev->dev, sizeof(*xgene_pmu), GFP_KERNEL); >> if (!xgene_pmu) >> return -ENOMEM; >> @@ -1865,6 +1923,7 @@ static int xgene_pmu_probe(struct platform_device >> *pdev) >> dev_err(&pdev->dev, "No IRQ resource\n"); >> return -EINVAL; >> } >> + >> rc = devm_request_irq(&pdev->dev, irq, xgene_pmu_isr, >> IRQF_NOBALANCING | IRQF_NO_THREAD, >> dev_name(&pdev->dev), xgene_pmu); >> @@ -1873,6 +1932,8 @@ static int xgene_pmu_probe(struct platform_device >> *pdev) >> return rc; >> } >> >> + xgene_pmu->irq = irq; >> + >> raw_spin_lock_init(&xgene_pmu->lock); >> >> /* Check for active MCBs and MCUs */ >> @@ -1883,13 +1944,11 @@ static int xgene_pmu_probe(struct platform_device >> *pdev) >> xgene_pmu->mc_active_mask = 0x1; >> } >> >> - /* Pick one core to use for cpumask attributes */ >> - cpumask_set_cpu(smp_processor_id(), &xgene_pmu->cpu); >> - >> - /* Make sure that the overflow interrupt is handled by this CPU */ >> - rc = irq_set_affinity(irq, &xgene_pmu->cpu); >> + /* Add this instance to the list used by the hotplug callback */ >> + rc = cpuhp_state_add_instance(CPUHP_AP_PERF_XGENE_ONLINE, >> + &xgene_pmu->node); >> if (rc) { >> - dev_err(&pdev->dev, "Failed to set interrupt affinity!\n"); >> + dev_err(&pdev->dev, "Error %d registering hotplug", rc); >> return rc; >> } > > You also need to remove the cpuhp instances when unregistering the PMUs > in xgene_pmu_dev_cleanup().
Yes, I'll fix it. Thanks and Regards Hoan > > Otherwise this looks fine to me, on the assumption the system PMU is > accessible from all CPUs in the system (e.g. it's not lcoal to a socket > or anything like that). > > Thanks, > Mark.

