Hi, Dhananjay > > > > > [...] > > > > > @@ -685,6 +774,13 @@ static void __init rapl_advertise(void) > > > rapl_pkg_domain_names[i], > > > rapl_hw_unit[i]); > > > } > > > } > > > + > > > + for (i = 0; i < NR_RAPL_CORE_DOMAINS; i++) { > > > + if (rapl_core_cntr_mask & (1 << i)) { > > > + pr_info("hw unit of domain %s 2^-%d > > > Joules\n", > > > + rapl_core_domain_names[i], > > > rapl_hw_unit[i]); > > > > rapl_hw_unit[] is for package pmu only and > > rapl_hw_unit[0] is rapl_hw_unit[PERF_RAPL_PP0] rather than > > rapl_hw_unit[PERF_RAPL_PER_CORE] > > > > you cannot use rapl_hw_unit[i] to represent per-core rapl domain > > unit. > > Yes right, I saw that all the elements in the rapl_hw_unit array were > actually > using the value from the same register "MSR_RAPL_POWER_UNIT" or > "MSR_AMD_RAPL_POWER_UNIT". > Except for the two quirks, > > 737 case > RAPL_UNIT_QUIRK_INTEL_HSW: > > > 738 rapl_hw_unit[PERF_RAPL_RAM] = > 16; > > > 739 > break; > > > 740 /* SPR uses a fixed energy unit for Psys domain. */ > 741 case RAPL_UNIT_QUIRK_INTEL_SPR: > 742 rapl_hw_unit[PERF_RAPL_PSYS] = 0; > 743 break; > > So, as for AMD systems the rapl_hw_unit[] elements will always have > the same value, I ended > up using the rapl_hw_unit[PERF_RAPL_PP0] for > rapl_hw_unit[PERF_RAPL_PER_CORE], but I do realize > it is quite hacky. So, better to do it cleanly and add a separate > array/variable for the core events. > yeah, that is much better. >
> > > > > > > > static struct rapl_model model_amd_hygon = { > > > .pkg_events = BIT(PERF_RAPL_PKG), > > > + .core_events = BIT(PERF_RAPL_PER_CORE), > > > .msr_power_unit = MSR_AMD_RAPL_POWER_UNIT, > > > - .rapl_msrs = amd_rapl_pkg_msrs, > > > + .rapl_pkg_msrs = amd_rapl_pkg_msrs, > > > + .rapl_core_msrs = amd_rapl_core_msrs, > > > }; > > > > > > static const struct x86_cpu_id rapl_model_match[] __initconst = > > > { > > > @@ -858,6 +957,11 @@ static int __init rapl_pmu_init(void) > > > { > > > const struct x86_cpu_id *id; > > > int ret; > > > + int nr_rapl_pmu = topology_max_packages() * > > > topology_max_dies_per_package(); > > > + int nr_cores = topology_max_packages() * > > > topology_num_cores_per_package(); > > > > I'd suggest either using two variables nr_pkgs/nr_cores, or reuse > > one > > variable nr_rapl_pmu for both pkg pmu and per-core pmu. > > I understand your point, but the problem with that is, there are > actually three scopes needed here > > Some Intel systems need a *die* scope for the rapl_pmus_pkg PMU > Some Intel systems and all AMD systems need a *package* scope for the > rapl_pmus_pkg PMU > And AMD systems need a *core* scope for the rapl_pmus_per_core PMU > > I think what we can do is three variables, nr_dies (for all Intel > systems as before), > nr_pkgs(for AMD systems rapl_pmus_pkg PMU) Not necessarily, we already uses rapl_pmus_pkg for intel systems, right? > and nr_cores(for rapl_pmus_per_core PMU) > > Sounds good? what about just one variable "count" and reuse it for every cases? > > > > > > + > > > + if (rapl_pmu_is_pkg_scope()) > > > + nr_rapl_pmu = topology_max_packages(); > > > > > > id = x86_match_cpu(rapl_model_match); > > > if (!id) > > > @@ -865,17 +969,34 @@ static int __init rapl_pmu_init(void) > > > > > > rapl_model = (struct rapl_model *) id->driver_data; > > > > > > - rapl_pkg_cntr_mask = perf_msr_probe(rapl_model- > > > >rapl_msrs, > > > PERF_RAPL_PKG_EVENTS_MAX, > > > + rapl_pkg_cntr_mask = perf_msr_probe(rapl_model- > > > > rapl_pkg_msrs, PERF_RAPL_PKG_EVENTS_MAX, > > > false, (void *) > > > &rapl_model- > > > > pkg_events); > > > > > > ret = rapl_check_hw_unit(); > > > if (ret) > > > return ret; > > > > > > - ret = init_rapl_pmus(&rapl_pmus_pkg); > > > + ret = init_rapl_pmus(&rapl_pmus_pkg, nr_rapl_pmu, > > > rapl_attr_groups, rapl_attr_update); > > > if (ret) > > > return ret; > > > > > > + if (rapl_model->core_events) { > > > + rapl_core_cntr_mask = perf_msr_probe(rapl_model- > > > > rapl_core_msrs, > > > + > > > PERF_RAPL_CORE_EVENTS_MAX, false, > > > + (void *) > > > &rapl_model->core_events); > > > + > > > + ret = init_rapl_pmus(&rapl_pmus_core, nr_cores, > > > + rapl_per_core_attr_groups, > > > rapl_per_core_attr_update); > > > + if (ret) { > > > + /* > > > + * If initialization of per_core PMU > > > fails, > > > reset per_core > > > + * flag, and continue with power PMU > > > initialization. > > > + */ > > > + pr_warn("Per-core PMU initialization > > > failed > > > (%d)\n", ret); > > > + rapl_model->core_events = 0UL; > > > + } > > > + } > > > + > > > /* > > > * Install callbacks. Core will call them for each online > > > cpu. > > > */ > > > @@ -889,6 +1010,20 @@ static int __init rapl_pmu_init(void) > > > if (ret) > > > goto out1; > > > > > > + if (rapl_model->core_events) { > > > + ret = perf_pmu_register(&rapl_pmus_core->pmu, > > > "power_per_core", -1); > > > + if (ret) { > > > + /* > > > + * If registration of per_core PMU fails, > > > cleanup per_core PMU > > > + * variables, reset the per_core flag and > > > keep the > > > + * power PMU untouched. > > > + */ > > > + pr_warn("Per-core PMU registration failed > > > (%d)\n", ret); > > > + cleanup_rapl_pmus(rapl_pmus_core); > > > + rapl_model->core_events = 0UL; > > > + } > > > + } > > > + > > > rapl_advertise(); > > > return 0; > > > > > > @@ -906,5 +1041,9 @@ static void __exit intel_rapl_exit(void) > > > cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE) > > > ; > > > perf_pmu_unregister(&rapl_pmus_pkg->pmu); > > > cleanup_rapl_pmus(rapl_pmus_pkg); > > > + if (rapl_model->core_events) { > > > + perf_pmu_unregister(&rapl_pmus_core->pmu); > > > + cleanup_rapl_pmus(rapl_pmus_core); > > > + } > > > > we do check rapl_pmus_core before accessing it, but we never check > > rapl_pmus_pkg because the previous code assumes it always exists. > > > > so could there be a problem if some one starts the per-core pmu > > when > > pkg pmu is unregistered and cleaned up? > > > > say, in rapl_pmu_event_init(), > > > > if (event->attr.type == rapl_pmus_pkg->pmu.type || > > (rapl_pmus_core && event->attr.type == rapl_pmus_core- > > >pmu.type)) > > > > this can break because rapl_pmus_pkg is freed, right? > > Hmm, I think this situation can't arise as whenever the power PMU > fails, we > directly go to the failure path and dont setup the per-core PMU(which > means > no one will be able to start the per-core PMU), > Please let me know if there is a scenario where this assumption can > fail. I mean if we do module unload and access power-per-core pmu at the same time, could there be a race? why not just unregister and cleanup the per-core pmu before the pkg pmu? > thanks, rui