On Thu, 29 Jun 2017, Anju T Sudhakar wrote:
> +static void cleanup_all_core_imc_memory(struct imc_pmu *pmu_ptr)
> +{
> +     struct imc_mem_info *ptr;
> +
> +     for (ptr = pmu_ptr->mem_info; ptr; ptr++) {
> +             if (ptr->vbase[0])
> +                     free_pages((u64)ptr->vbase[0], 0);
> +     }

This loop is broken beyond repair. If pmu_ptr->mem_info is not NULL, then
ptr will happily increment to the point where it wraps around to
NULL. Oh well.

> +     kfree(pmu_ptr->mem_info);

> +bool is_core_imc_mem_inited(int cpu)

This function is global because?

> +{
> +     struct imc_mem_info *mem_info;
> +     int core_id = (cpu / threads_per_core);
> +
> +     mem_info = &core_imc_pmu->mem_info[core_id];
> +     if ((mem_info->id == core_id) && (mem_info->vbase[0] != NULL))
> +             return true;
> +
> +     return false;
> +}
> +
> +/*
> + * imc_mem_init : Function to support memory allocation for core imc.
> + */
> +static int imc_mem_init(struct imc_pmu *pmu_ptr)
> +{

The function placement is horrible. This function belongs to the pmu init
stuff and wants to be placed there and not five pages away in the middle of
unrelated functions.

> +     int nr_cores;
> +
> +     if (pmu_ptr->imc_counter_mmaped)
> +             return 0;
> +     nr_cores = num_present_cpus() / threads_per_core;
> +     pmu_ptr->mem_info = kzalloc((sizeof(struct imc_mem_info) * nr_cores), 
> GFP_KERNEL);
> +     if (!pmu_ptr->mem_info)
> +             return -ENOMEM;
> +     return 0;
> +}
> +static int core_imc_event_init(struct perf_event *event)
> +{
> +     int core_id, rc;
> +     u64 config = event->attr.config;
> +     struct imc_mem_info *pcmi;
> +     struct imc_pmu *pmu;
> +
> +     if (event->attr.type != event->pmu->type)
> +             return -ENOENT;
> +
> +     /* Sampling not supported */
> +     if (event->hw.sample_period)
> +             return -EINVAL;
> +
> +     /* unsupported modes and filters */
> +     if (event->attr.exclude_user   ||
> +         event->attr.exclude_kernel ||
> +         event->attr.exclude_hv     ||
> +         event->attr.exclude_idle   ||
> +         event->attr.exclude_host   ||
> +         event->attr.exclude_guest)
> +             return -EINVAL;
> +
> +     if (event->cpu < 0)
> +             return -EINVAL;
> +
> +     event->hw.idx = -1;
> +     pmu = imc_event_to_pmu(event);
> +
> +     /* Sanity check for config (event offset and rvalue) */
> +     if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size) ||
> +         ((config & IMC_EVENT_RVALUE_MASK) != 0))
> +             return -EINVAL;
> +
> +     if (!is_core_imc_mem_inited(event->cpu))
> +             return -ENODEV;
> +
> +     core_id = event->cpu / threads_per_core;
> +     pcmi = &pmu->mem_info[core_id];
> +     if ((pcmi->id != core_id) || (!pcmi->vbase[0]))
> +             return -ENODEV;
> +
> +     event->hw.event_base = (u64)pcmi->vbase[0] + (config & 
> IMC_EVENT_OFFSET_MASK);
> +     /*
> +      * Core pmu units are enabled only when it is used.
> +      * See if this is triggered for the first time.
> +      * If yes, take the mutex lock and enable the core counters.
> +      * If not, just increment the count in core_events.
> +      */
> +     if (atomic_inc_return(&core_events[core_id]) == 1) {
> +             mutex_lock(&imc_core_reserve);
> +             rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
> +                                          
> get_hard_smp_processor_id(event->cpu));
> +             mutex_unlock(&imc_core_reserve);

That machinery here is racy as hell in several aspects.

CPU0                                   CPU1

atomic_inc_ret(core_events[0]) -> 1

preemption()
                                       atomic_inc_ret(core_events[0]) -> 2
                                       return 0;
                                       
                                       Uses the event, without counters
                                       being started until the preempted
                                       task comes on the CPU again.

Here is another one:

CPU0                                   CPU1

atomic_dec_ret(core_events[0]) -> 0
                                       atomic_inc_ret(core_events[1] -> 1
                                       mutex_lock();
mutex_lock()                           start counter();
                                       mutex_unlock()

stop_counter();
mutex_unlock();
                                       Yay, another stale event!

Brilliant stuff that, or maybe not so much.

> +             if (rc) {
> +                     atomic_dec_return(&core_events[core_id]);

What's the point of using atomic_dec_return here if you ignore the return
value? Not that it matters much as the whole logic is crap anyway.

> +                     pr_err("IMC: Unable to start the counters for core 
> %d\n", core_id);
> +                     return -ENODEV;
> +             }
> +     }
> @@ -410,22 +658,38 @@ int init_imc_pmu(struct imc_events *events, int idx,
>                struct imc_pmu *pmu_ptr)
>  {
>       int ret = -ENODEV;
> +

Sure ret needs to stay initialized, just in case imc_mem_init() does not
return anything magically, right?

> +     ret = imc_mem_init(pmu_ptr);
> +     if (ret)
> +             goto err_free;
>       /* Add cpumask and register for hotplug notification */
> -     if (atomic_inc_return(&nest_pmus) == 1) {
> -             /*
> -              * Nest imc pmu need only one cpu per chip, we initialize the
> -              * cpumask for the first nest imc pmu and use the same for the 
> rest.
> -              * To handle the cpuhotplug callback unregister, we track
> -              * the number of nest pmus registers in "nest_pmus".
> -              * "nest_imc_cpumask_initialized" is set to zero during 
> cpuhotplug
> -              * callback unregister.
> -              */
> -             ret = nest_pmu_cpumask_init();
> +     switch (pmu_ptr->domain) {
> +     case IMC_DOMAIN_NEST:
> +             if (atomic_inc_return(&nest_pmus) == 1) {
> +                     /*
> +                      * Nest imc pmu need only one cpu per chip, we 
> initialize
> +                      * the cpumask for the first nest imc pmu and use the
> +                      * same for the rest.
> +                      * To handle the cpuhotplug callback unregister, we 
> track
> +                      * the number of nest pmus registers in "nest_pmus".
> +                      * "nest_imc_cpumask_initialized" is set to zero during
> +                      * cpuhotplug callback unregister.
> +                      */
> +                     ret = nest_pmu_cpumask_init();
> +                     if (ret)
> +                             goto err_free;
> +                     mutex_lock(&imc_nest_inited_reserve);
> +                     nest_imc_cpumask_initialized = 1;
> +                     mutex_unlock(&imc_nest_inited_reserve);
> +             }
> +             break;
> +     case IMC_DOMAIN_CORE:
> +             ret = core_imc_pmu_cpumask_init();
>               if (ret)
> -                     goto err_free;
> -             mutex_lock(&imc_nest_inited_reserve);
> -             nest_imc_cpumask_initialized = 1;
> -             mutex_unlock(&imc_nest_inited_reserve);
> +                     return ret;

Oh, so now you replaced the goto with ret. What is actually taking care of
the cleanup if that fails?

> +             break;
> +     default:
> +             return -1;      /* Unknown domain */
>       }
>       ret = update_events_in_group(events, idx, pmu_ptr);
>       if (ret)
> @@ -459,5 +723,10 @@ int init_imc_pmu(struct imc_events *events, int idx,
>                       mutex_unlock(&imc_nest_inited_reserve);
>               }
>       }
> +     /* For core_imc, we have allocated memory, we need to free it */
> +     if (pmu_ptr->domain == IMC_DOMAIN_CORE) {
> +             cleanup_all_core_imc_memory(pmu_ptr);
> +             cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE);

Cute. You cleanuo the memory stuff and then you let the hotplug core invoke
the offline callbacks which then deal with freed memory. 

Thanks,

        tglx

Reply via email to