* Gautham R. Shenoy <e...@linux.vnet.ibm.com> [2020-12-04 10:18:45]:

> From: "Gautham R. Shenoy" <e...@linux.vnet.ibm.com>

<snipped>

> 
>  static int parse_thread_groups(struct device_node *dn,
> -                            struct thread_groups *tg,
> -                            unsigned int property)
> +                            struct thread_groups_list *tglp)
>  {
> -     int i;
> -     u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE];
> +     int i = 0;
> +     u32 *thread_group_array;
>       u32 *thread_list;
>       size_t total_threads;
> -     int ret;
> +     int ret = 0, count;
> +     unsigned int property_idx = 0;

NIT:
tglx mentions in one of his recent comments to try keep a reverse fir tree
ordering of variables where possible.

> 
> +     count = of_property_count_u32_elems(dn, "ibm,thread-groups");
> +     thread_group_array = kcalloc(count, sizeof(u32), GFP_KERNEL);
>       ret = of_property_read_u32_array(dn, "ibm,thread-groups",
> -                                      thread_group_array, 3);
> +                                      thread_group_array, count);
>       if (ret)
> -             return ret;
> -
> -     tg->property = thread_group_array[0];
> -     tg->nr_groups = thread_group_array[1];
> -     tg->threads_per_group = thread_group_array[2];
> -     if (tg->property != property ||
> -         tg->nr_groups < 1 ||
> -         tg->threads_per_group < 1)
> -             return -ENODATA;
> +             goto out_free;
> 
> -     total_threads = tg->nr_groups * tg->threads_per_group;
> +     while (i < count && property_idx < MAX_THREAD_GROUP_PROPERTIES) {
> +             int j;
> +             struct thread_groups *tg = &tglp->property_tgs[property_idx++];

NIT: same as above.

> 
> -     ret = of_property_read_u32_array(dn, "ibm,thread-groups",
> -                                      thread_group_array,
> -                                      3 + total_threads);
> -     if (ret)
> -             return ret;
> +             tg->property = thread_group_array[i];
> +             tg->nr_groups = thread_group_array[i + 1];
> +             tg->threads_per_group = thread_group_array[i + 2];
> +             total_threads = tg->nr_groups * tg->threads_per_group;
> +
> +             thread_list = &thread_group_array[i + 3];
> 
> -     thread_list = &thread_group_array[3];
> +             for (j = 0; j < total_threads; j++)
> +                     tg->thread_list[j] = thread_list[j];
> +             i = i + 3 + total_threads;

        Can't we simply use memcpy instead?

> +     }
> 
> -     for (i = 0 ; i < total_threads; i++)
> -             tg->thread_list[i] = thread_list[i];
> +     tglp->nr_properties = property_idx;
> 
> -     return 0;
> +out_free:
> +     kfree(thread_group_array);
> +     return ret;
>  }
> 
>  /*
> @@ -805,24 +827,39 @@ static int get_cpu_thread_group_start(int cpu, struct 
> thread_groups *tg)
>       return -1;
>  }
> 
> -static int init_cpu_l1_cache_map(int cpu)
> +static int init_cpu_cache_map(int cpu, unsigned int cache_property)
> 
>  {
>       struct device_node *dn = of_get_cpu_node(cpu, NULL);
> -     struct thread_groups tg = {.property = 0,
> -                                .nr_groups = 0,
> -                                .threads_per_group = 0};
> +     struct thread_groups *tg = NULL;
>       int first_thread = cpu_first_thread_sibling(cpu);
>       int i, cpu_group_start = -1, err = 0;
> +     cpumask_var_t *mask;
> +     struct thread_groups_list *cpu_tgl = &tgl[cpu];

NIT: same as 1st comment.

> 
>       if (!dn)
>               return -ENODATA;
> 
> -     err = parse_thread_groups(dn, &tg, THREAD_GROUP_SHARE_L1);
> -     if (err)
> -             goto out;
> +     if (!(cache_property == THREAD_GROUP_SHARE_L1))
> +             return -EINVAL;
> 
> -     cpu_group_start = get_cpu_thread_group_start(cpu, &tg);
> +     if (!cpu_tgl->nr_properties) {
> +             err = parse_thread_groups(dn, cpu_tgl);
> +             if (err)
> +                     goto out;
> +     }
> +
> +     for (i = 0; i < cpu_tgl->nr_properties; i++) {
> +             if (cpu_tgl->property_tgs[i].property == cache_property) {
> +                     tg = &cpu_tgl->property_tgs[i];
> +                     break;
> +             }
> +     }
> +
> +     if (!tg)
> +             return -EINVAL;
> +
> +     cpu_group_start = get_cpu_thread_group_start(cpu, tg);

This whole hunk should be moved to a new function and called before
init_cpu_cache_map. It will simplify the logic to great extent.

> 
>       if (unlikely(cpu_group_start == -1)) {
>               WARN_ON_ONCE(1);
> @@ -830,11 +867,12 @@ static int init_cpu_l1_cache_map(int cpu)
>               goto out;
>       }
> 
> -     zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu),
> -                             GFP_KERNEL, cpu_to_node(cpu));
> +     mask = &per_cpu(cpu_l1_cache_map, cpu);
> +
> +     zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
> 

This hunk (and the next hunk) should be moved to next patch.

>       for (i = first_thread; i < first_thread + threads_per_core; i++) {
> -             int i_group_start = get_cpu_thread_group_start(i, &tg);
> +             int i_group_start = get_cpu_thread_group_start(i, tg);
> 
>               if (unlikely(i_group_start == -1)) {
>                       WARN_ON_ONCE(1);
> @@ -843,7 +881,7 @@ static int init_cpu_l1_cache_map(int cpu)
>               }
> 
>               if (i_group_start == cpu_group_start)
> -                     cpumask_set_cpu(i, per_cpu(cpu_l1_cache_map, cpu));
> +                     cpumask_set_cpu(i, *mask);
>       }
> 
>  out:
> @@ -924,7 +962,7 @@ static int init_big_cores(void)
>       int cpu;
> 
>       for_each_possible_cpu(cpu) {
> -             int err = init_cpu_l1_cache_map(cpu);
> +             int err = init_cpu_cache_map(cpu, THREAD_GROUP_SHARE_L1);
> 
>               if (err)
>                       return err;
> -- 
> 1.9.4
> 

-- 
Thanks and Regards
Srikar Dronamraju

Reply via email to