* Gautham R. Shenoy <e...@linux.vnet.ibm.com> [2020-12-04 10:18:45]: > From: "Gautham R. Shenoy" <e...@linux.vnet.ibm.com>
<snipped> > > static int parse_thread_groups(struct device_node *dn, > - struct thread_groups *tg, > - unsigned int property) > + struct thread_groups_list *tglp) > { > - int i; > - u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE]; > + int i = 0; > + u32 *thread_group_array; > u32 *thread_list; > size_t total_threads; > - int ret; > + int ret = 0, count; > + unsigned int property_idx = 0; NIT: tglx mentions in one of his recent comments to try keep a reverse fir tree ordering of variables where possible. > > + count = of_property_count_u32_elems(dn, "ibm,thread-groups"); > + thread_group_array = kcalloc(count, sizeof(u32), GFP_KERNEL); > ret = of_property_read_u32_array(dn, "ibm,thread-groups", > - thread_group_array, 3); > + thread_group_array, count); > if (ret) > - return ret; > - > - tg->property = thread_group_array[0]; > - tg->nr_groups = thread_group_array[1]; > - tg->threads_per_group = thread_group_array[2]; > - if (tg->property != property || > - tg->nr_groups < 1 || > - tg->threads_per_group < 1) > - return -ENODATA; > + goto out_free; > > - total_threads = tg->nr_groups * tg->threads_per_group; > + while (i < count && property_idx < MAX_THREAD_GROUP_PROPERTIES) { > + int j; > + struct thread_groups *tg = &tglp->property_tgs[property_idx++]; NIT: same as above. > > - ret = of_property_read_u32_array(dn, "ibm,thread-groups", > - thread_group_array, > - 3 + total_threads); > - if (ret) > - return ret; > + tg->property = thread_group_array[i]; > + tg->nr_groups = thread_group_array[i + 1]; > + tg->threads_per_group = thread_group_array[i + 2]; > + total_threads = tg->nr_groups * tg->threads_per_group; > + > + thread_list = &thread_group_array[i + 3]; > > - thread_list = &thread_group_array[3]; > + for (j = 0; j < total_threads; j++) > + tg->thread_list[j] = thread_list[j]; > + i = i + 3 + total_threads; Can't we simply use memcpy instead? > + } > > - for (i = 0 ; i < total_threads; i++) > - tg->thread_list[i] = thread_list[i]; > + tglp->nr_properties = property_idx; > > - return 0; > +out_free: > + kfree(thread_group_array); > + return ret; > } > > /* > @@ -805,24 +827,39 @@ static int get_cpu_thread_group_start(int cpu, struct > thread_groups *tg) > return -1; > } > > -static int init_cpu_l1_cache_map(int cpu) > +static int init_cpu_cache_map(int cpu, unsigned int cache_property) > > { > struct device_node *dn = of_get_cpu_node(cpu, NULL); > - struct thread_groups tg = {.property = 0, > - .nr_groups = 0, > - .threads_per_group = 0}; > + struct thread_groups *tg = NULL; > int first_thread = cpu_first_thread_sibling(cpu); > int i, cpu_group_start = -1, err = 0; > + cpumask_var_t *mask; > + struct thread_groups_list *cpu_tgl = &tgl[cpu]; NIT: same as 1st comment. > > if (!dn) > return -ENODATA; > > - err = parse_thread_groups(dn, &tg, THREAD_GROUP_SHARE_L1); > - if (err) > - goto out; > + if (!(cache_property == THREAD_GROUP_SHARE_L1)) > + return -EINVAL; > > - cpu_group_start = get_cpu_thread_group_start(cpu, &tg); > + if (!cpu_tgl->nr_properties) { > + err = parse_thread_groups(dn, cpu_tgl); > + if (err) > + goto out; > + } > + > + for (i = 0; i < cpu_tgl->nr_properties; i++) { > + if (cpu_tgl->property_tgs[i].property == cache_property) { > + tg = &cpu_tgl->property_tgs[i]; > + break; > + } > + } > + > + if (!tg) > + return -EINVAL; > + > + cpu_group_start = get_cpu_thread_group_start(cpu, tg); This whole hunk should be moved to a new function and called before init_cpu_cache_map. It will simplify the logic to great extent. > > if (unlikely(cpu_group_start == -1)) { > WARN_ON_ONCE(1); > @@ -830,11 +867,12 @@ static int init_cpu_l1_cache_map(int cpu) > goto out; > } > > - zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu), > - GFP_KERNEL, cpu_to_node(cpu)); > + mask = &per_cpu(cpu_l1_cache_map, cpu); > + > + zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu)); > This hunk (and the next hunk) should be moved to next patch. > for (i = first_thread; i < first_thread + threads_per_core; i++) { > - int i_group_start = get_cpu_thread_group_start(i, &tg); > + int i_group_start = get_cpu_thread_group_start(i, tg); > > if (unlikely(i_group_start == -1)) { > WARN_ON_ONCE(1); > @@ -843,7 +881,7 @@ static int init_cpu_l1_cache_map(int cpu) > } > > if (i_group_start == cpu_group_start) > - cpumask_set_cpu(i, per_cpu(cpu_l1_cache_map, cpu)); > + cpumask_set_cpu(i, *mask); > } > > out: > @@ -924,7 +962,7 @@ static int init_big_cores(void) > int cpu; > > for_each_possible_cpu(cpu) { > - int err = init_cpu_l1_cache_map(cpu); > + int err = init_cpu_cache_map(cpu, THREAD_GROUP_SHARE_L1); > > if (err) > return err; > -- > 1.9.4 > -- Thanks and Regards Srikar Dronamraju