> On 05-Apr-2022, at 11:22 PM, Ian Rogers wrote:
>
> On Fri, Apr 1, 2022 at 11:59 AM Athira Rajeev
> wrote:
>>
>> perf bench numa testcase fails on systems with CPU's
>> more than 1K.
>>
>> Testcase: perf bench numa mem -p 1 -t 3 -P 512 -s 100 -zZ0qcm --thp 1
>> Snippet of code:
>> <<>>
>> perf: bench/numa.c:302: bind_to_node: Assertion `!(ret)' failed.
>> Aborted (core dumped)
>> <<>>
>>
>> bind_to_node function uses "sched_getaffinity" to save the original
>> cpumask and this call is returning EINVAL ((invalid argument).
>> This happens because the default mask size in glibc is 1024.
>> To overcome this 1024 CPUs mask size limitation of cpu_set_t,
>> change the mask size using the CPU_*_S macros ie, use CPU_ALLOC to
>> allocate cpumask, CPU_ALLOC_SIZE for size. Apart from fixing this
>> for "orig_mask", apply same logic to "mask" as well which is used to
>> setaffinity so that mask size is large enough to represent number
>> of possible CPU's in the system.
>>
>> sched_getaffinity is used in one more place in perf numa bench. It
>> is in "bind_to_cpu" function. Apply the same logic there also. Though
>> currently no failure is reported from there, it is ideal to change
>> getaffinity to work with such system configurations having CPU's more
>> than default mask size supported by glibc.
>>
>> Also fix "sched_setaffinity" to use mask size which is large enough
>> to represent number of possible CPU's in the system.
>>
>> Fixed all places where "bind_cpumask" which is part of "struct
>> thread_data" is used such that bind_cpumask works in all configuration.
>>
>> Reported-by: Disha Goel
>> Signed-off-by: Athira Rajeev
>> ---
>> tools/perf/bench/numa.c | 109 +---
>> 1 file changed, 81 insertions(+), 28 deletions(-)
>>
>> diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
>> index f2640179ada9..333896907e45 100644
>> --- a/tools/perf/bench/numa.c
>> +++ b/tools/perf/bench/numa.c
>> @@ -54,7 +54,7 @@
>>
>> struct thread_data {
>>int curr_cpu;
>> - cpu_set_t bind_cpumask;
>> + cpu_set_t *bind_cpumask;
>>int bind_node;
>>u8 *process_data;
>>int process_nr;
>> @@ -266,46 +266,75 @@ static bool node_has_cpus(int node)
>>return ret;
>> }
>>
>> -static cpu_set_t bind_to_cpu(int target_cpu)
>> +static cpu_set_t *bind_to_cpu(int target_cpu)
>> {
>> - cpu_set_t orig_mask, mask;
>> + int nrcpus = numa_num_possible_cpus();
>> + cpu_set_t *orig_mask, *mask;
>> + size_t size;
>>int ret;
>>
>> - ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
>> - BUG_ON(ret);
>> + orig_mask = CPU_ALLOC(nrcpus);
>> + BUG_ON(!orig_mask);
>> + size = CPU_ALLOC_SIZE(nrcpus);
>> + CPU_ZERO_S(size, orig_mask);
>> +
>> + ret = sched_getaffinity(0, size, orig_mask);
>> + if (ret) {
>> + CPU_FREE(orig_mask);
>> + BUG_ON(ret);
>> + }
>>
>> - CPU_ZERO(&mask);
>> + mask = CPU_ALLOC(nrcpus);
>> + BUG_ON(!mask);
>> + CPU_ZERO_S(size, mask);
>>
>>if (target_cpu == -1) {
>>int cpu;
>>
>>for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
>> - CPU_SET(cpu, &mask);
>> + CPU_SET_S(cpu, size, mask);
>>} else {
>>BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus);
>> - CPU_SET(target_cpu, &mask);
>> + CPU_SET_S(target_cpu, size, mask);
>>}
>>
>> - ret = sched_setaffinity(0, sizeof(mask), &mask);
>> - BUG_ON(ret);
>> + ret = sched_setaffinity(0, size, mask);
>> + if (ret) {
>> + CPU_FREE(mask);
>> + BUG_ON(ret);
>> + }
>> +
>> + CPU_FREE(mask);
>
> This all looks good, a nit here it could it be a little shorter as:
> ret = sched_setaffinity(0, size, mask);
> CPU_FREE(mask);
> BUG_ON(ret);
>
> Thanks,
> Ian
>
Thanks for the review Ian.
Sure, I will address this change in V2
Thanks
Athira
>>
>>return orig_mask;
>> }
>>
>> -static cpu_set_t bind_to_node(int target_node)
>> +static cpu_set_t *bind_to_node(int target_node)
>> {
>> - cpu_set_t orig_mask, mask;
>> + int nrcpus = numa_num_possible_cpus();
>> + cpu_set_t *orig_mask, *mask;
>> + size_t size;
>>int cpu;
>>int ret;
>>
>> - ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
>> - BUG_ON(ret);
>> + orig_mask = CPU_ALLOC(nrcpus);
>> + BUG_ON(!orig_mask);
>> + size = CPU_ALLOC_SIZE(nrcpus);
>> + CPU_ZERO_S(size, orig_mask);
>> +
>> + ret = sched_getaffinity(0, size, orig_mask);
>> + if (ret) {
>> + CPU_FREE(orig_mask);
>> + BUG_ON(ret);
>> + }
>>
>> - CPU_ZERO(&mask);
>> + mask = CPU_A