Re: [PATCH 3/4] tools/perf: Fix perf numa bench to fix usage of affinity for machines with #CPUs > 1K

2022-04-06 Thread Athira Rajeev



> On 05-Apr-2022, at 11:22 PM, Ian Rogers  wrote:
> 
> On Fri, Apr 1, 2022 at 11:59 AM Athira Rajeev
>  wrote:
>> 
>> perf bench numa testcase fails on systems with CPU's
>> more than 1K.
>> 
>> Testcase: perf bench numa mem -p 1 -t 3 -P 512 -s 100 -zZ0qcm --thp  1
>> Snippet of code:
>> <<>>
>> perf: bench/numa.c:302: bind_to_node: Assertion `!(ret)' failed.
>> Aborted (core dumped)
>> <<>>
>> 
>> bind_to_node function uses "sched_getaffinity" to save the original
>> cpumask and this call is returning EINVAL ((invalid argument).
>> This happens because the default mask size in glibc is 1024.
>> To overcome this 1024 CPUs mask size limitation of cpu_set_t,
>> change the mask size using the CPU_*_S macros ie, use CPU_ALLOC to
>> allocate cpumask, CPU_ALLOC_SIZE for size. Apart from fixing this
>> for "orig_mask", apply same logic to "mask" as well which is used to
>> setaffinity so that mask size is large enough to represent number
>> of possible CPU's in the system.
>> 
>> sched_getaffinity is used in one more place in perf numa bench. It
>> is in "bind_to_cpu" function. Apply the same logic there also. Though
>> currently no failure is reported from there, it is ideal to change
>> getaffinity to work with such system configurations having CPU's more
>> than default mask size supported by glibc.
>> 
>> Also fix "sched_setaffinity" to use mask size which is large enough
>> to represent number of possible CPU's in the system.
>> 
>> Fixed all places where "bind_cpumask" which is part of "struct
>> thread_data" is used such that bind_cpumask works in all configuration.
>> 
>> Reported-by: Disha Goel 
>> Signed-off-by: Athira Rajeev 
>> ---
>> tools/perf/bench/numa.c | 109 +---
>> 1 file changed, 81 insertions(+), 28 deletions(-)
>> 
>> diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
>> index f2640179ada9..333896907e45 100644
>> --- a/tools/perf/bench/numa.c
>> +++ b/tools/perf/bench/numa.c
>> @@ -54,7 +54,7 @@
>> 
>> struct thread_data {
>>int curr_cpu;
>> -   cpu_set_t   bind_cpumask;
>> +   cpu_set_t   *bind_cpumask;
>>int bind_node;
>>u8  *process_data;
>>int process_nr;
>> @@ -266,46 +266,75 @@ static bool node_has_cpus(int node)
>>return ret;
>> }
>> 
>> -static cpu_set_t bind_to_cpu(int target_cpu)
>> +static cpu_set_t *bind_to_cpu(int target_cpu)
>> {
>> -   cpu_set_t orig_mask, mask;
>> +   int nrcpus = numa_num_possible_cpus();
>> +   cpu_set_t *orig_mask, *mask;
>> +   size_t size;
>>int ret;
>> 
>> -   ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
>> -   BUG_ON(ret);
>> +   orig_mask = CPU_ALLOC(nrcpus);
>> +   BUG_ON(!orig_mask);
>> +   size = CPU_ALLOC_SIZE(nrcpus);
>> +   CPU_ZERO_S(size, orig_mask);
>> +
>> +   ret = sched_getaffinity(0, size, orig_mask);
>> +   if (ret) {
>> +   CPU_FREE(orig_mask);
>> +   BUG_ON(ret);
>> +   }
>> 
>> -   CPU_ZERO(&mask);
>> +   mask = CPU_ALLOC(nrcpus);
>> +   BUG_ON(!mask);
>> +   CPU_ZERO_S(size, mask);
>> 
>>if (target_cpu == -1) {
>>int cpu;
>> 
>>for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
>> -   CPU_SET(cpu, &mask);
>> +   CPU_SET_S(cpu, size, mask);
>>} else {
>>BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus);
>> -   CPU_SET(target_cpu, &mask);
>> +   CPU_SET_S(target_cpu, size, mask);
>>}
>> 
>> -   ret = sched_setaffinity(0, sizeof(mask), &mask);
>> -   BUG_ON(ret);
>> +   ret = sched_setaffinity(0, size, mask);
>> +   if (ret) {
>> +   CPU_FREE(mask);
>> +   BUG_ON(ret);
>> +   }
>> +
>> +   CPU_FREE(mask);
> 
> This all looks good, a nit here it could it be a little shorter as:
> ret = sched_setaffinity(0, size, mask);
> CPU_FREE(mask);
> BUG_ON(ret);
> 
> Thanks,
> Ian
> 

Thanks for the review Ian.
Sure, I will address this change in V2

Thanks
Athira
>> 
>>return orig_mask;
>> }
>> 
>> -static cpu_set_t bind_to_node(int target_node)
>> +static cpu_set_t *bind_to_node(int target_node)
>> {
>> -   cpu_set_t orig_mask, mask;
>> +   int nrcpus = numa_num_possible_cpus();
>> +   cpu_set_t *orig_mask, *mask;
>> +   size_t size;
>>int cpu;
>>int ret;
>> 
>> -   ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
>> -   BUG_ON(ret);
>> +   orig_mask = CPU_ALLOC(nrcpus);
>> +   BUG_ON(!orig_mask);
>> +   size = CPU_ALLOC_SIZE(nrcpus);
>> +   CPU_ZERO_S(size, orig_mask);
>> +
>> +   ret = sched_getaffinity(0, size, orig_mask);
>> +   if (ret) {
>> +   CPU_FREE(orig_mask);
>> +   BUG_ON(ret);
>> +   }
>> 
>> -   CPU_ZERO(&mask);
>> +   mask = CPU_A

[PATCH 3/4] tools/perf: Fix perf numa bench to fix usage of affinity for machines with #CPUs > 1K

2022-04-01 Thread Athira Rajeev
perf bench numa testcase fails on systems with CPU's
more than 1K.

Testcase: perf bench numa mem -p 1 -t 3 -P 512 -s 100 -zZ0qcm --thp  1
Snippet of code:
<<>>
perf: bench/numa.c:302: bind_to_node: Assertion `!(ret)' failed.
Aborted (core dumped)
<<>>

bind_to_node function uses "sched_getaffinity" to save the original
cpumask and this call is returning EINVAL ((invalid argument).
This happens because the default mask size in glibc is 1024.
To overcome this 1024 CPUs mask size limitation of cpu_set_t,
change the mask size using the CPU_*_S macros ie, use CPU_ALLOC to
allocate cpumask, CPU_ALLOC_SIZE for size. Apart from fixing this
for "orig_mask", apply same logic to "mask" as well which is used to
setaffinity so that mask size is large enough to represent number
of possible CPU's in the system.

sched_getaffinity is used in one more place in perf numa bench. It
is in "bind_to_cpu" function. Apply the same logic there also. Though
currently no failure is reported from there, it is ideal to change
getaffinity to work with such system configurations having CPU's more
than default mask size supported by glibc.

Also fix "sched_setaffinity" to use mask size which is large enough
to represent number of possible CPU's in the system.

Fixed all places where "bind_cpumask" which is part of "struct
thread_data" is used such that bind_cpumask works in all configuration.

Reported-by: Disha Goel 
Signed-off-by: Athira Rajeev 
---
 tools/perf/bench/numa.c | 109 +---
 1 file changed, 81 insertions(+), 28 deletions(-)

diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index f2640179ada9..333896907e45 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -54,7 +54,7 @@
 
 struct thread_data {
int curr_cpu;
-   cpu_set_t   bind_cpumask;
+   cpu_set_t   *bind_cpumask;
int bind_node;
u8  *process_data;
int process_nr;
@@ -266,46 +266,75 @@ static bool node_has_cpus(int node)
return ret;
 }
 
-static cpu_set_t bind_to_cpu(int target_cpu)
+static cpu_set_t *bind_to_cpu(int target_cpu)
 {
-   cpu_set_t orig_mask, mask;
+   int nrcpus = numa_num_possible_cpus();
+   cpu_set_t *orig_mask, *mask;
+   size_t size;
int ret;
 
-   ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
-   BUG_ON(ret);
+   orig_mask = CPU_ALLOC(nrcpus);
+   BUG_ON(!orig_mask);
+   size = CPU_ALLOC_SIZE(nrcpus);
+   CPU_ZERO_S(size, orig_mask);
+
+   ret = sched_getaffinity(0, size, orig_mask);
+   if (ret) {
+   CPU_FREE(orig_mask);
+   BUG_ON(ret);
+   }
 
-   CPU_ZERO(&mask);
+   mask = CPU_ALLOC(nrcpus);
+   BUG_ON(!mask);
+   CPU_ZERO_S(size, mask);
 
if (target_cpu == -1) {
int cpu;
 
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
-   CPU_SET(cpu, &mask);
+   CPU_SET_S(cpu, size, mask);
} else {
BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus);
-   CPU_SET(target_cpu, &mask);
+   CPU_SET_S(target_cpu, size, mask);
}
 
-   ret = sched_setaffinity(0, sizeof(mask), &mask);
-   BUG_ON(ret);
+   ret = sched_setaffinity(0, size, mask);
+   if (ret) {
+   CPU_FREE(mask);
+   BUG_ON(ret);
+   }
+
+   CPU_FREE(mask);
 
return orig_mask;
 }
 
-static cpu_set_t bind_to_node(int target_node)
+static cpu_set_t *bind_to_node(int target_node)
 {
-   cpu_set_t orig_mask, mask;
+   int nrcpus = numa_num_possible_cpus();
+   cpu_set_t *orig_mask, *mask;
+   size_t size;
int cpu;
int ret;
 
-   ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
-   BUG_ON(ret);
+   orig_mask = CPU_ALLOC(nrcpus);
+   BUG_ON(!orig_mask);
+   size = CPU_ALLOC_SIZE(nrcpus);
+   CPU_ZERO_S(size, orig_mask);
+
+   ret = sched_getaffinity(0, size, orig_mask);
+   if (ret) {
+   CPU_FREE(orig_mask);
+   BUG_ON(ret);
+   }
 
-   CPU_ZERO(&mask);
+   mask = CPU_ALLOC(nrcpus);
+   BUG_ON(!mask);
+   CPU_ZERO_S(size, mask);
 
if (target_node == NUMA_NO_NODE) {
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
-   CPU_SET(cpu, &mask);
+   CPU_SET_S(cpu, size, mask);
} else {
struct bitmask *cpumask = numa_allocate_cpumask();
 
@@ -313,24 +342,33 @@ static cpu_set_t bind_to_node(int target_node)
if (!numa_node_to_cpus(target_node, cpumask)) {
for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
if (numa_bitmask_isbitset(cpumask, cpu))
-   CPU_SET(cpu, &mask);
+