Re: [PATCH] cpufreq:powernv: Fix init_chip_info initialization in numa=off

2021-07-27 Thread Pratik Sampat




On 27/07/21 11:46 am, Gautham R Shenoy wrote:

On Mon, Jul 26, 2021 at 10:37:57PM +0530, Pratik R. Sampat wrote:

In the numa=off kernel command-line configuration init_chip_info() loops
around the number of chips and attempts to copy the cpumask of that node
which is NULL for all iterations after the first chip.

Hence, store the cpu mask for each chip instead of derving cpumask from
node while populating the "chips" struct array and copy that to the
chips[i].mask

Cc: sta...@vger.kernel.org
Fixes: 053819e0bf84 ("cpufreq: powernv: Handle throttling due to Pmax capping at 
chip level")
Signed-off-by: Pratik R. Sampat 
Reported-by: Shirisha Ganta 
---
  drivers/cpufreq/powernv-cpufreq.c | 15 +--
  1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/powernv-cpufreq.c 
b/drivers/cpufreq/powernv-cpufreq.c
index 005600cef273..8ec10d9aed8f 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -1046,12 +1046,20 @@ static int init_chip_info(void)
unsigned int *chip;
unsigned int cpu, i;
unsigned int prev_chip_id = UINT_MAX;
+   cpumask_t *chip_cpu_mask;
int ret = 0;

chip = kcalloc(num_possible_cpus(), sizeof(*chip), GFP_KERNEL);
if (!chip)
return -ENOMEM;

+   /* Allocate a chip cpu mask large enough to fit mask for all chips */
+   chip_cpu_mask = kcalloc(32, sizeof(cpumask_t), GFP_KERNEL);

I suppose by 32 you mean the maximum number of chips possible. You
could use a #define for that.


ack, I could #define the constant.


Otherwise, the patch looks good to me.

Reviewed-by: Gautham R. Shenoy 


Thanks
Pratik




+   if (!chip_cpu_mask) {
+   ret = -ENOMEM;
+   goto free_and_return;
+   }
+
for_each_possible_cpu(cpu) {
unsigned int id = cpu_to_chip_id(cpu);

@@ -1059,22 +1067,25 @@ static int init_chip_info(void)
prev_chip_id = id;
chip[nr_chips++] = id;
}
+   cpumask_set_cpu(cpu, _cpu_mask[nr_chips-1]);
}

chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL);
if (!chips) {
ret = -ENOMEM;
-   goto free_and_return;
+   goto out_chip_cpu_mask;
}

for (i = 0; i < nr_chips; i++) {
chips[i].id = chip[i];
-   cpumask_copy([i].mask, cpumask_of_node(chip[i]));
+   cpumask_copy([i].mask, _cpu_mask[i]);
INIT_WORK([i].throttle, powernv_cpufreq_work_fn);
for_each_cpu(cpu, [i].mask)
per_cpu(chip_info, cpu) =  [i];
}

+out_chip_cpu_mask:
+   kfree(chip_cpu_mask);
  free_and_return:
kfree(chip);
return ret;
--
2.31.1





Re: [PATCH] cpufreq:powernv: Fix init_chip_info initialization in numa=off

2021-07-27 Thread Gautham R Shenoy
On Mon, Jul 26, 2021 at 10:37:57PM +0530, Pratik R. Sampat wrote:
> In the numa=off kernel command-line configuration init_chip_info() loops
> around the number of chips and attempts to copy the cpumask of that node
> which is NULL for all iterations after the first chip.
> 
> Hence, store the cpu mask for each chip instead of derving cpumask from
> node while populating the "chips" struct array and copy that to the
> chips[i].mask
> 
> Cc: sta...@vger.kernel.org
> Fixes: 053819e0bf84 ("cpufreq: powernv: Handle throttling due to Pmax capping 
> at chip level")
> Signed-off-by: Pratik R. Sampat 
> Reported-by: Shirisha Ganta 
> ---
>  drivers/cpufreq/powernv-cpufreq.c | 15 +--
>  1 file changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/cpufreq/powernv-cpufreq.c 
> b/drivers/cpufreq/powernv-cpufreq.c
> index 005600cef273..8ec10d9aed8f 100644
> --- a/drivers/cpufreq/powernv-cpufreq.c
> +++ b/drivers/cpufreq/powernv-cpufreq.c
> @@ -1046,12 +1046,20 @@ static int init_chip_info(void)
>   unsigned int *chip;
>   unsigned int cpu, i;
>   unsigned int prev_chip_id = UINT_MAX;
> + cpumask_t *chip_cpu_mask;
>   int ret = 0;
> 
>   chip = kcalloc(num_possible_cpus(), sizeof(*chip), GFP_KERNEL);
>   if (!chip)
>   return -ENOMEM;
> 
> + /* Allocate a chip cpu mask large enough to fit mask for all chips */
> + chip_cpu_mask = kcalloc(32, sizeof(cpumask_t), GFP_KERNEL);

I suppose by 32 you mean the maximum number of chips possible. You
could use a #define for that.

Otherwise, the patch looks good to me.

Reviewed-by: Gautham R. Shenoy 



> + if (!chip_cpu_mask) {
> + ret = -ENOMEM;
> + goto free_and_return;
> + }
> +
>   for_each_possible_cpu(cpu) {
>   unsigned int id = cpu_to_chip_id(cpu);
> 
> @@ -1059,22 +1067,25 @@ static int init_chip_info(void)
>   prev_chip_id = id;
>   chip[nr_chips++] = id;
>   }
> + cpumask_set_cpu(cpu, _cpu_mask[nr_chips-1]);
>   }
> 
>   chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL);
>   if (!chips) {
>   ret = -ENOMEM;
> - goto free_and_return;
> + goto out_chip_cpu_mask;
>   }
> 
>   for (i = 0; i < nr_chips; i++) {
>   chips[i].id = chip[i];
> - cpumask_copy([i].mask, cpumask_of_node(chip[i]));
> + cpumask_copy([i].mask, _cpu_mask[i]);
>   INIT_WORK([i].throttle, powernv_cpufreq_work_fn);
>   for_each_cpu(cpu, [i].mask)
>   per_cpu(chip_info, cpu) =  [i];
>   }
> 
> +out_chip_cpu_mask:
> + kfree(chip_cpu_mask);
>  free_and_return:
>   kfree(chip);
>   return ret;
> -- 
> 2.31.1
> 


[PATCH] cpufreq:powernv: Fix init_chip_info initialization in numa=off

2021-07-26 Thread Pratik R. Sampat
In the numa=off kernel command-line configuration init_chip_info() loops
around the number of chips and attempts to copy the cpumask of that node
which is NULL for all iterations after the first chip.

Hence, store the cpu mask for each chip instead of derving cpumask from
node while populating the "chips" struct array and copy that to the
chips[i].mask

Cc: sta...@vger.kernel.org
Fixes: 053819e0bf84 ("cpufreq: powernv: Handle throttling due to Pmax capping 
at chip level")
Signed-off-by: Pratik R. Sampat 
Reported-by: Shirisha Ganta 
---
 drivers/cpufreq/powernv-cpufreq.c | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/powernv-cpufreq.c 
b/drivers/cpufreq/powernv-cpufreq.c
index 005600cef273..8ec10d9aed8f 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -1046,12 +1046,20 @@ static int init_chip_info(void)
unsigned int *chip;
unsigned int cpu, i;
unsigned int prev_chip_id = UINT_MAX;
+   cpumask_t *chip_cpu_mask;
int ret = 0;
 
chip = kcalloc(num_possible_cpus(), sizeof(*chip), GFP_KERNEL);
if (!chip)
return -ENOMEM;
 
+   /* Allocate a chip cpu mask large enough to fit mask for all chips */
+   chip_cpu_mask = kcalloc(32, sizeof(cpumask_t), GFP_KERNEL);
+   if (!chip_cpu_mask) {
+   ret = -ENOMEM;
+   goto free_and_return;
+   }
+
for_each_possible_cpu(cpu) {
unsigned int id = cpu_to_chip_id(cpu);
 
@@ -1059,22 +1067,25 @@ static int init_chip_info(void)
prev_chip_id = id;
chip[nr_chips++] = id;
}
+   cpumask_set_cpu(cpu, _cpu_mask[nr_chips-1]);
}
 
chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL);
if (!chips) {
ret = -ENOMEM;
-   goto free_and_return;
+   goto out_chip_cpu_mask;
}
 
for (i = 0; i < nr_chips; i++) {
chips[i].id = chip[i];
-   cpumask_copy([i].mask, cpumask_of_node(chip[i]));
+   cpumask_copy([i].mask, _cpu_mask[i]);
INIT_WORK([i].throttle, powernv_cpufreq_work_fn);
for_each_cpu(cpu, [i].mask)
per_cpu(chip_info, cpu) =  [i];
}
 
+out_chip_cpu_mask:
+   kfree(chip_cpu_mask);
 free_and_return:
kfree(chip);
return ret;
-- 
2.31.1



Re: [PATCH] cpufreq:powernv: Fix init_chip_info initialization in numa=off

2021-07-08 Thread Gautham R Shenoy
Hello Pratik,

On Tue, Jun 15, 2021 at 10:39:49AM +0530, Pratik R. Sampat wrote:
> In the numa=off kernel command-line configuration init_chip_info() loops
> around the number of chips and attempts to copy the cpumask of that node
> which is NULL for all iterations after the first chip.

Thanks for taking a look into this. Indeed there is an issue here
because the code here assumes that node_mask as a proxy for the
chip_mask. This assumption breaks when run with numa=off, since there will only 
be a
single node, but multiple chips.


> 
> Hence adding a check to bail out after the first initialization if there
> is only one node.
> 
> Fixes: 053819e0bf84 ("cpufreq: powernv: Handle throttling due to Pmax capping 
> at chip level")
> Signed-off-by: Pratik R. Sampat 
> Reported-by: Shirisha Ganta 
> ---
>  drivers/cpufreq/powernv-cpufreq.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/cpufreq/powernv-cpufreq.c 
> b/drivers/cpufreq/powernv-cpufreq.c
> index e439b43c19eb..663f9c4b5e3a 100644
> --- a/drivers/cpufreq/powernv-cpufreq.c
> +++ b/drivers/cpufreq/powernv-cpufreq.c
> @@ -1078,6 +1078,8 @@ static int init_chip_info(void)
>   INIT_WORK([i].throttle, powernv_cpufreq_work_fn);
>   for_each_cpu(cpu, [i].mask)
>   per_cpu(chip_info, cpu) =  [i];
> + if (num_possible_nodes() == 1)
> + break;

With this we will only initialize the chip[0].throttle work function,
while for the rest of the chips chip[i].throttle will be
uninitialized. While we may be running in the numa=off mode, the fact
remains that those other chips do exist and they may experiencing
throttling, during which they will try to schedule work for chip[i] in
order to take corrective action, which will fail.

Hence a more correct approach may be to maintain a chip[i] mask
independent of the node mask.





>   }
>  
>  free_and_return:
> -- 
> 2.30.2
> 


[PATCH] cpufreq:powernv: Fix init_chip_info initialization in numa=off

2021-06-14 Thread Pratik R. Sampat
In the numa=off kernel command-line configuration init_chip_info() loops
around the number of chips and attempts to copy the cpumask of that node
which is NULL for all iterations after the first chip.

Hence adding a check to bail out after the first initialization if there
is only one node.

Fixes: 053819e0bf84 ("cpufreq: powernv: Handle throttling due to Pmax capping 
at chip level")
Signed-off-by: Pratik R. Sampat 
Reported-by: Shirisha Ganta 
---
 drivers/cpufreq/powernv-cpufreq.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/cpufreq/powernv-cpufreq.c 
b/drivers/cpufreq/powernv-cpufreq.c
index e439b43c19eb..663f9c4b5e3a 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -1078,6 +1078,8 @@ static int init_chip_info(void)
INIT_WORK([i].throttle, powernv_cpufreq_work_fn);
for_each_cpu(cpu, [i].mask)
per_cpu(chip_info, cpu) =  [i];
+   if (num_possible_nodes() == 1)
+   break;
}
 
 free_and_return:
-- 
2.30.2