Re: [PATCH 3/3] powerpc/pseries/cpuhp: delete add/remove_by_count code

2021-09-21 Thread kernel test robot
Hi Nathan,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on linus/master v5.15-rc2 next-20210920]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Nathan-Lynch/CPU-DLPAR-hotplug-for-v5-16/20210920-215907
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-allmodconfig (attached as .config)
compiler: powerpc64-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/0day-ci/linux/commit/72ea4c8a5398a4a72da34051a66f260ab0154f57
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Nathan-Lynch/CPU-DLPAR-hotplug-for-v5-16/20210920-215907
git checkout 72ea4c8a5398a4a72da34051a66f260ab0154f57
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross 
ARCH=powerpc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   arch/powerpc/platforms/pseries/hotplug-cpu.c: In function 'dlpar_cpu':
>> arch/powerpc/platforms/pseries/hotplug-cpu.c:746:13: error: variable 'count' 
>> set but not used [-Werror=unused-but-set-variable]
 746 | u32 count, drc_index;
 | ^
   cc1: all warnings being treated as errors


vim +/count +746 arch/powerpc/platforms/pseries/hotplug-cpu.c

ac71380071d19d Nathan Fontenot 2015-12-16  743  
ac71380071d19d Nathan Fontenot 2015-12-16  744  int dlpar_cpu(struct 
pseries_hp_errorlog *hp_elog)
ac71380071d19d Nathan Fontenot 2015-12-16  745  {
ac71380071d19d Nathan Fontenot 2015-12-16 @746  u32 count, 
drc_index;
ac71380071d19d Nathan Fontenot 2015-12-16  747  int rc;
ac71380071d19d Nathan Fontenot 2015-12-16  748  
ac71380071d19d Nathan Fontenot 2015-12-16  749  count = 
hp_elog->_drc_u.drc_count;
ac71380071d19d Nathan Fontenot 2015-12-16  750  drc_index = 
hp_elog->_drc_u.drc_index;
ac71380071d19d Nathan Fontenot 2015-12-16  751  
ac71380071d19d Nathan Fontenot 2015-12-16  752  
lock_device_hotplug();
ac71380071d19d Nathan Fontenot 2015-12-16  753  
ac71380071d19d Nathan Fontenot 2015-12-16  754  switch 
(hp_elog->action) {
ac71380071d19d Nathan Fontenot 2015-12-16  755  case 
PSERIES_HP_ELOG_ACTION_REMOVE:
72ea4c8a5398a4 Nathan Lynch2021-09-20  756  if 
(hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) {
ac71380071d19d Nathan Fontenot 2015-12-16  757  
rc = dlpar_cpu_remove_by_index(drc_index);
29c9a2699e71a7 Daniel Henrique Barboza 2021-04-16  758  
/*
29c9a2699e71a7 Daniel Henrique Barboza 2021-04-16  759  
 * Setting the isolation state of an UNISOLATED/CONFIGURED
29c9a2699e71a7 Daniel Henrique Barboza 2021-04-16  760  
 * device to UNISOLATE is a no-op, but the hypervisor can
29c9a2699e71a7 Daniel Henrique Barboza 2021-04-16  761  
 * use it as a hint that the CPU removal failed.
29c9a2699e71a7 Daniel Henrique Barboza 2021-04-16  762  
 */
29c9a2699e71a7 Daniel Henrique Barboza 2021-04-16  763  
if (rc)
29c9a2699e71a7 Daniel Henrique Barboza 2021-04-16  764  
dlpar_unisolate_drc(drc_index);
29c9a2699e71a7 Daniel Henrique Barboza 2021-04-16  765  }
ac71380071d19d Nathan Fontenot 2015-12-16  766  else
ac71380071d19d Nathan Fontenot 2015-12-16  767  
rc = -EINVAL;
ac71380071d19d Nathan Fontenot 2015-12-16  768  break;
90edf184b9b727 Nathan Fontenot 2015-12-16  769  case 
PSERIES_HP_ELOG_ACTION_ADD:
72ea4c8a5398a4 Nathan Lynch2021-09-20  770  if 
(hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
90edf184b9b727 Nathan Fontenot 2015-12-16  771  
rc = dlpar_cpu_add(drc_index);
90edf184b9b727 Nathan Fontenot 2015-12-16  772  else
90edf184b9b727 Nathan Fontenot 2015-12-16  773  
rc = -EINVAL;
90edf184b9b727 Nathan Fontenot 2015-12-16  774  break;
ac71380071d19d Nathan Fontenot 2015-12-16  775  default:
ac71380071d19d Nathan Fontenot 2015-12-16  776  
pr_err("Invalid action (%d) specified\n", hp_elog->action);

Re: [PATCH 3/3] powerpc/pseries/cpuhp: delete add/remove_by_count code

2021-09-20 Thread Nathan Lynch
Daniel Henrique Barboza  writes:

> On 9/20/21 10:55, Nathan Lynch wrote:
>> The core DLPAR code supports two actions (add and remove) and three
>> subtypes of action:
>> 
>> * By DRC index: the action is attempted on a single specified resource.
>>This is the usual case for processors.
>> * By indexed count: the action is attempted on a range of resources
>>beginning at the specified index. This is implemented only by the memory
>>DLPAR code.
>> * By count: the lower layer (CPU or memory) is responsible for locating the
>>specified number of resources to which the action can be applied.
>> 
>> I cannot find any evidence of the "by count" subtype being used by drmgr or
>> qemu for processors. And when I try to exercise this code, the add case
>> does not work:
>
>
> Just to clarify: did you check both CPU and memory cases and found out that 
> the
> 'by count' subtype isn't used with CPUs, but drmgr has some cases in which
> 'by count' is used with LMBs?

Yes, drmgr uses both the 'by count' and the 'by index' methods for
memory currently on PowerVM.

> I'm asking because I worked with a part of the LMB removal code a few months 
> ago,
> and got stuck in a situation in which the 'by count' and 'by indexed count' 
> are
> similar enough to feel repetitive, but distinct enough to not be easily 
> reduced
> into a single function. If drmgr wasn't using the 'by count' subtypes for LMBs
> that would be a good chance for more code redux.

The 'by count' method is definitely used for memory on PowerVM. I was
under the impression that the 'by indexed count' method was used by qemu
for memory sometimes; I'm pretty sure it's not used on PowerVM.

>> Summary:
>> 
>> * This code has not worked reliably since its introduction.
>> * There is no evidence that it is used.
>> * It contains questionable rollback behaviors in error paths which are
>>difficult to test.
>> 
>> So let's remove it.
>> 
>> Signed-off-by: Nathan Lynch 
>> Fixes: ac71380071d1 ("powerpc/pseries: Add CPU dlpar remove functionality")
>> Fixes: 90edf184b9b7 ("powerpc/pseries: Add CPU dlpar add functionality")
>> Fixes: b015f6bc9547 ("powerpc/pseries: Add cpu DLPAR support for drc-info 
>> property")
>> ---
>
> Tested with a QEMU pseries guest, no issues found.
>
>
> Reviewed-by: Daniel Henrique Barboza 
> Tested-by: Daniel Henrique Barboza 

Thanks!


Re: [PATCH 3/3] powerpc/pseries/cpuhp: delete add/remove_by_count code

2021-09-20 Thread Daniel Henrique Barboza




On 9/20/21 10:55, Nathan Lynch wrote:

The core DLPAR code supports two actions (add and remove) and three
subtypes of action:

* By DRC index: the action is attempted on a single specified resource.
   This is the usual case for processors.
* By indexed count: the action is attempted on a range of resources
   beginning at the specified index. This is implemented only by the memory
   DLPAR code.
* By count: the lower layer (CPU or memory) is responsible for locating the
   specified number of resources to which the action can be applied.

I cannot find any evidence of the "by count" subtype being used by drmgr or
qemu for processors. And when I try to exercise this code, the add case
does not work:



Just to clarify: did you check both CPU and memory cases and found out that the
'by count' subtype isn't used with CPUs, but drmgr has some cases in which
'by count' is used with LMBs?

I'm asking because I worked with a part of the LMB removal code a few months 
ago,
and got stuck in a situation in which the 'by count' and 'by indexed count' are
similar enough to feel repetitive, but distinct enough to not be easily reduced
into a single function. If drmgr wasn't using the 'by count' subtypes for LMBs
that would be a good chance for more code redux.




   $ ppc64_cpu --smt ; nproc
   SMT=8
   24
   $ printf "cpu remove count 2" > /sys/kernel/dlpar
   $ nproc
   8
   $ printf "cpu add count 2" > /sys/kernel/dlpar
   -bash: printf: write error: Invalid argument
   $ dmesg | tail -2
   pseries-hotplug-cpu: Failed to find enough CPUs (1 of 2) to add
   dlpar: Could not handle DLPAR request "cpu add count 2"
   $ nproc
   8
   $ drmgr -c cpu -a -q 2 # this uses the by-index method
   Validating CPU DLPAR capability...yes.
   CPU 1
   CPU 17
   $ nproc
   24

This is because find_drc_info_cpus_to_add() does not increment drc_index
appropriately during its search.

This is not hard to fix. But the _by_count() functions also have the
property that they attempt to roll back all prior operations if the entire
request cannot be satisfied, even though the rollback itself can encounter
errors. It's not possible to provide transaction-like behavior at this
level, and it's undesirable to have code that can only pretend to do that.
Any users of these functions cannot know what the state of the system is in
the error case. And the error paths are, to my knowledge, impossible to
test without adding custom error injection code.

Summary:

* This code has not worked reliably since its introduction.
* There is no evidence that it is used.
* It contains questionable rollback behaviors in error paths which are
   difficult to test.

So let's remove it.

Signed-off-by: Nathan Lynch 
Fixes: ac71380071d1 ("powerpc/pseries: Add CPU dlpar remove functionality")
Fixes: 90edf184b9b7 ("powerpc/pseries: Add CPU dlpar add functionality")
Fixes: b015f6bc9547 ("powerpc/pseries: Add cpu DLPAR support for drc-info 
property")
---


Tested with a QEMU pseries guest, no issues found.


Reviewed-by: Daniel Henrique Barboza 
Tested-by: Daniel Henrique Barboza 




  arch/powerpc/platforms/pseries/hotplug-cpu.c | 218 +--
  1 file changed, 2 insertions(+), 216 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 87a0fbe9cf12..768997261ce8 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -741,216 +741,6 @@ static int dlpar_cpu_remove_by_index(u32 drc_index)
return rc;
  }
  
-static int find_dlpar_cpus_to_remove(u32 *cpu_drcs, int cpus_to_remove)

-{
-   struct device_node *dn;
-   int cpus_found = 0;
-   int rc;
-
-   /* We want to find cpus_to_remove + 1 CPUs to ensure we do not
-* remove the last CPU.
-*/
-   for_each_node_by_type(dn, "cpu") {
-   cpus_found++;
-
-   if (cpus_found > cpus_to_remove) {
-   of_node_put(dn);
-   break;
-   }
-
-   /* Note that cpus_found is always 1 ahead of the index
-* into the cpu_drcs array, so we use cpus_found - 1
-*/
-   rc = of_property_read_u32(dn, "ibm,my-drc-index",
- _drcs[cpus_found - 1]);
-   if (rc) {
-   pr_warn("Error occurred getting drc-index for %pOFn\n",
-   dn);
-   of_node_put(dn);
-   return -1;
-   }
-   }
-
-   if (cpus_found < cpus_to_remove) {
-   pr_warn("Failed to find enough CPUs (%d of %d) to remove\n",
-   cpus_found, cpus_to_remove);
-   } else if (cpus_found == cpus_to_remove) {
-   pr_warn("Cannot remove all CPUs\n");
-   }
-
-   return cpus_found;
-}
-
-static int dlpar_cpu_remove_by_count(u32 cpus_to_remove)
-{
-   u32 

[PATCH 3/3] powerpc/pseries/cpuhp: delete add/remove_by_count code

2021-09-20 Thread Nathan Lynch
The core DLPAR code supports two actions (add and remove) and three
subtypes of action:

* By DRC index: the action is attempted on a single specified resource.
  This is the usual case for processors.
* By indexed count: the action is attempted on a range of resources
  beginning at the specified index. This is implemented only by the memory
  DLPAR code.
* By count: the lower layer (CPU or memory) is responsible for locating the
  specified number of resources to which the action can be applied.

I cannot find any evidence of the "by count" subtype being used by drmgr or
qemu for processors. And when I try to exercise this code, the add case
does not work:

  $ ppc64_cpu --smt ; nproc
  SMT=8
  24
  $ printf "cpu remove count 2" > /sys/kernel/dlpar
  $ nproc
  8
  $ printf "cpu add count 2" > /sys/kernel/dlpar
  -bash: printf: write error: Invalid argument
  $ dmesg | tail -2
  pseries-hotplug-cpu: Failed to find enough CPUs (1 of 2) to add
  dlpar: Could not handle DLPAR request "cpu add count 2"
  $ nproc
  8
  $ drmgr -c cpu -a -q 2 # this uses the by-index method
  Validating CPU DLPAR capability...yes.
  CPU 1
  CPU 17
  $ nproc
  24

This is because find_drc_info_cpus_to_add() does not increment drc_index
appropriately during its search.

This is not hard to fix. But the _by_count() functions also have the
property that they attempt to roll back all prior operations if the entire
request cannot be satisfied, even though the rollback itself can encounter
errors. It's not possible to provide transaction-like behavior at this
level, and it's undesirable to have code that can only pretend to do that.
Any users of these functions cannot know what the state of the system is in
the error case. And the error paths are, to my knowledge, impossible to
test without adding custom error injection code.

Summary:

* This code has not worked reliably since its introduction.
* There is no evidence that it is used.
* It contains questionable rollback behaviors in error paths which are
  difficult to test.

So let's remove it.

Signed-off-by: Nathan Lynch 
Fixes: ac71380071d1 ("powerpc/pseries: Add CPU dlpar remove functionality")
Fixes: 90edf184b9b7 ("powerpc/pseries: Add CPU dlpar add functionality")
Fixes: b015f6bc9547 ("powerpc/pseries: Add cpu DLPAR support for drc-info 
property")
---
 arch/powerpc/platforms/pseries/hotplug-cpu.c | 218 +--
 1 file changed, 2 insertions(+), 216 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 87a0fbe9cf12..768997261ce8 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -741,216 +741,6 @@ static int dlpar_cpu_remove_by_index(u32 drc_index)
return rc;
 }
 
-static int find_dlpar_cpus_to_remove(u32 *cpu_drcs, int cpus_to_remove)
-{
-   struct device_node *dn;
-   int cpus_found = 0;
-   int rc;
-
-   /* We want to find cpus_to_remove + 1 CPUs to ensure we do not
-* remove the last CPU.
-*/
-   for_each_node_by_type(dn, "cpu") {
-   cpus_found++;
-
-   if (cpus_found > cpus_to_remove) {
-   of_node_put(dn);
-   break;
-   }
-
-   /* Note that cpus_found is always 1 ahead of the index
-* into the cpu_drcs array, so we use cpus_found - 1
-*/
-   rc = of_property_read_u32(dn, "ibm,my-drc-index",
- _drcs[cpus_found - 1]);
-   if (rc) {
-   pr_warn("Error occurred getting drc-index for %pOFn\n",
-   dn);
-   of_node_put(dn);
-   return -1;
-   }
-   }
-
-   if (cpus_found < cpus_to_remove) {
-   pr_warn("Failed to find enough CPUs (%d of %d) to remove\n",
-   cpus_found, cpus_to_remove);
-   } else if (cpus_found == cpus_to_remove) {
-   pr_warn("Cannot remove all CPUs\n");
-   }
-
-   return cpus_found;
-}
-
-static int dlpar_cpu_remove_by_count(u32 cpus_to_remove)
-{
-   u32 *cpu_drcs;
-   int cpus_found;
-   int cpus_removed = 0;
-   int i, rc;
-
-   pr_debug("Attempting to hot-remove %d CPUs\n", cpus_to_remove);
-
-   cpu_drcs = kcalloc(cpus_to_remove, sizeof(*cpu_drcs), GFP_KERNEL);
-   if (!cpu_drcs)
-   return -EINVAL;
-
-   cpus_found = find_dlpar_cpus_to_remove(cpu_drcs, cpus_to_remove);
-   if (cpus_found <= cpus_to_remove) {
-   kfree(cpu_drcs);
-   return -EINVAL;
-   }
-
-   for (i = 0; i < cpus_to_remove; i++) {
-   rc = dlpar_cpu_remove_by_index(cpu_drcs[i]);
-   if (rc)
-   break;
-
-   cpus_removed++;
-   }
-
-   if (cpus_removed != cpus_to_remove) {
-   pr_warn("CPU