[PATCH] powerpc/mm/drmem: Silence drmem_init() early return

2024-06-03 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

It's not an error or noteworthy condition if the
"ibm,dynamic-reconfiguration-memory" node isn't present.

Drop the needless message.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/mm/drmem.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index c110ab8fa8a3..8dd7b340d51f 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -491,10 +491,8 @@ static int __init drmem_init(void)
const __be32 *prop;
 
dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
-   if (!dn) {
-   pr_info("No dynamic reconfiguration memory found\n");
+   if (!dn)
return 0;
-   }
 
if (init_drmem_lmb_size(dn)) {
of_node_put(dn);

---
base-commit: be2fc65d66e0406cc9d39d40becaecdf4ee765f3
change-id: 20240603-silence-drmem_init-da4577e80b4c

Best regards,
-- 
Nathan Lynch 




[PATCH] powerpc/crypto: Add generated P8 asm to .gitignore

2024-06-03 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Looks like drivers/crypto/vmx/.gitignore should have been merged into
arch/powerpc/crypto/.gitignore as part of commit
109303336a0c ("crypto: vmx - Move to arch/powerpc/crypto") so that all
generated asm files are ignored.

Signed-off-by: Nathan Lynch 
Fixes: 109303336a0c ("crypto: vmx - Move to arch/powerpc/crypto")
---
 arch/powerpc/crypto/.gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/crypto/.gitignore b/arch/powerpc/crypto/.gitignore
index e1094f08f713..e9fe73aac8b6 100644
--- a/arch/powerpc/crypto/.gitignore
+++ b/arch/powerpc/crypto/.gitignore
@@ -1,3 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
 aesp10-ppc.S
+aesp8-ppc.S
 ghashp10-ppc.S
+ghashp8-ppc.S

---
base-commit: be2fc65d66e0406cc9d39d40becaecdf4ee765f3
change-id: 20240603-powerpc-crypto-ignore-p8-asm-4d9f59da003e

Best regards,
-- 
Nathan Lynch 




[PATCH] powerpc/prom: Add CPU info to hardware description string later

2024-06-03 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

cur_cpu_spec->cpu_name is appended to ppc_hw_desc before cur_cpu_spec
has taken on its final value. This is illustrated on pseries by
comparing the CPU name as reported at boot ("POWER8E (raw)") to the
contents of /proc/cpuinfo ("POWER8 (architected)"):

  $ dmesg | grep Hardware
  Hardware name: IBM,8408-E8E POWER8E (raw) 0x4b0201 0xf04 \
of:IBM,FW860.50 (SV860_146) hv:phyp pSeries

  $ grep -m 1 ^cpu /proc/cpuinfo
  cpu : POWER8 (architected), altivec supported

Some 44x models would appear to be affected as well; see
identical_pvr_fixup().

This results in incorrect CPU information in stack dumps --
ppc_hw_desc is an input to dump_stack_set_arch_desc().

Delay gathering the CPU name until after all potential calls to
identify_cpu().

Signed-off-by: Nathan Lynch 
Fixes: bd649d40e0f2 ("powerpc: Add PVR & CPU name to hardware description")
---
Originally sent as part of a RFC series to update the hardware
description at runtime for live migration:

https://lore.kernel.org/linuxppc-dev/20240118-update-dump-stack-arch-str-v1-0-5c0f98d01...@linux.ibm.com/

Sending this separately as a standalone fix before I take that effort
up again.
---
 arch/powerpc/kernel/prom.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 60819751e55e..0be07ed407c7 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -331,6 +331,7 @@ static int __init early_init_dt_scan_cpus(unsigned long 
node,
  void *data)
 {
const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+   const __be32 *cpu_version = NULL;
const __be32 *prop;
const __be32 *intserv;
int i, nthreads;
@@ -420,7 +421,7 @@ static int __init early_init_dt_scan_cpus(unsigned long 
node,
prop = of_get_flat_dt_prop(node, "cpu-version", NULL);
if (prop && (be32_to_cpup(prop) & 0xff00) == 0x0f00) {
identify_cpu(0, be32_to_cpup(prop));
-   seq_buf_printf(&ppc_hw_desc, "0x%04x ", 
be32_to_cpup(prop));
+   cpu_version = prop;
}
 
check_cpu_feature_properties(node);
@@ -431,6 +432,12 @@ static int __init early_init_dt_scan_cpus(unsigned long 
node,
}
 
identical_pvr_fixup(node);
+
+   // We can now add the CPU name & PVR to the hardware description
+   seq_buf_printf(&ppc_hw_desc, "%s 0x%04lx ", cur_cpu_spec->cpu_name, 
mfspr(SPRN_PVR));
+   if (cpu_version)
+   seq_buf_printf(&ppc_hw_desc, "0x%04x ", 
be32_to_cpup(cpu_version));
+
init_mmu_slb_size(node);
 
 #ifdef CONFIG_PPC64
@@ -881,9 +888,6 @@ void __init early_init_devtree(void *params)
 
dt_cpu_ftrs_scan();
 
-   // We can now add the CPU name & PVR to the hardware description
-   seq_buf_printf(&ppc_hw_desc, "%s 0x%04lx ", cur_cpu_spec->cpu_name, 
mfspr(SPRN_PVR));
-
/* Retrieve CPU related informations from the flat tree
 * (altivec support, boot CPU ID, ...)
 */

---
base-commit: be2fc65d66e0406cc9d39d40becaecdf4ee765f3
change-id: 20240603-fix-cpu-hwdesc-1dc055b3f93f

Best regards,
-- 
Nathan Lynch 




[PATCH] powerpc/rtas: Prevent Spectre v1 gadget construction in sys_rtas()

2024-05-30 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Smatch warns:

  arch/powerpc/kernel/rtas.c:1932 __do_sys_rtas() warn: potential
  spectre issue 'args.args' [r] (local cap)

The 'nargs' and 'nret' locals come directly from a user-supplied
buffer and are used as indexes into a small stack-based array and as
inputs to copy_to_user() after they are subject to bounds checks.

Use array_index_nospec() after the bounds checks to clamp these values
for speculative execution.

Signed-off-by: Nathan Lynch 
Reported-by: Breno Leitao 
---
Based on a change originally submitted by Breno Leitao in 2018:

https://lore.kernel.org/linuxppc-dev/1534876926-21849-1-git-send-email-lei...@debian.org/

I've used a Reported-by: tag to credit Breno, let me know if you would
prefer a different tag (perhaps Co-developed-by?)
---
 arch/powerpc/kernel/rtas.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 8064d9c3de86..f7e86e09c49f 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1916,6 +1917,9 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
|| nargs + nret > ARRAY_SIZE(args.args))
return -EINVAL;
 
+   nargs = array_index_nospec(nargs, ARRAY_SIZE(args.args));
+   nret = array_index_nospec(nret, ARRAY_SIZE(args.args) - nargs);
+
/* Copy in args. */
if (copy_from_user(args.args, uargs->args,
   nargs * sizeof(rtas_arg_t)) != 0)

---
base-commit: be2fc65d66e0406cc9d39d40becaecdf4ee765f3
change-id: 20240530-sys_rtas-nargs-nret-a188eaf5a500

Best regards,
-- 
Nathan Lynch 




[PATCH v2] powerpc/pseries/lparcfg: drop error message from guest name lookup

2024-05-24 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

It's not an error or exceptional situation when the hosting
environment does not expose a name for the LP/guest via RTAS or the
device tree. This happens with qemu when run without the '-name'
option. The message also lacks a newline. Remove it.

Signed-off-by: Nathan Lynch 
Fixes: eddaa9a40275 ("powerpc/pseries: read the lpar name from the firmware")
---
Changes in v2:
- Added Fixes: tag
- Link to v1: 
https://lore.kernel.org/r/20240104-lparcfg-updates-v1-1-66a0184a4...@linux.ibm.com
---
 arch/powerpc/platforms/pseries/lparcfg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/lparcfg.c 
b/arch/powerpc/platforms/pseries/lparcfg.c
index 6e7029640c0c..62da20f9700a 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -371,8 +371,8 @@ static int read_dt_lpar_name(struct seq_file *m)
 
 static void read_lpar_name(struct seq_file *m)
 {
-   if (read_rtas_lpar_name(m) && read_dt_lpar_name(m))
-   pr_err_once("Error can't get the LPAR name");
+   if (read_rtas_lpar_name(m))
+   read_dt_lpar_name(m);
 }
 
 #define SPLPAR_MAXLENGTH 1026*(sizeof(char))

---
base-commit: 61700f816e6f58f6b1aaa881a69a784d146e30f0
change-id: 20231212-lparcfg-updates-ef15437c6570

Best regards,
-- 
Nathan Lynch 




[PATCH] powerpc/pseries: Enforce hcall result buffer validity and size

2024-04-08 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

plpar_hcall(), plpar_hcall9(), and related functions expect callers to
provide valid result buffers of certain minimum size. Currently this
is communicated only through comments in the code and the compiler has
no idea.

For example, if I write a bug like this:

  long retbuf[PLPAR_HCALL_BUFSIZE]; // should be PLPAR_HCALL9_BUFSIZE
  plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, ...);

This compiles with no diagnostics emitted, but likely results in stack
corruption at runtime when plpar_hcall9() stores results past the end
of the array. (To be clear this is a contrived example and I have not
found a real instance yet.)

To make this class of error less likely, we can use explicitly-sized
array parameters instead of pointers in the declarations for the hcall
APIs. When compiled with -Warray-bounds[1], the code above now
provokes a diagnostic like this:

error: array argument is too small;
is of size 32, callee requires at least 72 [-Werror,-Warray-bounds]
   60 | plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf,
  | ^   ~~

[1] Enabled for LLVM builds but not GCC for now. See commit
0da6e5fd6c37 ("gcc: disable '-Warray-bounds' for gcc-13 too") and
related changes.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/hvcall.h | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/hvcall.h 
b/arch/powerpc/include/asm/hvcall.h
index a41e542ba94d..39cd1ca4ccb9 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -524,7 +524,7 @@ long plpar_hcall_norets_notrace(unsigned long opcode, ...);
  * Used for all but the craziest of phyp interfaces (see plpar_hcall9)
  */
 #define PLPAR_HCALL_BUFSIZE 4
-long plpar_hcall(unsigned long opcode, unsigned long *retbuf, ...);
+long plpar_hcall(unsigned long opcode, unsigned long retbuf[static 
PLPAR_HCALL_BUFSIZE], ...);
 
 /**
  * plpar_hcall_raw: - Make a hypervisor call without calculating hcall stats
@@ -538,7 +538,7 @@ long plpar_hcall(unsigned long opcode, unsigned long 
*retbuf, ...);
  * plpar_hcall, but plpar_hcall_raw works in real mode and does not
  * calculate hypervisor call statistics.
  */
-long plpar_hcall_raw(unsigned long opcode, unsigned long *retbuf, ...);
+long plpar_hcall_raw(unsigned long opcode, unsigned long retbuf[static 
PLPAR_HCALL_BUFSIZE], ...);
 
 /**
  * plpar_hcall9: - Make a pseries hypervisor call with up to 9 return arguments
@@ -549,8 +549,8 @@ long plpar_hcall_raw(unsigned long opcode, unsigned long 
*retbuf, ...);
  * PLPAR_HCALL9_BUFSIZE to size the return argument buffer.
  */
 #define PLPAR_HCALL9_BUFSIZE 9
-long plpar_hcall9(unsigned long opcode, unsigned long *retbuf, ...);
-long plpar_hcall9_raw(unsigned long opcode, unsigned long *retbuf, ...);
+long plpar_hcall9(unsigned long opcode, unsigned long retbuf[static 
PLPAR_HCALL9_BUFSIZE], ...);
+long plpar_hcall9_raw(unsigned long opcode, unsigned long retbuf[static 
PLPAR_HCALL9_BUFSIZE], ...);
 
 /* pseries hcall tracing */
 extern struct static_key hcall_tracepoint_key;

---
base-commit: bfe51886ca544956eb4ff924d1937ac01d0ca9c8
change-id: 20240408-pseries-hvcall-retbuf-c47c4d70d847

Best regards,
-- 
Nathan Lynch 




[PATCH v2] selftests/powerpc/papr-vpd: Fix missing variable initialization

2024-04-04 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

The "close handle without consuming VPD" testcase has inconsistent
results because it fails to initialize the location code object it
passes to ioctl() to create a VPD handle. Initialize the location code
to the empty string as intended.

Signed-off-by: Nathan Lynch 
Reported-by: Geetika Moolchandani 
Fixes: 9118c5d32bdd ("powerpc/selftests: Add test for papr-vpd")
---
Changes in v2:
- Add Fixes: and Reported-by: tags.
- Link to v1: 
https://lore.kernel.org/r/20240404-papr-vpd-test-uninit-lc-v1-1-04cc22d7f...@linux.ibm.com
---
 tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c 
b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
index 505294da1b9f..d6f99eb9be65 100644
--- a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
+++ b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
@@ -154,7 +154,7 @@ static int dev_papr_vpd_null_handle(void)
 static int papr_vpd_close_handle_without_reading(void)
 {
const int devfd = open(DEVPATH, O_RDONLY);
-   struct papr_location_code lc;
+   struct papr_location_code lc = { .str = "", };
int fd;
 
SKIP_IF_MSG(devfd < 0 && errno == ENOENT,

---
base-commit: bfe51886ca544956eb4ff924d1937ac01d0ca9c8
change-id: 20240403-papr-vpd-test-uninit-lc-306ce6cd5167

Best regards,
-- 
Nathan Lynch 




[PATCH] selftests/powerpc/papr-vpd: Fix missing variable initialization

2024-04-04 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

The "close handle without consuming VPD" testcase has inconsistent
results because it fails to initialize the location code object it
passes to ioctl() to create a VPD handle. Initialize the location code
to the empty string as intended.

Signed-off-by: Nathan Lynch 
---
 tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c 
b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
index 505294da1b9f..d6f99eb9be65 100644
--- a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
+++ b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
@@ -154,7 +154,7 @@ static int dev_papr_vpd_null_handle(void)
 static int papr_vpd_close_handle_without_reading(void)
 {
const int devfd = open(DEVPATH, O_RDONLY);
-   struct papr_location_code lc;
+   struct papr_location_code lc = { .str = "", };
int fd;
 
SKIP_IF_MSG(devfd < 0 && errno == ENOENT,

---
base-commit: bfe51886ca544956eb4ff924d1937ac01d0ca9c8
change-id: 20240403-papr-vpd-test-uninit-lc-306ce6cd5167

Best regards,
-- 
Nathan Lynch 




[PATCH] powerpc/rtas: use correct function name for resetting TCE tables

2024-02-22 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

The PAPR spec spells the function name as

  "ibm,reset-pe-dma-windows"

but in practice firmware uses the singular form:

  "ibm,reset-pe-dma-window"

in the device tree. Since we have the wrong spelling in the RTAS
function table, reverse lookups (token -> name) fail and warn:

  unexpected failed lookup for token 86
  WARNING: CPU: 1 PID: 545 at arch/powerpc/kernel/rtas.c:659 
__do_enter_rtas_trace+0x2a4/0x2b4
  CPU: 1 PID: 545 Comm: systemd-udevd Not tainted 6.8.0-rc4 #30
  Hardware name: IBM,9105-22A POWER10 (raw) 0x800200 0xf06 of:IBM,FW1060.00 
(NL1060_028) hv:phyp pSeries
  NIP [c00417f0] __do_enter_rtas_trace+0x2a4/0x2b4
  LR [c00417ec] __do_enter_rtas_trace+0x2a0/0x2b4
  Call Trace:
   __do_enter_rtas_trace+0x2a0/0x2b4 (unreliable)
   rtas_call+0x1f8/0x3e0
   enable_ddw.constprop.0+0x4d0/0xc84
   dma_iommu_dma_supported+0xe8/0x24c
   dma_set_mask+0x5c/0xd8
   mlx5_pci_init.constprop.0+0xf0/0x46c [mlx5_core]
   probe_one+0xfc/0x32c [mlx5_core]
   local_pci_probe+0x68/0x12c
   pci_call_probe+0x68/0x1ec
   pci_device_probe+0xbc/0x1a8
   really_probe+0x104/0x570
   __driver_probe_device+0xb8/0x224
   driver_probe_device+0x54/0x130
   __driver_attach+0x158/0x2b0
   bus_for_each_dev+0xa8/0x120
   driver_attach+0x34/0x48
   bus_add_driver+0x174/0x304
   driver_register+0x8c/0x1c4
   __pci_register_driver+0x68/0x7c
   mlx5_init+0xb8/0x118 [mlx5_core]
   do_one_initcall+0x60/0x388
   do_init_module+0x7c/0x2a4
   init_module_from_file+0xb4/0x108
   idempotent_init_module+0x184/0x34c
   sys_finit_module+0x90/0x114

And oopses are possible when lockdep is enabled or the RTAS
tracepoints are active, since those paths dereference the result of
the lookup.

Use the correct spelling to match firmware's behavior, adjusting the
related constants to match.

Signed-off-by: Nathan Lynch 
Reported-by: Gaurav Batra 
Fixes: 8252b88294d2 ("powerpc/rtas: improve function information lookups")
---
 arch/powerpc/include/asm/rtas.h | 4 ++--
 arch/powerpc/kernel/rtas.c  | 9 +++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 9bb2210c8d44..065ffd1b2f8a 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -69,7 +69,7 @@ enum rtas_function_index {
RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE,
RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2,
RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW,
-   RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOWS,
+   RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOW,
RTAS_FNIDX__IBM_SCAN_LOG_DUMP,
RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR,
RTAS_FNIDX__IBM_SET_EEH_OPTION,
@@ -164,7 +164,7 @@ typedef struct {
 #define RTAS_FN_IBM_READ_SLOT_RESET_STATE 
rtas_fn_handle(RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE)
 #define RTAS_FN_IBM_READ_SLOT_RESET_STATE2
rtas_fn_handle(RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2)
 #define RTAS_FN_IBM_REMOVE_PE_DMA_WINDOW  
rtas_fn_handle(RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW)
-#define RTAS_FN_IBM_RESET_PE_DMA_WINDOWS  
rtas_fn_handle(RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOWS)
+#define RTAS_FN_IBM_RESET_PE_DMA_WINDOW   
rtas_fn_handle(RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOW)
 #define RTAS_FN_IBM_SCAN_LOG_DUMP 
rtas_fn_handle(RTAS_FNIDX__IBM_SCAN_LOG_DUMP)
 #define RTAS_FN_IBM_SET_DYNAMIC_INDICATOR 
rtas_fn_handle(RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR)
 #define RTAS_FN_IBM_SET_EEH_OPTION
rtas_fn_handle(RTAS_FNIDX__IBM_SET_EEH_OPTION)
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 7e793b503e29..8064d9c3de86 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -375,8 +375,13 @@ static struct rtas_function rtas_function_table[] 
__ro_after_init = {
[RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW] = {
.name = "ibm,remove-pe-dma-window",
},
-   [RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOWS] = {
-   .name = "ibm,reset-pe-dma-windows",
+   [RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOW] = {
+   /*
+* Note: PAPR+ v2.13 7.3.31.4.1 spells this as
+* "ibm,reset-pe-dma-windows" (plural), but RTAS
+* implementations use the singular form in practice.
+*/
+   .name = "ibm,reset-pe-dma-window",
},
[RTAS_FNIDX__IBM_SCAN_LOG_DUMP] = {
.name = "ibm,scan-log-dump",

---
base-commit: b22ea627225b53ec7ce25c19d6df9fa8217d1643
change-id: 20240222-rtas-fix-ibm-reset-pe-dma-window-745dd1824011

Best regards,
-- 
Nathan Lynch 



[PATCH] powerpc/pseries/papr-sysparm: use u8 arrays for payloads

2024-02-02 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Some PAPR system parameter values are formatted by firmware as
nul-terminated strings (e.g. LPAR name, shared processor attributes).
But the values returned for other parameters, such as processor module
info and TLB block invalidate characteristics, are binary data with
parameter-specific layouts. So char[] isn't the appropriate type for
the general case. Use u8/__u8.

Signed-off-by: Nathan Lynch 
Fixes: 905b9e48786e ("powerpc/pseries/papr-sysparm: Expose character device to 
user space")
---
I'd like to get this in for v6.8 so the uapi header has the change for
its first point release.
---
 arch/powerpc/include/asm/papr-sysparm.h  | 2 +-
 arch/powerpc/include/uapi/asm/papr-sysparm.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/papr-sysparm.h 
b/arch/powerpc/include/asm/papr-sysparm.h
index 0dbbff59101d..c3cd5b131033 100644
--- a/arch/powerpc/include/asm/papr-sysparm.h
+++ b/arch/powerpc/include/asm/papr-sysparm.h
@@ -32,7 +32,7 @@ typedef struct {
  */
 struct papr_sysparm_buf {
__be16 len;
-   char val[PAPR_SYSPARM_MAX_OUTPUT];
+   u8 val[PAPR_SYSPARM_MAX_OUTPUT];
 };
 
 struct papr_sysparm_buf *papr_sysparm_buf_alloc(void);
diff --git a/arch/powerpc/include/uapi/asm/papr-sysparm.h 
b/arch/powerpc/include/uapi/asm/papr-sysparm.h
index 9f9a0f267ea5..f733467b1534 100644
--- a/arch/powerpc/include/uapi/asm/papr-sysparm.h
+++ b/arch/powerpc/include/uapi/asm/papr-sysparm.h
@@ -14,7 +14,7 @@ enum {
 struct papr_sysparm_io_block {
__u32 parameter;
__u16 length;
-   char data[PAPR_SYSPARM_MAX_OUTPUT];
+   __u8 data[PAPR_SYSPARM_MAX_OUTPUT];
 };
 
 /**

---
base-commit: 44a1aad2fe6c10bfe0589d8047057b10a4c18a19
change-id: 20240201-papr-sysparm-ioblock-data-use-u8-10d283cb6f1c

Best regards,
-- 
Nathan Lynch 



[PATCH RFC 0/5] dump_stack: Allow runtime updates of the hardware description

2024-01-18 Thread Nathan Lynch via B4 Relay
When the kernel emits a stack trace, typically it includes a hardware
description string, e.g.

  Kernel panic - not syncing: sysrq triggered crash
  CPU: 6 PID: 46433 Comm: bash Tainted: GW  6.7.0-rc2+ #83
> Hardware name: IBM,9040-MR9 POWER9 (architected) 0x4e2102 0xf05 
> of:IBM,FW950.01 (VM950_047) hv:phyp pSeries
  Call Trace:
   dump_stack_lvl+0xc4/0x170 (unreliable)
   panic+0x39c/0x584
   sysrq_handle_crash+0x80/0xe0
   __handle_sysrq+0x208/0x4bc
   [...]

This string is a statically allocated buffer populated during boot by
arch code calling dump_stack_set_arch_desc(). For most platforms this
is sufficient.

But the string may become inaccurate on the IBM PowerVM platform due
to live migration between machine models and firmware versions. Stack
dumps emitted after a migration reflect the machine on which the
kernel booted, not necessarily the machine on which it is currently
running. This is potentially confusing for anyone investigating
kernel issues on the platform.

To address this, this series introduces a new function that safely
updates the hardware description string and updates the powerpc
pseries platform code to call it after a migration. The series also
includes changes addressing minor latent issues identified during the
implementation.

Platforms which do not need the new functionality remain unchanged.

For this initial version at least, the powerpc/pseries part includes
some "self-test" code that 1. verifies that reconstructing the
hardware description string late in boot matches the one that was
built earlier, and 2. fully exercises the update path before any
migrations occur. This could be dropped or made configurable in the
future.

Signed-off-by: Nathan Lynch 
---
Nathan Lynch (5):
  dump_stack: Make arch description buffer __ro_after_init
  dump_stack: Allow update of arch description string at runtime
  powerpc/prom: Add CPU info to hardware description string later
  powerpc/pseries: Prepare pseries_add_hw_description() for runtime use
  powerpc/pseries: Update hardware description string after migration

 arch/powerpc/kernel/prom.c| 12 +++--
 arch/powerpc/platforms/pseries/mobility.c |  5 ++
 arch/powerpc/platforms/pseries/pseries.h  |  1 +
 arch/powerpc/platforms/pseries/setup.c| 80 +--
 include/linux/printk.h|  5 ++
 lib/dump_stack.c  | 57 --
 6 files changed, 146 insertions(+), 14 deletions(-)
---
base-commit: 44a1aad2fe6c10bfe0589d8047057b10a4c18a19
change-id: 20240111-update-dump-stack-arch-str-7f0880d23f30

Best regards,
-- 
Nathan Lynch 



[PATCH RFC 5/5] powerpc/pseries: Update hardware description string after migration

2024-01-18 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Introduce code that rebuilds the short hardware description printed by
stack traces. This sort of duplicates some code from boot (prom.c
mainly), but that code populates the string as early as possible using
APIs that aren't available later. So sharing all the code between the
boot and runtime versions isn't feasible.

To prevent "drift" between the boot and runtime versions, rebuild the
description using the new runtime APIs in a late initcall and warn if
it doesn't match the one built earlier. The initcall also invokes
dump_stack_update_arch_desc() twice to fully exercise it before any
partition migration occurs. These checks could be dropped or made
configurable later.

Call pseries_update_hw_description() immediately after updating the
device tree when resuming from a partition migration.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/mobility.c |  5 +++
 arch/powerpc/platforms/pseries/pseries.h  |  1 +
 arch/powerpc/platforms/pseries/setup.c| 70 +++
 3 files changed, 76 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index 1798f0f14d58..ff573cb5aee5 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -378,6 +378,11 @@ void post_mobility_fixup(void)
rc = pseries_devicetree_update(MIGRATION_SCOPE);
if (rc)
pr_err("device tree update failed: %d\n", rc);
+   /*
+* Rebuild the hardware description printed in stack traces
+* using the updated device tree.
+*/
+   pseries_update_hw_description();
 
cacheinfo_rebuild();
 
diff --git a/arch/powerpc/platforms/pseries/pseries.h 
b/arch/powerpc/platforms/pseries/pseries.h
index bba4ad192b0f..810a64fccc7e 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -56,6 +56,7 @@ extern int dlpar_acquire_drc(u32 drc_index);
 extern int dlpar_release_drc(u32 drc_index);
 extern int dlpar_unisolate_drc(u32 drc_index);
 extern void post_mobility_fixup(void);
+void pseries_update_hw_description(void);
 
 void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog);
 int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_errlog);
diff --git a/arch/powerpc/platforms/pseries/setup.c 
b/arch/powerpc/platforms/pseries/setup.c
index 9ae1951f8312..72177411026e 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -1034,6 +1034,76 @@ static void pseries_add_hw_description(struct seq_buf 
*sb)
seq_buf_printf(sb, "hv:phyp ");
 }
 
+static void pseries_rebuild_hw_desc(struct seq_buf *sb)
+{
+   struct device_node *cpudn, *root;
+   const char *model;
+   u32 cpu_version;
+
+   seq_buf_clear(sb);
+
+   root = of_find_node_by_path("/");
+   if (!of_property_read_string(root, "model", &model))
+   seq_buf_printf(sb, "%s ", model);
+   of_node_put(root);
+
+   seq_buf_printf(sb, "%s 0x%04lx ", cur_cpu_spec->cpu_name, 
mfspr(SPRN_PVR));
+
+   cpudn = of_get_next_cpu_node(NULL);
+   if (!of_property_read_u32(cpudn, "cpu-version", &cpu_version)) {
+   if ((cpu_version & 0xff00) == 0x0f00)
+   seq_buf_printf(sb, "0x%04x ", cpu_version);
+   }
+   of_node_put(cpudn);
+
+   pseries_add_hw_description(sb);
+
+   seq_buf_puts(sb, ppc_md.name);
+}
+
+void pseries_update_hw_description(void)
+{
+   struct seq_buf sb = { // todo: use DECLARE_SEQ_BUF() once it's fixed
+   .buffer = (char[128]) { 0 },
+   .size = sizeof(char[128]),
+   };
+
+   pseries_rebuild_hw_desc(&sb);
+   dump_stack_update_arch_desc("%s", seq_buf_str(&sb));
+}
+
+static int __init pseries_test_update_hw_desc(void)
+{
+   struct seq_buf sb = { // todo: use DECLARE_SEQ_BUF() once it's fixed
+   .buffer = (char[128]) { 0 },
+   .size = sizeof(char[128]),
+   };
+   bool mismatch;
+
+   /*
+* Ensure the rebuilt description matches the one built during
+* boot.
+*/
+   pseries_rebuild_hw_desc(&sb);
+
+   mismatch = strcmp(seq_buf_str(&ppc_hw_desc), seq_buf_str(&sb));
+   if (WARN(mismatch, "rebuilt hardware description string mismatch")) {
+   pr_err("  boot:'%s'\n", ppc_hw_desc.buffer);
+   pr_err("  runtime: '%s'\n", sb.buffer);
+   return -EINVAL;
+   }
+
+   /*
+* Invoke dump_stack_update_arch_desc() *twice* to ensure it
+* exercises the free path.
+*/
+   dump_stack_update_arch_desc("%s", sb.buffer);
+   dump_stack_update_arch_desc("%s", sb.buffer);
+
+   return 0;
+}
+late_initcall(pseries_test_update_hw_desc);
+
 /*
  * Early initialization.  Relocation is on but do not reference unbolted pages
  */

-- 
2.43.0



[PATCH RFC 2/5] dump_stack: Allow update of arch description string at runtime

2024-01-18 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

The IBM PowerVM platform (targeted by powerpc/pseries) exposes the
physical machine model and firmware version to partitions (guests),
and this information is used to populate the arch description string,
e.g.

  IBM,8408-E8E POWER8E (raw) 0x4b0201 0xf04 \
of:IBM,FW860.50 (SV860_146) hv:phyp pSeries

The platform supports live migration of partitions between different
machine models and firmware versions, so the arch description string
set at boot can become inaccurate, potentially misleading anyone who's
analyzing stack traces produced after a migration.

Introduce a RCU-guarded pointer to the current arch description
string, initializing it to the static buffer populated at boot. Add to
dump_stack_print_info() a RCU read-side critical section that accesses
the buffer through this pointer. The majority of architectures which
don't need to update the string after boot incur only an additional
indirection.

As for platforms which do need that ability, they can use
dump_stack_update_arch_desc(), which allocates and formats a new
buffer, updates the pointer, and if appropriate frees the previous
buffer.

Signed-off-by: Nathan Lynch 
---
 include/linux/printk.h |  5 +
 lib/dump_stack.c   | 54 ++
 2 files changed, 55 insertions(+), 4 deletions(-)

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 8ef499ab3c1e..6138ae019d2a 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -187,6 +187,7 @@ u32 log_buf_len_get(void);
 void log_buf_vmcoreinfo_setup(void);
 void __init setup_log_buf(int early);
 __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...);
+__printf(1, 2) void dump_stack_update_arch_desc(const char *fmt, ...);
 void dump_stack_print_info(const char *log_lvl);
 void show_regs_print_info(const char *log_lvl);
 extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold;
@@ -253,6 +254,10 @@ static inline __printf(1, 2) void 
dump_stack_set_arch_desc(const char *fmt, ...)
 {
 }
 
+static inline __printf(1, 2) void dump_stack_update_arch_desc(const char *fmt, 
...)
+{
+}
+
 static inline void dump_stack_print_info(const char *log_lvl)
 {
 }
diff --git a/lib/dump_stack.c b/lib/dump_stack.c
index 1057f102f6f2..bd497e7797ee 100644
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -8,15 +8,18 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include 
 
 static char dump_stack_arch_desc_str[128] __ro_after_init;
+static const char *dump_stack_arch_desc_ptr = dump_stack_arch_desc_str;
 
 /**
  * dump_stack_set_arch_desc - set arch-specific str to show with task dumps
@@ -28,7 +31,7 @@ static char dump_stack_arch_desc_str[128] __ro_after_init;
  * arch wants to make use of such an ID string, it should initialize this
  * as soon as possible during boot.
  */
-void __init dump_stack_set_arch_desc(const char *fmt, ...)
+void dump_stack_set_arch_desc(const char *fmt, ...)
 {
va_list args;
 
@@ -38,6 +41,45 @@ void __init dump_stack_set_arch_desc(const char *fmt, ...)
va_end(args);
 }
 
+/**
+ * dump_stack_update_arch_desc() - Update the arch description string at 
runtime.
+ * @fmt: printf-style format string
+ * @...: arguments for the format string
+ *
+ * A runtime counterpart of dump_stack_set_arch_desc(). Arch code
+ * should use this when the arch description set at boot potentially
+ * has become inaccurate, such as after a guest migration.
+ *
+ * Context: May sleep.
+ */
+void dump_stack_update_arch_desc(const char *fmt, ...)
+{
+   static DEFINE_SPINLOCK(arch_desc_update_lock);
+   const char *old;
+   const char *new;
+   va_list args;
+
+   va_start(args, fmt);
+   new = kvasprintf(GFP_KERNEL, fmt, args);
+   va_end(args);
+
+   if (!new)
+   return;
+
+   spin_lock(&arch_desc_update_lock);
+   old = rcu_replace_pointer(dump_stack_arch_desc_ptr, new,
+ lockdep_is_held(&arch_desc_update_lock));
+   spin_unlock(&arch_desc_update_lock);
+
+   /*
+* Avoid freeing the static buffer initialized during boot.
+*/
+   if (old == dump_stack_arch_desc_str)
+   return;
+
+   kfree_rcu_mightsleep(old);
+}
+
 #if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID)
 #define BUILD_ID_FMT " %20phN"
 #define BUILD_ID_VAL vmlinux_build_id
@@ -55,6 +97,8 @@ void __init dump_stack_set_arch_desc(const char *fmt, ...)
  */
 void dump_stack_print_info(const char *log_lvl)
 {
+   const char *arch_str;
+
printk("%sCPU: %d PID: %d Comm: %.20s %s%s %s %.*s" BUILD_ID_FMT "\n",
   log_lvl, raw_smp_processor_id(), current->pid, current->comm,
   kexec_crash_loaded() ? "Kdump: loaded " : "",
@@ -63,9 +107,11 @@ void dump_stack_print_info(const char *log_lvl)
   (int)strcspn(init_utsname()->version, " "),
   init_utsname()->version, BUILD_ID_

[PATCH RFC 3/5] powerpc/prom: Add CPU info to hardware description string later

2024-01-18 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

cur_cpu_spec->cpu_name is appended to ppc_hw_desc before cur_cpu_spec
has taken on its final value. This is illustrated on pseries by
comparing the CPU name as reported at boot ("POWER8E (raw)") to the
contents of /proc/cpuinfo ("POWER8 (architected)"):

  $ dmesg | grep Hardware
  Hardware name: IBM,8408-E8E POWER8E (raw) 0x4b0201 0xf04 \
of:IBM,FW860.50 (SV860_146) hv:phyp pSeries

  $ grep -m 1 ^cpu /proc/cpuinfo
  cpu : POWER8 (architected), altivec supported

Some 44x models would appear to be affected as well; see
identical_pvr_fixup().

This results in incorrect CPU information in stack dumps --
ppc_hw_desc is an input to dump_stack_set_arch_desc().

Delay gathering the CPU name until after all potential calls to
identify_cpu().

Signed-off-by: Nathan Lynch 
Fixes: bd649d40e0f2 ("powerpc: Add PVR & CPU name to hardware description")
---
 arch/powerpc/kernel/prom.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 0b5878c3125b..c12b4434336f 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -327,6 +327,7 @@ static int __init early_init_dt_scan_cpus(unsigned long 
node,
  void *data)
 {
const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+   const __be32 *cpu_version = NULL;
const __be32 *prop;
const __be32 *intserv;
int i, nthreads;
@@ -398,7 +399,7 @@ static int __init early_init_dt_scan_cpus(unsigned long 
node,
prop = of_get_flat_dt_prop(node, "cpu-version", NULL);
if (prop && (be32_to_cpup(prop) & 0xff00) == 0x0f00) {
identify_cpu(0, be32_to_cpup(prop));
-   seq_buf_printf(&ppc_hw_desc, "0x%04x ", 
be32_to_cpup(prop));
+   cpu_version = prop;
}
 
check_cpu_feature_properties(node);
@@ -409,6 +410,12 @@ static int __init early_init_dt_scan_cpus(unsigned long 
node,
}
 
identical_pvr_fixup(node);
+
+   // We can now add the CPU name & PVR to the hardware description
+   seq_buf_printf(&ppc_hw_desc, "%s 0x%04lx ", cur_cpu_spec->cpu_name, 
mfspr(SPRN_PVR));
+   if (cpu_version)
+   seq_buf_printf(&ppc_hw_desc, "0x%04x ", 
be32_to_cpup(cpu_version));
+
init_mmu_slb_size(node);
 
 #ifdef CONFIG_PPC64
@@ -846,9 +853,6 @@ void __init early_init_devtree(void *params)
 
dt_cpu_ftrs_scan();
 
-   // We can now add the CPU name & PVR to the hardware description
-   seq_buf_printf(&ppc_hw_desc, "%s 0x%04lx ", cur_cpu_spec->cpu_name, 
mfspr(SPRN_PVR));
-
/* Retrieve CPU related informations from the flat tree
 * (altivec support, boot CPU ID, ...)
 */

-- 
2.43.0



[PATCH RFC 4/5] powerpc/pseries: Prepare pseries_add_hw_description() for runtime use

2024-01-18 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

pseries_add_hw_description() will be used after boot to update the
hardware description string emitted in stack dumps. Remove the __init
and make it take a seq_buf * parameter instead of referencing
ppc_hw_desc directly.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/setup.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/setup.c 
b/arch/powerpc/platforms/pseries/setup.c
index ecea85c74c43..9ae1951f8312 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -1007,7 +1007,7 @@ static void __init pSeries_cmo_feature_init(void)
pr_debug(" <- fw_cmo_feature_init()\n");
 }
 
-static void __init pseries_add_hw_description(void)
+static void pseries_add_hw_description(struct seq_buf *sb)
 {
struct device_node *dn;
const char *s;
@@ -1015,7 +1015,7 @@ static void __init pseries_add_hw_description(void)
dn = of_find_node_by_path("/openprom");
if (dn) {
if (of_property_read_string(dn, "model", &s) == 0)
-   seq_buf_printf(&ppc_hw_desc, "of:%s ", s);
+   seq_buf_printf(sb, "of:%s ", s);
 
of_node_put(dn);
}
@@ -1023,7 +1023,7 @@ static void __init pseries_add_hw_description(void)
dn = of_find_node_by_path("/hypervisor");
if (dn) {
if (of_property_read_string(dn, "compatible", &s) == 0)
-   seq_buf_printf(&ppc_hw_desc, "hv:%s ", s);
+   seq_buf_printf(sb, "hv:%s ", s);
 
of_node_put(dn);
return;
@@ -1031,7 +1031,7 @@ static void __init pseries_add_hw_description(void)
 
if (of_property_read_bool(of_root, "ibm,powervm-partition") ||
of_property_read_bool(of_root, "ibm,fw-net-version"))
-   seq_buf_printf(&ppc_hw_desc, "hv:phyp ");
+   seq_buf_printf(sb, "hv:phyp ");
 }
 
 /*
@@ -1041,7 +1041,7 @@ static void __init pseries_init(void)
 {
pr_debug(" -> pseries_init()\n");
 
-   pseries_add_hw_description();
+   pseries_add_hw_description(&ppc_hw_desc);
 
 #ifdef CONFIG_HVC_CONSOLE
if (firmware_has_feature(FW_FEATURE_LPAR))

-- 
2.43.0



[PATCH RFC 1/5] dump_stack: Make arch description buffer __ro_after_init

2024-01-18 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

The static hardware description buffer is populated by arch code
during boot and should not change afterwards, so mark it
__ro_after_init.

Signed-off-by: Nathan Lynch 
---
 lib/dump_stack.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/dump_stack.c b/lib/dump_stack.c
index 83471e81501a..1057f102f6f2 100644
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -6,6 +6,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -15,7 +16,7 @@
 #include 
 #include 
 
-static char dump_stack_arch_desc_str[128];
+static char dump_stack_arch_desc_str[128] __ro_after_init;
 
 /**
  * dump_stack_set_arch_desc - set arch-specific str to show with task dumps

-- 
2.43.0



[PATCH RFC] powerpc/pseries: exploit H_PAGE_SET_UNUSED for partition migration

2024-01-11 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Although the H_PAGE_INIT hcall's H_PAGE_SET_UNUSED historically has
been tied to the cooperative memory overcommit (CMO) platform feature,
the flag also is treated by the PowerVM hypervisor as a hint that the
page contents need not be copied to the destination during a live
partition migration.

Use the "ibm,migratable-partition" root node property to determine
whether this partition/guest can be migrated. Mark freed pages unused
if so (or if CMO is in use, as before).

Signed-off-by: Nathan Lynch 
---
Several things yet to improve here:

* powerpc's arch_free_page()/HAVE_ARCH_FREE_PAGE should be decoupled
  from CONFIG_PPC_SMLPAR.

* powerpc's arch_free_page() could be made to use a static key if
  justified.

* I have not yet measured the overhead this introduces, nor have I
  measured the benefit to a live migration.

To date, I have smoke tested it by doing a live migration and
performing a build on a kernel with the change, to ensure it doesn't
introduce obvious memory corruption or anything. It hasn't blown up
yet :-)

This will be a possibly significant behavior change in that we will be
flagging pages unused where we typically did not before. Until now,
having CMO enabled was the only way to do this, and I don't think that
feature is used all that much?

Posting this as RFC to see if there are any major concerns.
---
 arch/powerpc/platforms/pseries/lpar.c | 21 +++--
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/lpar.c 
b/arch/powerpc/platforms/pseries/lpar.c
index 4561667832ed..b264371d8e12 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1772,17 +1773,25 @@ static void pSeries_set_page_state(struct page *page, 
int order,
}
 }
 
+static bool migratable_partition;
+
 void arch_free_page(struct page *page, int order)
 {
-   if (radix_enabled())
-   return;
-   if (!cmo_free_hint_flag || !firmware_has_feature(FW_FEATURE_CMO))
-   return;
-
-   pSeries_set_page_state(page, order, H_PAGE_SET_UNUSED);
+   if (migratable_partition ||
+   (firmware_has_feature(FW_FEATURE_CMO) && cmo_free_hint_flag))
+   pSeries_set_page_state(page, order, H_PAGE_SET_UNUSED);
 }
 EXPORT_SYMBOL(arch_free_page);
 
+static int __init check_migratable_partition(void)
+{
+   struct device_node *root = of_find_node_by_path("/");
+   migratable_partition = !!of_find_property(root, 
"ibm,migratable-partition", NULL);
+   of_node_put(root);
+   return 0;
+}
+machine_device_initcall(pseries, check_migratable_partition);
+
 #endif /* CONFIG_PPC_SMLPAR */
 #endif /* CONFIG_PPC_BOOK3S_64 */
 

---
base-commit: 44a1aad2fe6c10bfe0589d8047057b10a4c18a19
change-id: 20240111-h_page_set_unused-for-lpm-d9027c672fe8

Best regards,
-- 
Nathan Lynch 



[PATCH] powerpc/pseries/lparcfg: drop error message from guest name lookup

2024-01-04 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

It's not an error or exceptional situation when the hosting
environment does not expose a name for the LP/guest via RTAS or the
device tree. This happens with qemu when run without the '-name'
option. The message also lacks a newline. Remove it.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/lparcfg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/lparcfg.c 
b/arch/powerpc/platforms/pseries/lparcfg.c
index 1c151d77e74b..ff02ade17a75 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -357,8 +357,8 @@ static int read_dt_lpar_name(struct seq_file *m)
 
 static void read_lpar_name(struct seq_file *m)
 {
-   if (read_rtas_lpar_name(m) && read_dt_lpar_name(m))
-   pr_err_once("Error can't get the LPAR name");
+   if (read_rtas_lpar_name(m))
+   read_dt_lpar_name(m);
 }
 
 #define SPLPAR_MAXLENGTH 1026*(sizeof(char))

---
base-commit: 44a1aad2fe6c10bfe0589d8047057b10a4c18a19
change-id: 20231212-lparcfg-updates-ef15437c6570

Best regards,
-- 
Nathan Lynch 



[PATCH v6 13/13] powerpc/selftests: Add test for papr-sysparm

2023-12-12 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Consistently testing system parameter access is a bit difficult by
nature -- the set of parameters available depends on the model and
system configuration, and updating a parameter should be considered a
destructive operation reserved for the admin.

So we validate some of the error paths and retrieve the SPLPAR
characteristics string, but not much else.

Signed-off-by: Nathan Lynch 
---
 tools/testing/selftests/powerpc/Makefile   |   1 +
 .../selftests/powerpc/papr_sysparm/.gitignore  |   1 +
 .../selftests/powerpc/papr_sysparm/Makefile|  12 ++
 .../selftests/powerpc/papr_sysparm/papr_sysparm.c  | 196 +
 4 files changed, 210 insertions(+)

diff --git a/tools/testing/selftests/powerpc/Makefile 
b/tools/testing/selftests/powerpc/Makefile
index 05fc68d446c2..c376151982c4 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -33,6 +33,7 @@ SUB_DIRS = alignment  \
   math \
   papr_attributes  \
   papr_vpd \
+  papr_sysparm \
   ptrace   \
   security \
   mce
diff --git a/tools/testing/selftests/powerpc/papr_sysparm/.gitignore 
b/tools/testing/selftests/powerpc/papr_sysparm/.gitignore
new file mode 100644
index ..f2a69bf59d40
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_sysparm/.gitignore
@@ -0,0 +1 @@
+/papr_sysparm
diff --git a/tools/testing/selftests/powerpc/papr_sysparm/Makefile 
b/tools/testing/selftests/powerpc/papr_sysparm/Makefile
new file mode 100644
index ..7f79e437634a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_sysparm/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+noarg:
+   $(MAKE) -C ../
+
+TEST_GEN_PROGS := papr_sysparm
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
+
+$(OUTPUT)/papr_sysparm: CFLAGS += $(KHDR_INCLUDES)
diff --git a/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c 
b/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c
new file mode 100644
index ..d5436de5b8ed
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "utils.h"
+
+#define DEVPATH "/dev/papr-sysparm"
+
+static int open_close(void)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int get_splpar(void)
+{
+   struct papr_sysparm_io_block sp = {
+   .parameter = 20, // SPLPAR characteristics
+   };
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+   FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_GET, &sp) != 0);
+   FAIL_IF(sp.length == 0);
+   FAIL_IF(sp.length > sizeof(sp.data));
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int get_bad_parameter(void)
+{
+   struct papr_sysparm_io_block sp = {
+   .parameter = UINT32_MAX, // there are only ~60 specified 
parameters
+   };
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   // Ensure expected error
+   FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_GET, &sp) != -1);
+   FAIL_IF(errno != EOPNOTSUPP);
+
+   // Ensure the buffer is unchanged
+   FAIL_IF(sp.length != 0);
+   for (size_t i = 0; i < ARRAY_SIZE(sp.data); ++i)
+   FAIL_IF(sp.data[i] != 0);
+
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int check_efault_common(unsigned long cmd)
+{
+   const int devfd = open(DEVPATH, O_RDWR);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   // Ensure expected error
+   FAIL_IF(ioctl(devfd, cmd, NULL) != -1);
+   FAIL_IF(errno != EFAULT);
+
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int check_efault_get(void)
+{
+   return check_efault_common(PAPR_SYSPARM_IOC_GET);
+}
+
+static int check_efault_set(void)
+{
+   return check_efault_common(PAPR_SYSPARM_IOC_SET);
+}
+
+static int set_hmc0(void)
+{
+   struct papr_sysparm_io_block sp = {
+   .parameter = 0, // HMC0, not a settable parameter
+   };
+   const int devfd = open(DEVPATH, O_RDWR);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   // Ensure expected error
+   FAIL_IF(ioctl(devfd, PAPR_SYSP

[PATCH v6 12/13] powerpc/selftests: Add test for papr-vpd

2023-12-12 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Add selftests for /dev/papr-vpd, exercising the common expected use
cases:

* Retrieve all VPD by passing an empty location code.
* Retrieve the "system VPD" by passing a location code derived from DT
  root node properties, as done by the vpdupdate command.

The tests also verify that certain intended properties of the driver
hold:

* Passing an unterminated location code to PAPR_VPD_CREATE_HANDLE gets
  EINVAL.
* Passing a NULL location code pointer to PAPR_VPD_CREATE_HANDLE gets
  EFAULT.
* Closing the device node without first issuing a
  PAPR_VPD_CREATE_HANDLE command to it succeeds.
* Releasing a handle without first consuming any data from it
  succeeds.
* Re-reading the contents of a handle returns the same data as the
  first time.

Some minimal validation of the returned data is performed.

The tests are skipped on systems where the papr-vpd driver does not
initialize, making this useful only on PowerVM LPARs at this point.

Signed-off-by: Nathan Lynch 
---
 tools/testing/selftests/powerpc/Makefile   |   1 +
 .../testing/selftests/powerpc/papr_vpd/.gitignore  |   1 +
 tools/testing/selftests/powerpc/papr_vpd/Makefile  |  12 +
 .../testing/selftests/powerpc/papr_vpd/papr_vpd.c  | 352 +
 4 files changed, 366 insertions(+)

diff --git a/tools/testing/selftests/powerpc/Makefile 
b/tools/testing/selftests/powerpc/Makefile
index 7ea42fa02eab..05fc68d446c2 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -32,6 +32,7 @@ SUB_DIRS = alignment  \
   vphn \
   math \
   papr_attributes  \
+  papr_vpd \
   ptrace   \
   security \
   mce
diff --git a/tools/testing/selftests/powerpc/papr_vpd/.gitignore 
b/tools/testing/selftests/powerpc/papr_vpd/.gitignore
new file mode 100644
index ..49285031a656
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_vpd/.gitignore
@@ -0,0 +1 @@
+/papr_vpd
diff --git a/tools/testing/selftests/powerpc/papr_vpd/Makefile 
b/tools/testing/selftests/powerpc/papr_vpd/Makefile
new file mode 100644
index ..06b719703bfd
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_vpd/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+noarg:
+   $(MAKE) -C ../
+
+TEST_GEN_PROGS := papr_vpd
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
+
+$(OUTPUT)/papr_vpd: CFLAGS += $(KHDR_INCLUDES)
diff --git a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c 
b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
new file mode 100644
index ..98cbb9109ee6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "utils.h"
+
+#define DEVPATH "/dev/papr-vpd"
+
+static int dev_papr_vpd_open_close(void)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int dev_papr_vpd_get_handle_all(void)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+   struct papr_location_code lc = { .str = "", };
+   off_t size;
+   int fd;
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   errno = 0;
+   fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc);
+   FAIL_IF(errno != 0);
+   FAIL_IF(fd < 0);
+
+   FAIL_IF(close(devfd) != 0);
+
+   size = lseek(fd, 0, SEEK_END);
+   FAIL_IF(size <= 0);
+
+   void *buf = malloc((size_t)size);
+   FAIL_IF(!buf);
+
+   ssize_t consumed = pread(fd, buf, size, 0);
+   FAIL_IF(consumed != size);
+
+   /* Ensure EOF */
+   FAIL_IF(read(fd, buf, size) != 0);
+   FAIL_IF(close(fd));
+
+   /* Verify that the buffer looks like VPD */
+   static const char needle[] = "System VPD";
+   FAIL_IF(!memmem(buf, size, needle, strlen(needle)));
+
+   return 0;
+}
+
+static int dev_papr_vpd_get_handle_byte_at_a_time(void)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+   struct papr_location_code lc = { .str = "", };
+   int fd;
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   errno = 0;
+   fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc);
+   FAIL_IF(errno != 0);
+   FAIL_IF(fd < 0);
+
+   FAIL_IF(close(devfd) != 0);
+
+   size_t consumed = 0;
+   while (1) {
+   ssize_t res;
+   char c;
+
+   errno = 0;
+   res = read(fd, &c, sizeof(c));
+   FAIL_IF(res > sizeof(c))

[PATCH v6 11/13] powerpc/pseries/papr-sysparm: Expose character device to user space

2023-12-12 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Until now the papr_sysparm APIs have been kernel-internal. But user
space needs access to PAPR system parameters too. The only method
available to user space today to get or set system parameters is using
sys_rtas() and /dev/mem to pass RTAS-addressable buffers between user
space and firmware. This is incompatible with lockdown and should be
deprecated.

So provide an alternative ABI to user space in the form of a
/dev/papr-sysparm character device with just two ioctl commands (get
and set). The data payloads involved are small enough to fit in the
ioctl argument buffer, making the code relatively simple.

Exposing the system parameters through sysfs has been considered but
it would be too awkward:

* The kernel currently does not have to contain an exhaustive list of
  defined system parameters. This is a convenient property to maintain
  because we don't have to update the kernel whenever a new parameter
  is added to PAPR. Exporting a named attribute in sysfs for each
  parameter would negate this.

* Some system parameters are text-based and some are not.

* Retrieval of at least one system parameter requires input data,
  which a simple read-oriented interface can't support.

Signed-off-by: Nathan Lynch 
---
 Documentation/userspace-api/ioctl/ioctl-number.rst |   2 +
 arch/powerpc/include/asm/papr-sysparm.h|  17 ++-
 arch/powerpc/include/uapi/asm/papr-sysparm.h   |  58 
 arch/powerpc/platforms/pseries/papr-sysparm.c  | 158 -
 4 files changed, 227 insertions(+), 8 deletions(-)

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst 
b/Documentation/userspace-api/ioctl/ioctl-number.rst
index a950545bf7cd..d8b6cb1a3636 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -351,6 +351,8 @@ Code  Seq#Include File  
 Comments
  

 0xB2  00 arch/powerpc/include/uapi/asm/papr-vpd.h
powerpc/pseries VPD API
  

+0xB2  01-02  arch/powerpc/include/uapi/asm/papr-sysparm.h
powerpc/pseries system parameter API
+ 

 0xB3  00 linux/mmc/ioctl.h
 0xB4  00-0F  linux/gpio.h

 0xB5  00-0F  uapi/linux/rpmsg.h  

diff --git a/arch/powerpc/include/asm/papr-sysparm.h 
b/arch/powerpc/include/asm/papr-sysparm.h
index f5fdbd8ae9db..0dbbff59101d 100644
--- a/arch/powerpc/include/asm/papr-sysparm.h
+++ b/arch/powerpc/include/asm/papr-sysparm.h
@@ -2,8 +2,10 @@
 #ifndef _ASM_POWERPC_PAPR_SYSPARM_H
 #define _ASM_POWERPC_PAPR_SYSPARM_H
 
+#include 
+
 typedef struct {
-   const u32 token;
+   u32 token;
 } papr_sysparm_t;
 
 #define mk_papr_sysparm(x_) ((papr_sysparm_t){ .token = x_, })
@@ -20,11 +22,14 @@ typedef struct {
 #define PAPR_SYSPARM_TLB_BLOCK_INVALIDATE_ATTRSmk_papr_sysparm(50)
 #define PAPR_SYSPARM_LPAR_NAME mk_papr_sysparm(55)
 
-enum {
-   PAPR_SYSPARM_MAX_INPUT  = 1024,
-   PAPR_SYSPARM_MAX_OUTPUT = 4000,
-};
-
+/**
+ * struct papr_sysparm_buf - RTAS work area layout for system parameter 
functions.
+ *
+ * This is the memory layout of the buffers passed to/from
+ * ibm,get-system-parameter and ibm,set-system-parameter. It is
+ * distinct from the papr_sysparm_io_block structure that is passed
+ * between user space and the kernel.
+ */
 struct papr_sysparm_buf {
__be16 len;
char val[PAPR_SYSPARM_MAX_OUTPUT];
diff --git a/arch/powerpc/include/uapi/asm/papr-sysparm.h 
b/arch/powerpc/include/uapi/asm/papr-sysparm.h
new file mode 100644
index ..9f9a0f267ea5
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-sysparm.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_SYSPARM_H_
+#define _UAPI_PAPR_SYSPARM_H_
+
+#include 
+#include 
+#include 
+
+enum {
+   PAPR_SYSPARM_MAX_INPUT  = 1024,
+   PAPR_SYSPARM_MAX_OUTPUT = 4000,
+};
+
+struct papr_sysparm_io_block {
+   __u32 parameter;
+   __u16 length;
+   char data[PAPR_SYSPARM_MAX_OUTPUT];
+};
+
+/**
+ * PAPR_SYSPARM_IOC_GET - Retrieve the value of a PAPR system parameter.
+ *
+ * Uses _IOWR because of one corner case: Retrieving the value of the
+ * "OS Service Entitlement Status" parameter (60) requires the caller
+ * to supply input data (a date string) in the buffer passed to
+ * firmware. So the @length and @data of the incoming
+ * papr_sysparm_io_block are always used to initialize the work area
+ * supplied to ibm,get-system-parameter. No other parameters are known
+ * to pa

[PATCH v6 06/13] powerpc/rtas: Facilitate high-level call sequences

2023-12-12 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

On RTAS platforms there is a general restriction that the OS must not
enter RTAS on more than one CPU at a time. This low-level
serialization requirement is satisfied by holding a spin
lock (rtas_lock) across most RTAS function invocations.

However, some pseries RTAS functions require multiple successive calls
to complete a logical operation. Beginning a new call sequence for such a
function may disrupt any other sequences of that function already in
progress. Safe and reliable use of these functions effectively
requires higher-level serialization beyond what is already done at the
level of RTAS entry and exit.

Where a sequence-based RTAS function is invoked only through
sys_rtas(), with no in-kernel users, there is no issue as far as the
kernel is concerned. User space is responsible for appropriately
serializing its call sequences. (Whether user space code actually
takes measures to prevent sequence interleaving is another matter.)
Examples of such functions currently include ibm,platform-dump and
ibm,get-vpd.

But where a sequence-based RTAS function has both user space and
in-kernel uesrs, there is a hazard. Even if the in-kernel call sites
of such a function serialize their sequences correctly, a user of
sys_rtas() can invoke the same function at any time, potentially
disrupting a sequence in progress.

So in order to prevent disruption of kernel-based RTAS call sequences,
they must serialize not only with themselves but also with sys_rtas()
users, somehow. Preferably without adding more function-specific hacks
to sys_rtas(). This is a prerequisite for adding an in-kernel call
sequence of ibm,get-vpd, which is in a change to follow.

Note that it has never been feasible for the kernel to prevent
sys_rtas()-based sequences from being disrupted because control
returns to user space on every call. sys_rtas()-based users of these
functions have always been, and continue to be, responsible for
coordinating their call sequences with other users, even those which
may invoke the RTAS functions through less direct means than
sys_rtas(). This is an unavoidable consequence of exposing
sequence-based RTAS functions through sys_rtas().

* Add an optional mutex member to struct rtas_function.

* Statically define a mutex for each RTAS function with known call
  sequence serialization requirements, and assign its address to the
  .lock member of the corresponding function table entry, along with
  justifying commentary.

* In sys_rtas(), if the table entry for the RTAS function being
  called has a populated lock member, acquire it before taking
  rtas_lock and entering RTAS.

* Kernel-based RTAS call sequences are expected to access the
  appropriate mutex explicitly by name. For example, a user of the
  ibm,activate-firmware RTAS function would do:

int token = rtas_function_token(RTAS_FN_IBM_ACTIVATE_FIRMWARE);
int fwrc;

mutex_lock(&rtas_ibm_activate_firmware_lock);

do {
fwrc = rtas_call(token, 0, 1, NULL);
} while (rtas_busy_delay(fwrc));

mutex_unlock(&rtas_ibm_activate_firmware_lock);

There should be no perceivable change introduced here except that
concurrent callers of the same RTAS function via sys_rtas() may block
on a mutex instead of spinning on rtas_lock.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h |  3 ++
 arch/powerpc/kernel/rtas.c  | 83 +
 2 files changed, 86 insertions(+)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 08d19e6904f7..9bb2210c8d44 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -3,6 +3,7 @@
 #define _POWERPC_RTAS_H
 #ifdef __KERNEL__
 
+#include 
 #include 
 #include 
 #include 
@@ -512,6 +513,8 @@ extern char rtas_data_buf[RTAS_DATA_BUF_SIZE];
 /* RMO buffer reserved for user-space RTAS use */
 extern unsigned long rtas_rmo_buf;
 
+extern struct mutex rtas_ibm_get_vpd_lock;
+
 #define GLOBAL_INTERRUPT_QUEUE 9005
 
 /**
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index ca5bb0b994ac..4d28983e8b1d 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -70,14 +71,33 @@ struct rtas_filter {
  *ppc64le, and we want to keep it that way. It does
  *not make sense for this to be set when @filter
  *is NULL.
+ * @lock: Pointer to an optional dedicated per-function mutex. This
+ *should be set for functions that require multiple calls in
+ *sequence to complete a single operation, and such sequences
+ *will disrupt each other if allowed to interleave. Users of
+ *this function are required to hold the associated lock for
+ *the duration of the call sequence. Add an explanatory
+ *comment to the fun

[PATCH v6 10/13] powerpc/pseries/papr-sysparm: Validate buffer object lengths

2023-12-12 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

The ability to get and set system parameters will be exposed to user
space, so let's get a little more strict about malformed
papr_sysparm_buf objects.

* Create accessors for the length field of struct papr_sysparm_buf.
  The length is always stored in MSB order and this is better than
  spreading the necessary conversions all over.

* Reject attempts to submit invalid buffers to RTAS.

* Warn if RTAS returns a buffer with an invalid length, clamping the
  returned length to a safe value that won't overrun the buffer.

These are meant as precautionary measures to mitigate both firmware
and kernel bugs in this area, should they arise, but I am not aware of
any.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/papr-sysparm.c | 47 +++
 1 file changed, 47 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/papr-sysparm.c 
b/arch/powerpc/platforms/pseries/papr-sysparm.c
index fedc61599e6c..a1e7aeac7416 100644
--- a/arch/powerpc/platforms/pseries/papr-sysparm.c
+++ b/arch/powerpc/platforms/pseries/papr-sysparm.c
@@ -23,6 +23,46 @@ void papr_sysparm_buf_free(struct papr_sysparm_buf *buf)
kfree(buf);
 }
 
+static size_t papr_sysparm_buf_get_length(const struct papr_sysparm_buf *buf)
+{
+   return be16_to_cpu(buf->len);
+}
+
+static void papr_sysparm_buf_set_length(struct papr_sysparm_buf *buf, size_t 
length)
+{
+   WARN_ONCE(length > sizeof(buf->val),
+ "bogus length %zu, clamping to safe value", length);
+   length = min(sizeof(buf->val), length);
+   buf->len = cpu_to_be16(length);
+}
+
+/*
+ * For use on buffers returned from ibm,get-system-parameter before
+ * returning them to callers. Ensures the encoded length of valid data
+ * cannot overrun buf->val[].
+ */
+static void papr_sysparm_buf_clamp_length(struct papr_sysparm_buf *buf)
+{
+   papr_sysparm_buf_set_length(buf, papr_sysparm_buf_get_length(buf));
+}
+
+/*
+ * Perform some basic diligence on the system parameter buffer before
+ * submitting it to RTAS.
+ */
+static bool papr_sysparm_buf_can_submit(const struct papr_sysparm_buf *buf)
+{
+   /*
+* Firmware ought to reject buffer lengths that exceed the
+* maximum specified in PAPR, but there's no reason for the
+* kernel to allow them either.
+*/
+   if (papr_sysparm_buf_get_length(buf) > sizeof(buf->val))
+   return false;
+
+   return true;
+}
+
 /**
  * papr_sysparm_get() - Retrieve the value of a PAPR system parameter.
  * @param: PAPR system parameter token as described in
@@ -63,6 +103,9 @@ int papr_sysparm_get(papr_sysparm_t param, struct 
papr_sysparm_buf *buf)
if (token == RTAS_UNKNOWN_SERVICE)
return -ENOENT;
 
+   if (!papr_sysparm_buf_can_submit(buf))
+   return -EINVAL;
+
work_area = rtas_work_area_alloc(sizeof(*buf));
 
memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf));
@@ -77,6 +120,7 @@ int papr_sysparm_get(papr_sysparm_t param, struct 
papr_sysparm_buf *buf)
case 0:
ret = 0;
memcpy(buf, rtas_work_area_raw_buf(work_area), sizeof(*buf));
+   papr_sysparm_buf_clamp_length(buf);
break;
case -3: /* parameter not implemented */
ret = -EOPNOTSUPP;
@@ -115,6 +159,9 @@ int papr_sysparm_set(papr_sysparm_t param, const struct 
papr_sysparm_buf *buf)
if (token == RTAS_UNKNOWN_SERVICE)
return -ENOENT;
 
+   if (!papr_sysparm_buf_can_submit(buf))
+   return -EINVAL;
+
work_area = rtas_work_area_alloc(sizeof(*buf));
 
memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf));

-- 
2.41.0



[PATCH v6 09/13] powerpc/pseries: Add papr-vpd character driver for VPD retrieval

2023-12-12 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

PowerVM LPARs may retrieve Vital Product Data (VPD) for system
components using the ibm,get-vpd RTAS function.

We can expose this to user space with a /dev/papr-vpd character
device, where the programming model is:

  struct papr_location_code plc = { .str = "", }; /* obtain all VPD */
  int devfd = open("/dev/papr-vpd", O_RDONLY);
  int vpdfd = ioctl(devfd, PAPR_VPD_CREATE_HANDLE, &plc);
  size_t size = lseek(vpdfd, 0, SEEK_END);
  char *buf = malloc(size);
  pread(devfd, buf, size, 0);

When a file descriptor is obtained from ioctl(PAPR_VPD_CREATE_HANDLE),
the file contains the result of a complete ibm,get-vpd sequence. The
file contents are immutable from the POV of user space. To get a new
view of the VPD, the client must create a new handle.

This design choice insulates user space from most of the complexities
that ibm,get-vpd brings:

* ibm,get-vpd must be called more than once to obtain complete
  results.

* Only one ibm,get-vpd call sequence should be in progress at a time;
  interleaved sequences will disrupt each other. Callers must have a
  protocol for serializing their use of the function.

* A call sequence in progress may receive a "VPD changed, try again"
  status, requiring the client to abandon the sequence and start
  over.

The memory required for the VPD buffers seems acceptable, around 20KB
for all VPD on one of my systems. And the value of the
/rtas/ibm,vpd-size DT property (the estimated maximum size of VPD) is
consistently 300KB across various systems I've checked.

I've implemented support for this new ABI in the rtas_get_vpd()
function in librtas, which the vpdupdate command currently uses to
populate its VPD database. I've verified that an unmodified vpdupdate
binary generates an identical database when using a librtas.so that
prefers the new ABI.

Along with the papr-vpd.h header exposed to user space, this
introduces a common papr-miscdev.h uapi header to share a base ioctl
ID with similar drivers to come.

Tested-by: Michal Suchánek 
Signed-off-by: Nathan Lynch 
---
 Documentation/userspace-api/ioctl/ioctl-number.rst |   2 +
 arch/powerpc/include/uapi/asm/papr-miscdev.h   |   9 +
 arch/powerpc/include/uapi/asm/papr-vpd.h   |  22 +
 arch/powerpc/platforms/pseries/Makefile|   1 +
 arch/powerpc/platforms/pseries/papr-vpd.c  | 541 +
 5 files changed, 575 insertions(+)

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst 
b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 4ea5b837399a..a950545bf7cd 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -349,6 +349,8 @@ Code  Seq#Include File  
 Comments
  

 0xB1  00-1F  PPPoX
  

+0xB2  00 arch/powerpc/include/uapi/asm/papr-vpd.h
powerpc/pseries VPD API
+ 

 0xB3  00 linux/mmc/ioctl.h
 0xB4  00-0F  linux/gpio.h

 0xB5  00-0F  uapi/linux/rpmsg.h  

diff --git a/arch/powerpc/include/uapi/asm/papr-miscdev.h 
b/arch/powerpc/include/uapi/asm/papr-miscdev.h
new file mode 100644
index ..49a2a270b7f3
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-miscdev.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_MISCDEV_H_
+#define _UAPI_PAPR_MISCDEV_H_
+
+enum {
+   PAPR_MISCDEV_IOC_ID = 0xb2,
+};
+
+#endif /* _UAPI_PAPR_MISCDEV_H_ */
diff --git a/arch/powerpc/include/uapi/asm/papr-vpd.h 
b/arch/powerpc/include/uapi/asm/papr-vpd.h
new file mode 100644
index ..1c88e87cb420
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-vpd.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_VPD_H_
+#define _UAPI_PAPR_VPD_H_
+
+#include 
+#include 
+
+struct papr_location_code {
+   /*
+* PAPR+ v2.13 12.3.2.4 Converged Location Code Rules - Length
+* Restrictions. 79 characters plus nul.
+*/
+   char str[80];
+};
+
+/*
+ * ioctl for /dev/papr-vpd. Returns a VPD handle fd corresponding to
+ * the location code.
+ */
+#define PAPR_VPD_IOC_CREATE_HANDLE _IOW(PAPR_MISCDEV_IOC_ID, 0, struct 
papr_location_code)
+
+#endif /* _UAPI_PAPR_VPD_H_ */
diff --git a/arch/powerpc/platforms/pseries/Makefile 
b/arch/powerpc/platforms/pseries/Makefile
index 1476c5e4433c..f936962a2946 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -4,6 +4,7

[PATCH v6 08/13] powerpc/rtas: Warn if per-function lock isn't held

2023-12-12 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

If the function descriptor has a populated lock member, then callers
are required to hold it across calls. Now that the firmware activation
sequence is appropriately guarded, we can warn when the requirement
isn't satisfied.

__do_enter_rtas_trace() gets reorganized a bit as a result of
performing the function descriptor lookup unconditionally now.

Reviewed-by: Aneesh Kumar K.V (IBM) 
Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 21 +
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 72f6b5a402dd..7e793b503e29 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -671,28 +671,25 @@ static void __do_enter_rtas(struct rtas_args *args)
 
 static void __do_enter_rtas_trace(struct rtas_args *args)
 {
-   const char *name = NULL;
+   const struct rtas_function *func = 
rtas_token_to_function(be32_to_cpu(args->token));
 
-   if (args == &rtas_args)
-   lockdep_assert_held(&rtas_lock);
/*
-* If the tracepoints that consume the function name aren't
-* active, avoid the lookup.
+* If there is a per-function lock, it must be held by the
+* caller.
 */
-   if ((trace_rtas_input_enabled() || trace_rtas_output_enabled())) {
-   const s32 token = be32_to_cpu(args->token);
-   const struct rtas_function *func = 
rtas_token_to_function(token);
+   if (func->lock)
+   lockdep_assert_held(func->lock);
 
-   name = func->name;
-   }
+   if (args == &rtas_args)
+   lockdep_assert_held(&rtas_lock);
 
-   trace_rtas_input(args, name);
+   trace_rtas_input(args, func->name);
trace_rtas_ll_entry(args);
 
__do_enter_rtas(args);
 
trace_rtas_ll_exit(args);
-   trace_rtas_output(args, name);
+   trace_rtas_output(args, func->name);
 }
 
 static void do_enter_rtas(struct rtas_args *args)

-- 
2.41.0



[PATCH v6 02/13] powerpc/rtas: Add for_each_rtas_function() iterator

2023-12-12 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Add a convenience macro for iterating over every element of the
internal function table and convert the one site that can use it. An
additional user of the macro is anticipated in changes to follow.

Reviewed-by: Aneesh Kumar K.V (IBM) 
Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index ce37dc9860ef..ae9b10c954a1 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -454,6 +454,11 @@ static struct rtas_function rtas_function_table[] 
__ro_after_init = {
},
 };
 
+#define for_each_rtas_function(funcp)   \
+   for (funcp = &rtas_function_table[0];   \
+funcp < &rtas_function_table[ARRAY_SIZE(rtas_function_table)]; \
+++funcp)
+
 /*
  * Nearly all RTAS calls need to be serialized. All uses of the
  * default rtas_args block must hold rtas_lock.
@@ -525,10 +530,10 @@ static DEFINE_XARRAY(rtas_token_to_function_xarray);
 
 static int __init rtas_token_to_function_xarray_init(void)
 {
+   const struct rtas_function *func;
int err = 0;
 
-   for (size_t i = 0; i < ARRAY_SIZE(rtas_function_table); ++i) {
-   const struct rtas_function *func = &rtas_function_table[i];
+   for_each_rtas_function(func) {
const s32 token = func->token;
 
if (token == RTAS_UNKNOWN_SERVICE)

-- 
2.41.0



[PATCH v6 04/13] powerpc/rtas: Add function return status constants

2023-12-12 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Not all of the generic RTAS function statuses specified in PAPR have
symbolic constants and descriptions in rtas.h. Fix this, providing a
little more background, slightly updating the existing wording, and
improving the formatting.

Reviewed-by: Aneesh Kumar K.V (IBM) 
Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h | 25 +++--
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index a7110ed52e25..08d19e6904f7 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -201,12 +201,25 @@ typedef struct {
 /* Memory set aside for sys_rtas to use with calls that need a work area. */
 #define RTAS_USER_REGION_SIZE (64 * 1024)
 
-/* RTAS return status codes */
-#define RTAS_HARDWARE_ERROR-1/* Hardware Error */
-#define RTAS_BUSY  -2/* RTAS Busy */
-#define RTAS_INVALID_PARAMETER -3/* Invalid indicator/domain/sensor etc. */
-#define RTAS_EXTENDED_DELAY_MIN9900
-#define RTAS_EXTENDED_DELAY_MAX9905
+/*
+ * Common RTAS function return values, derived from the table "RTAS
+ * Status Word Values" in PAPR+ v2.13 7.2.8: "Return Codes". If a
+ * function can return a value in this table then generally it has the
+ * meaning listed here. More extended commentary in the documentation
+ * for rtas_call().
+ *
+ * RTAS functions may use negative and positive numbers not in this
+ * set for function-specific error and success conditions,
+ * respectively.
+ */
+#define RTAS_SUCCESS 0 /* Success. */
+#define RTAS_HARDWARE_ERROR -1 /* Hardware or other unspecified 
error. */
+#define RTAS_BUSY   -2 /* Retry immediately. */
+#define RTAS_INVALID_PARAMETER  -3 /* Invalid indicator/domain/sensor 
etc. */
+#define RTAS_UNEXPECTED_STATE_CHANGE-7 /* Seems limited to EEH and slot 
reset. */
+#define RTAS_EXTENDED_DELAY_MIN   9900 /* Retry after delaying for ~1ms. */
+#define RTAS_EXTENDED_DELAY_MAX   9905 /* Retry after delaying for ~100s. 
*/
+#define RTAS_ML_ISOLATION_ERROR  -9000 /* Multi-level isolation error. */
 
 /* statuses specific to ibm,suspend-me */
 #define RTAS_SUSPEND_ABORTED 9000 /* Suspension aborted */

-- 
2.41.0



[PATCH v6 03/13] powerpc/rtas: Fall back to linear search on failed token->function lookup

2023-12-12 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Enabling any of the powerpc:rtas_* tracepoints at boot is likely to
result in an oops on RTAS platforms. For example, booting a QEMU
pseries model with 'trace_event=powerpc:rtas_input' in the command
line leads to:

  BUG: Kernel NULL pointer dereference on read at 0x0008
  Oops: Kernel access of bad area, sig: 7 [#1]
  NIP [c004231c] do_enter_rtas+0x1bc/0x460
  LR [c004231c] do_enter_rtas+0x1bc/0x460
  Call Trace:
do_enter_rtas+0x1bc/0x460 (unreliable)
rtas_call+0x22c/0x4a0
rtas_get_boot_time+0x80/0x14c
read_persistent_clock64+0x124/0x150
read_persistent_wall_and_boot_offset+0x28/0x58
timekeeping_init+0x70/0x348
start_kernel+0xa0c/0xc1c
start_here_common+0x1c/0x20

(This is preceded by a warning for the failed lookup in
rtas_token_to_function().)

This happens when __do_enter_rtas_trace() attempts a token to function
descriptor lookup before the xarray containing the mappings has been
set up.

Fall back to linear scan of the table if rtas_token_to_function_xarray
is empty.

Fixes: 24098f580e2b ("powerpc/rtas: add tracepoints around RTAS entry")
Reviewed-by: Aneesh Kumar K.V (IBM) 
Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index ae9b10c954a1..f60a8e7bd5ed 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -572,11 +572,21 @@ static const struct rtas_function 
*rtas_token_to_function(s32 token)
return NULL;
 
func = rtas_token_to_function_untrusted(token);
+   if (func)
+   return func;
+   /*
+* Fall back to linear scan in case the reverse mapping hasn't
+* been initialized yet.
+*/
+   if (xa_empty(&rtas_token_to_function_xarray)) {
+   for_each_rtas_function(func) {
+   if (func->token == token)
+   return func;
+   }
+   }
 
-   if (WARN_ONCE(!func, "unexpected failed lookup for token %d", token))
-   return NULL;
-
-   return func;
+   WARN_ONCE(true, "unexpected failed lookup for token %d", token);
+   return NULL;
 }
 
 /* This is here deliberately so it's only used in this file */

-- 
2.41.0



[PATCH v6 00/13] powerpc/pseries: New character devices for system parameters and VPD

2023-12-12 Thread Nathan Lynch via B4 Relay
Add character devices that expose PAPR-specific system parameters and
VPD to user space.

The problem: important platform features are enabled on Linux VMs
through the powerpc-specific rtas() syscall in combination with
writeable mappings of /dev/mem. In typical usage, this is encapsulated
behind APIs provided by the librtas library. This paradigm is
incompatible with lockdown, which prohibits /dev/mem access. It also
is too low-level in many cases: a single logical operation may require
multiple sys_rtas() calls in succession to complete. This carries the
risk that a process may exit while leaving an operation unfinished. It
also means that callers must coordinate their use of the syscall for
functions that cannot tolerate multiple concurrent clients, such as
ibm,get-vpd.

These are the general problems, but it's difficult to formulate a
similarly general solution in the form of a single replacement ABI for
sys_rtas(). Instead, each platform facility we expose to user space
needs a specific interface that forms the kernel-user interactions at
a higher level than individual RTAS calls.

I've identified system parameter support as a high priority for this
effort, since the software that communicates with the management
console relies on it and therefore does not work at all with lockdown
enabled on current kernels. VPD retrieval is also important (for
licensing/entitlement things I think?), and serves as a good initial
example of encapsulating sequence-based RTAS calls. So this series
proposes a model for incrementally solving these issues by introducing
a small pseries-specific "driver" for each of these platform
functions. The new drivers expose these facilities to user space in
ways that are compatible with lockdown, and they require no
coordination between their clients.

In preparation, per-function mutexes are added to the core RTAS code
to serialize access to sequence-based RTAS functions. These prevent
kernel-based sequences from interfering with each other, and they
prevent sys_rtas() users from disrupting kernel-based users. The RTAS
core enforces correct lock usage when lockdep is enabled.

Both drivers could potentially support poll() methods to notify
clients of changes to parameters or VPD that happen due to partition
migration and other events. But that should be safe to leave for
later, assuming there's any interest.

I have made changes to librtas to prefer the new interfaces and
verified that existing clients work correctly with the new code. A
draft PR for that work is here:

https://github.com/ibm-power-utilities/librtas/pull/36

The user-space ABI has not changed since v1 of this series.

I expect to propose at least one more small driver in this style for
platform dump retrieval in a separate submission in the future. Other
facilities may follow as needs are identified.

---
Changes in v6:
- Require FMODE_WRITE for setting system parameters in papr-sysparm
  and add a corresponding testcase.
- Update existing sysparm testcases to open the device writable when
  attempting to set parameters.
- Link to v5: 
https://lore.kernel.org/r/20231207-papr-sys_rtas-vs-lockdown-v5-0-2ce965636...@linux.ibm.com

Changes in v5:
- Add to the front of the queue another fix for a latent bug where
  sys_rtas() users can trigger a spurious warning backtrace.
- Merge "powerpc/uapi: Export papr-miscdev.h header" into "Add
  papr-vpd character driver for VPD retrieval" so we don't temporarily
  expose the PAPR_MISCDEV_IOC_ID constant via UAPI without also
  updating the ioctl-number documentation. (Michael Ellerman)
- Drop rtas_function_{un}lock() in favor of more explicit use of the new
  per-function mutexes; make rtas_ibm_get_vpd_lock extern for use by
  papr-vpd.
- Assert that rtas_ibm_get_vpd_lock is held in rtas_ibm_get_vpd().
- Add example usage to "powerpc/rtas: Facilitate high-level call
  sequences". (Aneesh Kumar K.V)
- Drop the now-unnecessary 04/13 "powerpc/rtas: Factor out function
  descriptor lookup".
- Include document version when citing the PAPR+ specification
  throughout. (Michael Ellerman)
- Add missing include directives to papr-vpd and papr-sysparm. (Michal
  Suchánek)
- Fix spurious testcase failure in environments without a working
  ibm,set-system-parameter RTAS function. (Michael Ellerman)
- Link to v4: 
https://lore.kernel.org/r/20231117-papr-sys_rtas-vs-lockdown-v4-0-b794d8cb8...@linux.ibm.com

Changes in v4:
- Fix latent issue in rtas_token_to_function() which causes boot-time
  crashes.
- More small preparatory changes: a function table iterator and
  additional symbolic constants for RTAS function return values.
- Use symbolic constants for ibm,get-vpd statuses in papr-vpd.c.
- Add commentary to papr_vpd_ioc_create_handle() explaining choice to
  retrieve all VPD at file handle creation time instead of deferring
  it to the read handler.
- Rebase on current powerpc/next.
- Link to v3: 
https://lore.kernel.org/r/20231025-papr-sys_rtas-vs-lockdown-v3-0-5eb04559e...@linux.ibm.com

[PATCH v6 01/13] powerpc/rtas: Avoid warning on invalid token argument to sys_rtas()

2023-12-12 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

rtas_token_to_function() WARNs when passed an invalid token; it's
meant to catch bugs in kernel-based users of RTAS functions. However,
user space controls the token value passed to rtas_token_to_function()
by block_rtas_call(), so user space with sufficient privilege to use
sys_rtas() can trigger the warnings at will:

  unexpected failed lookup for token 2048
  WARNING: CPU: 20 PID: 2247 at arch/powerpc/kernel/rtas.c:556
rtas_token_to_function+0xfc/0x110
  ...
  NIP rtas_token_to_function+0xfc/0x110
  LR  rtas_token_to_function+0xf8/0x110
  Call Trace:
rtas_token_to_function+0xf8/0x110 (unreliable)
sys_rtas+0x188/0x880
system_call_exception+0x268/0x530
system_call_common+0x160/0x2c4

It's desirable to continue warning on bogus tokens in
rtas_token_to_function(). Currently it is used to look up RTAS
function descriptors when tracing, where we know there has to have
been a successful descriptor lookup by different means already, and it
would be a serious inconsistency for the reverse lookup to fail.

So instead of weakening rtas_token_to_function()'s contract by
removing the warnings, introduce rtas_token_to_function_untrusted(),
which has no opinion on failed lookups. Convert block_rtas_call() and
rtas_token_to_function() to use it.

Fixes: 8252b88294d2 ("powerpc/rtas: improve function information lookups")
Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index c49f078382a9..ce37dc9860ef 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -544,6 +544,21 @@ static int __init rtas_token_to_function_xarray_init(void)
 }
 arch_initcall(rtas_token_to_function_xarray_init);
 
+/*
+ * For use by sys_rtas(), where the token value is provided by user
+ * space and we don't want to warn on failed lookups.
+ */
+static const struct rtas_function *rtas_token_to_function_untrusted(s32 token)
+{
+   return xa_load(&rtas_token_to_function_xarray, token);
+}
+
+/*
+ * Reverse lookup for deriving the function descriptor from a
+ * known-good token value in contexts where the former is not already
+ * available. @token must be valid, e.g. derived from the result of a
+ * prior lookup against the function table.
+ */
 static const struct rtas_function *rtas_token_to_function(s32 token)
 {
const struct rtas_function *func;
@@ -551,7 +566,7 @@ static const struct rtas_function 
*rtas_token_to_function(s32 token)
if (WARN_ONCE(token < 0, "invalid token %d", token))
return NULL;
 
-   func = xa_load(&rtas_token_to_function_xarray, token);
+   func = rtas_token_to_function_untrusted(token);
 
if (WARN_ONCE(!func, "unexpected failed lookup for token %d", token))
return NULL;
@@ -1721,7 +1736,7 @@ static bool block_rtas_call(int token, int nargs,
 * If this token doesn't correspond to a function the kernel
 * understands, you're not allowed to call it.
 */
-   func = rtas_token_to_function(token);
+   func = rtas_token_to_function_untrusted(token);
if (!func)
goto err;
/*

-- 
2.41.0



[PATCH v6 07/13] powerpc/rtas: Serialize firmware activation sequences

2023-12-12 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Use rtas_ibm_activate_firmware_lock to prevent interleaving call
sequences of the ibm,activate-firmware RTAS function, which typically
requires multiple calls to complete the update. While the spec does
not specifically prohibit interleaved sequences, there's almost
certainly no advantage to allowing them.

Reviewed-by: Aneesh Kumar K.V (IBM) 
Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 4d28983e8b1d..72f6b5a402dd 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1734,10 +1734,14 @@ void rtas_activate_firmware(void)
return;
}
 
+   mutex_lock(&rtas_ibm_activate_firmware_lock);
+
do {
fwrc = rtas_call(token, 0, 1, NULL);
} while (rtas_busy_delay(fwrc));
 
+   mutex_unlock(&rtas_ibm_activate_firmware_lock);
+
if (fwrc)
pr_err("ibm,activate-firmware failed (%i)\n", fwrc);
 }

-- 
2.41.0



[PATCH v6 05/13] powerpc/rtas: Move token validation from block_rtas_call() to sys_rtas()

2023-12-12 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

The rtas system call handler sys_rtas() delegates certain input
validation steps to a helper function: block_rtas_call(). One of these
steps ensures that the user-supplied token value maps to a known RTAS
function. This is done by performing a "reverse" token-to-function
lookup via rtas_token_to_function_untrusted() to obtain an
rtas_function object.

In changes to come, sys_rtas() itself will need the function
descriptor for the token. To prepare:

* Move the lookup and validation up into sys_rtas() and pass the
  resulting rtas_function pointer to block_rtas_call(), which is
  otherwise unconcerned with the token value.

* Change block_rtas_call() to report the RTAS function name instead of
  the token value on validation failures, since it can now rely on
  having a valid function descriptor.

One behavior change is that sys_rtas() now silently errors out when
passed a bad token, before calling block_rtas_call(). So we will no
longer log "RTAS call blocked - exploit attempt?" on invalid
tokens. This is consistent with how sys_rtas() currently handles other
"metadata" (nargs and nret), while block_rtas_call() is primarily
concerned with validating the arguments to be passed to specific RTAS
functions.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 32 
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index f60a8e7bd5ed..ca5bb0b994ac 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1738,24 +1738,18 @@ static bool in_rmo_buf(u32 base, u32 end)
end < (rtas_rmo_buf + RTAS_USER_REGION_SIZE);
 }
 
-static bool block_rtas_call(int token, int nargs,
+static bool block_rtas_call(const struct rtas_function *func, int nargs,
struct rtas_args *args)
 {
-   const struct rtas_function *func;
const struct rtas_filter *f;
-   const bool is_platform_dump = token == 
rtas_function_token(RTAS_FN_IBM_PLATFORM_DUMP);
-   const bool is_config_conn = token == 
rtas_function_token(RTAS_FN_IBM_CONFIGURE_CONNECTOR);
+   const bool is_platform_dump =
+   func == &rtas_function_table[RTAS_FNIDX__IBM_PLATFORM_DUMP];
+   const bool is_config_conn =
+   func == 
&rtas_function_table[RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR];
u32 base, size, end;
 
/*
-* If this token doesn't correspond to a function the kernel
-* understands, you're not allowed to call it.
-*/
-   func = rtas_token_to_function_untrusted(token);
-   if (!func)
-   goto err;
-   /*
-* And only functions with filters attached are allowed.
+* Only functions with filters attached are allowed.
 */
f = func->filter;
if (!f)
@@ -1812,14 +1806,15 @@ static bool block_rtas_call(int token, int nargs,
return false;
 err:
pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n");
-   pr_err_ratelimited("sys_rtas: token=0x%x, nargs=%d (called by %s)\n",
-  token, nargs, current->comm);
+   pr_err_ratelimited("sys_rtas: %s nargs=%d (called by %s)\n",
+  func->name, nargs, current->comm);
return true;
 }
 
 /* We assume to be passed big endian arguments */
 SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
 {
+   const struct rtas_function *func;
struct pin_cookie cookie;
struct rtas_args args;
unsigned long flags;
@@ -1849,13 +1844,18 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
   nargs * sizeof(rtas_arg_t)) != 0)
return -EFAULT;
 
-   if (token == RTAS_UNKNOWN_SERVICE)
+   /*
+* If this token doesn't correspond to a function the kernel
+* understands, you're not allowed to call it.
+*/
+   func = rtas_token_to_function_untrusted(token);
+   if (!func)
return -EINVAL;
 
args.rets = &args.args[nargs];
memset(args.rets, 0, nret * sizeof(rtas_arg_t));
 
-   if (block_rtas_call(token, nargs, &args))
+   if (block_rtas_call(func, nargs, &args))
return -EINVAL;
 
if (token_is_restricted_errinjct(token)) {

-- 
2.41.0



[PATCH v5 09/13] powerpc/pseries: Add papr-vpd character driver for VPD retrieval

2023-12-07 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

PowerVM LPARs may retrieve Vital Product Data (VPD) for system
components using the ibm,get-vpd RTAS function.

We can expose this to user space with a /dev/papr-vpd character
device, where the programming model is:

  struct papr_location_code plc = { .str = "", }; /* obtain all VPD */
  int devfd = open("/dev/papr-vpd", O_RDONLY);
  int vpdfd = ioctl(devfd, PAPR_VPD_CREATE_HANDLE, &plc);
  size_t size = lseek(vpdfd, 0, SEEK_END);
  char *buf = malloc(size);
  pread(devfd, buf, size, 0);

When a file descriptor is obtained from ioctl(PAPR_VPD_CREATE_HANDLE),
the file contains the result of a complete ibm,get-vpd sequence. The
file contents are immutable from the POV of user space. To get a new
view of the VPD, the client must create a new handle.

This design choice insulates user space from most of the complexities
that ibm,get-vpd brings:

* ibm,get-vpd must be called more than once to obtain complete
  results.

* Only one ibm,get-vpd call sequence should be in progress at a time;
  interleaved sequences will disrupt each other. Callers must have a
  protocol for serializing their use of the function.

* A call sequence in progress may receive a "VPD changed, try again"
  status, requiring the client to abandon the sequence and start
  over.

The memory required for the VPD buffers seems acceptable, around 20KB
for all VPD on one of my systems. And the value of the
/rtas/ibm,vpd-size DT property (the estimated maximum size of VPD) is
consistently 300KB across various systems I've checked.

I've implemented support for this new ABI in the rtas_get_vpd()
function in librtas, which the vpdupdate command currently uses to
populate its VPD database. I've verified that an unmodified vpdupdate
binary generates an identical database when using a librtas.so that
prefers the new ABI.

Along with the papr-vpd.h header exposed to user space, this
introduces a common papr-miscdev.h uapi header to share a base ioctl
ID with similar drivers to come.

Tested-by: Michal Suchánek 
Signed-off-by: Nathan Lynch 
---
 Documentation/userspace-api/ioctl/ioctl-number.rst |   2 +
 arch/powerpc/include/uapi/asm/papr-miscdev.h   |   9 +
 arch/powerpc/include/uapi/asm/papr-vpd.h   |  22 +
 arch/powerpc/platforms/pseries/Makefile|   1 +
 arch/powerpc/platforms/pseries/papr-vpd.c  | 541 +
 5 files changed, 575 insertions(+)

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst 
b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 4ea5b837399a..a950545bf7cd 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -349,6 +349,8 @@ Code  Seq#Include File  
 Comments
  

 0xB1  00-1F  PPPoX
  

+0xB2  00 arch/powerpc/include/uapi/asm/papr-vpd.h
powerpc/pseries VPD API
+ 

 0xB3  00 linux/mmc/ioctl.h
 0xB4  00-0F  linux/gpio.h

 0xB5  00-0F  uapi/linux/rpmsg.h  

diff --git a/arch/powerpc/include/uapi/asm/papr-miscdev.h 
b/arch/powerpc/include/uapi/asm/papr-miscdev.h
new file mode 100644
index ..49a2a270b7f3
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-miscdev.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_MISCDEV_H_
+#define _UAPI_PAPR_MISCDEV_H_
+
+enum {
+   PAPR_MISCDEV_IOC_ID = 0xb2,
+};
+
+#endif /* _UAPI_PAPR_MISCDEV_H_ */
diff --git a/arch/powerpc/include/uapi/asm/papr-vpd.h 
b/arch/powerpc/include/uapi/asm/papr-vpd.h
new file mode 100644
index ..1c88e87cb420
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-vpd.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_VPD_H_
+#define _UAPI_PAPR_VPD_H_
+
+#include 
+#include 
+
+struct papr_location_code {
+   /*
+* PAPR+ v2.13 12.3.2.4 Converged Location Code Rules - Length
+* Restrictions. 79 characters plus nul.
+*/
+   char str[80];
+};
+
+/*
+ * ioctl for /dev/papr-vpd. Returns a VPD handle fd corresponding to
+ * the location code.
+ */
+#define PAPR_VPD_IOC_CREATE_HANDLE _IOW(PAPR_MISCDEV_IOC_ID, 0, struct 
papr_location_code)
+
+#endif /* _UAPI_PAPR_VPD_H_ */
diff --git a/arch/powerpc/platforms/pseries/Makefile 
b/arch/powerpc/platforms/pseries/Makefile
index 1476c5e4433c..f936962a2946 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -4,6 +4,7

[PATCH v5 12/13] powerpc/selftests: Add test for papr-vpd

2023-12-07 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Add selftests for /dev/papr-vpd, exercising the common expected use
cases:

* Retrieve all VPD by passing an empty location code.
* Retrieve the "system VPD" by passing a location code derived from DT
  root node properties, as done by the vpdupdate command.

The tests also verify that certain intended properties of the driver
hold:

* Passing an unterminated location code to PAPR_VPD_CREATE_HANDLE gets
  EINVAL.
* Passing a NULL location code pointer to PAPR_VPD_CREATE_HANDLE gets
  EFAULT.
* Closing the device node without first issuing a
  PAPR_VPD_CREATE_HANDLE command to it succeeds.
* Releasing a handle without first consuming any data from it
  succeeds.
* Re-reading the contents of a handle returns the same data as the
  first time.

Some minimal validation of the returned data is performed.

The tests are skipped on systems where the papr-vpd driver does not
initialize, making this useful only on PowerVM LPARs at this point.

Signed-off-by: Nathan Lynch 
---
 tools/testing/selftests/powerpc/Makefile   |   1 +
 .../testing/selftests/powerpc/papr_vpd/.gitignore  |   1 +
 tools/testing/selftests/powerpc/papr_vpd/Makefile  |  12 +
 .../testing/selftests/powerpc/papr_vpd/papr_vpd.c  | 352 +
 4 files changed, 366 insertions(+)

diff --git a/tools/testing/selftests/powerpc/Makefile 
b/tools/testing/selftests/powerpc/Makefile
index 7ea42fa02eab..05fc68d446c2 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -32,6 +32,7 @@ SUB_DIRS = alignment  \
   vphn \
   math \
   papr_attributes  \
+  papr_vpd \
   ptrace   \
   security \
   mce
diff --git a/tools/testing/selftests/powerpc/papr_vpd/.gitignore 
b/tools/testing/selftests/powerpc/papr_vpd/.gitignore
new file mode 100644
index ..49285031a656
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_vpd/.gitignore
@@ -0,0 +1 @@
+/papr_vpd
diff --git a/tools/testing/selftests/powerpc/papr_vpd/Makefile 
b/tools/testing/selftests/powerpc/papr_vpd/Makefile
new file mode 100644
index ..06b719703bfd
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_vpd/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+noarg:
+   $(MAKE) -C ../
+
+TEST_GEN_PROGS := papr_vpd
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
+
+$(OUTPUT)/papr_vpd: CFLAGS += $(KHDR_INCLUDES)
diff --git a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c 
b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
new file mode 100644
index ..98cbb9109ee6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "utils.h"
+
+#define DEVPATH "/dev/papr-vpd"
+
+static int dev_papr_vpd_open_close(void)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int dev_papr_vpd_get_handle_all(void)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+   struct papr_location_code lc = { .str = "", };
+   off_t size;
+   int fd;
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   errno = 0;
+   fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc);
+   FAIL_IF(errno != 0);
+   FAIL_IF(fd < 0);
+
+   FAIL_IF(close(devfd) != 0);
+
+   size = lseek(fd, 0, SEEK_END);
+   FAIL_IF(size <= 0);
+
+   void *buf = malloc((size_t)size);
+   FAIL_IF(!buf);
+
+   ssize_t consumed = pread(fd, buf, size, 0);
+   FAIL_IF(consumed != size);
+
+   /* Ensure EOF */
+   FAIL_IF(read(fd, buf, size) != 0);
+   FAIL_IF(close(fd));
+
+   /* Verify that the buffer looks like VPD */
+   static const char needle[] = "System VPD";
+   FAIL_IF(!memmem(buf, size, needle, strlen(needle)));
+
+   return 0;
+}
+
+static int dev_papr_vpd_get_handle_byte_at_a_time(void)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+   struct papr_location_code lc = { .str = "", };
+   int fd;
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   errno = 0;
+   fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc);
+   FAIL_IF(errno != 0);
+   FAIL_IF(fd < 0);
+
+   FAIL_IF(close(devfd) != 0);
+
+   size_t consumed = 0;
+   while (1) {
+   ssize_t res;
+   char c;
+
+   errno = 0;
+   res = read(fd, &c, sizeof(c));
+   FAIL_IF(res > sizeof(c))

[PATCH v5 13/13] powerpc/selftests: Add test for papr-sysparm

2023-12-07 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Consistently testing system parameter access is a bit difficult by
nature -- the set of parameters available depends on the model and
system configuration, and updating a parameter should be considered a
destructive operation reserved for the admin.

So we validate some of the error paths and retrieve the SPLPAR
characteristics string, but not much else.

Signed-off-by: Nathan Lynch 
---
 tools/testing/selftests/powerpc/Makefile   |   1 +
 .../selftests/powerpc/papr_sysparm/.gitignore  |   1 +
 .../selftests/powerpc/papr_sysparm/Makefile|  12 ++
 .../selftests/powerpc/papr_sysparm/papr_sysparm.c  | 165 +
 4 files changed, 179 insertions(+)

diff --git a/tools/testing/selftests/powerpc/Makefile 
b/tools/testing/selftests/powerpc/Makefile
index 05fc68d446c2..c376151982c4 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -33,6 +33,7 @@ SUB_DIRS = alignment  \
   math \
   papr_attributes  \
   papr_vpd \
+  papr_sysparm \
   ptrace   \
   security \
   mce
diff --git a/tools/testing/selftests/powerpc/papr_sysparm/.gitignore 
b/tools/testing/selftests/powerpc/papr_sysparm/.gitignore
new file mode 100644
index ..f2a69bf59d40
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_sysparm/.gitignore
@@ -0,0 +1 @@
+/papr_sysparm
diff --git a/tools/testing/selftests/powerpc/papr_sysparm/Makefile 
b/tools/testing/selftests/powerpc/papr_sysparm/Makefile
new file mode 100644
index ..7f79e437634a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_sysparm/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+noarg:
+   $(MAKE) -C ../
+
+TEST_GEN_PROGS := papr_sysparm
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
+
+$(OUTPUT)/papr_sysparm: CFLAGS += $(KHDR_INCLUDES)
diff --git a/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c 
b/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c
new file mode 100644
index ..9d4850c25aed
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "utils.h"
+
+#define DEVPATH "/dev/papr-sysparm"
+
+static int open_close(void)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int get_splpar(void)
+{
+   struct papr_sysparm_io_block sp = {
+   .parameter = 20, // SPLPAR characteristics
+   };
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+   FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_GET, &sp) != 0);
+   FAIL_IF(sp.length == 0);
+   FAIL_IF(sp.length > sizeof(sp.data));
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int get_bad_parameter(void)
+{
+   struct papr_sysparm_io_block sp = {
+   .parameter = UINT32_MAX, // there are only ~60 specified 
parameters
+   };
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   // Ensure expected error
+   FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_GET, &sp) != -1);
+   FAIL_IF(errno != EOPNOTSUPP);
+
+   // Ensure the buffer is unchanged
+   FAIL_IF(sp.length != 0);
+   for (size_t i = 0; i < ARRAY_SIZE(sp.data); ++i)
+   FAIL_IF(sp.data[i] != 0);
+
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int check_efault_common(unsigned long cmd)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   // Ensure expected error
+   FAIL_IF(ioctl(devfd, cmd, NULL) != -1);
+   FAIL_IF(errno != EFAULT);
+
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int check_efault_get(void)
+{
+   return check_efault_common(PAPR_SYSPARM_IOC_GET);
+}
+
+static int check_efault_set(void)
+{
+   return check_efault_common(PAPR_SYSPARM_IOC_SET);
+}
+
+static int set_hmc0(void)
+{
+   struct papr_sysparm_io_block sp = {
+   .parameter = 0, // HMC0, not a settable parameter
+   };
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   // Ensure expected error
+   FAIL_IF(ioctl(devfd, PAPR_

[PATCH v5 00/13] powerpc/pseries: New character devices for system parameters and VPD

2023-12-07 Thread Nathan Lynch via B4 Relay
Add character devices that expose PAPR-specific system parameters and
VPD to user space.

The problem: important platform features are enabled on Linux VMs
through the powerpc-specific rtas() syscall in combination with
writeable mappings of /dev/mem. In typical usage, this is encapsulated
behind APIs provided by the librtas library. This paradigm is
incompatible with lockdown, which prohibits /dev/mem access. It also
is too low-level in many cases: a single logical operation may require
multiple sys_rtas() calls in succession to complete. This carries the
risk that a process may exit while leaving an operation unfinished. It
also means that callers must coordinate their use of the syscall for
functions that cannot tolerate multiple concurrent clients, such as
ibm,get-vpd.

These are the general problems, but it's difficult to formulate a
similarly general solution in the form of a single replacement ABI for
sys_rtas(). Instead, each platform facility we expose to user space
needs a specific interface that forms the kernel-user interactions at
a higher level than individual RTAS calls.

I've identified system parameter support as a high priority for this
effort, since the software that communicates with the management
console relies on it and therefore does not work at all with lockdown
enabled on current kernels. VPD retrieval is also important (for
licensing/entitlement things I think?), and serves as a good initial
example of encapsulating sequence-based RTAS calls. So this series
proposes a model for incrementally solving these issues by introducing
a small pseries-specific "driver" for each of these platform
functions. The new drivers expose these facilities to user space in
ways that are compatible with lockdown, and they require no
coordination between their clients.

In preparation, per-function mutexes are added to the core RTAS code
to serialize access to sequence-based RTAS functions. These prevent
kernel-based sequences from interfering with each other, and they
prevent sys_rtas() users from disrupting kernel-based users. The RTAS
core enforces correct lock usage when lockdep is enabled.

Both drivers could potentially support poll() methods to notify
clients of changes to parameters or VPD that happen due to partition
migration and other events. But that should be safe to leave for
later, assuming there's any interest.

I have made changes to librtas to prefer the new interfaces and
verified that existing clients work correctly with the new code. A
draft PR for that work is here:

https://github.com/ibm-power-utilities/librtas/pull/36

The user-space ABI has not changed since v1 of this series.

I expect to propose at least one more small driver in this style for
platform dump retrieval in a separate submission in the future. Other
facilities may follow as needs are identified.

---
Changes in v5:
- Add to the front of the queue another fix for a latent bug where
  sys_rtas() users can trigger a spurious warning backtrace.
- Merge "powerpc/uapi: Export papr-miscdev.h header" into "Add
  papr-vpd character driver for VPD retrieval" so we don't temporarily
  expose the PAPR_MISCDEV_IOC_ID constant via UAPI without also
  updating the ioctl-number documentation. (Michael Ellerman)
- Drop rtas_function_{un}lock() in favor of more explicit use of the new
  per-function mutexes; make rtas_ibm_get_vpd_lock extern for use by
  papr-vpd.
- Assert that rtas_ibm_get_vpd_lock is held in rtas_ibm_get_vpd().
- Add example usage to "powerpc/rtas: Facilitate high-level call
  sequences". (Aneesh Kumar K.V)
- Drop the now-unnecessary 04/13 "powerpc/rtas: Factor out function
  descriptor lookup".
- Include document version when citing the PAPR+ specification
  throughout. (Michael Ellerman)
- Add missing include directives to papr-vpd and papr-sysparm. (Michal
  Suchánek)
- Fix spurious testcase failure in environments without a working
  ibm,set-system-parameter RTAS function. (Michael Ellerman)
- Link to v4: 
https://lore.kernel.org/r/20231117-papr-sys_rtas-vs-lockdown-v4-0-b794d8cb8...@linux.ibm.com

Changes in v4:
- Fix latent issue in rtas_token_to_function() which causes boot-time
  crashes.
- More small preparatory changes: a function table iterator and
  additional symbolic constants for RTAS function return values.
- Use symbolic constants for ibm,get-vpd statuses in papr-vpd.c.
- Add commentary to papr_vpd_ioc_create_handle() explaining choice to
  retrieve all VPD at file handle creation time instead of deferring
  it to the read handler.
- Rebase on current powerpc/next.
- Link to v3: 
https://lore.kernel.org/r/20231025-papr-sys_rtas-vs-lockdown-v3-0-5eb04559e...@linux.ibm.com

Changes in v3:
- Add new rtas_function_lock()/unlock() APIs and convert existing code
  to use them.
- Convert papr-vpd to use rtas_function_lock()/unlock() instead of
  having sys_rtas() obtain a driver-private mutex.
- Rebase on current powerpc/next.
- Link to v2: 
https://lore.kernel.org/r/20231013-papr-sys_rtas-vs-lockdow

[PATCH v5 03/13] powerpc/rtas: Fall back to linear search on failed token->function lookup

2023-12-07 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Enabling any of the powerpc:rtas_* tracepoints at boot is likely to
result in an oops on RTAS platforms. For example, booting a QEMU
pseries model with 'trace_event=powerpc:rtas_input' in the command
line leads to:

  BUG: Kernel NULL pointer dereference on read at 0x0008
  Oops: Kernel access of bad area, sig: 7 [#1]
  NIP [c004231c] do_enter_rtas+0x1bc/0x460
  LR [c004231c] do_enter_rtas+0x1bc/0x460
  Call Trace:
do_enter_rtas+0x1bc/0x460 (unreliable)
rtas_call+0x22c/0x4a0
rtas_get_boot_time+0x80/0x14c
read_persistent_clock64+0x124/0x150
read_persistent_wall_and_boot_offset+0x28/0x58
timekeeping_init+0x70/0x348
start_kernel+0xa0c/0xc1c
start_here_common+0x1c/0x20

(This is preceded by a warning for the failed lookup in
rtas_token_to_function().)

This happens when __do_enter_rtas_trace() attempts a token to function
descriptor lookup before the xarray containing the mappings has been
set up.

Fall back to linear scan of the table if rtas_token_to_function_xarray
is empty.

Fixes: 24098f580e2b ("powerpc/rtas: add tracepoints around RTAS entry")
Reviewed-by: Aneesh Kumar K.V (IBM) 
Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index ae9b10c954a1..f60a8e7bd5ed 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -572,11 +572,21 @@ static const struct rtas_function 
*rtas_token_to_function(s32 token)
return NULL;
 
func = rtas_token_to_function_untrusted(token);
+   if (func)
+   return func;
+   /*
+* Fall back to linear scan in case the reverse mapping hasn't
+* been initialized yet.
+*/
+   if (xa_empty(&rtas_token_to_function_xarray)) {
+   for_each_rtas_function(func) {
+   if (func->token == token)
+   return func;
+   }
+   }
 
-   if (WARN_ONCE(!func, "unexpected failed lookup for token %d", token))
-   return NULL;
-
-   return func;
+   WARN_ONCE(true, "unexpected failed lookup for token %d", token);
+   return NULL;
 }
 
 /* This is here deliberately so it's only used in this file */

-- 
2.41.0



[PATCH v5 05/13] powerpc/rtas: Move token validation from block_rtas_call() to sys_rtas()

2023-12-07 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

The rtas system call handler sys_rtas() delegates certain input
validation steps to a helper function: block_rtas_call(). One of these
steps ensures that the user-supplied token value maps to a known RTAS
function. This is done by performing a "reverse" token-to-function
lookup via rtas_token_to_function_untrusted() to obtain an
rtas_function object.

In changes to come, sys_rtas() itself will need the function
descriptor for the token. To prepare:

* Move the lookup and validation up into sys_rtas() and pass the
  resulting rtas_function pointer to block_rtas_call(), which is
  otherwise unconcerned with the token value.

* Change block_rtas_call() to report the RTAS function name instead of
  the token value on validation failures, since it can now rely on
  having a valid function descriptor.

One behavior change is that sys_rtas() now silently errors out when
passed a bad token, before calling block_rtas_call(). So we will no
longer log "RTAS call blocked - exploit attempt?" on invalid
tokens. This is consistent with how sys_rtas() currently handles other
"metadata" (nargs and nret), while block_rtas_call() is primarily
concerned with validating the arguments to be passed to specific RTAS
functions.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 32 
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index f60a8e7bd5ed..ca5bb0b994ac 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1738,24 +1738,18 @@ static bool in_rmo_buf(u32 base, u32 end)
end < (rtas_rmo_buf + RTAS_USER_REGION_SIZE);
 }
 
-static bool block_rtas_call(int token, int nargs,
+static bool block_rtas_call(const struct rtas_function *func, int nargs,
struct rtas_args *args)
 {
-   const struct rtas_function *func;
const struct rtas_filter *f;
-   const bool is_platform_dump = token == 
rtas_function_token(RTAS_FN_IBM_PLATFORM_DUMP);
-   const bool is_config_conn = token == 
rtas_function_token(RTAS_FN_IBM_CONFIGURE_CONNECTOR);
+   const bool is_platform_dump =
+   func == &rtas_function_table[RTAS_FNIDX__IBM_PLATFORM_DUMP];
+   const bool is_config_conn =
+   func == 
&rtas_function_table[RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR];
u32 base, size, end;
 
/*
-* If this token doesn't correspond to a function the kernel
-* understands, you're not allowed to call it.
-*/
-   func = rtas_token_to_function_untrusted(token);
-   if (!func)
-   goto err;
-   /*
-* And only functions with filters attached are allowed.
+* Only functions with filters attached are allowed.
 */
f = func->filter;
if (!f)
@@ -1812,14 +1806,15 @@ static bool block_rtas_call(int token, int nargs,
return false;
 err:
pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n");
-   pr_err_ratelimited("sys_rtas: token=0x%x, nargs=%d (called by %s)\n",
-  token, nargs, current->comm);
+   pr_err_ratelimited("sys_rtas: %s nargs=%d (called by %s)\n",
+  func->name, nargs, current->comm);
return true;
 }
 
 /* We assume to be passed big endian arguments */
 SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
 {
+   const struct rtas_function *func;
struct pin_cookie cookie;
struct rtas_args args;
unsigned long flags;
@@ -1849,13 +1844,18 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
   nargs * sizeof(rtas_arg_t)) != 0)
return -EFAULT;
 
-   if (token == RTAS_UNKNOWN_SERVICE)
+   /*
+* If this token doesn't correspond to a function the kernel
+* understands, you're not allowed to call it.
+*/
+   func = rtas_token_to_function_untrusted(token);
+   if (!func)
return -EINVAL;
 
args.rets = &args.args[nargs];
memset(args.rets, 0, nret * sizeof(rtas_arg_t));
 
-   if (block_rtas_call(token, nargs, &args))
+   if (block_rtas_call(func, nargs, &args))
return -EINVAL;
 
if (token_is_restricted_errinjct(token)) {

-- 
2.41.0



[PATCH v5 11/13] powerpc/pseries/papr-sysparm: Expose character device to user space

2023-12-07 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Until now the papr_sysparm APIs have been kernel-internal. But user
space needs access to PAPR system parameters too. The only method
available to user space today to get or set system parameters is using
sys_rtas() and /dev/mem to pass RTAS-addressable buffers between user
space and firmware. This is incompatible with lockdown and should be
deprecated.

So provide an alternative ABI to user space in the form of a
/dev/papr-sysparm character device with just two ioctl commands (get
and set). The data payloads involved are small enough to fit in the
ioctl argument buffer, making the code relatively simple.

Exposing the system parameters through sysfs has been considered but
it would be too awkward:

* The kernel currently does not have to contain an exhaustive list of
  defined system parameters. This is a convenient property to maintain
  because we don't have to update the kernel whenever a new parameter
  is added to PAPR. Exporting a named attribute in sysfs for each
  parameter would negate this.

* Some system parameters are text-based and some are not.

* Retrieval of at least one system parameter requires input data,
  which a simple read-oriented interface can't support.

Signed-off-by: Nathan Lynch 
---
 Documentation/userspace-api/ioctl/ioctl-number.rst |   2 +
 arch/powerpc/include/asm/papr-sysparm.h|  17 ++-
 arch/powerpc/include/uapi/asm/papr-sysparm.h   |  58 
 arch/powerpc/platforms/pseries/papr-sysparm.c  | 155 -
 4 files changed, 224 insertions(+), 8 deletions(-)

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst 
b/Documentation/userspace-api/ioctl/ioctl-number.rst
index a950545bf7cd..d8b6cb1a3636 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -351,6 +351,8 @@ Code  Seq#Include File  
 Comments
  

 0xB2  00 arch/powerpc/include/uapi/asm/papr-vpd.h
powerpc/pseries VPD API
  

+0xB2  01-02  arch/powerpc/include/uapi/asm/papr-sysparm.h
powerpc/pseries system parameter API
+ 

 0xB3  00 linux/mmc/ioctl.h
 0xB4  00-0F  linux/gpio.h

 0xB5  00-0F  uapi/linux/rpmsg.h  

diff --git a/arch/powerpc/include/asm/papr-sysparm.h 
b/arch/powerpc/include/asm/papr-sysparm.h
index f5fdbd8ae9db..0dbbff59101d 100644
--- a/arch/powerpc/include/asm/papr-sysparm.h
+++ b/arch/powerpc/include/asm/papr-sysparm.h
@@ -2,8 +2,10 @@
 #ifndef _ASM_POWERPC_PAPR_SYSPARM_H
 #define _ASM_POWERPC_PAPR_SYSPARM_H
 
+#include 
+
 typedef struct {
-   const u32 token;
+   u32 token;
 } papr_sysparm_t;
 
 #define mk_papr_sysparm(x_) ((papr_sysparm_t){ .token = x_, })
@@ -20,11 +22,14 @@ typedef struct {
 #define PAPR_SYSPARM_TLB_BLOCK_INVALIDATE_ATTRSmk_papr_sysparm(50)
 #define PAPR_SYSPARM_LPAR_NAME mk_papr_sysparm(55)
 
-enum {
-   PAPR_SYSPARM_MAX_INPUT  = 1024,
-   PAPR_SYSPARM_MAX_OUTPUT = 4000,
-};
-
+/**
+ * struct papr_sysparm_buf - RTAS work area layout for system parameter 
functions.
+ *
+ * This is the memory layout of the buffers passed to/from
+ * ibm,get-system-parameter and ibm,set-system-parameter. It is
+ * distinct from the papr_sysparm_io_block structure that is passed
+ * between user space and the kernel.
+ */
 struct papr_sysparm_buf {
__be16 len;
char val[PAPR_SYSPARM_MAX_OUTPUT];
diff --git a/arch/powerpc/include/uapi/asm/papr-sysparm.h 
b/arch/powerpc/include/uapi/asm/papr-sysparm.h
new file mode 100644
index ..9f9a0f267ea5
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-sysparm.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_SYSPARM_H_
+#define _UAPI_PAPR_SYSPARM_H_
+
+#include 
+#include 
+#include 
+
+enum {
+   PAPR_SYSPARM_MAX_INPUT  = 1024,
+   PAPR_SYSPARM_MAX_OUTPUT = 4000,
+};
+
+struct papr_sysparm_io_block {
+   __u32 parameter;
+   __u16 length;
+   char data[PAPR_SYSPARM_MAX_OUTPUT];
+};
+
+/**
+ * PAPR_SYSPARM_IOC_GET - Retrieve the value of a PAPR system parameter.
+ *
+ * Uses _IOWR because of one corner case: Retrieving the value of the
+ * "OS Service Entitlement Status" parameter (60) requires the caller
+ * to supply input data (a date string) in the buffer passed to
+ * firmware. So the @length and @data of the incoming
+ * papr_sysparm_io_block are always used to initialize the work area
+ * supplied to ibm,get-system-parameter. No other parameters are known
+ * to pa

[PATCH v5 07/13] powerpc/rtas: Serialize firmware activation sequences

2023-12-07 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Use rtas_ibm_activate_firmware_lock to prevent interleaving call
sequences of the ibm,activate-firmware RTAS function, which typically
requires multiple calls to complete the update. While the spec does
not specifically prohibit interleaved sequences, there's almost
certainly no advantage to allowing them.

Reviewed-by: Aneesh Kumar K.V (IBM) 
Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 4d28983e8b1d..72f6b5a402dd 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1734,10 +1734,14 @@ void rtas_activate_firmware(void)
return;
}
 
+   mutex_lock(&rtas_ibm_activate_firmware_lock);
+
do {
fwrc = rtas_call(token, 0, 1, NULL);
} while (rtas_busy_delay(fwrc));
 
+   mutex_unlock(&rtas_ibm_activate_firmware_lock);
+
if (fwrc)
pr_err("ibm,activate-firmware failed (%i)\n", fwrc);
 }

-- 
2.41.0



[PATCH v5 01/13] powerpc/rtas: Avoid warning on invalid token argument to sys_rtas()

2023-12-07 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

rtas_token_to_function() WARNs when passed an invalid token; it's
meant to catch bugs in kernel-based users of RTAS functions. However,
user space controls the token value passed to rtas_token_to_function()
by block_rtas_call(), so user space with sufficient privilege to use
sys_rtas() can trigger the warnings at will:

  unexpected failed lookup for token 2048
  WARNING: CPU: 20 PID: 2247 at arch/powerpc/kernel/rtas.c:556
rtas_token_to_function+0xfc/0x110
  ...
  NIP rtas_token_to_function+0xfc/0x110
  LR  rtas_token_to_function+0xf8/0x110
  Call Trace:
rtas_token_to_function+0xf8/0x110 (unreliable)
sys_rtas+0x188/0x880
system_call_exception+0x268/0x530
system_call_common+0x160/0x2c4

It's desirable to continue warning on bogus tokens in
rtas_token_to_function(). Currently it is used to look up RTAS
function descriptors when tracing, where we know there has to have
been a successful descriptor lookup by different means already, and it
would be a serious inconsistency for the reverse lookup to fail.

So instead of weakening rtas_token_to_function()'s contract by
removing the warnings, introduce rtas_token_to_function_untrusted(),
which has no opinion on failed lookups. Convert block_rtas_call() and
rtas_token_to_function() to use it.

Fixes: 8252b88294d2 ("powerpc/rtas: improve function information lookups")
Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index c49f078382a9..ce37dc9860ef 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -544,6 +544,21 @@ static int __init rtas_token_to_function_xarray_init(void)
 }
 arch_initcall(rtas_token_to_function_xarray_init);
 
+/*
+ * For use by sys_rtas(), where the token value is provided by user
+ * space and we don't want to warn on failed lookups.
+ */
+static const struct rtas_function *rtas_token_to_function_untrusted(s32 token)
+{
+   return xa_load(&rtas_token_to_function_xarray, token);
+}
+
+/*
+ * Reverse lookup for deriving the function descriptor from a
+ * known-good token value in contexts where the former is not already
+ * available. @token must be valid, e.g. derived from the result of a
+ * prior lookup against the function table.
+ */
 static const struct rtas_function *rtas_token_to_function(s32 token)
 {
const struct rtas_function *func;
@@ -551,7 +566,7 @@ static const struct rtas_function 
*rtas_token_to_function(s32 token)
if (WARN_ONCE(token < 0, "invalid token %d", token))
return NULL;
 
-   func = xa_load(&rtas_token_to_function_xarray, token);
+   func = rtas_token_to_function_untrusted(token);
 
if (WARN_ONCE(!func, "unexpected failed lookup for token %d", token))
return NULL;
@@ -1721,7 +1736,7 @@ static bool block_rtas_call(int token, int nargs,
 * If this token doesn't correspond to a function the kernel
 * understands, you're not allowed to call it.
 */
-   func = rtas_token_to_function(token);
+   func = rtas_token_to_function_untrusted(token);
if (!func)
goto err;
/*

-- 
2.41.0



[PATCH v5 02/13] powerpc/rtas: Add for_each_rtas_function() iterator

2023-12-07 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Add a convenience macro for iterating over every element of the
internal function table and convert the one site that can use it. An
additional user of the macro is anticipated in changes to follow.

Reviewed-by: Aneesh Kumar K.V (IBM) 
Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index ce37dc9860ef..ae9b10c954a1 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -454,6 +454,11 @@ static struct rtas_function rtas_function_table[] 
__ro_after_init = {
},
 };
 
+#define for_each_rtas_function(funcp)   \
+   for (funcp = &rtas_function_table[0];   \
+funcp < &rtas_function_table[ARRAY_SIZE(rtas_function_table)]; \
+++funcp)
+
 /*
  * Nearly all RTAS calls need to be serialized. All uses of the
  * default rtas_args block must hold rtas_lock.
@@ -525,10 +530,10 @@ static DEFINE_XARRAY(rtas_token_to_function_xarray);
 
 static int __init rtas_token_to_function_xarray_init(void)
 {
+   const struct rtas_function *func;
int err = 0;
 
-   for (size_t i = 0; i < ARRAY_SIZE(rtas_function_table); ++i) {
-   const struct rtas_function *func = &rtas_function_table[i];
+   for_each_rtas_function(func) {
const s32 token = func->token;
 
if (token == RTAS_UNKNOWN_SERVICE)

-- 
2.41.0



[PATCH v5 10/13] powerpc/pseries/papr-sysparm: Validate buffer object lengths

2023-12-07 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

The ability to get and set system parameters will be exposed to user
space, so let's get a little more strict about malformed
papr_sysparm_buf objects.

* Create accessors for the length field of struct papr_sysparm_buf.
  The length is always stored in MSB order and this is better than
  spreading the necessary conversions all over.

* Reject attempts to submit invalid buffers to RTAS.

* Warn if RTAS returns a buffer with an invalid length, clamping the
  returned length to a safe value that won't overrun the buffer.

These are meant as precautionary measures to mitigate both firmware
and kernel bugs in this area, should they arise, but I am not aware of
any.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/papr-sysparm.c | 47 +++
 1 file changed, 47 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/papr-sysparm.c 
b/arch/powerpc/platforms/pseries/papr-sysparm.c
index fedc61599e6c..a1e7aeac7416 100644
--- a/arch/powerpc/platforms/pseries/papr-sysparm.c
+++ b/arch/powerpc/platforms/pseries/papr-sysparm.c
@@ -23,6 +23,46 @@ void papr_sysparm_buf_free(struct papr_sysparm_buf *buf)
kfree(buf);
 }
 
+static size_t papr_sysparm_buf_get_length(const struct papr_sysparm_buf *buf)
+{
+   return be16_to_cpu(buf->len);
+}
+
+static void papr_sysparm_buf_set_length(struct papr_sysparm_buf *buf, size_t 
length)
+{
+   WARN_ONCE(length > sizeof(buf->val),
+ "bogus length %zu, clamping to safe value", length);
+   length = min(sizeof(buf->val), length);
+   buf->len = cpu_to_be16(length);
+}
+
+/*
+ * For use on buffers returned from ibm,get-system-parameter before
+ * returning them to callers. Ensures the encoded length of valid data
+ * cannot overrun buf->val[].
+ */
+static void papr_sysparm_buf_clamp_length(struct papr_sysparm_buf *buf)
+{
+   papr_sysparm_buf_set_length(buf, papr_sysparm_buf_get_length(buf));
+}
+
+/*
+ * Perform some basic diligence on the system parameter buffer before
+ * submitting it to RTAS.
+ */
+static bool papr_sysparm_buf_can_submit(const struct papr_sysparm_buf *buf)
+{
+   /*
+* Firmware ought to reject buffer lengths that exceed the
+* maximum specified in PAPR, but there's no reason for the
+* kernel to allow them either.
+*/
+   if (papr_sysparm_buf_get_length(buf) > sizeof(buf->val))
+   return false;
+
+   return true;
+}
+
 /**
  * papr_sysparm_get() - Retrieve the value of a PAPR system parameter.
  * @param: PAPR system parameter token as described in
@@ -63,6 +103,9 @@ int papr_sysparm_get(papr_sysparm_t param, struct 
papr_sysparm_buf *buf)
if (token == RTAS_UNKNOWN_SERVICE)
return -ENOENT;
 
+   if (!papr_sysparm_buf_can_submit(buf))
+   return -EINVAL;
+
work_area = rtas_work_area_alloc(sizeof(*buf));
 
memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf));
@@ -77,6 +120,7 @@ int papr_sysparm_get(papr_sysparm_t param, struct 
papr_sysparm_buf *buf)
case 0:
ret = 0;
memcpy(buf, rtas_work_area_raw_buf(work_area), sizeof(*buf));
+   papr_sysparm_buf_clamp_length(buf);
break;
case -3: /* parameter not implemented */
ret = -EOPNOTSUPP;
@@ -115,6 +159,9 @@ int papr_sysparm_set(papr_sysparm_t param, const struct 
papr_sysparm_buf *buf)
if (token == RTAS_UNKNOWN_SERVICE)
return -ENOENT;
 
+   if (!papr_sysparm_buf_can_submit(buf))
+   return -EINVAL;
+
work_area = rtas_work_area_alloc(sizeof(*buf));
 
memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf));

-- 
2.41.0



[PATCH v5 08/13] powerpc/rtas: Warn if per-function lock isn't held

2023-12-07 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

If the function descriptor has a populated lock member, then callers
are required to hold it across calls. Now that the firmware activation
sequence is appropriately guarded, we can warn when the requirement
isn't satisfied.

__do_enter_rtas_trace() gets reorganized a bit as a result of
performing the function descriptor lookup unconditionally now.

Reviewed-by: Aneesh Kumar K.V (IBM) 
Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 21 +
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 72f6b5a402dd..7e793b503e29 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -671,28 +671,25 @@ static void __do_enter_rtas(struct rtas_args *args)
 
 static void __do_enter_rtas_trace(struct rtas_args *args)
 {
-   const char *name = NULL;
+   const struct rtas_function *func = 
rtas_token_to_function(be32_to_cpu(args->token));
 
-   if (args == &rtas_args)
-   lockdep_assert_held(&rtas_lock);
/*
-* If the tracepoints that consume the function name aren't
-* active, avoid the lookup.
+* If there is a per-function lock, it must be held by the
+* caller.
 */
-   if ((trace_rtas_input_enabled() || trace_rtas_output_enabled())) {
-   const s32 token = be32_to_cpu(args->token);
-   const struct rtas_function *func = 
rtas_token_to_function(token);
+   if (func->lock)
+   lockdep_assert_held(func->lock);
 
-   name = func->name;
-   }
+   if (args == &rtas_args)
+   lockdep_assert_held(&rtas_lock);
 
-   trace_rtas_input(args, name);
+   trace_rtas_input(args, func->name);
trace_rtas_ll_entry(args);
 
__do_enter_rtas(args);
 
trace_rtas_ll_exit(args);
-   trace_rtas_output(args, name);
+   trace_rtas_output(args, func->name);
 }
 
 static void do_enter_rtas(struct rtas_args *args)

-- 
2.41.0



[PATCH v5 04/13] powerpc/rtas: Add function return status constants

2023-12-07 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Not all of the generic RTAS function statuses specified in PAPR have
symbolic constants and descriptions in rtas.h. Fix this, providing a
little more background, slightly updating the existing wording, and
improving the formatting.

Reviewed-by: Aneesh Kumar K.V (IBM) 
Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h | 25 +++--
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index a7110ed52e25..08d19e6904f7 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -201,12 +201,25 @@ typedef struct {
 /* Memory set aside for sys_rtas to use with calls that need a work area. */
 #define RTAS_USER_REGION_SIZE (64 * 1024)
 
-/* RTAS return status codes */
-#define RTAS_HARDWARE_ERROR-1/* Hardware Error */
-#define RTAS_BUSY  -2/* RTAS Busy */
-#define RTAS_INVALID_PARAMETER -3/* Invalid indicator/domain/sensor etc. */
-#define RTAS_EXTENDED_DELAY_MIN9900
-#define RTAS_EXTENDED_DELAY_MAX9905
+/*
+ * Common RTAS function return values, derived from the table "RTAS
+ * Status Word Values" in PAPR+ v2.13 7.2.8: "Return Codes". If a
+ * function can return a value in this table then generally it has the
+ * meaning listed here. More extended commentary in the documentation
+ * for rtas_call().
+ *
+ * RTAS functions may use negative and positive numbers not in this
+ * set for function-specific error and success conditions,
+ * respectively.
+ */
+#define RTAS_SUCCESS 0 /* Success. */
+#define RTAS_HARDWARE_ERROR -1 /* Hardware or other unspecified 
error. */
+#define RTAS_BUSY   -2 /* Retry immediately. */
+#define RTAS_INVALID_PARAMETER  -3 /* Invalid indicator/domain/sensor 
etc. */
+#define RTAS_UNEXPECTED_STATE_CHANGE-7 /* Seems limited to EEH and slot 
reset. */
+#define RTAS_EXTENDED_DELAY_MIN   9900 /* Retry after delaying for ~1ms. */
+#define RTAS_EXTENDED_DELAY_MAX   9905 /* Retry after delaying for ~100s. 
*/
+#define RTAS_ML_ISOLATION_ERROR  -9000 /* Multi-level isolation error. */
 
 /* statuses specific to ibm,suspend-me */
 #define RTAS_SUSPEND_ABORTED 9000 /* Suspension aborted */

-- 
2.41.0



[PATCH v5 06/13] powerpc/rtas: Facilitate high-level call sequences

2023-12-07 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

On RTAS platforms there is a general restriction that the OS must not
enter RTAS on more than one CPU at a time. This low-level
serialization requirement is satisfied by holding a spin
lock (rtas_lock) across most RTAS function invocations.

However, some pseries RTAS functions require multiple successive calls
to complete a logical operation. Beginning a new call sequence for such a
function may disrupt any other sequences of that function already in
progress. Safe and reliable use of these functions effectively
requires higher-level serialization beyond what is already done at the
level of RTAS entry and exit.

Where a sequence-based RTAS function is invoked only through
sys_rtas(), with no in-kernel users, there is no issue as far as the
kernel is concerned. User space is responsible for appropriately
serializing its call sequences. (Whether user space code actually
takes measures to prevent sequence interleaving is another matter.)
Examples of such functions currently include ibm,platform-dump and
ibm,get-vpd.

But where a sequence-based RTAS function has both user space and
in-kernel uesrs, there is a hazard. Even if the in-kernel call sites
of such a function serialize their sequences correctly, a user of
sys_rtas() can invoke the same function at any time, potentially
disrupting a sequence in progress.

So in order to prevent disruption of kernel-based RTAS call sequences,
they must serialize not only with themselves but also with sys_rtas()
users, somehow. Preferably without adding more function-specific hacks
to sys_rtas(). This is a prerequisite for adding an in-kernel call
sequence of ibm,get-vpd, which is in a change to follow.

Note that it has never been feasible for the kernel to prevent
sys_rtas()-based sequences from being disrupted because control
returns to user space on every call. sys_rtas()-based users of these
functions have always been, and continue to be, responsible for
coordinating their call sequences with other users, even those which
may invoke the RTAS functions through less direct means than
sys_rtas(). This is an unavoidable consequence of exposing
sequence-based RTAS functions through sys_rtas().

* Add an optional mutex member to struct rtas_function.

* Statically define a mutex for each RTAS function with known call
  sequence serialization requirements, and assign its address to the
  .lock member of the corresponding function table entry, along with
  justifying commentary.

* In sys_rtas(), if the table entry for the RTAS function being
  called has a populated lock member, acquire it before taking
  rtas_lock and entering RTAS.

* Kernel-based RTAS call sequences are expected to access the
  appropriate mutex explicitly by name. For example, a user of the
  ibm,activate-firmware RTAS function would do:

int token = rtas_function_token(RTAS_FN_IBM_ACTIVATE_FIRMWARE);
int fwrc;

mutex_lock(&rtas_ibm_activate_firmware_lock);

do {
fwrc = rtas_call(token, 0, 1, NULL);
} while (rtas_busy_delay(fwrc));

mutex_unlock(&rtas_ibm_activate_firmware_lock);

There should be no perceivable change introduced here except that
concurrent callers of the same RTAS function via sys_rtas() may block
on a mutex instead of spinning on rtas_lock.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h |  3 ++
 arch/powerpc/kernel/rtas.c  | 83 +
 2 files changed, 86 insertions(+)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 08d19e6904f7..9bb2210c8d44 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -3,6 +3,7 @@
 #define _POWERPC_RTAS_H
 #ifdef __KERNEL__
 
+#include 
 #include 
 #include 
 #include 
@@ -512,6 +513,8 @@ extern char rtas_data_buf[RTAS_DATA_BUF_SIZE];
 /* RMO buffer reserved for user-space RTAS use */
 extern unsigned long rtas_rmo_buf;
 
+extern struct mutex rtas_ibm_get_vpd_lock;
+
 #define GLOBAL_INTERRUPT_QUEUE 9005
 
 /**
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index ca5bb0b994ac..4d28983e8b1d 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -70,14 +71,33 @@ struct rtas_filter {
  *ppc64le, and we want to keep it that way. It does
  *not make sense for this to be set when @filter
  *is NULL.
+ * @lock: Pointer to an optional dedicated per-function mutex. This
+ *should be set for functions that require multiple calls in
+ *sequence to complete a single operation, and such sequences
+ *will disrupt each other if allowed to interleave. Users of
+ *this function are required to hold the associated lock for
+ *the duration of the call sequence. Add an explanatory
+ *comment to the fun

[PATCH] powerpc/rtas_pci: rename and properly expose config access APIs

2023-11-27 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

The rtas_read_config() and rtas_write_config() functions in
kernel/rtas_pci.c have external linkage and two users in arch/powerpc:
the rtas_pci code itself and the pseries platform's "enhanced error
handling" (EEH) support code.

The prototypes for these functions in asm/ppc-pci.h have until now
been guarded by CONFIG_EEH since the only external caller is the
pseries EEH code. However, this presumably has always generated
warnings when built with !CONFIG_EEH and -Wmissing-prototypes:

  arch/powerpc/kernel/rtas_pci.c:46:5: error: no previous prototype for
  function 'rtas_read_config' [-Werror,-Wmissing-prototypes]
 46 | int rtas_read_config(struct pci_dn *pdn, int where,
   int size, u32 *val)

  arch/powerpc/kernel/rtas_pci.c:98:5: error: no previous prototype for
  function 'rtas_write_config' [-Werror,-Wmissing-prototypes]
 98 | int rtas_write_config(struct pci_dn *pdn, int where,
int size, u32 val)

The introduction of commit c6345dfa6e3e ("Makefile.extrawarn: turn on
missing-prototypes globally") forces the issue.

The efika and chrp platform code have (static) functions with the same
names but different signatures. We may as well eliminate the potential
for conflicts and confusion by renaming the globally visible versions
as their prototypes get moved out of the CONFIG_EEH-guarded region;
their current names are too generic anyway. Since they operate on
objects of the type 'struct pci_dn *', give them the slightly more
verbose prefix "rtas_pci_dn_" and fix up all the call sites.

Fixes: c6345dfa6e3e ("Makefile.extrawarn: turn on missing-prototypes globally")
Reported-by: Linux Kernel Functional Testing 
Closes: 
https://lore.kernel.org/linuxppc-dev/ca+g9fyt0llxtjsz+hkf3fhm-kf0zqanrhus+zvzga3o+wt2...@mail.gmail.com/
Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/ppc-pci.h   |  5 +++--
 arch/powerpc/kernel/rtas_pci.c   |  8 
 arch/powerpc/platforms/pseries/eeh_pseries.c | 18 +-
 3 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc-pci.h 
b/arch/powerpc/include/asm/ppc-pci.h
index d9fcff575027..ce2b1b5eebdd 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -35,6 +35,9 @@ extern void init_pci_config_tokens (void);
 extern unsigned long get_phb_buid (struct device_node *);
 extern int rtas_setup_phb(struct pci_controller *phb);
 
+int rtas_pci_dn_read_config(struct pci_dn *pdn, int where, int size, u32 *val);
+int rtas_pci_dn_write_config(struct pci_dn *pdn, int where, int size, u32 val);
+
 #ifdef CONFIG_EEH
 
 void eeh_addr_cache_insert_dev(struct pci_dev *dev);
@@ -44,8 +47,6 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity);
 int eeh_pci_enable(struct eeh_pe *pe, int function);
 int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed);
 void eeh_save_bars(struct eeh_dev *edev);
-int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
-int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
 void eeh_pe_state_mark(struct eeh_pe *pe, int state);
 void eeh_pe_mark_isolated(struct eeh_pe *pe);
 void eeh_pe_state_clear(struct eeh_pe *pe, int state, bool include_passed);
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index e1fdc7473b72..fccf96e897f6 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -43,7 +43,7 @@ static inline int config_access_valid(struct pci_dn *dn, int 
where)
return 0;
 }
 
-int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
+int rtas_pci_dn_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
 {
int returnval = -1;
unsigned long buid, addr;
@@ -87,7 +87,7 @@ static int rtas_pci_read_config(struct pci_bus *bus,
pdn = pci_get_pdn_by_devfn(bus, devfn);
 
/* Validity of pdn is checked in here */
-   ret = rtas_read_config(pdn, where, size, val);
+   ret = rtas_pci_dn_read_config(pdn, where, size, val);
if (*val == EEH_IO_ERROR_VALUE(size) &&
eeh_dev_check_failure(pdn_to_eeh_dev(pdn)))
return PCIBIOS_DEVICE_NOT_FOUND;
@@ -95,7 +95,7 @@ static int rtas_pci_read_config(struct pci_bus *bus,
return ret;
 }
 
-int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val)
+int rtas_pci_dn_write_config(struct pci_dn *pdn, int where, int size, u32 val)
 {
unsigned long buid, addr;
int ret;
@@ -134,7 +134,7 @@ static int rtas_pci_write_config(struct pci_bus *bus,
pdn = pci_get_pdn_by_devfn(bus, devfn);
 
/* Validity of pdn is checked in here. */
-   return rtas_write_config(pdn, where, size, val);
+   return rtas_pci_dn_write_config(pdn, where, size, val);
 }
 
 static struct pci_ops rtas_pci_ops = {
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c 
b/arch/powerpc/platforms/pseries/eeh_p

[PATCH v4 04/13] powerpc/rtas: Factor out function descriptor lookup

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Move the function descriptor table lookup out of rtas_function_token()
into a separate routine for use in new code to follow. No functional
change.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 31 +++
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index f0051881348a..1fc0b3fffdd1 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -469,29 +469,36 @@ static struct rtas_function rtas_function_table[] 
__ro_after_init = {
 static DEFINE_RAW_SPINLOCK(rtas_lock);
 static struct rtas_args rtas_args;
 
-/**
- * rtas_function_token() - RTAS function token lookup.
- * @handle: Function handle, e.g. RTAS_FN_EVENT_SCAN.
- *
- * Context: Any context.
- * Return: the token value for the function if implemented by this platform,
- * otherwise RTAS_UNKNOWN_SERVICE.
- */
-s32 rtas_function_token(const rtas_fn_handle_t handle)
+static struct rtas_function *rtas_function_lookup(const rtas_fn_handle_t 
handle)
 {
const size_t index = handle.index;
const bool out_of_bounds = index >= ARRAY_SIZE(rtas_function_table);
 
if (WARN_ONCE(out_of_bounds, "invalid function index %zu", index))
-   return RTAS_UNKNOWN_SERVICE;
+   return NULL;
/*
 * Various drivers attempt token lookups on non-RTAS
 * platforms.
 */
if (!rtas.dev)
-   return RTAS_UNKNOWN_SERVICE;
+   return NULL;
+
+   return &rtas_function_table[index];
+}
+
+/**
+ * rtas_function_token() - RTAS function token lookup.
+ * @handle: Function handle, e.g. RTAS_FN_EVENT_SCAN.
+ *
+ * Context: Any context.
+ * Return: the token value for the function if implemented by this platform,
+ * otherwise RTAS_UNKNOWN_SERVICE.
+ */
+s32 rtas_function_token(const rtas_fn_handle_t handle)
+{
+   const struct rtas_function *func = rtas_function_lookup(handle);
 
-   return rtas_function_table[index].token;
+   return func ? func->token : RTAS_UNKNOWN_SERVICE;
 }
 EXPORT_SYMBOL_GPL(rtas_function_token);
 

-- 
2.41.0



[PATCH v4 02/13] powerpc/rtas: Fall back to linear search on failed token->function lookup

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Enabling any of the powerpc:rtas_* tracepoints at boot is likely to
result in an oops on RTAS platforms. For example, booting a QEMU
pseries model with 'trace_event=powerpc:rtas_input' in the command
line leads to:

  BUG: Kernel NULL pointer dereference on read at 0x0008
  Oops: Kernel access of bad area, sig: 7 [#1]
  NIP [c004231c] do_enter_rtas+0x1bc/0x460
  LR [c004231c] do_enter_rtas+0x1bc/0x460
  Call Trace:
do_enter_rtas+0x1bc/0x460 (unreliable)
rtas_call+0x22c/0x4a0
rtas_get_boot_time+0x80/0x14c
read_persistent_clock64+0x124/0x150
read_persistent_wall_and_boot_offset+0x28/0x58
timekeeping_init+0x70/0x348
start_kernel+0xa0c/0xc1c
start_here_common+0x1c/0x20

(This is preceded by a warning for the failed lookup in
rtas_token_to_function().)

This happens when __do_enter_rtas_trace() attempts a token to function
descriptor lookup before the xarray containing the mappings has been
set up.

Fall back to linear scan of the table if rtas_token_to_function_xarray
is empty.

Signed-off-by: Nathan Lynch 
Fixes: 24098f580e2b ("powerpc/rtas: add tracepoints around RTAS entry")
---
 arch/powerpc/kernel/rtas.c | 18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 1ad1869e2e96..f0051881348a 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -557,11 +557,21 @@ static const struct rtas_function 
*rtas_token_to_function(s32 token)
return NULL;
 
func = xa_load(&rtas_token_to_function_xarray, token);
+   if (func)
+   return func;
+   /*
+* Fall back to linear scan in case the reverse mapping hasn't
+* been initialized yet.
+*/
+   if (xa_empty(&rtas_token_to_function_xarray)) {
+   for_each_rtas_function(func) {
+   if (func->token == token)
+   return func;
+   }
+   }
 
-   if (WARN_ONCE(!func, "unexpected failed lookup for token %d", token))
-   return NULL;
-
-   return func;
+   WARN_ONCE(true, "unexpected failed lookup for token %d", token);
+   return NULL;
 }
 
 /* This is here deliberately so it's only used in this file */

-- 
2.41.0



[PATCH v4 08/13] powerpc/uapi: Export papr-miscdev.h header

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Allocate one identifying code (the first column of the ioctl-number
table) for the collection of PAPR miscdev drivers to share.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/uapi/asm/papr-miscdev.h | 9 +
 1 file changed, 9 insertions(+)

diff --git a/arch/powerpc/include/uapi/asm/papr-miscdev.h 
b/arch/powerpc/include/uapi/asm/papr-miscdev.h
new file mode 100644
index ..49a2a270b7f3
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-miscdev.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_MISCDEV_H_
+#define _UAPI_PAPR_MISCDEV_H_
+
+enum {
+   PAPR_MISCDEV_IOC_ID = 0xb2,
+};
+
+#endif /* _UAPI_PAPR_MISCDEV_H_ */

-- 
2.41.0



[PATCH v4 06/13] powerpc/rtas: Serialize firmware activation sequences

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Use the function lock API to prevent interleaving call sequences of
the ibm,activate-firmware RTAS function, which typically requires
multiple calls to complete the update. While the spec does not
specifically prohibit interleaved sequences, there's almost certainly
no advantage to allowing them.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 52f2242d0c28..e38ba05ad613 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1753,10 +1753,14 @@ void rtas_activate_firmware(void)
return;
}
 
+   rtas_function_lock(RTAS_FN_IBM_ACTIVATE_FIRMWARE);
+
do {
fwrc = rtas_call(token, 0, 1, NULL);
} while (rtas_busy_delay(fwrc));
 
+   rtas_function_unlock(RTAS_FN_IBM_ACTIVATE_FIRMWARE);
+
if (fwrc)
pr_err("ibm,activate-firmware failed (%i)\n", fwrc);
 }

-- 
2.41.0



[PATCH v4 09/13] powerpc/pseries: Add papr-vpd character driver for VPD retrieval

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

PowerVM LPARs may retrieve Vital Product Data (VPD) for system
components using the ibm,get-vpd RTAS function.

We can expose this to user space with a /dev/papr-vpd character
device, where the programming model is:

  struct papr_location_code plc = { .str = "", }; /* obtain all VPD */
  int devfd = open("/dev/papr-vpd", O_RDONLY);
  int vpdfd = ioctl(devfd, PAPR_VPD_CREATE_HANDLE, &plc);
  size_t size = lseek(vpdfd, 0, SEEK_END);
  char *buf = malloc(size);
  pread(devfd, buf, size, 0);

When a file descriptor is obtained from ioctl(PAPR_VPD_CREATE_HANDLE),
the file contains the result of a complete ibm,get-vpd sequence. The
file contents are immutable from the POV of user space. To get a new
view of the VPD, the client must create a new handle.

This design choice insulates user space from most of the complexities
that ibm,get-vpd brings:

* ibm,get-vpd must be called more than once to obtain complete
  results.

* Only one ibm,get-vpd call sequence should be in progress at a time;
  interleaved sequences will disrupt each other. Callers must have a
  protocol for serializing their use of the function.

* A call sequence in progress may receive a "VPD changed, try again"
  status, requiring the client to abandon the sequence and start
  over.

The memory required for the VPD buffers seems acceptable, around 20KB
for all VPD on one of my systems. And the value of the
/rtas/ibm,vpd-size DT property (the estimated maximum size of VPD) is
consistently 300KB across various systems I've checked.

I've implemented support for this new ABI in the rtas_get_vpd()
function in librtas, which the vpdupdate command currently uses to
populate its VPD database. I've verified that an unmodified vpdupdate
binary generates an identical database when using a librtas.so that
prefers the new ABI.

Note that the driver needs to serialize its call sequences with legacy
sys_rtas(ibm,get-vpd) callers, so it exposes its internal lock for
sys_rtas.

Signed-off-by: Nathan Lynch 
---
 Documentation/userspace-api/ioctl/ioctl-number.rst |   2 +
 arch/powerpc/include/uapi/asm/papr-vpd.h   |  22 +
 arch/powerpc/platforms/pseries/Makefile|   1 +
 arch/powerpc/platforms/pseries/papr-vpd.c  | 536 +
 4 files changed, 561 insertions(+)

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst 
b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 4ea5b837399a..a950545bf7cd 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -349,6 +349,8 @@ Code  Seq#Include File  
 Comments
  

 0xB1  00-1F  PPPoX
  

+0xB2  00 arch/powerpc/include/uapi/asm/papr-vpd.h
powerpc/pseries VPD API
+ 

 0xB3  00 linux/mmc/ioctl.h
 0xB4  00-0F  linux/gpio.h

 0xB5  00-0F  uapi/linux/rpmsg.h  

diff --git a/arch/powerpc/include/uapi/asm/papr-vpd.h 
b/arch/powerpc/include/uapi/asm/papr-vpd.h
new file mode 100644
index ..b62e4f897a70
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-vpd.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_VPD_H_
+#define _UAPI_PAPR_VPD_H_
+
+#include 
+#include 
+
+struct papr_location_code {
+   /*
+* PAPR+ 12.3.2.4 Converged Location Code Rules - Length
+* Restrictions. 79 characters plus nul.
+*/
+   char str[80];
+};
+
+/*
+ * ioctl for /dev/papr-vpd. Returns a VPD handle fd corresponding to
+ * the location code.
+ */
+#define PAPR_VPD_IOC_CREATE_HANDLE _IOW(PAPR_MISCDEV_IOC_ID, 0, struct 
papr_location_code)
+
+#endif /* _UAPI_PAPR_VPD_H_ */
diff --git a/arch/powerpc/platforms/pseries/Makefile 
b/arch/powerpc/platforms/pseries/Makefile
index 1476c5e4433c..f936962a2946 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -4,6 +4,7 @@ ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG
 
 obj-y  := lpar.o hvCall.o nvram.o reconfig.o \
   of_helpers.o rtas-work-area.o papr-sysparm.o \
+  papr-vpd.o \
   setup.o iommu.o event_sources.o ras.o \
   firmware.o power.o dlpar.o mobility.o rng.o \
   pci.o pci_dlpar.o eeh_pseries.o msi.o \
diff --git a/arch/powerpc/platforms/pseries/papr-vpd.c 
b/arch/powerpc/platforms/pseries/papr-vpd.c
new file mode 100644
i

[PATCH v4 11/13] powerpc/pseries/papr-sysparm: Expose character device to user space

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Until now the papr_sysparm APIs have been kernel-internal. But user
space needs access to PAPR system parameters too. The only method
available to user space today to get or set system parameters is using
sys_rtas() and /dev/mem to pass RTAS-addressable buffers between user
space and firmware. This is incompatible with lockdown and should be
deprecated.

So provide an alternative ABI to user space in the form of a
/dev/papr-sysparm character device with just two ioctl commands (get
and set). The data payloads involved are small enough to fit in the
ioctl argument buffer, making the code relatively simple.

Exposing the system parameters through sysfs has been considered but
it would be too awkward:

* The kernel currently does not have to contain an exhaustive list of
  defined system parameters. This is a convenient property to maintain
  because we don't have to update the kernel whenever a new parameter
  is added to PAPR. Exporting a named attribute in sysfs for each
  parameter would negate this.

* Some system parameters are text-based and some are not.

* Retrieval of at least one system parameter requires input data,
  which a simple read-oriented interface can't support.

Signed-off-by: Nathan Lynch 
---
 Documentation/userspace-api/ioctl/ioctl-number.rst |   2 +
 arch/powerpc/include/asm/papr-sysparm.h|  17 ++-
 arch/powerpc/include/uapi/asm/papr-sysparm.h   |  58 
 arch/powerpc/platforms/pseries/papr-sysparm.c  | 154 -
 4 files changed, 223 insertions(+), 8 deletions(-)

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst 
b/Documentation/userspace-api/ioctl/ioctl-number.rst
index a950545bf7cd..d8b6cb1a3636 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -351,6 +351,8 @@ Code  Seq#Include File  
 Comments
  

 0xB2  00 arch/powerpc/include/uapi/asm/papr-vpd.h
powerpc/pseries VPD API
  

+0xB2  01-02  arch/powerpc/include/uapi/asm/papr-sysparm.h
powerpc/pseries system parameter API
+ 

 0xB3  00 linux/mmc/ioctl.h
 0xB4  00-0F  linux/gpio.h

 0xB5  00-0F  uapi/linux/rpmsg.h  

diff --git a/arch/powerpc/include/asm/papr-sysparm.h 
b/arch/powerpc/include/asm/papr-sysparm.h
index f5fdbd8ae9db..0dbbff59101d 100644
--- a/arch/powerpc/include/asm/papr-sysparm.h
+++ b/arch/powerpc/include/asm/papr-sysparm.h
@@ -2,8 +2,10 @@
 #ifndef _ASM_POWERPC_PAPR_SYSPARM_H
 #define _ASM_POWERPC_PAPR_SYSPARM_H
 
+#include 
+
 typedef struct {
-   const u32 token;
+   u32 token;
 } papr_sysparm_t;
 
 #define mk_papr_sysparm(x_) ((papr_sysparm_t){ .token = x_, })
@@ -20,11 +22,14 @@ typedef struct {
 #define PAPR_SYSPARM_TLB_BLOCK_INVALIDATE_ATTRSmk_papr_sysparm(50)
 #define PAPR_SYSPARM_LPAR_NAME mk_papr_sysparm(55)
 
-enum {
-   PAPR_SYSPARM_MAX_INPUT  = 1024,
-   PAPR_SYSPARM_MAX_OUTPUT = 4000,
-};
-
+/**
+ * struct papr_sysparm_buf - RTAS work area layout for system parameter 
functions.
+ *
+ * This is the memory layout of the buffers passed to/from
+ * ibm,get-system-parameter and ibm,set-system-parameter. It is
+ * distinct from the papr_sysparm_io_block structure that is passed
+ * between user space and the kernel.
+ */
 struct papr_sysparm_buf {
__be16 len;
char val[PAPR_SYSPARM_MAX_OUTPUT];
diff --git a/arch/powerpc/include/uapi/asm/papr-sysparm.h 
b/arch/powerpc/include/uapi/asm/papr-sysparm.h
new file mode 100644
index ..9f9a0f267ea5
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-sysparm.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_SYSPARM_H_
+#define _UAPI_PAPR_SYSPARM_H_
+
+#include 
+#include 
+#include 
+
+enum {
+   PAPR_SYSPARM_MAX_INPUT  = 1024,
+   PAPR_SYSPARM_MAX_OUTPUT = 4000,
+};
+
+struct papr_sysparm_io_block {
+   __u32 parameter;
+   __u16 length;
+   char data[PAPR_SYSPARM_MAX_OUTPUT];
+};
+
+/**
+ * PAPR_SYSPARM_IOC_GET - Retrieve the value of a PAPR system parameter.
+ *
+ * Uses _IOWR because of one corner case: Retrieving the value of the
+ * "OS Service Entitlement Status" parameter (60) requires the caller
+ * to supply input data (a date string) in the buffer passed to
+ * firmware. So the @length and @data of the incoming
+ * papr_sysparm_io_block are always used to initialize the work area
+ * supplied to ibm,get-system-parameter. No other parameters are known
+ * to pa

[PATCH v4 12/13] powerpc/selftests: Add test for papr-vpd

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Add selftests for /dev/papr-vpd, exercising the common expected use
cases:

* Retrieve all VPD by passing an empty location code.
* Retrieve the "system VPD" by passing a location code derived from DT
  root node properties, as done by the vpdupdate command.

The tests also verify that certain intended properties of the driver
hold:

* Passing an unterminated location code to PAPR_VPD_CREATE_HANDLE gets
  EINVAL.
* Passing a NULL location code pointer to PAPR_VPD_CREATE_HANDLE gets
  EFAULT.
* Closing the device node without first issuing a
  PAPR_VPD_CREATE_HANDLE command to it succeeds.
* Releasing a handle without first consuming any data from it
  succeeds.
* Re-reading the contents of a handle returns the same data as the
  first time.

Some minimal validation of the returned data is performed.

The tests are skipped on systems where the papr-vpd driver does not
initialize, making this useful only on PowerVM LPARs at this point.

Signed-off-by: Nathan Lynch 
---
 tools/testing/selftests/powerpc/Makefile   |   1 +
 .../testing/selftests/powerpc/papr_vpd/.gitignore  |   1 +
 tools/testing/selftests/powerpc/papr_vpd/Makefile  |  12 +
 .../testing/selftests/powerpc/papr_vpd/papr_vpd.c  | 352 +
 4 files changed, 366 insertions(+)

diff --git a/tools/testing/selftests/powerpc/Makefile 
b/tools/testing/selftests/powerpc/Makefile
index 7ea42fa02eab..05fc68d446c2 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -32,6 +32,7 @@ SUB_DIRS = alignment  \
   vphn \
   math \
   papr_attributes  \
+  papr_vpd \
   ptrace   \
   security \
   mce
diff --git a/tools/testing/selftests/powerpc/papr_vpd/.gitignore 
b/tools/testing/selftests/powerpc/papr_vpd/.gitignore
new file mode 100644
index ..49285031a656
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_vpd/.gitignore
@@ -0,0 +1 @@
+/papr_vpd
diff --git a/tools/testing/selftests/powerpc/papr_vpd/Makefile 
b/tools/testing/selftests/powerpc/papr_vpd/Makefile
new file mode 100644
index ..06b719703bfd
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_vpd/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+noarg:
+   $(MAKE) -C ../
+
+TEST_GEN_PROGS := papr_vpd
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
+
+$(OUTPUT)/papr_vpd: CFLAGS += $(KHDR_INCLUDES)
diff --git a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c 
b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
new file mode 100644
index ..98cbb9109ee6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "utils.h"
+
+#define DEVPATH "/dev/papr-vpd"
+
+static int dev_papr_vpd_open_close(void)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int dev_papr_vpd_get_handle_all(void)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+   struct papr_location_code lc = { .str = "", };
+   off_t size;
+   int fd;
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   errno = 0;
+   fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc);
+   FAIL_IF(errno != 0);
+   FAIL_IF(fd < 0);
+
+   FAIL_IF(close(devfd) != 0);
+
+   size = lseek(fd, 0, SEEK_END);
+   FAIL_IF(size <= 0);
+
+   void *buf = malloc((size_t)size);
+   FAIL_IF(!buf);
+
+   ssize_t consumed = pread(fd, buf, size, 0);
+   FAIL_IF(consumed != size);
+
+   /* Ensure EOF */
+   FAIL_IF(read(fd, buf, size) != 0);
+   FAIL_IF(close(fd));
+
+   /* Verify that the buffer looks like VPD */
+   static const char needle[] = "System VPD";
+   FAIL_IF(!memmem(buf, size, needle, strlen(needle)));
+
+   return 0;
+}
+
+static int dev_papr_vpd_get_handle_byte_at_a_time(void)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+   struct papr_location_code lc = { .str = "", };
+   int fd;
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   errno = 0;
+   fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc);
+   FAIL_IF(errno != 0);
+   FAIL_IF(fd < 0);
+
+   FAIL_IF(close(devfd) != 0);
+
+   size_t consumed = 0;
+   while (1) {
+   ssize_t res;
+   char c;
+
+   errno = 0;
+   res = read(fd, &c, sizeof(c));
+   FAIL_IF(res > sizeof(c))

[PATCH v4 13/13] powerpc/selftests: Add test for papr-sysparm

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Consistently testing system parameter access is a bit difficult by
nature -- the set of parameters available depends on the model and
system configuration, and updating a parameter should be considered a
destructive operation reserved for the admin.

So we validate some of the error paths and retrieve the SPLPAR
characteristics string, but not much else.

Signed-off-by: Nathan Lynch 
---
 tools/testing/selftests/powerpc/Makefile   |   1 +
 .../selftests/powerpc/papr_sysparm/.gitignore  |   1 +
 .../selftests/powerpc/papr_sysparm/Makefile|  12 ++
 .../selftests/powerpc/papr_sysparm/papr_sysparm.c  | 164 +
 4 files changed, 178 insertions(+)

diff --git a/tools/testing/selftests/powerpc/Makefile 
b/tools/testing/selftests/powerpc/Makefile
index 05fc68d446c2..c376151982c4 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -33,6 +33,7 @@ SUB_DIRS = alignment  \
   math \
   papr_attributes  \
   papr_vpd \
+  papr_sysparm \
   ptrace   \
   security \
   mce
diff --git a/tools/testing/selftests/powerpc/papr_sysparm/.gitignore 
b/tools/testing/selftests/powerpc/papr_sysparm/.gitignore
new file mode 100644
index ..f2a69bf59d40
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_sysparm/.gitignore
@@ -0,0 +1 @@
+/papr_sysparm
diff --git a/tools/testing/selftests/powerpc/papr_sysparm/Makefile 
b/tools/testing/selftests/powerpc/papr_sysparm/Makefile
new file mode 100644
index ..7f79e437634a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_sysparm/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+noarg:
+   $(MAKE) -C ../
+
+TEST_GEN_PROGS := papr_sysparm
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
+
+$(OUTPUT)/papr_sysparm: CFLAGS += $(KHDR_INCLUDES)
diff --git a/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c 
b/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c
new file mode 100644
index ..fc25c03e8bc7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "utils.h"
+
+#define DEVPATH "/dev/papr-sysparm"
+
+static int open_close(void)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int get_splpar(void)
+{
+   struct papr_sysparm_io_block sp = {
+   .parameter = 20, // SPLPAR characteristics
+   };
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+   FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_GET, &sp) != 0);
+   FAIL_IF(sp.length == 0);
+   FAIL_IF(sp.length > sizeof(sp.data));
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int get_bad_parameter(void)
+{
+   struct papr_sysparm_io_block sp = {
+   .parameter = UINT32_MAX, // there are only ~60 specified 
parameters
+   };
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   // Ensure expected error
+   FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_GET, &sp) != -1);
+   FAIL_IF(errno != EOPNOTSUPP);
+
+   // Ensure the buffer is unchanged
+   FAIL_IF(sp.length != 0);
+   for (size_t i = 0; i < ARRAY_SIZE(sp.data); ++i)
+   FAIL_IF(sp.data[i] != 0);
+
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int check_efault_common(unsigned long cmd)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   // Ensure expected error
+   FAIL_IF(ioctl(devfd, cmd, NULL) != -1);
+   FAIL_IF(errno != EFAULT);
+
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int check_efault_get(void)
+{
+   return check_efault_common(PAPR_SYSPARM_IOC_GET);
+}
+
+static int check_efault_set(void)
+{
+   return check_efault_common(PAPR_SYSPARM_IOC_SET);
+}
+
+static int set_hmc0(void)
+{
+   struct papr_sysparm_io_block sp = {
+   .parameter = 0, // HMC0, not a settable parameter
+   };
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   // Ensure expected error
+   FAIL_IF(ioctl(devfd, PAPR_

[PATCH v4 01/13] powerpc/rtas: Add for_each_rtas_function() iterator

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Add a convenience macro for iterating over every element of the
internal function table and convert the one site that can use it. An
additional user of the macro is anticipated in changes to follow.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index eddc031c4b95..1ad1869e2e96 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -454,6 +454,11 @@ static struct rtas_function rtas_function_table[] 
__ro_after_init = {
},
 };
 
+#define for_each_rtas_function(funcp)   \
+   for (funcp = &rtas_function_table[0];   \
+funcp < &rtas_function_table[ARRAY_SIZE(rtas_function_table)]; \
+++funcp)
+
 /*
  * Nearly all RTAS calls need to be serialized. All uses of the
  * default rtas_args block must hold rtas_lock.
@@ -525,10 +530,10 @@ static DEFINE_XARRAY(rtas_token_to_function_xarray);
 
 static int __init rtas_token_to_function_xarray_init(void)
 {
+   const struct rtas_function *func;
int err = 0;
 
-   for (size_t i = 0; i < ARRAY_SIZE(rtas_function_table); ++i) {
-   const struct rtas_function *func = &rtas_function_table[i];
+   for_each_rtas_function(func) {
const s32 token = func->token;
 
if (token == RTAS_UNKNOWN_SERVICE)

-- 
2.41.0



[PATCH v4 00/13] powerpc/pseries: New character devices for system parameters and VPD

2023-11-17 Thread Nathan Lynch via B4 Relay
Add character devices that expose PAPR-specific system parameters and
VPD to user space.

The problem: important platform features are enabled on Linux VMs
through the powerpc-specific rtas() syscall in combination with
writeable mappings of /dev/mem. In typical usage, this is encapsulated
behind APIs provided by the librtas library. This paradigm is
incompatible with lockdown, which prohibits /dev/mem access. It also
is too low-level in many cases: a single logical operation may require
multiple sys_rtas() calls in succession to complete. This carries the
risk that a process may exit while leaving an operation unfinished. It
also means that callers must coordinate their use of the syscall for
functions that cannot tolerate multiple concurrent clients, such as
ibm,get-vpd.

The solution presented here is to add a pair of small pseries-specific
"drivers," one for VPD and one for system parameters. The new drivers
expose these facilities to user space in ways that are compatible with
lockdown and require no coordination between their clients.

Since the ibm,get-vpd call sequence performed by the papr-vpd driver
must be serialized against all other uses of the function, the series
begins by adding some new APIs to the core RTAS support code for this
purpose.

Both drivers could potentially support poll() methods to notify
clients of changes to parameters or VPD that happen due to partition
migration and other events. But that should be safe to leave for
later, assuming there's any interest.

I have made changes to librtas to prefer the new interfaces and
verified that existing clients work correctly with the new code. A
draft PR for that work is here:

https://github.com/ibm-power-utilities/librtas/pull/36

The user-space ABI has not changed since v1 of this series.

I expect to propose at least one more small driver in this style for
platform dump retrieval in a separate submission in the future.

---
Changes in v4:
- Fix latent issue in rtas_token_to_function() which causes boot-time
  crashes.
- More small preparatory changes: a function table iterator and
  additional symbolic constants for RTAS function return values.
- Use symbolic constants for ibm,get-vpd statuses in papr-vpd.c.
- Add commentary to papr_vpd_ioc_create_handle() explaining choice to
  retrieve all VPD at file handle creation time instead of deferring
  it to the read handler.
- Rebase on current powerpc/next.
- Link to v3: 
https://lore.kernel.org/r/20231025-papr-sys_rtas-vs-lockdown-v3-0-5eb04559e...@linux.ibm.com

Changes in v3:
- Add new rtas_function_lock()/unlock() APIs and convert existing code
  to use them.
- Convert papr-vpd to use rtas_function_lock()/unlock() instead of
  having sys_rtas() obtain a driver-private mutex.
- Rebase on current powerpc/next.
- Link to v2: 
https://lore.kernel.org/r/20231013-papr-sys_rtas-vs-lockdown-v2-0-ead01ce01...@linux.ibm.com

Changes in v2:
- Fix unused-but-set variable warning in papr-sysparm code.
- Rebase on powerpc/next branch.
- Link to v1: 
https://lore.kernel.org/r/20231006-papr-sys_rtas-vs-lockdown-v1-0-3a36bfb66...@linux.ibm.com

Changes in v1 vs initial RFC:
- Add papr-sysparm driver and tests.
- Add a papr-miscdev.h uapi header.
- Prevent sys_rtas() from interfering with papr-vpd call sequences.
- Handle -4 ("VPD changed") status in papr-vpd.
- Include string_helpers.h in papr-vpd.c, per Michal Suchánek
- Link to RFC: 
https://lore.kernel.org/r/20230822-papr-sys_rtas-vs-lockdown-v1-0-932623cf3...@linux.ibm.com

---
Nathan Lynch (13):
  powerpc/rtas: Add for_each_rtas_function() iterator
  powerpc/rtas: Fall back to linear search on failed token->function lookup
  powerpc/rtas: Add function return status constants
  powerpc/rtas: Factor out function descriptor lookup
  powerpc/rtas: Facilitate high-level call sequences
  powerpc/rtas: Serialize firmware activation sequences
  powerpc/rtas: Warn if per-function lock isn't held
  powerpc/uapi: Export papr-miscdev.h header
  powerpc/pseries: Add papr-vpd character driver for VPD retrieval
  powerpc/pseries/papr-sysparm: Validate buffer object lengths
  powerpc/pseries/papr-sysparm: Expose character device to user space
  powerpc/selftests: Add test for papr-vpd
  powerpc/selftests: Add test for papr-sysparm

 Documentation/userspace-api/ioctl/ioctl-number.rst |   4 +
 arch/powerpc/include/asm/papr-sysparm.h|  17 +-
 arch/powerpc/include/asm/rtas.h|  27 +-
 arch/powerpc/include/uapi/asm/papr-miscdev.h   |   9 +
 arch/powerpc/include/uapi/asm/papr-sysparm.h   |  58 +++
 arch/powerpc/include/uapi/asm/papr-vpd.h   |  22 +
 arch/powerpc/kernel/rtas.c | 182 +--
 arch/powerpc/platforms/pseries/Makefile|   1 +
 arch/powerpc/platforms/pseries/papr-sysparm.c  | 201 +++-
 arch/powerpc/platforms/pseries/papr-vpd.c  | 536 +
 tools/testing/selftests/powerpc/Makefile  

[PATCH v4 05/13] powerpc/rtas: Facilitate high-level call sequences

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

On RTAS platforms there is a general restriction that the OS must not
enter RTAS on more than one CPU at a time. This low-level
serialization requirement is satisfied by holding a spin
lock (rtas_lock) across most RTAS function invocations.

However, some pseries RTAS functions require multiple successive calls
to complete a logical operation. Beginning a new call sequence for such a
function may disrupt any other sequences of that function already in
progress. Safe and reliable use of these functions effectively
requires higher-level serialization beyond what is already done at the
level of RTAS entry and exit.

Where a sequence-based RTAS function is invoked only through
sys_rtas(), with no in-kernel users, there is no issue as far as the
kernel is concerned. User space is responsible for appropriately
serializing its call sequences. (Whether user space code actually
takes measures to prevent sequence interleaving is another matter.)
Examples of such functions currently include ibm,platform-dump and
ibm,get-vpd.

But where a sequence-based RTAS function has both user space and
in-kernel uesrs, there is a hazard. Even if the in-kernel call sites
of such a function serialize their sequences correctly, a user of
sys_rtas() can invoke the same function at any time, potentially
disrupting a sequence in progress.

So in order to prevent disruption of kernel-based RTAS call sequences,
they must serialize not only with themselves but also with sys_rtas()
users, somehow. Preferably without adding global locks or adding more
function-specific hacks to sys_rtas(). This is a prerequisite for
adding an in-kernel call sequence of ibm,get-vpd, which is in a change
to follow.

Note that it has never been feasible for the kernel to prevent
sys_rtas()-based sequences from being disrupted because control
returns to user space on every call. sys_rtas()-based users of these
functions have always been, and continue to be, responsible for
coordinating their call sequences with other users, even those which
may invoke the RTAS functions through less direct means than
sys_rtas(). This is an unavoidable consequence of exposing
sequence-based RTAS functions through sys_rtas().

* Add new rtas_function_lock() and rtas_function_unlock() APIs for use
  with sequence-based RTAS functions.

* Add an optional per-function mutex to struct rtas_function. When this
  member is set, kernel-internal callers of the RTAS function are
  required to guard their call sequences with rtas_function_lock() and
  rtas_function_unlock(). This requirement will be enforced in a later
  change, after all affected call sites are updated.

* Populate the lock members of function table entries where
  serialization of call sequences is known to be necessary, along with
  justifying commentary.

* In sys_rtas(), acquire the per-function mutex when it is present.

There should be no perceivable change introduced here except that
concurrent callers of the same RTAS function via sys_rtas() may block
on a mutex instead of spinning on rtas_lock. Changes to follow will
add rtas_function_lock()/unlock() pairs to kernel-based call
sequences.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h |   2 +
 arch/powerpc/kernel/rtas.c  | 101 +++-
 2 files changed, 101 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index b73010583a8d..9a20caba6858 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -421,6 +421,8 @@ static inline bool rtas_function_implemented(const 
rtas_fn_handle_t handle)
 {
return rtas_function_token(handle) != RTAS_UNKNOWN_SERVICE;
 }
+void rtas_function_lock(rtas_fn_handle_t handle);
+void rtas_function_unlock(rtas_fn_handle_t handle);
 extern int rtas_token(const char *service);
 extern int rtas_service_present(const char *service);
 extern int rtas_call(int token, int, int, int *, ...);
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 1fc0b3fffdd1..52f2242d0c28 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -70,14 +71,34 @@ struct rtas_filter {
  *ppc64le, and we want to keep it that way. It does
  *not make sense for this to be set when @filter
  *is NULL.
+ * @lock: Pointer to an optional dedicated per-function mutex. This
+ *should be set for functions that require multiple calls in
+ *sequence to complete a single operation, and such sequences
+ *will disrupt each other if allowed to interleave. Users of
+ *this function are required to hold the associated lock for
+ *the duration of the call sequence. Add an explanatory
+ *comment to the function table entry if setting this member.
  */
 stru

[PATCH v4 03/13] powerpc/rtas: Add function return status constants

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Not all of the generic RTAS function statuses specified in PAPR have
symbolic constants and descriptions in rtas.h. Fix this, providing a
little more background, slightly updating the existing wording, and
improving the formatting.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h | 25 +++--
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index c697c3c74694..b73010583a8d 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -201,12 +201,25 @@ typedef struct {
 /* Memory set aside for sys_rtas to use with calls that need a work area. */
 #define RTAS_USER_REGION_SIZE (64 * 1024)
 
-/* RTAS return status codes */
-#define RTAS_HARDWARE_ERROR-1/* Hardware Error */
-#define RTAS_BUSY  -2/* RTAS Busy */
-#define RTAS_INVALID_PARAMETER -3/* Invalid indicator/domain/sensor etc. */
-#define RTAS_EXTENDED_DELAY_MIN9900
-#define RTAS_EXTENDED_DELAY_MAX9905
+/*
+ * Common RTAS function return values, derived from the table "RTAS
+ * Status Word Values" in PAPR+ 7.2.8: "Return Codes". If a function
+ * can return a value in this table then generally it has the meaning
+ * listed here. More extended commentary in the documentation for
+ * rtas_call().
+ *
+ * RTAS functions may use negative and positive numbers not in this
+ * set for function-specific error and success conditions,
+ * respectively.
+ */
+#define RTAS_SUCCESS 0 /* Success. */
+#define RTAS_HARDWARE_ERROR -1 /* Hardware or other unspecified 
error. */
+#define RTAS_BUSY   -2 /* Retry immediately. */
+#define RTAS_INVALID_PARAMETER  -3 /* Invalid indicator/domain/sensor 
etc. */
+#define RTAS_UNEXPECTED_STATE_CHANGE-7 /* Seems limited to EEH and slot 
reset. */
+#define RTAS_EXTENDED_DELAY_MIN   9900 /* Retry after delaying for ~1ms. */
+#define RTAS_EXTENDED_DELAY_MAX   9905 /* Retry after delaying for ~100s. 
*/
+#define RTAS_ML_ISOLATION_ERROR  -9000 /* Multi-level isolation error. */
 
 /* statuses specific to ibm,suspend-me */
 #define RTAS_SUSPEND_ABORTED 9000 /* Suspension aborted */

-- 
2.41.0



[PATCH v4 10/13] powerpc/pseries/papr-sysparm: Validate buffer object lengths

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

The ability to get and set system parameters will be exposed to user
space, so let's get a little more strict about malformed
papr_sysparm_buf objects.

* Create accessors for the length field of struct papr_sysparm_buf.
  The length is always stored in MSB order and this is better than
  spreading the necessary conversions all over.

* Reject attempts to submit invalid buffers to RTAS.

* Warn if RTAS returns a buffer with an invalid length, clamping the
  returned length to a safe value that won't overrun the buffer.

These are meant as precautionary measures to mitigate both firmware
and kernel bugs in this area, should they arise, but I am not aware of
any.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/papr-sysparm.c | 47 +++
 1 file changed, 47 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/papr-sysparm.c 
b/arch/powerpc/platforms/pseries/papr-sysparm.c
index fedc61599e6c..a1e7aeac7416 100644
--- a/arch/powerpc/platforms/pseries/papr-sysparm.c
+++ b/arch/powerpc/platforms/pseries/papr-sysparm.c
@@ -23,6 +23,46 @@ void papr_sysparm_buf_free(struct papr_sysparm_buf *buf)
kfree(buf);
 }
 
+static size_t papr_sysparm_buf_get_length(const struct papr_sysparm_buf *buf)
+{
+   return be16_to_cpu(buf->len);
+}
+
+static void papr_sysparm_buf_set_length(struct papr_sysparm_buf *buf, size_t 
length)
+{
+   WARN_ONCE(length > sizeof(buf->val),
+ "bogus length %zu, clamping to safe value", length);
+   length = min(sizeof(buf->val), length);
+   buf->len = cpu_to_be16(length);
+}
+
+/*
+ * For use on buffers returned from ibm,get-system-parameter before
+ * returning them to callers. Ensures the encoded length of valid data
+ * cannot overrun buf->val[].
+ */
+static void papr_sysparm_buf_clamp_length(struct papr_sysparm_buf *buf)
+{
+   papr_sysparm_buf_set_length(buf, papr_sysparm_buf_get_length(buf));
+}
+
+/*
+ * Perform some basic diligence on the system parameter buffer before
+ * submitting it to RTAS.
+ */
+static bool papr_sysparm_buf_can_submit(const struct papr_sysparm_buf *buf)
+{
+   /*
+* Firmware ought to reject buffer lengths that exceed the
+* maximum specified in PAPR, but there's no reason for the
+* kernel to allow them either.
+*/
+   if (papr_sysparm_buf_get_length(buf) > sizeof(buf->val))
+   return false;
+
+   return true;
+}
+
 /**
  * papr_sysparm_get() - Retrieve the value of a PAPR system parameter.
  * @param: PAPR system parameter token as described in
@@ -63,6 +103,9 @@ int papr_sysparm_get(papr_sysparm_t param, struct 
papr_sysparm_buf *buf)
if (token == RTAS_UNKNOWN_SERVICE)
return -ENOENT;
 
+   if (!papr_sysparm_buf_can_submit(buf))
+   return -EINVAL;
+
work_area = rtas_work_area_alloc(sizeof(*buf));
 
memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf));
@@ -77,6 +120,7 @@ int papr_sysparm_get(papr_sysparm_t param, struct 
papr_sysparm_buf *buf)
case 0:
ret = 0;
memcpy(buf, rtas_work_area_raw_buf(work_area), sizeof(*buf));
+   papr_sysparm_buf_clamp_length(buf);
break;
case -3: /* parameter not implemented */
ret = -EOPNOTSUPP;
@@ -115,6 +159,9 @@ int papr_sysparm_set(papr_sysparm_t param, const struct 
papr_sysparm_buf *buf)
if (token == RTAS_UNKNOWN_SERVICE)
return -ENOENT;
 
+   if (!papr_sysparm_buf_can_submit(buf))
+   return -EINVAL;
+
work_area = rtas_work_area_alloc(sizeof(*buf));
 
memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf));

-- 
2.41.0



[PATCH v4 07/13] powerpc/rtas: Warn if per-function lock isn't held

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

If the function descriptor has a populated lock member, then callers
are required to hold it across calls. Now that the firmware activation
sequence is appropriately guarded, we can warn when the requirement
isn't satisfied.

__do_enter_rtas_trace() gets reorganized a bit as a result of
performing the function descriptor lookup unconditionally now.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 21 +
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index e38ba05ad613..deb6289fcf9c 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -685,28 +685,25 @@ static void __do_enter_rtas(struct rtas_args *args)
 
 static void __do_enter_rtas_trace(struct rtas_args *args)
 {
-   const char *name = NULL;
+   struct rtas_function *func = 
rtas_token_to_function(be32_to_cpu(args->token));
 
-   if (args == &rtas_args)
-   lockdep_assert_held(&rtas_lock);
/*
-* If the tracepoints that consume the function name aren't
-* active, avoid the lookup.
+* If there is a per-function lock, it must be held by the
+* caller.
 */
-   if ((trace_rtas_input_enabled() || trace_rtas_output_enabled())) {
-   const s32 token = be32_to_cpu(args->token);
-   const struct rtas_function *func = 
rtas_token_to_function(token);
+   if (func->lock)
+   WARN_ON(!mutex_is_locked(func->lock));
 
-   name = func->name;
-   }
+   if (args == &rtas_args)
+   lockdep_assert_held(&rtas_lock);
 
-   trace_rtas_input(args, name);
+   trace_rtas_input(args, func->name);
trace_rtas_ll_entry(args);
 
__do_enter_rtas(args);
 
trace_rtas_ll_exit(args);
-   trace_rtas_output(args, name);
+   trace_rtas_output(args, func->name);
 }
 
 static void do_enter_rtas(struct rtas_args *args)

-- 
2.41.0



[PATCH v2 2/5] powerpc/rtas: Remove unused rtas_service_present()

2023-11-14 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

rtas_service_present() has no more users.

rtas_function_implemented() is now the appropriate API for determining
whether a given RTAS function is available to call.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h | 1 -
 arch/powerpc/kernel/rtas.c  | 5 -
 2 files changed, 6 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 3bf7f0a4b07e..c6568a647cd0 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -409,7 +409,6 @@ static inline bool rtas_function_implemented(const 
rtas_fn_handle_t handle)
return rtas_function_token(handle) != RTAS_UNKNOWN_SERVICE;
 }
 extern int rtas_token(const char *service);
-extern int rtas_service_present(const char *service);
 extern int rtas_call(int token, int, int, int *, ...);
 void rtas_call_unlocked(struct rtas_args *args, int token, int nargs,
int nret, ...);
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index eddc031c4b95..b5b340a91157 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -900,11 +900,6 @@ int rtas_token(const char *service)
 }
 EXPORT_SYMBOL_GPL(rtas_token);
 
-int rtas_service_present(const char *service)
-{
-   return rtas_token(service) != RTAS_UNKNOWN_SERVICE;
-}
-
 #ifdef CONFIG_RTAS_ERROR_LOGGING
 
 static u32 rtas_error_log_max __ro_after_init = RTAS_ERROR_LOG_MAX;

-- 
2.41.0



[PATCH v2 5/5] powerpc/rtas: Remove 'extern' from function declarations in rtas.h

2023-11-14 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

This header occasionally gains new function declarations without the
leading extern in accordance with current style rules. Leaving the
legacy externs in place is making the header more difficult to read
over time because of the inconsistency. Remove them, fixing up
checkpatch issues with unnamed parameters (rtas_call) and bracket
alignment (early_init_dt_scan_rtas) that get raised as a result of
touching the code.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h | 53 -
 1 file changed, 26 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 1bed6be8ada3..a7110ed52e25 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -408,42 +408,41 @@ static inline bool rtas_function_implemented(const 
rtas_fn_handle_t handle)
 {
return rtas_function_token(handle) != RTAS_UNKNOWN_SERVICE;
 }
-extern int rtas_token(const char *service);
-extern int rtas_call(int token, int, int, int *, ...);
+int rtas_token(const char *service);
+int rtas_call(int token, int nargs, int nret, int *outputs, ...);
 void rtas_call_unlocked(struct rtas_args *args, int token, int nargs,
int nret, ...);
-extern void __noreturn rtas_restart(char *cmd);
-extern void rtas_power_off(void);
-extern void __noreturn rtas_halt(void);
-extern void rtas_os_term(char *str);
+void __noreturn rtas_restart(char *cmd);
+void rtas_power_off(void);
+void __noreturn rtas_halt(void);
+void rtas_os_term(char *str);
 void rtas_activate_firmware(void);
-extern int rtas_get_sensor(int sensor, int index, int *state);
-extern int rtas_get_sensor_fast(int sensor, int index, int *state);
-extern int rtas_get_power_level(int powerdomain, int *level);
-extern int rtas_set_power_level(int powerdomain, int level, int *setlevel);
-extern bool rtas_indicator_present(int token, int *maxindex);
-extern int rtas_set_indicator(int indicator, int index, int new_value);
-extern int rtas_set_indicator_fast(int indicator, int index, int new_value);
-extern void rtas_progress(char *s, unsigned short hex);
+int rtas_get_sensor(int sensor, int index, int *state);
+int rtas_get_sensor_fast(int sensor, int index, int *state);
+int rtas_get_power_level(int powerdomain, int *level);
+int rtas_set_power_level(int powerdomain, int level, int *setlevel);
+bool rtas_indicator_present(int token, int *maxindex);
+int rtas_set_indicator(int indicator, int index, int new_value);
+int rtas_set_indicator_fast(int indicator, int index, int new_value);
+void rtas_progress(char *s, unsigned short hex);
 int rtas_ibm_suspend_me(int *fw_status);
 int rtas_error_rc(int rtas_rc);
 
 struct rtc_time;
-extern time64_t rtas_get_boot_time(void);
-extern void rtas_get_rtc_time(struct rtc_time *rtc_time);
-extern int rtas_set_rtc_time(struct rtc_time *rtc_time);
+time64_t rtas_get_boot_time(void);
+void rtas_get_rtc_time(struct rtc_time *rtc_time);
+int rtas_set_rtc_time(struct rtc_time *rtc_time);
 
-extern unsigned int rtas_busy_delay_time(int status);
+unsigned int rtas_busy_delay_time(int status);
 bool rtas_busy_delay(int status);
 
-extern int early_init_dt_scan_rtas(unsigned long node,
-   const char *uname, int depth, void *data);
+int early_init_dt_scan_rtas(unsigned long node, const char *uname, int depth, 
void *data);
 
-extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal);
+void pSeries_log_error(char *buf, unsigned int err_type, int fatal);
 
 #ifdef CONFIG_PPC_PSERIES
 extern time64_t last_rtas_event;
-extern int clobbering_unread_rtas_event(void);
+int clobbering_unread_rtas_event(void);
 int rtas_syscall_dispatch_ibm_suspend_me(u64 handle);
 #else
 static inline int clobbering_unread_rtas_event(void) { return 0; }
@@ -454,7 +453,7 @@ static inline int rtas_syscall_dispatch_ibm_suspend_me(u64 
handle)
 #endif
 
 #ifdef CONFIG_PPC_RTAS_DAEMON
-extern void rtas_cancel_event_scan(void);
+void rtas_cancel_event_scan(void);
 #else
 static inline void rtas_cancel_event_scan(void) { }
 #endif
@@ -479,7 +478,7 @@ static inline void rtas_cancel_event_scan(void) { }
  *  for all rtas calls that require an error buffer argument.
  *  This includes 'check-exception' and 'rtas-last-error'.
  */
-extern int rtas_get_error_log_max(void);
+int rtas_get_error_log_max(void);
 
 /* Event Scan Parameters */
 #define EVENT_SCAN_ALL_EVENTS  0xf000
@@ -518,8 +517,8 @@ static inline u32 rtas_config_addr(int busno, int devfn, 
int reg)
(devfn << 8) | (reg & 0xff);
 }
 
-extern void rtas_give_timebase(void);
-extern void rtas_take_timebase(void);
+void rtas_give_timebase(void);
+void rtas_take_timebase(void);
 
 #ifdef CONFIG_PPC_RTAS
 static inline int page_is_rtas_user_buf(unsigned long pfn)
@@ -532,7 +531,7 @@ static inline int page_is_rtas_user_buf(unsigned long pfn)
 
 /* Not the best place to put pSeries_coalesce_init, will be fixed when we
  * move so

[PATCH v2 3/5] powerpc/rtas: Move post_mobility_fixup() declaration to pseries

2023-11-14 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

This is a pseries-specific function declaration that doesn't belong in
rtas.h. Move it to the pseries platform code and adjust
pseries/suspend.c accordingly.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h  | 1 -
 arch/powerpc/platforms/pseries/pseries.h | 1 +
 arch/powerpc/platforms/pseries/suspend.c | 1 +
 3 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index c6568a647cd0..2365668fc13e 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -444,7 +444,6 @@ extern void pSeries_log_error(char *buf, unsigned int 
err_type, int fatal);
 #ifdef CONFIG_PPC_PSERIES
 extern time64_t last_rtas_event;
 extern int clobbering_unread_rtas_event(void);
-extern void post_mobility_fixup(void);
 int rtas_syscall_dispatch_ibm_suspend_me(u64 handle);
 #else
 static inline int clobbering_unread_rtas_event(void) { return 0; }
diff --git a/arch/powerpc/platforms/pseries/pseries.h 
b/arch/powerpc/platforms/pseries/pseries.h
index 8376f03f932a..1c2d736f600d 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -55,6 +55,7 @@ extern int dlpar_detach_node(struct device_node *);
 extern int dlpar_acquire_drc(u32 drc_index);
 extern int dlpar_release_drc(u32 drc_index);
 extern int dlpar_unisolate_drc(u32 drc_index);
+void post_mobility_fixup(void);
 
 void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog);
 int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_errlog);
diff --git a/arch/powerpc/platforms/pseries/suspend.c 
b/arch/powerpc/platforms/pseries/suspend.c
index 5c43435472cc..382003dfdb9a 100644
--- a/arch/powerpc/platforms/pseries/suspend.c
+++ b/arch/powerpc/platforms/pseries/suspend.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include "pseries.h"
 
 static struct device suspend_dev;
 

-- 
2.41.0



[PATCH v2 0/5] powerpc/rtas: Trivial and coding style fixes

2023-11-14 Thread Nathan Lynch via B4 Relay
* Make minor coding style adjustments for readability.
* Remove rtas_service_present() and an old call_rtas() declaration.
* Move a pseries-specific function prototype to pseries code.

---
Changes in v2:
- Address various checkpatch issues missed in v1.
- Drop kernel-doc fixes already applied.
- Cc recent contributors in this area.
- Link to v1: 
https://lore.kernel.org/r/20231106-rtas-trivial-v1-0-61847655c...@linux.ibm.com

---
Nathan Lynch (5):
  powerpc/rtas: Drop declaration of undefined call_rtas() function
  powerpc/rtas: Remove unused rtas_service_present()
  powerpc/rtas: Move post_mobility_fixup() declaration to pseries
  powerpc/rtas: Remove trailing space
  powerpc/rtas: Remove 'extern' from function declarations in rtas.h

 arch/powerpc/include/asm/rtas.h  | 63 +++-
 arch/powerpc/kernel/rtas.c   | 23 +---
 arch/powerpc/platforms/pseries/pseries.h |  1 +
 arch/powerpc/platforms/pseries/suspend.c |  1 +
 4 files changed, 40 insertions(+), 48 deletions(-)
---
base-commit: 707df298cbde200b939c70be2577b20775fe3345
change-id: 20231025-rtas-trivial-2c22ce853f46

Best regards,
-- 
Nathan Lynch 



[PATCH v2 1/5] powerpc/rtas: Drop declaration of undefined call_rtas() function

2023-11-14 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

The call_rtas() function has never been a part of arch/powerpc, and
its implementation was removed from arch/ppc by commit 0a26b1364f14
("ppc: Remove CHRP, POWER3 and POWER4 support from arch/ppc").

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index c697c3c74694..3bf7f0a4b07e 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -542,8 +542,6 @@ static inline void pSeries_coalesce_init(void) { }
 static inline void rtas_initialize(void) { }
 #endif
 
-extern int call_rtas(const char *, int, int, unsigned long *, ...);
-
 #ifdef CONFIG_HV_PERF_CTRS
 void read_24x7_sys_info(void);
 #else

-- 
2.41.0



[PATCH v2 4/5] powerpc/rtas: Remove trailing space

2023-11-14 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Use scripts/cleanfile to remove instances of trailing space in the
core RTAS code and header.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h |  6 +++---
 arch/powerpc/kernel/rtas.c  | 18 +-
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 2365668fc13e..1bed6be8ada3 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -268,7 +268,7 @@ typedef struct {
 #define RTAS_TYPE_DEALLOC  0xE3
 #define RTAS_TYPE_DUMP 0xE4
 #define RTAS_TYPE_HOTPLUG  0xE5
-/* I don't add PowerMGM events right now, this is a different topic */ 
+/* I don't add PowerMGM events right now, this is a different topic */
 #define RTAS_TYPE_PMGM_POWER_SW_ON 0x60
 #define RTAS_TYPE_PMGM_POWER_SW_OFF0x61
 #define RTAS_TYPE_PMGM_LID_OPEN0x62
@@ -461,7 +461,7 @@ static inline void rtas_cancel_event_scan(void) { }
 
 /* Error types logged.  */
 #define ERR_FLAG_ALREADY_LOGGED0x0
-#define ERR_FLAG_BOOT  0x1 /* log was pulled from NVRAM on boot */
+#define ERR_FLAG_BOOT  0x1 /* log was pulled from NVRAM on boot */
 #define ERR_TYPE_RTAS_LOG  0x2 /* from rtas event-scan */
 #define ERR_TYPE_KERNEL_PANIC  0x4 /* from die()/panic() */
 #define ERR_TYPE_KERNEL_PANIC_GZ 0x8   /* ditto, compressed */
@@ -471,7 +471,7 @@ static inline void rtas_cancel_event_scan(void) { }
(ERR_TYPE_RTAS_LOG | ERR_TYPE_KERNEL_PANIC | ERR_TYPE_KERNEL_PANIC_GZ)
 
 #define RTAS_DEBUG KERN_DEBUG "RTAS: "
- 
+
 #define RTAS_ERROR_LOG_MAX 2048
 
 /*
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index b5b340a91157..c49f078382a9 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -670,7 +670,7 @@ static void call_rtas_display_status_delay(char c)
static int pending_newline = 0;  /* did last write end with unprinted 
newline? */
static int width = 16;
 
-   if (c == '\n') {
+   if (c == '\n') {
while (width-- > 0)
call_rtas_display_status(' ');
width = 16;
@@ -680,7 +680,7 @@ static void call_rtas_display_status_delay(char c)
if (pending_newline) {
call_rtas_display_status('\r');
call_rtas_display_status('\n');
-   } 
+   }
pending_newline = 0;
if (width--) {
call_rtas_display_status(c);
@@ -820,7 +820,7 @@ void rtas_progress(char *s, unsigned short hex)
else
rtas_call(display_character, 1, 1, NULL, '\r');
}
- 
+
if (row_width)
width = row_width[current_line];
else
@@ -840,9 +840,9 @@ void rtas_progress(char *s, unsigned short hex)
spin_unlock(&progress_lock);
return;
}
- 
+
/* RTAS wants CR-LF, not just LF */
- 
+
if (*os == '\n') {
rtas_call(display_character, 1, 1, NULL, '\r');
rtas_call(display_character, 1, 1, NULL, '\n');
@@ -852,7 +852,7 @@ void rtas_progress(char *s, unsigned short hex)
 */
rtas_call(display_character, 1, 1, NULL, *os);
}
- 
+
if (row_width)
width = row_width[current_line];
else
@@ -861,15 +861,15 @@ void rtas_progress(char *s, unsigned short hex)
width--;
rtas_call(display_character, 1, 1, NULL, *os);
}
- 
+
os++;
- 
+
/* if we overwrite the screen length */
if (width <= 0)
while ((*os != 0) && (*os != '\n') && (*os != '\r'))
os++;
}
- 
+
spin_unlock(&progress_lock);
 }
 EXPORT_SYMBOL_GPL(rtas_progress);  /* needed by rtas_flash module 
*/

-- 
2.41.0



[PATCH 0/3] powerpc/pseries/memhp: Fix minor bugs and improve error logging

2023-11-14 Thread Nathan Lynch via B4 Relay
This includes a fix for an array bounds read overrun that can be
triggered when debug messages are enabled.

---
Nathan Lynch (3):
  powerpc/pseries/memhp: Fix access beyond end of drmem array
  powerpc/pseries/memhp: Remove unbalanced dlpar_release_drc() call
  powerpc/pseries/memhp: Log more error conditions in add path

 arch/powerpc/platforms/pseries/hotplug-memory.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)
---
base-commit: 707df298cbde200b939c70be2577b20775fe3345
change-id: 20231114-pseries-memhp-fixes-185988dde2a0

Best regards,
-- 
Nathan Lynch 



[PATCH 3/3] powerpc/pseries/memhp: Log more error conditions in add path

2023-11-14 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

When an add operation for multiple LMBs fails, there is currently
little indication from the kernel of what went wrong. Be a little more
verbose about error conditions in the add paths.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/hotplug-memory.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
b/arch/powerpc/platforms/pseries/hotplug-memory.c
index ba883c1b9f6d..4896920514f3 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -208,8 +208,10 @@ static int dlpar_change_lmb_state(struct drmem_lmb *lmb, 
bool online)
int rc;
 
mem_block = lmb_to_memblock(lmb);
-   if (!mem_block)
+   if (!mem_block) {
+   pr_err("Failed memory block lookup for LMB 0x%x\n", 
lmb->drc_index);
return -EINVAL;
+   }
 
if (online && mem_block->dev.offline)
rc = device_online(&mem_block->dev);
@@ -575,6 +577,7 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
 
rc = update_lmb_associativity_index(lmb);
if (rc) {
+   pr_err("Failed to configure LMB 0x%x\n", lmb->drc_index);
return rc;
}
 
@@ -588,12 +591,14 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
/* Add the memory */
rc = __add_memory(nid, lmb->base_addr, block_sz, MHP_MEMMAP_ON_MEMORY);
if (rc) {
+   pr_err("Failed to add LMB 0x%x to node %u", lmb->drc_index, 
nid);
invalidate_lmb_associativity_index(lmb);
return rc;
}
 
rc = dlpar_online_lmb(lmb);
if (rc) {
+   pr_err("Failed to online LMB 0x%x on node %u\n", 
lmb->drc_index, nid);
__remove_memory(lmb->base_addr, block_sz);
invalidate_lmb_associativity_index(lmb);
} else {

-- 
2.41.0



[PATCH 1/3] powerpc/pseries/memhp: Fix access beyond end of drmem array

2023-11-14 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

dlpar_memory_remove_by_index() may access beyond the bounds of the
drmem lmb array when the LMB lookup fails to match an entry with the
given DRC index. When the search fails, the cursor is left pointing to
&drmem_info->lmbs[drmem_info->n_lmbs], which is one element past the
last valid entry in the array. The debug message at the end of the
function then dereferences this pointer:

pr_debug("Failed to hot-remove memory at %llx\n",
 lmb->base_addr);

This was found by inspection and confirmed with KASAN:

  pseries-hotplug-mem: Attempting to hot-remove LMB, drc index 1234
  ==
  BUG: KASAN: slab-out-of-bounds in dlpar_memory+0x298/0x1658
  Read of size 8 at addr c00364e97fd0 by task bash/949

  dump_stack_lvl+0xa4/0xfc (unreliable)
  print_report+0x214/0x63c
  kasan_report+0x140/0x2e0
  __asan_load8+0xa8/0xe0
  dlpar_memory+0x298/0x1658
  handle_dlpar_errorlog+0x130/0x1d0
  dlpar_store+0x18c/0x3e0
  kobj_attr_store+0x68/0xa0
  sysfs_kf_write+0xc4/0x110
  kernfs_fop_write_iter+0x26c/0x390
  vfs_write+0x2d4/0x4e0
  ksys_write+0xac/0x1a0
  system_call_exception+0x268/0x530
  system_call_vectored_common+0x15c/0x2ec

  Allocated by task 1:
   kasan_save_stack+0x48/0x80
   kasan_set_track+0x34/0x50
   kasan_save_alloc_info+0x34/0x50
   __kasan_kmalloc+0xd0/0x120
   __kmalloc+0x8c/0x320
   kmalloc_array.constprop.0+0x48/0x5c
   drmem_init+0x2a0/0x41c
   do_one_initcall+0xe0/0x5c0
   kernel_init_freeable+0x4ec/0x5a0
   kernel_init+0x30/0x1e0
   ret_from_kernel_user_thread+0x14/0x1c

  The buggy address belongs to the object at c00364e8
   which belongs to the cache kmalloc-128k of size 131072
  The buggy address is located 0 bytes to the right of
   allocated 98256-byte region [c00364e8, c00364e97fd0)

  ==
  pseries-hotplug-mem: Failed to hot-remove memory at 0

Log failed lookups with a separate message and dereference the
cursor only when it points to a valid entry.

Signed-off-by: Nathan Lynch 
Fixes: 51925fb3c5c9 ("powerpc/pseries: Implement memory hotplug remove in the 
kernel")
---
 arch/powerpc/platforms/pseries/hotplug-memory.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
b/arch/powerpc/platforms/pseries/hotplug-memory.c
index a43bfb01720a..6f2eebae7bee 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -436,14 +436,15 @@ static int dlpar_memory_remove_by_index(u32 drc_index)
}
}
 
-   if (!lmb_found)
+   if (!lmb_found) {
+   pr_debug("Failed to look up LMB for drc index %x\n", drc_index);
rc = -EINVAL;
-
-   if (rc)
+   } else if (rc) {
pr_debug("Failed to hot-remove memory at %llx\n",
 lmb->base_addr);
-   else
+   } else {
pr_debug("Memory at %llx was hot-removed\n", lmb->base_addr);
+   }
 
return rc;
 }

-- 
2.41.0



[PATCH 2/3] powerpc/pseries/memhp: Remove unbalanced dlpar_release_drc() call

2023-11-14 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Callers of dlpar_add_lmb() are responsible for first acquiring the DRC
and releasing it if dlpar_add_lmb() fails.

However, dlpar_add_lmb() performs a dlpar_release_drc() in one error
branch.  There is no corresponding dlpar_acquire_drc() in the
function, nor is there any stated justification. None of the other
error paths in dlpar_add_lmb() release the DRC.

This is a potential source of redundant attempts to release DRCs,
which is likely benign, but is confusing and inconsistent. Remove it.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/hotplug-memory.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 6f2eebae7bee..ba883c1b9f6d 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -575,7 +575,6 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
 
rc = update_lmb_associativity_index(lmb);
if (rc) {
-   dlpar_release_drc(lmb->drc_index);
return rc;
}
 

-- 
2.41.0



[PATCH 5/7] powerpc/rtas: Move post_mobility_fixup() declaration to pseries

2023-11-06 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

This is a pseries-specific function declaration that doesn't belong in
rtas.h. Move it to the pseries platform code and adjust
pseries/suspend.c accordingly.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h  | 1 -
 arch/powerpc/platforms/pseries/pseries.h | 1 +
 arch/powerpc/platforms/pseries/suspend.c | 1 +
 3 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index c6568a647cd0..2365668fc13e 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -444,7 +444,6 @@ extern void pSeries_log_error(char *buf, unsigned int 
err_type, int fatal);
 #ifdef CONFIG_PPC_PSERIES
 extern time64_t last_rtas_event;
 extern int clobbering_unread_rtas_event(void);
-extern void post_mobility_fixup(void);
 int rtas_syscall_dispatch_ibm_suspend_me(u64 handle);
 #else
 static inline int clobbering_unread_rtas_event(void) { return 0; }
diff --git a/arch/powerpc/platforms/pseries/pseries.h 
b/arch/powerpc/platforms/pseries/pseries.h
index 8376f03f932a..bba4ad192b0f 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -55,6 +55,7 @@ extern int dlpar_detach_node(struct device_node *);
 extern int dlpar_acquire_drc(u32 drc_index);
 extern int dlpar_release_drc(u32 drc_index);
 extern int dlpar_unisolate_drc(u32 drc_index);
+extern void post_mobility_fixup(void);
 
 void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog);
 int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_errlog);
diff --git a/arch/powerpc/platforms/pseries/suspend.c 
b/arch/powerpc/platforms/pseries/suspend.c
index 5c43435472cc..382003dfdb9a 100644
--- a/arch/powerpc/platforms/pseries/suspend.c
+++ b/arch/powerpc/platforms/pseries/suspend.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include "pseries.h"
 
 static struct device suspend_dev;
 

-- 
2.41.0



[PATCH 3/7] powerpc/rtas: Drop declaration of undefined call_rtas() function

2023-11-06 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

The call_rtas() function has never been a part of arch/powerpc, and
its implementation was removed from arch/ppc by 0a26b1364f14 ("ppc:
Remove CHRP, POWER3 and POWER4 support from arch/ppc").

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index c697c3c74694..3bf7f0a4b07e 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -542,8 +542,6 @@ static inline void pSeries_coalesce_init(void) { }
 static inline void rtas_initialize(void) { }
 #endif
 
-extern int call_rtas(const char *, int, int, unsigned long *, ...);
-
 #ifdef CONFIG_HV_PERF_CTRS
 void read_24x7_sys_info(void);
 #else

-- 
2.41.0



[PATCH 0/7] powerpc/rtas: Trivial, coding style, and kernel-doc fixes

2023-11-06 Thread Nathan Lynch via B4 Relay
* Fix recently introduced kernel-doc warnings.
* Make minor coding style adjustments for readability.
* Remove rtas_service_present() and an old call_rtas() declaration.
* Move a pseries-specific function prototype to pseries code.

---
Nathan Lynch (7):
  powerpc/pseries/rtas-work-area: Fix rtas_work_area_reserve_arena() 
kernel-doc
  powerpc/rtas: Fix ppc_rtas_rmo_buf_show() kernel-doc
  powerpc/rtas: Drop declaration of undefined call_rtas() function
  powerpc/rtas: Remove unused rtas_service_present()
  powerpc/rtas: Move post_mobility_fixup() declaration to pseries
  powerpc/rtas: Remove trailing space
  powerpc/rtas: Remove 'extern' from function declarations in rtas.h

 arch/powerpc/include/asm/rtas.h | 62 -
 arch/powerpc/kernel/rtas-proc.c |  2 +
 arch/powerpc/kernel/rtas.c  | 23 -
 arch/powerpc/platforms/pseries/pseries.h|  1 +
 arch/powerpc/platforms/pseries/rtas-work-area.c |  1 +
 arch/powerpc/platforms/pseries/suspend.c|  1 +
 6 files changed, 43 insertions(+), 47 deletions(-)
---
base-commit: 303d77a6e1707498f09c9d8ee91b1dc07ca315a5
change-id: 20231025-rtas-trivial-2c22ce853f46

Best regards,
-- 
Nathan Lynch 



[PATCH 6/7] powerpc/rtas: Remove trailing space

2023-11-06 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Use scripts/cleanfile to remove instances of trailing space in the
core RTAS code and header.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h |  6 +++---
 arch/powerpc/kernel/rtas.c  | 18 +-
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 2365668fc13e..1bed6be8ada3 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -268,7 +268,7 @@ typedef struct {
 #define RTAS_TYPE_DEALLOC  0xE3
 #define RTAS_TYPE_DUMP 0xE4
 #define RTAS_TYPE_HOTPLUG  0xE5
-/* I don't add PowerMGM events right now, this is a different topic */ 
+/* I don't add PowerMGM events right now, this is a different topic */
 #define RTAS_TYPE_PMGM_POWER_SW_ON 0x60
 #define RTAS_TYPE_PMGM_POWER_SW_OFF0x61
 #define RTAS_TYPE_PMGM_LID_OPEN0x62
@@ -461,7 +461,7 @@ static inline void rtas_cancel_event_scan(void) { }
 
 /* Error types logged.  */
 #define ERR_FLAG_ALREADY_LOGGED0x0
-#define ERR_FLAG_BOOT  0x1 /* log was pulled from NVRAM on boot */
+#define ERR_FLAG_BOOT  0x1 /* log was pulled from NVRAM on boot */
 #define ERR_TYPE_RTAS_LOG  0x2 /* from rtas event-scan */
 #define ERR_TYPE_KERNEL_PANIC  0x4 /* from die()/panic() */
 #define ERR_TYPE_KERNEL_PANIC_GZ 0x8   /* ditto, compressed */
@@ -471,7 +471,7 @@ static inline void rtas_cancel_event_scan(void) { }
(ERR_TYPE_RTAS_LOG | ERR_TYPE_KERNEL_PANIC | ERR_TYPE_KERNEL_PANIC_GZ)
 
 #define RTAS_DEBUG KERN_DEBUG "RTAS: "
- 
+
 #define RTAS_ERROR_LOG_MAX 2048
 
 /*
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index b5b340a91157..c49f078382a9 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -670,7 +670,7 @@ static void call_rtas_display_status_delay(char c)
static int pending_newline = 0;  /* did last write end with unprinted 
newline? */
static int width = 16;
 
-   if (c == '\n') {
+   if (c == '\n') {
while (width-- > 0)
call_rtas_display_status(' ');
width = 16;
@@ -680,7 +680,7 @@ static void call_rtas_display_status_delay(char c)
if (pending_newline) {
call_rtas_display_status('\r');
call_rtas_display_status('\n');
-   } 
+   }
pending_newline = 0;
if (width--) {
call_rtas_display_status(c);
@@ -820,7 +820,7 @@ void rtas_progress(char *s, unsigned short hex)
else
rtas_call(display_character, 1, 1, NULL, '\r');
}
- 
+
if (row_width)
width = row_width[current_line];
else
@@ -840,9 +840,9 @@ void rtas_progress(char *s, unsigned short hex)
spin_unlock(&progress_lock);
return;
}
- 
+
/* RTAS wants CR-LF, not just LF */
- 
+
if (*os == '\n') {
rtas_call(display_character, 1, 1, NULL, '\r');
rtas_call(display_character, 1, 1, NULL, '\n');
@@ -852,7 +852,7 @@ void rtas_progress(char *s, unsigned short hex)
 */
rtas_call(display_character, 1, 1, NULL, *os);
}
- 
+
if (row_width)
width = row_width[current_line];
else
@@ -861,15 +861,15 @@ void rtas_progress(char *s, unsigned short hex)
width--;
rtas_call(display_character, 1, 1, NULL, *os);
}
- 
+
os++;
- 
+
/* if we overwrite the screen length */
if (width <= 0)
while ((*os != 0) && (*os != '\n') && (*os != '\r'))
os++;
}
- 
+
spin_unlock(&progress_lock);
 }
 EXPORT_SYMBOL_GPL(rtas_progress);  /* needed by rtas_flash module 
*/

-- 
2.41.0



[PATCH 4/7] powerpc/rtas: Remove unused rtas_service_present()

2023-11-06 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

rtas_service_present() has no more users.

rtas_function_implemented() is now the appropriate API for determining
whether a given RTAS function is available to call.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h | 1 -
 arch/powerpc/kernel/rtas.c  | 5 -
 2 files changed, 6 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 3bf7f0a4b07e..c6568a647cd0 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -409,7 +409,6 @@ static inline bool rtas_function_implemented(const 
rtas_fn_handle_t handle)
return rtas_function_token(handle) != RTAS_UNKNOWN_SERVICE;
 }
 extern int rtas_token(const char *service);
-extern int rtas_service_present(const char *service);
 extern int rtas_call(int token, int, int, int *, ...);
 void rtas_call_unlocked(struct rtas_args *args, int token, int nargs,
int nret, ...);
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index eddc031c4b95..b5b340a91157 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -900,11 +900,6 @@ int rtas_token(const char *service)
 }
 EXPORT_SYMBOL_GPL(rtas_token);
 
-int rtas_service_present(const char *service)
-{
-   return rtas_token(service) != RTAS_UNKNOWN_SERVICE;
-}
-
 #ifdef CONFIG_RTAS_ERROR_LOGGING
 
 static u32 rtas_error_log_max __ro_after_init = RTAS_ERROR_LOG_MAX;

-- 
2.41.0



[PATCH 1/7] powerpc/pseries/rtas-work-area: Fix rtas_work_area_reserve_arena() kernel-doc

2023-11-06 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

>From a W=1 build:

>> arch/powerpc/platforms/pseries/rtas-work-area.c:189: warning: Function 
>> parameter or member 'limit' not
>> described in 'rtas_work_area_reserve_arena'

Add the missing description of the limit parameter.

Signed-off-by: Nathan Lynch 
Reported-by: kernel test robot 
Closes: 
https://lore.kernel.org/oe-kbuild-all/202309131221.bm1pg96n-...@intel.com/
---
 arch/powerpc/platforms/pseries/rtas-work-area.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/platforms/pseries/rtas-work-area.c 
b/arch/powerpc/platforms/pseries/rtas-work-area.c
index b37d52f40360..7fe34bee84d8 100644
--- a/arch/powerpc/platforms/pseries/rtas-work-area.c
+++ b/arch/powerpc/platforms/pseries/rtas-work-area.c
@@ -184,6 +184,7 @@ machine_arch_initcall(pseries, 
rtas_work_area_allocator_init);
 
 /**
  * rtas_work_area_reserve_arena() - Reserve memory suitable for RTAS work 
areas.
+ * @limit: Upper limit for memblock allocation.
  */
 void __init rtas_work_area_reserve_arena(const phys_addr_t limit)
 {

-- 
2.41.0



[PATCH 7/7] powerpc/rtas: Remove 'extern' from function declarations in rtas.h

2023-11-06 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

This header occasionally gains new function declarations without the
leading extern in accordance with current style rules. Leaving the
legacy externs in place is making the header more difficult to read
over time because of the inconsistency. Remove them.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h | 52 -
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 1bed6be8ada3..6e32ad62e936 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -408,42 +408,42 @@ static inline bool rtas_function_implemented(const 
rtas_fn_handle_t handle)
 {
return rtas_function_token(handle) != RTAS_UNKNOWN_SERVICE;
 }
-extern int rtas_token(const char *service);
-extern int rtas_call(int token, int, int, int *, ...);
+int rtas_token(const char *service);
+int rtas_call(int token, int, int, int *, ...);
 void rtas_call_unlocked(struct rtas_args *args, int token, int nargs,
int nret, ...);
-extern void __noreturn rtas_restart(char *cmd);
-extern void rtas_power_off(void);
-extern void __noreturn rtas_halt(void);
-extern void rtas_os_term(char *str);
+void __noreturn rtas_restart(char *cmd);
+void rtas_power_off(void);
+void __noreturn rtas_halt(void);
+void rtas_os_term(char *str);
 void rtas_activate_firmware(void);
-extern int rtas_get_sensor(int sensor, int index, int *state);
-extern int rtas_get_sensor_fast(int sensor, int index, int *state);
-extern int rtas_get_power_level(int powerdomain, int *level);
-extern int rtas_set_power_level(int powerdomain, int level, int *setlevel);
-extern bool rtas_indicator_present(int token, int *maxindex);
-extern int rtas_set_indicator(int indicator, int index, int new_value);
-extern int rtas_set_indicator_fast(int indicator, int index, int new_value);
-extern void rtas_progress(char *s, unsigned short hex);
+int rtas_get_sensor(int sensor, int index, int *state);
+int rtas_get_sensor_fast(int sensor, int index, int *state);
+int rtas_get_power_level(int powerdomain, int *level);
+int rtas_set_power_level(int powerdomain, int level, int *setlevel);
+bool rtas_indicator_present(int token, int *maxindex);
+int rtas_set_indicator(int indicator, int index, int new_value);
+int rtas_set_indicator_fast(int indicator, int index, int new_value);
+void rtas_progress(char *s, unsigned short hex);
 int rtas_ibm_suspend_me(int *fw_status);
 int rtas_error_rc(int rtas_rc);
 
 struct rtc_time;
-extern time64_t rtas_get_boot_time(void);
-extern void rtas_get_rtc_time(struct rtc_time *rtc_time);
-extern int rtas_set_rtc_time(struct rtc_time *rtc_time);
+time64_t rtas_get_boot_time(void);
+void rtas_get_rtc_time(struct rtc_time *rtc_time);
+int rtas_set_rtc_time(struct rtc_time *rtc_time);
 
-extern unsigned int rtas_busy_delay_time(int status);
+unsigned int rtas_busy_delay_time(int status);
 bool rtas_busy_delay(int status);
 
-extern int early_init_dt_scan_rtas(unsigned long node,
+int early_init_dt_scan_rtas(unsigned long node,
const char *uname, int depth, void *data);
 
-extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal);
+void pSeries_log_error(char *buf, unsigned int err_type, int fatal);
 
 #ifdef CONFIG_PPC_PSERIES
 extern time64_t last_rtas_event;
-extern int clobbering_unread_rtas_event(void);
+int clobbering_unread_rtas_event(void);
 int rtas_syscall_dispatch_ibm_suspend_me(u64 handle);
 #else
 static inline int clobbering_unread_rtas_event(void) { return 0; }
@@ -454,7 +454,7 @@ static inline int rtas_syscall_dispatch_ibm_suspend_me(u64 
handle)
 #endif
 
 #ifdef CONFIG_PPC_RTAS_DAEMON
-extern void rtas_cancel_event_scan(void);
+void rtas_cancel_event_scan(void);
 #else
 static inline void rtas_cancel_event_scan(void) { }
 #endif
@@ -479,7 +479,7 @@ static inline void rtas_cancel_event_scan(void) { }
  *  for all rtas calls that require an error buffer argument.
  *  This includes 'check-exception' and 'rtas-last-error'.
  */
-extern int rtas_get_error_log_max(void);
+int rtas_get_error_log_max(void);
 
 /* Event Scan Parameters */
 #define EVENT_SCAN_ALL_EVENTS  0xf000
@@ -518,8 +518,8 @@ static inline u32 rtas_config_addr(int busno, int devfn, 
int reg)
(devfn << 8) | (reg & 0xff);
 }
 
-extern void rtas_give_timebase(void);
-extern void rtas_take_timebase(void);
+void rtas_give_timebase(void);
+void rtas_take_timebase(void);
 
 #ifdef CONFIG_PPC_RTAS
 static inline int page_is_rtas_user_buf(unsigned long pfn)
@@ -532,7 +532,7 @@ static inline int page_is_rtas_user_buf(unsigned long pfn)
 
 /* Not the best place to put pSeries_coalesce_init, will be fixed when we
  * move some of the rtas suspend-me stuff to pseries */
-extern void pSeries_coalesce_init(void);
+void pSeries_coalesce_init(void);
 void rtas_initialize(void);
 #else
 static inline int page_is_rtas_user_buf(unsigned long pfn) { ret

[PATCH 2/7] powerpc/rtas: Fix ppc_rtas_rmo_buf_show() kernel-doc

2023-11-06 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

>From a W=1 build:

>> arch/powerpc/kernel/rtas-proc.c:771: warning: Function parameter or member 
>> 'm' not described in
>> 'ppc_rtas_rmo_buf_show'
>> arch/powerpc/kernel/rtas-proc.c:771: warning: Function parameter or member 
>> 'v' not described in
>> 'ppc_rtas_rmo_buf_show'

Add the missing parameter descriptions.

Signed-off-by: Nathan Lynch 
Reported-by: kernel test robot 
Closes: 
https://lore.kernel.org/oe-kbuild-all/202309211645.1lvwmbv4-...@intel.com/
---
 arch/powerpc/kernel/rtas-proc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index 9454b8395b6a..f38df72e64b8 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -752,6 +752,8 @@ static int ppc_rtas_tone_volume_show(struct seq_file *m, 
void *v)
 
 /**
  * ppc_rtas_rmo_buf_show() - Describe RTAS-addressable region for user space.
+ * @m: seq_file output target.
+ * @v: Unused.
  *
  * Base + size description of a range of RTAS-addressable memory set
  * aside for user space to use as work area(s) for certain RTAS

-- 
2.41.0



[PATCH v3 10/10] powerpc/selftests: Add test for papr-sysparm

2023-10-25 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Consistently testing system parameter access is a bit difficult by
nature -- the set of parameters available depends on the model and
system configuration, and updating a parameter should be considered a
destructive operation reserved for the admin.

So we validate some of the error paths and retrieve the SPLPAR
characteristics string, but not much else.

Signed-off-by: Nathan Lynch 
---
 tools/testing/selftests/powerpc/Makefile   |   1 +
 .../selftests/powerpc/papr_sysparm/.gitignore  |   1 +
 .../selftests/powerpc/papr_sysparm/Makefile|  12 ++
 .../selftests/powerpc/papr_sysparm/papr_sysparm.c  | 164 +
 4 files changed, 178 insertions(+)

diff --git a/tools/testing/selftests/powerpc/Makefile 
b/tools/testing/selftests/powerpc/Makefile
index 05fc68d446c2..c376151982c4 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -33,6 +33,7 @@ SUB_DIRS = alignment  \
   math \
   papr_attributes  \
   papr_vpd \
+  papr_sysparm \
   ptrace   \
   security \
   mce
diff --git a/tools/testing/selftests/powerpc/papr_sysparm/.gitignore 
b/tools/testing/selftests/powerpc/papr_sysparm/.gitignore
new file mode 100644
index ..f2a69bf59d40
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_sysparm/.gitignore
@@ -0,0 +1 @@
+/papr_sysparm
diff --git a/tools/testing/selftests/powerpc/papr_sysparm/Makefile 
b/tools/testing/selftests/powerpc/papr_sysparm/Makefile
new file mode 100644
index ..7f79e437634a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_sysparm/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+noarg:
+   $(MAKE) -C ../
+
+TEST_GEN_PROGS := papr_sysparm
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
+
+$(OUTPUT)/papr_sysparm: CFLAGS += $(KHDR_INCLUDES)
diff --git a/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c 
b/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c
new file mode 100644
index ..fc25c03e8bc7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "utils.h"
+
+#define DEVPATH "/dev/papr-sysparm"
+
+static int open_close(void)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int get_splpar(void)
+{
+   struct papr_sysparm_io_block sp = {
+   .parameter = 20, // SPLPAR characteristics
+   };
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+   FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_GET, &sp) != 0);
+   FAIL_IF(sp.length == 0);
+   FAIL_IF(sp.length > sizeof(sp.data));
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int get_bad_parameter(void)
+{
+   struct papr_sysparm_io_block sp = {
+   .parameter = UINT32_MAX, // there are only ~60 specified 
parameters
+   };
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   // Ensure expected error
+   FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_GET, &sp) != -1);
+   FAIL_IF(errno != EOPNOTSUPP);
+
+   // Ensure the buffer is unchanged
+   FAIL_IF(sp.length != 0);
+   for (size_t i = 0; i < ARRAY_SIZE(sp.data); ++i)
+   FAIL_IF(sp.data[i] != 0);
+
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int check_efault_common(unsigned long cmd)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   // Ensure expected error
+   FAIL_IF(ioctl(devfd, cmd, NULL) != -1);
+   FAIL_IF(errno != EFAULT);
+
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int check_efault_get(void)
+{
+   return check_efault_common(PAPR_SYSPARM_IOC_GET);
+}
+
+static int check_efault_set(void)
+{
+   return check_efault_common(PAPR_SYSPARM_IOC_SET);
+}
+
+static int set_hmc0(void)
+{
+   struct papr_sysparm_io_block sp = {
+   .parameter = 0, // HMC0, not a settable parameter
+   };
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   // Ensure expected error
+   FAIL_IF(ioctl(devfd, PAPR_

[PATCH v3 04/10] powerpc/rtas: Warn if per-function lock isn't held

2023-10-25 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

If the function descriptor has a populated lock member, then callers
are required to hold it across calls. Now that the firmware activation
sequence is appropriately guarded, we can warn when the requirement
isn't satisfied.

__do_enter_rtas_trace() gets reorganized a bit as a result of
performing the function descriptor lookup unconditionally now.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 21 +
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 864d140e7247..0a0ae81fac99 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -670,28 +670,25 @@ static void __do_enter_rtas(struct rtas_args *args)
 
 static void __do_enter_rtas_trace(struct rtas_args *args)
 {
-   const char *name = NULL;
+   struct rtas_function *func = 
rtas_token_to_function(be32_to_cpu(args->token));
 
-   if (args == &rtas_args)
-   lockdep_assert_held(&rtas_lock);
/*
-* If the tracepoints that consume the function name aren't
-* active, avoid the lookup.
+* If there is a per-function lock, it must be held by the
+* caller.
 */
-   if ((trace_rtas_input_enabled() || trace_rtas_output_enabled())) {
-   const s32 token = be32_to_cpu(args->token);
-   const struct rtas_function *func = 
rtas_token_to_function(token);
+   if (func->lock)
+   WARN_ON(!mutex_is_locked(func->lock));
 
-   name = func->name;
-   }
+   if (args == &rtas_args)
+   lockdep_assert_held(&rtas_lock);
 
-   trace_rtas_input(args, name);
+   trace_rtas_input(args, func->name);
trace_rtas_ll_entry(args);
 
__do_enter_rtas(args);
 
trace_rtas_ll_exit(args);
-   trace_rtas_output(args, name);
+   trace_rtas_output(args, func->name);
 }
 
 static void do_enter_rtas(struct rtas_args *args)

-- 
2.41.0



[PATCH v3 06/10] powerpc/pseries: Add papr-vpd character driver for VPD retrieval

2023-10-25 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

PowerVM LPARs may retrieve Vital Product Data (VPD) for system
components using the ibm,get-vpd RTAS function.

We can expose this to user space with a /dev/papr-vpd character
device, where the programming model is:

  struct papr_location_code plc = { .str = "", }; /* obtain all VPD */
  int devfd = open("/dev/papr-vpd", O_RDONLY);
  int vpdfd = ioctl(devfd, PAPR_VPD_CREATE_HANDLE, &plc);
  size_t size = lseek(vpdfd, 0, SEEK_END);
  char *buf = malloc(size);
  pread(devfd, buf, size, 0);

When a file descriptor is obtained from ioctl(PAPR_VPD_CREATE_HANDLE),
the file contains the result of a complete ibm,get-vpd sequence. The
file contents are immutable from the POV of user space. To get a new
view of the VPD, the client must create a new handle.

This design choice insulates user space from most of the complexities
that ibm,get-vpd brings:

* ibm,get-vpd must be called more than once to obtain complete
  results.

* Only one ibm,get-vpd call sequence should be in progress at a time;
  interleaved sequences will disrupt each other. Callers must have a
  protocol for serializing their use of the function.

* A call sequence in progress may receive a "VPD changed, try again"
  status, requiring the client to abandon the sequence and start
  over.

The memory required for the VPD buffers seems acceptable, around 20KB
for all VPD on one of my systems. And the value of the
/rtas/ibm,vpd-size DT property (the estimated maximum size of VPD) is
consistently 300KB across various systems I've checked.

I've implemented support for this new ABI in the rtas_get_vpd()
function in librtas, which the vpdupdate command currently uses to
populate its VPD database. I've verified that an unmodified vpdupdate
binary generates an identical database when using a librtas.so that
prefers the new ABI.

Note that the driver needs to serialize its call sequences with legacy
sys_rtas(ibm,get-vpd) callers, so it exposes its internal lock for
sys_rtas.

Signed-off-by: Nathan Lynch 
---
 Documentation/userspace-api/ioctl/ioctl-number.rst |   2 +
 arch/powerpc/include/uapi/asm/papr-vpd.h   |  22 +
 arch/powerpc/platforms/pseries/Makefile|   1 +
 arch/powerpc/platforms/pseries/papr-vpd.c  | 522 +
 4 files changed, 547 insertions(+)

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst 
b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 4ea5b837399a..a950545bf7cd 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -349,6 +349,8 @@ Code  Seq#Include File  
 Comments
  

 0xB1  00-1F  PPPoX
  

+0xB2  00 arch/powerpc/include/uapi/asm/papr-vpd.h
powerpc/pseries VPD API
+ 

 0xB3  00 linux/mmc/ioctl.h
 0xB4  00-0F  linux/gpio.h

 0xB5  00-0F  uapi/linux/rpmsg.h  

diff --git a/arch/powerpc/include/uapi/asm/papr-vpd.h 
b/arch/powerpc/include/uapi/asm/papr-vpd.h
new file mode 100644
index ..b62e4f897a70
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-vpd.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_VPD_H_
+#define _UAPI_PAPR_VPD_H_
+
+#include 
+#include 
+
+struct papr_location_code {
+   /*
+* PAPR+ 12.3.2.4 Converged Location Code Rules - Length
+* Restrictions. 79 characters plus nul.
+*/
+   char str[80];
+};
+
+/*
+ * ioctl for /dev/papr-vpd. Returns a VPD handle fd corresponding to
+ * the location code.
+ */
+#define PAPR_VPD_IOC_CREATE_HANDLE _IOW(PAPR_MISCDEV_IOC_ID, 0, struct 
papr_location_code)
+
+#endif /* _UAPI_PAPR_VPD_H_ */
diff --git a/arch/powerpc/platforms/pseries/Makefile 
b/arch/powerpc/platforms/pseries/Makefile
index 53c3b91af2f7..cbcaa102e2b4 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -4,6 +4,7 @@ ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG
 
 obj-y  := lpar.o hvCall.o nvram.o reconfig.o \
   of_helpers.o rtas-work-area.o papr-sysparm.o \
+  papr-vpd.o \
   setup.o iommu.o event_sources.o ras.o \
   firmware.o power.o dlpar.o mobility.o rng.o \
   pci.o pci_dlpar.o eeh_pseries.o msi.o \
diff --git a/arch/powerpc/platforms/pseries/papr-vpd.c 
b/arch/powerpc/platforms/pseries/papr-vpd.c
new file mode 100644
i

[PATCH v3 03/10] powerpc/rtas: Serialize firmware activation sequences

2023-10-25 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Use the function lock API to prevent interleaving call sequences of
the ibm,activate-firmware RTAS function, which typically requires
multiple calls to complete the update. While the spec does not
specifically prohibit interleaved sequences, there's almost certainly
no advantage to allowing them.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 579a2c475bb6..864d140e7247 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1738,10 +1738,14 @@ void rtas_activate_firmware(void)
return;
}
 
+   rtas_function_lock(RTAS_FN_IBM_ACTIVATE_FIRMWARE);
+
do {
fwrc = rtas_call(token, 0, 1, NULL);
} while (rtas_busy_delay(fwrc));
 
+   rtas_function_unlock(RTAS_FN_IBM_ACTIVATE_FIRMWARE);
+
if (fwrc)
pr_err("ibm,activate-firmware failed (%i)\n", fwrc);
 }

-- 
2.41.0



[PATCH v3 00/10] powerpc/pseries: New character devices for system parameters and VPD

2023-10-25 Thread Nathan Lynch via B4 Relay
Add character devices that expose PAPR-specific system parameters and
VPD to user space.

The problem: important platform features are enabled on Linux VMs
through the powerpc-specific rtas() syscall in combination with
writeable mappings of /dev/mem. In typical usage, this is encapsulated
behind APIs provided by the librtas library. This paradigm is
incompatible with lockdown, which prohibits /dev/mem access. It also
is too low-level in many cases: a single logical operation may require
multiple sys_rtas() calls in succession to complete. This carries the
risk that a process may exit while leaving an operation unfinished. It
also means that callers must coordinate their use of the syscall for
functions that cannot tolerate multiple concurrent clients, such as
ibm,get-vpd.

The solution presented here is to add a pair of small pseries-specific
"drivers," one for VPD and one for system parameters. The new drivers
expose these facilities to user space in ways that are compatible with
lockdown and require no coordination between their clients.

Since the ibm,get-vpd call sequence performed by the papr-vpd driver
must be serialized against all other uses of the function, the series
begins by adding some new APIs to the core RTAS support code for this
purpose.

Both drivers could potentially support poll() methods to notify
clients of changes to parameters or VPD that happen due to partition
migration and other events. But that should be safe to leave for
later, assuming there's any interest.

I have made changes to librtas to prefer the new interfaces and
verified that existing clients work correctly with the new code. A
draft PR for that work is here:

https://github.com/ibm-power-utilities/librtas/pull/36

The user-space ABI has not changed since v1 of this series.

I expect to propose at least one more small driver in this style for
platform dump retrieval in a separate submission in the future.

---
Changes in v3:
- Add new rtas_function_lock()/unlock() APIs and convert existing code
  to use them.
- Convert papr-vpd to use rtas_function_lock()/unlock() instead of
  having sys_rtas() obtain a driver-private mutex.
- Rebase on current powerpc/next.
- Link to v2: 
https://lore.kernel.org/r/20231013-papr-sys_rtas-vs-lockdown-v2-0-ead01ce01...@linux.ibm.com

Changes in v2:
- Fix unused-but-set variable warning in papr-sysparm code.
- Rebase on powerpc/next branch.
- Link to v1: 
https://lore.kernel.org/r/20231006-papr-sys_rtas-vs-lockdown-v1-0-3a36bfb66...@linux.ibm.com

Changes in v1 vs initial RFC:
- Add papr-sysparm driver and tests.
- Add a papr-miscdev.h uapi header.
- Prevent sys_rtas() from interfering with papr-vpd call sequences.
- Handle -4 ("VPD changed") status in papr-vpd.
- Include string_helpers.h in papr-vpd.c, per Michal Suchánek
- Link to RFC: 
https://lore.kernel.org/r/20230822-papr-sys_rtas-vs-lockdown-v1-0-932623cf3...@linux.ibm.com

---
Nathan Lynch (10):
  powerpc/rtas: Factor out function descriptor lookup
  powerpc/rtas: Facilitate high-level call sequences
  powerpc/rtas: Serialize firmware activation sequences
  powerpc/rtas: Warn if per-function lock isn't held
  powerpc/uapi: Export papr-miscdev.h header
  powerpc/pseries: Add papr-vpd character driver for VPD retrieval
  powerpc/pseries/papr-sysparm: Validate buffer object lengths
  powerpc/pseries/papr-sysparm: Expose character device to user space
  powerpc/selftests: Add test for papr-vpd
  powerpc/selftests: Add test for papr-sysparm

 Documentation/userspace-api/ioctl/ioctl-number.rst |   4 +
 arch/powerpc/include/asm/papr-sysparm.h|  17 +-
 arch/powerpc/include/asm/rtas.h|   2 +
 arch/powerpc/include/uapi/asm/papr-miscdev.h   |   9 +
 arch/powerpc/include/uapi/asm/papr-sysparm.h   |  58 +++
 arch/powerpc/include/uapi/asm/papr-vpd.h   |  22 +
 arch/powerpc/kernel/rtas.c | 157 ++-
 arch/powerpc/platforms/pseries/Makefile|   1 +
 arch/powerpc/platforms/pseries/papr-sysparm.c  | 201 +++-
 arch/powerpc/platforms/pseries/papr-vpd.c  | 522 +
 tools/testing/selftests/powerpc/Makefile   |   2 +
 .../selftests/powerpc/papr_sysparm/.gitignore  |   1 +
 .../selftests/powerpc/papr_sysparm/Makefile|  12 +
 .../selftests/powerpc/papr_sysparm/papr_sysparm.c  | 164 +++
 .../testing/selftests/powerpc/papr_vpd/.gitignore  |   1 +
 tools/testing/selftests/powerpc/papr_vpd/Makefile  |  12 +
 .../testing/selftests/powerpc/papr_vpd/papr_vpd.c  | 352 ++
 17 files changed, 1503 insertions(+), 34 deletions(-)
---
base-commit: 36e826b568e412f61d68fedc02a67b4d8b7583cc
change-id: 20230817-papr-sys_rtas-vs-lockdown-5c54505db792

Best regards,
-- 
Nathan Lynch 



[PATCH v3 01/10] powerpc/rtas: Factor out function descriptor lookup

2023-10-25 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Move the function descriptor table lookup out of rtas_function_token()
into a separate routine for use in new code to follow. No functional
change.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 31 +++
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index eddc031c4b95..e2fac171bf69 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -464,29 +464,36 @@ static struct rtas_function rtas_function_table[] 
__ro_after_init = {
 static DEFINE_RAW_SPINLOCK(rtas_lock);
 static struct rtas_args rtas_args;
 
-/**
- * rtas_function_token() - RTAS function token lookup.
- * @handle: Function handle, e.g. RTAS_FN_EVENT_SCAN.
- *
- * Context: Any context.
- * Return: the token value for the function if implemented by this platform,
- * otherwise RTAS_UNKNOWN_SERVICE.
- */
-s32 rtas_function_token(const rtas_fn_handle_t handle)
+static struct rtas_function *rtas_function_lookup(const rtas_fn_handle_t 
handle)
 {
const size_t index = handle.index;
const bool out_of_bounds = index >= ARRAY_SIZE(rtas_function_table);
 
if (WARN_ONCE(out_of_bounds, "invalid function index %zu", index))
-   return RTAS_UNKNOWN_SERVICE;
+   return NULL;
/*
 * Various drivers attempt token lookups on non-RTAS
 * platforms.
 */
if (!rtas.dev)
-   return RTAS_UNKNOWN_SERVICE;
+   return NULL;
+
+   return &rtas_function_table[index];
+}
+
+/**
+ * rtas_function_token() - RTAS function token lookup.
+ * @handle: Function handle, e.g. RTAS_FN_EVENT_SCAN.
+ *
+ * Context: Any context.
+ * Return: the token value for the function if implemented by this platform,
+ * otherwise RTAS_UNKNOWN_SERVICE.
+ */
+s32 rtas_function_token(const rtas_fn_handle_t handle)
+{
+   const struct rtas_function *func = rtas_function_lookup(handle);
 
-   return rtas_function_table[index].token;
+   return func ? func->token : RTAS_UNKNOWN_SERVICE;
 }
 EXPORT_SYMBOL_GPL(rtas_function_token);
 

-- 
2.41.0



[PATCH v3 09/10] powerpc/selftests: Add test for papr-vpd

2023-10-25 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Add selftests for /dev/papr-vpd, exercising the common expected use
cases:

* Retrieve all VPD by passing an empty location code.
* Retrieve the "system VPD" by passing a location code derived from DT
  root node properties, as done by the vpdupdate command.

The tests also verify that certain intended properties of the driver
hold:

* Passing an unterminated location code to PAPR_VPD_CREATE_HANDLE gets
  EINVAL.
* Passing a NULL location code pointer to PAPR_VPD_CREATE_HANDLE gets
  EFAULT.
* Closing the device node without first issuing a
  PAPR_VPD_CREATE_HANDLE command to it succeeds.
* Releasing a handle without first consuming any data from it
  succeeds.
* Re-reading the contents of a handle returns the same data as the
  first time.

Some minimal validation of the returned data is performed.

The tests are skipped on systems where the papr-vpd driver does not
initialize, making this useful only on PowerVM LPARs at this point.

Signed-off-by: Nathan Lynch 
---
 tools/testing/selftests/powerpc/Makefile   |   1 +
 .../testing/selftests/powerpc/papr_vpd/.gitignore  |   1 +
 tools/testing/selftests/powerpc/papr_vpd/Makefile  |  12 +
 .../testing/selftests/powerpc/papr_vpd/papr_vpd.c  | 352 +
 4 files changed, 366 insertions(+)

diff --git a/tools/testing/selftests/powerpc/Makefile 
b/tools/testing/selftests/powerpc/Makefile
index 7ea42fa02eab..05fc68d446c2 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -32,6 +32,7 @@ SUB_DIRS = alignment  \
   vphn \
   math \
   papr_attributes  \
+  papr_vpd \
   ptrace   \
   security \
   mce
diff --git a/tools/testing/selftests/powerpc/papr_vpd/.gitignore 
b/tools/testing/selftests/powerpc/papr_vpd/.gitignore
new file mode 100644
index ..49285031a656
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_vpd/.gitignore
@@ -0,0 +1 @@
+/papr_vpd
diff --git a/tools/testing/selftests/powerpc/papr_vpd/Makefile 
b/tools/testing/selftests/powerpc/papr_vpd/Makefile
new file mode 100644
index ..06b719703bfd
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_vpd/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+noarg:
+   $(MAKE) -C ../
+
+TEST_GEN_PROGS := papr_vpd
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
+
+$(OUTPUT)/papr_vpd: CFLAGS += $(KHDR_INCLUDES)
diff --git a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c 
b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
new file mode 100644
index ..98cbb9109ee6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "utils.h"
+
+#define DEVPATH "/dev/papr-vpd"
+
+static int dev_papr_vpd_open_close(void)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int dev_papr_vpd_get_handle_all(void)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+   struct papr_location_code lc = { .str = "", };
+   off_t size;
+   int fd;
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   errno = 0;
+   fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc);
+   FAIL_IF(errno != 0);
+   FAIL_IF(fd < 0);
+
+   FAIL_IF(close(devfd) != 0);
+
+   size = lseek(fd, 0, SEEK_END);
+   FAIL_IF(size <= 0);
+
+   void *buf = malloc((size_t)size);
+   FAIL_IF(!buf);
+
+   ssize_t consumed = pread(fd, buf, size, 0);
+   FAIL_IF(consumed != size);
+
+   /* Ensure EOF */
+   FAIL_IF(read(fd, buf, size) != 0);
+   FAIL_IF(close(fd));
+
+   /* Verify that the buffer looks like VPD */
+   static const char needle[] = "System VPD";
+   FAIL_IF(!memmem(buf, size, needle, strlen(needle)));
+
+   return 0;
+}
+
+static int dev_papr_vpd_get_handle_byte_at_a_time(void)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+   struct papr_location_code lc = { .str = "", };
+   int fd;
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   errno = 0;
+   fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc);
+   FAIL_IF(errno != 0);
+   FAIL_IF(fd < 0);
+
+   FAIL_IF(close(devfd) != 0);
+
+   size_t consumed = 0;
+   while (1) {
+   ssize_t res;
+   char c;
+
+   errno = 0;
+   res = read(fd, &c, sizeof(c));
+   FAIL_IF(res > sizeof(c))

[PATCH v3 02/10] powerpc/rtas: Facilitate high-level call sequences

2023-10-25 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

On RTAS platforms there is a general restriction that the OS must not
enter RTAS on more than one CPU at a time. This low-level
serialization requirement is satisfied by holding a spin
lock (rtas_lock) across most RTAS function invocations.

However, some pseries RTAS functions require multiple successive calls
to complete a logical operation. Beginning a new call sequence for such a
function may disrupt any other sequences of that function already in
progress. Safe and reliable use of these functions effectively
requires higher-level serialization beyond what is already done at the
level of RTAS entry and exit.

Where a sequence-based RTAS function is invoked only through
sys_rtas(), with no in-kernel users, there is no issue as far as the
kernel is concerned. User space is responsible for appropriately
serializing its call sequences. (Whether user space code actually
takes measures to prevent sequence interleaving is another matter.)
Examples of such functions currently include ibm,platform-dump and
ibm,get-vpd.

But where a sequence-based RTAS function has both user space and
in-kernel uesrs, there is a hazard. Even if the in-kernel call sites
of such a function serialize their sequences correctly, a user of
sys_rtas() can invoke the same function at any time, potentially
disrupting a sequence in progress.

So in order to prevent disruption of kernel-based RTAS call sequences,
they must serialize not only with themselves but also with sys_rtas()
users, somehow. Preferably without adding global locks or adding more
function-specific hacks to sys_rtas(). This is a prerequisite for
adding an in-kernel call sequence of ibm,get-vpd, which is in a change
to follow.

Note that it has never been feasible for the kernel to prevent
sys_rtas()-based sequences from being disrupted because control
returns to user space on every call. sys_rtas()-based users of these
functions have always been, and continue to be, responsible for
coordinating their call sequences with other users, even those which
may invoke the RTAS functions through less direct means than
sys_rtas(). This is an unavoidable consequence of exposing
sequence-based RTAS functions through sys_rtas().

* Add new rtas_function_lock() and rtas_function_unlock() APIs for use
  with sequence-based RTAS functions.

* Add an optional per-function mutex to struct rtas_function. When this
  member is set, kernel-internal callers of the RTAS function are
  required to guard their call sequences with rtas_function_lock() and
  rtas_function_unlock(). This requirement will be enforced in a later
  change, after all affected call sites are updated.

* Populate the lock members of function table entries where
  serialization of call sequences is known to be necessary, along with
  justifying commentary.

* In sys_rtas(), acquire the per-function mutex when it is present.

There should be no perceivable change introduced here except that
concurrent callers of the same RTAS function via sys_rtas() may block
on a mutex instead of spinning on rtas_lock. Changes to follow will
add rtas_function_lock()/unlock() pairs to kernel-based call
sequences.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h |   2 +
 arch/powerpc/kernel/rtas.c  | 101 +++-
 2 files changed, 101 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index c697c3c74694..382627854034 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -408,6 +408,8 @@ static inline bool rtas_function_implemented(const 
rtas_fn_handle_t handle)
 {
return rtas_function_token(handle) != RTAS_UNKNOWN_SERVICE;
 }
+void rtas_function_lock(rtas_fn_handle_t handle);
+void rtas_function_unlock(rtas_fn_handle_t handle);
 extern int rtas_token(const char *service);
 extern int rtas_service_present(const char *service);
 extern int rtas_call(int token, int, int, int *, ...);
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index e2fac171bf69..579a2c475bb6 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -70,14 +71,34 @@ struct rtas_filter {
  *ppc64le, and we want to keep it that way. It does
  *not make sense for this to be set when @filter
  *is NULL.
+ * @lock: Pointer to an optional dedicated per-function mutex. This
+ *should be set for functions that require multiple calls in
+ *sequence to complete a single operation, and such sequences
+ *will disrupt each other if allowed to interleave. Users of
+ *this function are required to hold the associated lock for
+ *the duration of the call sequence. Add an explanatory
+ *comment to the function table entry if setting this member.
  */
 stru

[PATCH v3 07/10] powerpc/pseries/papr-sysparm: Validate buffer object lengths

2023-10-25 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

The ability to get and set system parameters will be exposed to user
space, so let's get a little more strict about malformed
papr_sysparm_buf objects.

* Create accessors for the length field of struct papr_sysparm_buf.
  The length is always stored in MSB order and this is better than
  spreading the necessary conversions all over.

* Reject attempts to submit invalid buffers to RTAS.

* Warn if RTAS returns a buffer with an invalid length, clamping the
  returned length to a safe value that won't overrun the buffer.

These are meant as precautionary measures to mitigate both firmware
and kernel bugs in this area, should they arise, but I am not aware of
any.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/papr-sysparm.c | 47 +++
 1 file changed, 47 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/papr-sysparm.c 
b/arch/powerpc/platforms/pseries/papr-sysparm.c
index fedc61599e6c..a1e7aeac7416 100644
--- a/arch/powerpc/platforms/pseries/papr-sysparm.c
+++ b/arch/powerpc/platforms/pseries/papr-sysparm.c
@@ -23,6 +23,46 @@ void papr_sysparm_buf_free(struct papr_sysparm_buf *buf)
kfree(buf);
 }
 
+static size_t papr_sysparm_buf_get_length(const struct papr_sysparm_buf *buf)
+{
+   return be16_to_cpu(buf->len);
+}
+
+static void papr_sysparm_buf_set_length(struct papr_sysparm_buf *buf, size_t 
length)
+{
+   WARN_ONCE(length > sizeof(buf->val),
+ "bogus length %zu, clamping to safe value", length);
+   length = min(sizeof(buf->val), length);
+   buf->len = cpu_to_be16(length);
+}
+
+/*
+ * For use on buffers returned from ibm,get-system-parameter before
+ * returning them to callers. Ensures the encoded length of valid data
+ * cannot overrun buf->val[].
+ */
+static void papr_sysparm_buf_clamp_length(struct papr_sysparm_buf *buf)
+{
+   papr_sysparm_buf_set_length(buf, papr_sysparm_buf_get_length(buf));
+}
+
+/*
+ * Perform some basic diligence on the system parameter buffer before
+ * submitting it to RTAS.
+ */
+static bool papr_sysparm_buf_can_submit(const struct papr_sysparm_buf *buf)
+{
+   /*
+* Firmware ought to reject buffer lengths that exceed the
+* maximum specified in PAPR, but there's no reason for the
+* kernel to allow them either.
+*/
+   if (papr_sysparm_buf_get_length(buf) > sizeof(buf->val))
+   return false;
+
+   return true;
+}
+
 /**
  * papr_sysparm_get() - Retrieve the value of a PAPR system parameter.
  * @param: PAPR system parameter token as described in
@@ -63,6 +103,9 @@ int papr_sysparm_get(papr_sysparm_t param, struct 
papr_sysparm_buf *buf)
if (token == RTAS_UNKNOWN_SERVICE)
return -ENOENT;
 
+   if (!papr_sysparm_buf_can_submit(buf))
+   return -EINVAL;
+
work_area = rtas_work_area_alloc(sizeof(*buf));
 
memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf));
@@ -77,6 +120,7 @@ int papr_sysparm_get(papr_sysparm_t param, struct 
papr_sysparm_buf *buf)
case 0:
ret = 0;
memcpy(buf, rtas_work_area_raw_buf(work_area), sizeof(*buf));
+   papr_sysparm_buf_clamp_length(buf);
break;
case -3: /* parameter not implemented */
ret = -EOPNOTSUPP;
@@ -115,6 +159,9 @@ int papr_sysparm_set(papr_sysparm_t param, const struct 
papr_sysparm_buf *buf)
if (token == RTAS_UNKNOWN_SERVICE)
return -ENOENT;
 
+   if (!papr_sysparm_buf_can_submit(buf))
+   return -EINVAL;
+
work_area = rtas_work_area_alloc(sizeof(*buf));
 
memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf));

-- 
2.41.0



[PATCH v3 05/10] powerpc/uapi: Export papr-miscdev.h header

2023-10-25 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Allocate one identifying code (the first column of the ioctl-number
table) for the collection of PAPR miscdev drivers to share.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/uapi/asm/papr-miscdev.h | 9 +
 1 file changed, 9 insertions(+)

diff --git a/arch/powerpc/include/uapi/asm/papr-miscdev.h 
b/arch/powerpc/include/uapi/asm/papr-miscdev.h
new file mode 100644
index ..49a2a270b7f3
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-miscdev.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_MISCDEV_H_
+#define _UAPI_PAPR_MISCDEV_H_
+
+enum {
+   PAPR_MISCDEV_IOC_ID = 0xb2,
+};
+
+#endif /* _UAPI_PAPR_MISCDEV_H_ */

-- 
2.41.0



[PATCH v3 08/10] powerpc/pseries/papr-sysparm: Expose character device to user space

2023-10-25 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Until now the papr_sysparm APIs have been kernel-internal. But user
space needs access to PAPR system parameters too. The only method
available to user space today to get or set system parameters is using
sys_rtas() and /dev/mem to pass RTAS-addressable buffers between user
space and firmware. This is incompatible with lockdown and should be
deprecated.

So provide an alternative ABI to user space in the form of a
/dev/papr-sysparm character device with just two ioctl commands (get
and set). The data payloads involved are small enough to fit in the
ioctl argument buffer, making the code relatively simple.

Exposing the system parameters through sysfs has been considered but
it would be too awkward:

* The kernel currently does not have to contain an exhaustive list of
  defined system parameters. This is a convenient property to maintain
  because we don't have to update the kernel whenever a new parameter
  is added to PAPR. Exporting a named attribute in sysfs for each
  parameter would negate this.

* Some system parameters are text-based and some are not.

* Retrieval of at least one system parameter requires input data,
  which a simple read-oriented interface can't support.

Signed-off-by: Nathan Lynch 
---
 Documentation/userspace-api/ioctl/ioctl-number.rst |   2 +
 arch/powerpc/include/asm/papr-sysparm.h|  17 ++-
 arch/powerpc/include/uapi/asm/papr-sysparm.h   |  58 
 arch/powerpc/platforms/pseries/papr-sysparm.c  | 154 -
 4 files changed, 223 insertions(+), 8 deletions(-)

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst 
b/Documentation/userspace-api/ioctl/ioctl-number.rst
index a950545bf7cd..d8b6cb1a3636 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -351,6 +351,8 @@ Code  Seq#Include File  
 Comments
  

 0xB2  00 arch/powerpc/include/uapi/asm/papr-vpd.h
powerpc/pseries VPD API
  

+0xB2  01-02  arch/powerpc/include/uapi/asm/papr-sysparm.h
powerpc/pseries system parameter API
+ 

 0xB3  00 linux/mmc/ioctl.h
 0xB4  00-0F  linux/gpio.h

 0xB5  00-0F  uapi/linux/rpmsg.h  

diff --git a/arch/powerpc/include/asm/papr-sysparm.h 
b/arch/powerpc/include/asm/papr-sysparm.h
index f5fdbd8ae9db..0dbbff59101d 100644
--- a/arch/powerpc/include/asm/papr-sysparm.h
+++ b/arch/powerpc/include/asm/papr-sysparm.h
@@ -2,8 +2,10 @@
 #ifndef _ASM_POWERPC_PAPR_SYSPARM_H
 #define _ASM_POWERPC_PAPR_SYSPARM_H
 
+#include 
+
 typedef struct {
-   const u32 token;
+   u32 token;
 } papr_sysparm_t;
 
 #define mk_papr_sysparm(x_) ((papr_sysparm_t){ .token = x_, })
@@ -20,11 +22,14 @@ typedef struct {
 #define PAPR_SYSPARM_TLB_BLOCK_INVALIDATE_ATTRSmk_papr_sysparm(50)
 #define PAPR_SYSPARM_LPAR_NAME mk_papr_sysparm(55)
 
-enum {
-   PAPR_SYSPARM_MAX_INPUT  = 1024,
-   PAPR_SYSPARM_MAX_OUTPUT = 4000,
-};
-
+/**
+ * struct papr_sysparm_buf - RTAS work area layout for system parameter 
functions.
+ *
+ * This is the memory layout of the buffers passed to/from
+ * ibm,get-system-parameter and ibm,set-system-parameter. It is
+ * distinct from the papr_sysparm_io_block structure that is passed
+ * between user space and the kernel.
+ */
 struct papr_sysparm_buf {
__be16 len;
char val[PAPR_SYSPARM_MAX_OUTPUT];
diff --git a/arch/powerpc/include/uapi/asm/papr-sysparm.h 
b/arch/powerpc/include/uapi/asm/papr-sysparm.h
new file mode 100644
index ..9f9a0f267ea5
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-sysparm.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_SYSPARM_H_
+#define _UAPI_PAPR_SYSPARM_H_
+
+#include 
+#include 
+#include 
+
+enum {
+   PAPR_SYSPARM_MAX_INPUT  = 1024,
+   PAPR_SYSPARM_MAX_OUTPUT = 4000,
+};
+
+struct papr_sysparm_io_block {
+   __u32 parameter;
+   __u16 length;
+   char data[PAPR_SYSPARM_MAX_OUTPUT];
+};
+
+/**
+ * PAPR_SYSPARM_IOC_GET - Retrieve the value of a PAPR system parameter.
+ *
+ * Uses _IOWR because of one corner case: Retrieving the value of the
+ * "OS Service Entitlement Status" parameter (60) requires the caller
+ * to supply input data (a date string) in the buffer passed to
+ * firmware. So the @length and @data of the incoming
+ * papr_sysparm_io_block are always used to initialize the work area
+ * supplied to ibm,get-system-parameter. No other parameters are known
+ * to pa

[PATCH v2 7/7] powerpc/selftests: add test for papr-sysparm

2023-10-13 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Consistently testing system parameter access is a bit difficult by
nature -- the set of parameters available depends on the model and
system configuration, and updating a parameter should be considered a
destructive operation reserved for the admin.

So we validate some of the error paths and retrieve the SPLPAR
characteristics string, but not much else.

Signed-off-by: Nathan Lynch 
---
 tools/testing/selftests/powerpc/Makefile   |   1 +
 .../selftests/powerpc/papr_sysparm/.gitignore  |   1 +
 .../selftests/powerpc/papr_sysparm/Makefile|  12 ++
 .../selftests/powerpc/papr_sysparm/papr_sysparm.c  | 164 +
 4 files changed, 178 insertions(+)

diff --git a/tools/testing/selftests/powerpc/Makefile 
b/tools/testing/selftests/powerpc/Makefile
index 7de972612786..2f3fa91a1f52 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -33,6 +33,7 @@ SUB_DIRS = alignment  \
   math \
   papr_attributes  \
   papr_vpd \
+  papr_sysparm \
   ptrace   \
   security \
   mce
diff --git a/tools/testing/selftests/powerpc/papr_sysparm/.gitignore 
b/tools/testing/selftests/powerpc/papr_sysparm/.gitignore
new file mode 100644
index ..f2a69bf59d40
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_sysparm/.gitignore
@@ -0,0 +1 @@
+/papr_sysparm
diff --git a/tools/testing/selftests/powerpc/papr_sysparm/Makefile 
b/tools/testing/selftests/powerpc/papr_sysparm/Makefile
new file mode 100644
index ..7f79e437634a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_sysparm/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+noarg:
+   $(MAKE) -C ../
+
+TEST_GEN_PROGS := papr_sysparm
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
+
+$(OUTPUT)/papr_sysparm: CFLAGS += $(KHDR_INCLUDES)
diff --git a/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c 
b/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c
new file mode 100644
index ..fc25c03e8bc7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "utils.h"
+
+#define DEVPATH "/dev/papr-sysparm"
+
+static int open_close(void)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int get_splpar(void)
+{
+   struct papr_sysparm_io_block sp = {
+   .parameter = 20, // SPLPAR characteristics
+   };
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+   FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_GET, &sp) != 0);
+   FAIL_IF(sp.length == 0);
+   FAIL_IF(sp.length > sizeof(sp.data));
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int get_bad_parameter(void)
+{
+   struct papr_sysparm_io_block sp = {
+   .parameter = UINT32_MAX, // there are only ~60 specified 
parameters
+   };
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   // Ensure expected error
+   FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_GET, &sp) != -1);
+   FAIL_IF(errno != EOPNOTSUPP);
+
+   // Ensure the buffer is unchanged
+   FAIL_IF(sp.length != 0);
+   for (size_t i = 0; i < ARRAY_SIZE(sp.data); ++i)
+   FAIL_IF(sp.data[i] != 0);
+
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int check_efault_common(unsigned long cmd)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   // Ensure expected error
+   FAIL_IF(ioctl(devfd, cmd, NULL) != -1);
+   FAIL_IF(errno != EFAULT);
+
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int check_efault_get(void)
+{
+   return check_efault_common(PAPR_SYSPARM_IOC_GET);
+}
+
+static int check_efault_set(void)
+{
+   return check_efault_common(PAPR_SYSPARM_IOC_SET);
+}
+
+static int set_hmc0(void)
+{
+   struct papr_sysparm_io_block sp = {
+   .parameter = 0, // HMC0, not a settable parameter
+   };
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   // Ensure expected error
+   FAIL_IF(ioctl(devfd, PAPR_

[PATCH v2 2/7] powerpc/pseries: papr-vpd char driver for VPD retrieval

2023-10-13 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

PowerVM LPARs may retrieve Vital Product Data (VPD) for system
components using the ibm,get-vpd RTAS function.

We can expose this to user space with a /dev/papr-vpd character
device, where the programming model is:

  struct papr_location_code plc = { .str = "", }; /* obtain all VPD */
  int devfd = open("/dev/papr-vpd", O_RDONLY);
  int vpdfd = ioctl(devfd, PAPR_VPD_CREATE_HANDLE, &plc);
  size_t size = lseek(vpdfd, 0, SEEK_END);
  char *buf = malloc(size);
  pread(devfd, buf, size, 0);

When a file descriptor is obtained from ioctl(PAPR_VPD_CREATE_HANDLE),
the file contains the result of a complete ibm,get-vpd sequence. The
file contents are immutable from the POV of user space. To get a new
view of the VPD, the client must create a new handle.

This design choice insulates user space from most of the complexities
that ibm,get-vpd brings:

* ibm,get-vpd must be called more than once to obtain complete
  results.

* Only one ibm,get-vpd call sequence should be in progress at a time;
  interleaved sequences will disrupt each other. Callers must have a
  protocol for serializing their use of the function.

* A call sequence in progress may receive a "VPD changed, try again"
  status, requiring the client to abandon the sequence and start
  over.

The memory required for the VPD buffers seems acceptable, around 20KB
for all VPD on one of my systems. And the value of the
/rtas/ibm,vpd-size DT property (the estimated maximum size of VPD) is
consistently 300KB across various systems I've checked.

I've implemented support for this new ABI in the rtas_get_vpd()
function in librtas, which the vpdupdate command currently uses to
populate its VPD database. I've verified that an unmodified vpdupdate
binary generates an identical database when using a librtas.so that
prefers the new ABI.

Note that the driver needs to serialize its call sequences with legacy
sys_rtas(ibm,get-vpd) callers, so it exposes its internal lock for
sys_rtas.

Signed-off-by: Nathan Lynch 
---
 Documentation/userspace-api/ioctl/ioctl-number.rst |   2 +
 arch/powerpc/include/asm/papr-vpd.h|  18 +
 arch/powerpc/include/uapi/asm/papr-vpd.h   |  22 +
 arch/powerpc/platforms/pseries/Makefile|   1 +
 arch/powerpc/platforms/pseries/papr-vpd.c  | 542 +
 5 files changed, 585 insertions(+)

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst 
b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 4ea5b837399a..a950545bf7cd 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -349,6 +349,8 @@ Code  Seq#Include File  
 Comments
  

 0xB1  00-1F  PPPoX
  

+0xB2  00 arch/powerpc/include/uapi/asm/papr-vpd.h
powerpc/pseries VPD API
+ 

 0xB3  00 linux/mmc/ioctl.h
 0xB4  00-0F  linux/gpio.h

 0xB5  00-0F  uapi/linux/rpmsg.h  

diff --git a/arch/powerpc/include/asm/papr-vpd.h 
b/arch/powerpc/include/asm/papr-vpd.h
new file mode 100644
index ..bce13e63ae6b
--- /dev/null
+++ b/arch/powerpc/include/asm/papr-vpd.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_POWERPC_PAPR_VPD_H
+#define _ASM_POWERPC_PAPR_VPD_H
+
+/*
+ * This is here only so sys_rtas() can avoid disrupting VPD sequences
+ * in progress in the papr-vpd driver. There's no other reason that
+ * the rest of the kernel should call into papr-vpd.
+ */
+#ifdef CONFIG_PPC_PSERIES
+void papr_vpd_mutex_lock(void);
+void papr_vpd_mutex_unlock(void);
+#else  /* CONFIG_PPC_PSERIES */
+static inline void papr_vpd_mutex_lock(void) {}
+static inline void papr_vpd_mutex_unlock(void) {}
+#endif /* CONFIG_PPC_PSERIES */
+
+#endif /* _ASM_POWERPC_PAPR_VPD_H */
diff --git a/arch/powerpc/include/uapi/asm/papr-vpd.h 
b/arch/powerpc/include/uapi/asm/papr-vpd.h
new file mode 100644
index ..b62e4f897a70
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-vpd.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_VPD_H_
+#define _UAPI_PAPR_VPD_H_
+
+#include 
+#include 
+
+struct papr_location_code {
+   /*
+* PAPR+ 12.3.2.4 Converged Location Code Rules - Length
+* Restrictions. 79 characters plus nul.
+*/
+   char str[80];
+};
+
+/*
+ * ioctl for /dev/papr-vpd. Returns a VPD handle fd corresponding to
+ * the location code.
+ */
+#define PAPR_VPD_IOC_CREATE_HANDLE _IO

[PATCH v2 5/7] powerpc/pseries/papr-sysparm: expose chardev API to user space

2023-10-13 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Until now the papr_sysparm APIs have been kernel-internal. But user
space needs access to PAPR system parameters too. The only method
available to user space today to get or set system parameters is using
sys_rtas() and /dev/mem to pass RTAS-addressable buffers between user
space and firmware. This is incompatible with lockdown and should be
deprecated.

So provide an alternative ABI to user space in the form of a
/dev/papr-sysparm character device with just two ioctl commands (get
and set). The data payloads involved are small enough to fit in the
ioctl argument buffer, making the code relatively simple.

Exposing the system parameters through sysfs has been considered but
it would be too awkward:

* The kernel currently does not have to contain an exhaustive list of
  defined system parameters. This is a convenient property to maintain
  because we don't have to update the kernel whenever a new parameter
  is added to PAPR. Exporting a named attribute in sysfs for each
  parameter would negate this.

* Some system parameters are text-based and some are not.

* Retrieval of at least one system parameter requires input data,
  which a simple read-oriented interface can't support.

Signed-off-by: Nathan Lynch 
---
 Documentation/userspace-api/ioctl/ioctl-number.rst |   2 +
 arch/powerpc/include/asm/papr-sysparm.h|  17 ++-
 arch/powerpc/include/uapi/asm/papr-sysparm.h   |  58 
 arch/powerpc/platforms/pseries/papr-sysparm.c  | 154 -
 4 files changed, 223 insertions(+), 8 deletions(-)

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst 
b/Documentation/userspace-api/ioctl/ioctl-number.rst
index a950545bf7cd..d8b6cb1a3636 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -351,6 +351,8 @@ Code  Seq#Include File  
 Comments
  

 0xB2  00 arch/powerpc/include/uapi/asm/papr-vpd.h
powerpc/pseries VPD API
  

+0xB2  01-02  arch/powerpc/include/uapi/asm/papr-sysparm.h
powerpc/pseries system parameter API
+ 

 0xB3  00 linux/mmc/ioctl.h
 0xB4  00-0F  linux/gpio.h

 0xB5  00-0F  uapi/linux/rpmsg.h  

diff --git a/arch/powerpc/include/asm/papr-sysparm.h 
b/arch/powerpc/include/asm/papr-sysparm.h
index f5fdbd8ae9db..0dbbff59101d 100644
--- a/arch/powerpc/include/asm/papr-sysparm.h
+++ b/arch/powerpc/include/asm/papr-sysparm.h
@@ -2,8 +2,10 @@
 #ifndef _ASM_POWERPC_PAPR_SYSPARM_H
 #define _ASM_POWERPC_PAPR_SYSPARM_H
 
+#include 
+
 typedef struct {
-   const u32 token;
+   u32 token;
 } papr_sysparm_t;
 
 #define mk_papr_sysparm(x_) ((papr_sysparm_t){ .token = x_, })
@@ -20,11 +22,14 @@ typedef struct {
 #define PAPR_SYSPARM_TLB_BLOCK_INVALIDATE_ATTRSmk_papr_sysparm(50)
 #define PAPR_SYSPARM_LPAR_NAME mk_papr_sysparm(55)
 
-enum {
-   PAPR_SYSPARM_MAX_INPUT  = 1024,
-   PAPR_SYSPARM_MAX_OUTPUT = 4000,
-};
-
+/**
+ * struct papr_sysparm_buf - RTAS work area layout for system parameter 
functions.
+ *
+ * This is the memory layout of the buffers passed to/from
+ * ibm,get-system-parameter and ibm,set-system-parameter. It is
+ * distinct from the papr_sysparm_io_block structure that is passed
+ * between user space and the kernel.
+ */
 struct papr_sysparm_buf {
__be16 len;
char val[PAPR_SYSPARM_MAX_OUTPUT];
diff --git a/arch/powerpc/include/uapi/asm/papr-sysparm.h 
b/arch/powerpc/include/uapi/asm/papr-sysparm.h
new file mode 100644
index ..9f9a0f267ea5
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-sysparm.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_SYSPARM_H_
+#define _UAPI_PAPR_SYSPARM_H_
+
+#include 
+#include 
+#include 
+
+enum {
+   PAPR_SYSPARM_MAX_INPUT  = 1024,
+   PAPR_SYSPARM_MAX_OUTPUT = 4000,
+};
+
+struct papr_sysparm_io_block {
+   __u32 parameter;
+   __u16 length;
+   char data[PAPR_SYSPARM_MAX_OUTPUT];
+};
+
+/**
+ * PAPR_SYSPARM_IOC_GET - Retrieve the value of a PAPR system parameter.
+ *
+ * Uses _IOWR because of one corner case: Retrieving the value of the
+ * "OS Service Entitlement Status" parameter (60) requires the caller
+ * to supply input data (a date string) in the buffer passed to
+ * firmware. So the @length and @data of the incoming
+ * papr_sysparm_io_block are always used to initialize the work area
+ * supplied to ibm,get-system-parameter. No other parameters are known
+ * to pa

[PATCH v2 4/7] powerpc/pseries/papr-sysparm: validate buffer object lengths

2023-10-13 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

The ability to get and set system parameters will be exposed to user
space, so let's get a little more strict about malformed
papr_sysparm_buf objects.

* Create accessors for the length field of struct papr_sysparm_buf.
  The length is always stored in MSB order and this is better than
  spreading the necessary conversions all over.

* Reject attempts to submit invalid buffers to RTAS.

* Warn if RTAS returns a buffer with an invalid length, clamping the
  returned length to a safe value that won't overrun the buffer.

These are meant as precautionary measures to mitigate both firmware
and kernel bugs in this area, should they arise, but I am not aware of
any.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/papr-sysparm.c | 47 +++
 1 file changed, 47 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/papr-sysparm.c 
b/arch/powerpc/platforms/pseries/papr-sysparm.c
index fedc61599e6c..a1e7aeac7416 100644
--- a/arch/powerpc/platforms/pseries/papr-sysparm.c
+++ b/arch/powerpc/platforms/pseries/papr-sysparm.c
@@ -23,6 +23,46 @@ void papr_sysparm_buf_free(struct papr_sysparm_buf *buf)
kfree(buf);
 }
 
+static size_t papr_sysparm_buf_get_length(const struct papr_sysparm_buf *buf)
+{
+   return be16_to_cpu(buf->len);
+}
+
+static void papr_sysparm_buf_set_length(struct papr_sysparm_buf *buf, size_t 
length)
+{
+   WARN_ONCE(length > sizeof(buf->val),
+ "bogus length %zu, clamping to safe value", length);
+   length = min(sizeof(buf->val), length);
+   buf->len = cpu_to_be16(length);
+}
+
+/*
+ * For use on buffers returned from ibm,get-system-parameter before
+ * returning them to callers. Ensures the encoded length of valid data
+ * cannot overrun buf->val[].
+ */
+static void papr_sysparm_buf_clamp_length(struct papr_sysparm_buf *buf)
+{
+   papr_sysparm_buf_set_length(buf, papr_sysparm_buf_get_length(buf));
+}
+
+/*
+ * Perform some basic diligence on the system parameter buffer before
+ * submitting it to RTAS.
+ */
+static bool papr_sysparm_buf_can_submit(const struct papr_sysparm_buf *buf)
+{
+   /*
+* Firmware ought to reject buffer lengths that exceed the
+* maximum specified in PAPR, but there's no reason for the
+* kernel to allow them either.
+*/
+   if (papr_sysparm_buf_get_length(buf) > sizeof(buf->val))
+   return false;
+
+   return true;
+}
+
 /**
  * papr_sysparm_get() - Retrieve the value of a PAPR system parameter.
  * @param: PAPR system parameter token as described in
@@ -63,6 +103,9 @@ int papr_sysparm_get(papr_sysparm_t param, struct 
papr_sysparm_buf *buf)
if (token == RTAS_UNKNOWN_SERVICE)
return -ENOENT;
 
+   if (!papr_sysparm_buf_can_submit(buf))
+   return -EINVAL;
+
work_area = rtas_work_area_alloc(sizeof(*buf));
 
memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf));
@@ -77,6 +120,7 @@ int papr_sysparm_get(papr_sysparm_t param, struct 
papr_sysparm_buf *buf)
case 0:
ret = 0;
memcpy(buf, rtas_work_area_raw_buf(work_area), sizeof(*buf));
+   papr_sysparm_buf_clamp_length(buf);
break;
case -3: /* parameter not implemented */
ret = -EOPNOTSUPP;
@@ -115,6 +159,9 @@ int papr_sysparm_set(papr_sysparm_t param, const struct 
papr_sysparm_buf *buf)
if (token == RTAS_UNKNOWN_SERVICE)
return -ENOENT;
 
+   if (!papr_sysparm_buf_can_submit(buf))
+   return -EINVAL;
+
work_area = rtas_work_area_alloc(sizeof(*buf));
 
memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf));

-- 
2.41.0



[PATCH v2 1/7] powerpc/uapi: export papr-miscdev.h header

2023-10-13 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Allocate one identifying code (the first column of the ioctl-number
table) for the collection of PAPR miscdev drivers to share.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/uapi/asm/papr-miscdev.h | 9 +
 1 file changed, 9 insertions(+)

diff --git a/arch/powerpc/include/uapi/asm/papr-miscdev.h 
b/arch/powerpc/include/uapi/asm/papr-miscdev.h
new file mode 100644
index ..49a2a270b7f3
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-miscdev.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_MISCDEV_H_
+#define _UAPI_PAPR_MISCDEV_H_
+
+enum {
+   PAPR_MISCDEV_IOC_ID = 0xb2,
+};
+
+#endif /* _UAPI_PAPR_MISCDEV_H_ */

-- 
2.41.0



[PATCH v2 6/7] powerpc/selftests: add test for papr-vpd

2023-10-13 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Add selftests for /dev/papr-vpd, exercising the common expected use
cases:

* Retrieve all VPD by passing an empty location code.
* Retrieve the "system VPD" by passing a location code derived from DT
  root node properties, as done by the vpdupdate command.

The tests also verify that certain intended properties of the driver
hold:

* Passing an unterminated location code to PAPR_VPD_CREATE_HANDLE gets
  EINVAL.
* Passing a NULL location code pointer to PAPR_VPD_CREATE_HANDLE gets
  EFAULT.
* Closing the device node without first issuing a
  PAPR_VPD_CREATE_HANDLE command to it succeeds.
* Releasing a handle without first consuming any data from it
  succeeds.
* Re-reading the contents of a handle returns the same data as the
  first time.

Some minimal validation of the returned data is performed.

The tests are skipped on systems where the papr-vpd driver does not
initialize, making this useful only on PowerVM LPARs at this point.

Signed-off-by: Nathan Lynch 
---
 tools/testing/selftests/powerpc/Makefile   |   1 +
 .../testing/selftests/powerpc/papr_vpd/.gitignore  |   1 +
 tools/testing/selftests/powerpc/papr_vpd/Makefile  |  12 +
 .../testing/selftests/powerpc/papr_vpd/papr_vpd.c  | 352 +
 4 files changed, 366 insertions(+)

diff --git a/tools/testing/selftests/powerpc/Makefile 
b/tools/testing/selftests/powerpc/Makefile
index 49f2ad1793fd..7de972612786 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -32,6 +32,7 @@ SUB_DIRS = alignment  \
   vphn \
   math \
   papr_attributes  \
+  papr_vpd \
   ptrace   \
   security \
   mce
diff --git a/tools/testing/selftests/powerpc/papr_vpd/.gitignore 
b/tools/testing/selftests/powerpc/papr_vpd/.gitignore
new file mode 100644
index ..49285031a656
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_vpd/.gitignore
@@ -0,0 +1 @@
+/papr_vpd
diff --git a/tools/testing/selftests/powerpc/papr_vpd/Makefile 
b/tools/testing/selftests/powerpc/papr_vpd/Makefile
new file mode 100644
index ..06b719703bfd
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_vpd/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+noarg:
+   $(MAKE) -C ../
+
+TEST_GEN_PROGS := papr_vpd
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
+
+$(OUTPUT)/papr_vpd: CFLAGS += $(KHDR_INCLUDES)
diff --git a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c 
b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
new file mode 100644
index ..98cbb9109ee6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "utils.h"
+
+#define DEVPATH "/dev/papr-vpd"
+
+static int dev_papr_vpd_open_close(void)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int dev_papr_vpd_get_handle_all(void)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+   struct papr_location_code lc = { .str = "", };
+   off_t size;
+   int fd;
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   errno = 0;
+   fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc);
+   FAIL_IF(errno != 0);
+   FAIL_IF(fd < 0);
+
+   FAIL_IF(close(devfd) != 0);
+
+   size = lseek(fd, 0, SEEK_END);
+   FAIL_IF(size <= 0);
+
+   void *buf = malloc((size_t)size);
+   FAIL_IF(!buf);
+
+   ssize_t consumed = pread(fd, buf, size, 0);
+   FAIL_IF(consumed != size);
+
+   /* Ensure EOF */
+   FAIL_IF(read(fd, buf, size) != 0);
+   FAIL_IF(close(fd));
+
+   /* Verify that the buffer looks like VPD */
+   static const char needle[] = "System VPD";
+   FAIL_IF(!memmem(buf, size, needle, strlen(needle)));
+
+   return 0;
+}
+
+static int dev_papr_vpd_get_handle_byte_at_a_time(void)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+   struct papr_location_code lc = { .str = "", };
+   int fd;
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   errno = 0;
+   fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc);
+   FAIL_IF(errno != 0);
+   FAIL_IF(fd < 0);
+
+   FAIL_IF(close(devfd) != 0);
+
+   size_t consumed = 0;
+   while (1) {
+   ssize_t res;
+   char c;
+
+   errno = 0;
+   res = read(fd, &c, sizeof(c));
+   FAIL_IF(res > sizeof(c))

[PATCH v2 3/7] powerpc/rtas: serialize ibm,get-vpd service with papr-vpd sequences

2023-10-13 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Take the papr-vpd driver's internal mutex when sys_rtas performs
ibm,get-vpd calls. This prevents sys_rtas(ibm,get-vpd) calls from
interleaving with sequences performed by the driver, ensuring that
such sequences are not disrupted.

However, it cannot prevent the driver from interleaving with sequences
that are initiated via sys_rtas, since control returns to user space
with each sys_rtas(ibm,get-vpd) call.

Emit a notice on first use of sys_rtas(ibm,get-vpd) encouraging users
to migrate to /dev/papr-vpd.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index eddc031c4b95..70ae118d2a13 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1861,6 +1862,28 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
goto copy_return;
}
 
+   if (token == rtas_function_token(RTAS_FN_IBM_GET_VPD)) {
+   /*
+* ibm,get-vpd potentially needs to be invoked
+* multiple times to obtain complete results.
+* Interleaved ibm,get-vpd sequences disrupt each
+* other.
+*
+* /dev/papr-vpd doesn't have this problem and users
+* do not need to be aware of each other to use it
+* safely.
+*
+* We can prevent this call from disrupting a
+* /dev/papr-vpd-initiated sequence in progress by
+* reaching into the driver to take its internal
+* lock. Unfortunately there is no way to prevent
+* interference in the other direction without
+* resorting to even worse hacks.
+*/
+   pr_notice_once("Calling ibm,get-vpd via sys_rtas is allowed but 
deprecated. Use /dev/papr-vpd instead.\n");
+   papr_vpd_mutex_lock();
+   }
+
buff_copy = get_errorlog_buffer();
 
raw_spin_lock_irqsave(&rtas_lock, flags);
@@ -1870,6 +1893,9 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
do_enter_rtas(&rtas_args);
args = rtas_args;
 
+   if (token == rtas_function_token(RTAS_FN_IBM_GET_VPD))
+   papr_vpd_mutex_unlock();
+
/* A -1 return code indicates that the last command couldn't
   be completed due to a hardware error. */
if (be32_to_cpu(args.rets[0]) == -1)

-- 
2.41.0



[PATCH v2 0/7] powerpc/pseries: new character devices for system parameters and VPD

2023-10-13 Thread Nathan Lynch via B4 Relay
Add character devices that expose PAPR-specific system parameters and
VPD to user space.

The problem: important platform features are enabled on Linux VMs
through the powerpc-specific rtas() syscall in combination with
writeable mappings of /dev/mem. In typical usage, this is encapsulated
behind APIs provided by the librtas library. This paradigm is
incompatible with lockdown, which prohibits /dev/mem access. It also
is too low-level in many cases: a single logical operation may require
multiple sys_rtas() calls in succession to complete. This carries the
risk that a process may exit while leaving an operation unfinished. It
also means that callers must coordinate their use of the syscall for
functions that cannot tolerate multiple concurrent clients, such as
ibm,get-vpd.

The solution presented here is to add a pair of small pseries-specific
"drivers," one for VPD and one for system parameters. The new drivers
expose these facilities to user space in ways that are compatible with
lockdown and require no coordination between their clients.

Both drivers could potentially support poll() methods to notify
clients of changes to parameters or VPD that happen due to partition
migration and other events. But that should be safe to leave for
later, assuming there's any interest.

I have made changes to librtas to prefer the new interfaces and
verified that existing clients work correctly with the new code. A
draft PR for that work is here:

https://github.com/ibm-power-utilities/librtas/pull/36

I expect to propose at least one more small driver in this style for
platform dump retrieval in a separate submission in the future.

---
Changes in v2:
- Fix unused-but-set variable warning in papr-sysparm code.
- Rebase on powerpc/next branch.
- Link to v1: 
https://lore.kernel.org/r/20231006-papr-sys_rtas-vs-lockdown-v1-0-3a36bfb66...@linux.ibm.com

Changes in v1 vs initial RFC:
- Add papr-sysparm driver and tests.
- Add a papr-miscdev.h uapi header.
- Prevent sys_rtas() from interfering with papr-vpd call sequences.
- Handle -4 ("VPD changed") status in papr-vpd.
- Include string_helpers.h in papr-vpd.c, per Michal Suchánek
- Link to RFC: 
https://lore.kernel.org/r/20230822-papr-sys_rtas-vs-lockdown-v1-0-932623cf3...@linux.ibm.com

---
Nathan Lynch (7):
  powerpc/uapi: export papr-miscdev.h header
  powerpc/pseries: papr-vpd char driver for VPD retrieval
  powerpc/rtas: serialize ibm,get-vpd service with papr-vpd sequences
  powerpc/pseries/papr-sysparm: validate buffer object lengths
  powerpc/pseries/papr-sysparm: expose chardev API to user space
  powerpc/selftests: add test for papr-vpd
  powerpc/selftests: add test for papr-sysparm

 Documentation/userspace-api/ioctl/ioctl-number.rst |   4 +
 arch/powerpc/include/asm/papr-sysparm.h|  17 +-
 arch/powerpc/include/asm/papr-vpd.h|  18 +
 arch/powerpc/include/uapi/asm/papr-miscdev.h   |   9 +
 arch/powerpc/include/uapi/asm/papr-sysparm.h   |  58 +++
 arch/powerpc/include/uapi/asm/papr-vpd.h   |  22 +
 arch/powerpc/kernel/rtas.c |  26 +
 arch/powerpc/platforms/pseries/Makefile|   1 +
 arch/powerpc/platforms/pseries/papr-sysparm.c  | 201 +++-
 arch/powerpc/platforms/pseries/papr-vpd.c  | 542 +
 tools/testing/selftests/powerpc/Makefile   |   2 +
 .../selftests/powerpc/papr_sysparm/.gitignore  |   1 +
 .../selftests/powerpc/papr_sysparm/Makefile|  12 +
 .../selftests/powerpc/papr_sysparm/papr_sysparm.c  | 164 +++
 .../testing/selftests/powerpc/papr_vpd/.gitignore  |   1 +
 tools/testing/selftests/powerpc/papr_vpd/Makefile  |  12 +
 .../testing/selftests/powerpc/papr_vpd/papr_vpd.c  | 352 +
 17 files changed, 1434 insertions(+), 8 deletions(-)
---
base-commit: 0ebc7feae79ac07772a20382eebd8c3503313714
change-id: 20230817-papr-sys_rtas-vs-lockdown-5c54505db792

Best regards,
-- 
Nathan Lynch 



[PATCH 3/7] powerpc/rtas: serialize ibm,get-vpd service with papr-vpd sequences

2023-10-06 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Take the papr-vpd driver's internal mutex when sys_rtas performs
ibm,get-vpd calls. This prevents sys_rtas(ibm,get-vpd) calls from
interleaving with sequences performed by the driver, ensuring that
such sequences are not disrupted.

However, it cannot prevent the driver from interleaving with sequences
that are initiated via sys_rtas, since control returns to user space
with each sys_rtas(ibm,get-vpd) call.

Emit a notice on first use of sys_rtas(ibm,get-vpd) encouraging users
to migrate to /dev/papr-vpd.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index eddc031c4b95..70ae118d2a13 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1861,6 +1862,28 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
goto copy_return;
}
 
+   if (token == rtas_function_token(RTAS_FN_IBM_GET_VPD)) {
+   /*
+* ibm,get-vpd potentially needs to be invoked
+* multiple times to obtain complete results.
+* Interleaved ibm,get-vpd sequences disrupt each
+* other.
+*
+* /dev/papr-vpd doesn't have this problem and users
+* do not need to be aware of each other to use it
+* safely.
+*
+* We can prevent this call from disrupting a
+* /dev/papr-vpd-initiated sequence in progress by
+* reaching into the driver to take its internal
+* lock. Unfortunately there is no way to prevent
+* interference in the other direction without
+* resorting to even worse hacks.
+*/
+   pr_notice_once("Calling ibm,get-vpd via sys_rtas is allowed but 
deprecated. Use /dev/papr-vpd instead.\n");
+   papr_vpd_mutex_lock();
+   }
+
buff_copy = get_errorlog_buffer();
 
raw_spin_lock_irqsave(&rtas_lock, flags);
@@ -1870,6 +1893,9 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
do_enter_rtas(&rtas_args);
args = rtas_args;
 
+   if (token == rtas_function_token(RTAS_FN_IBM_GET_VPD))
+   papr_vpd_mutex_unlock();
+
/* A -1 return code indicates that the last command couldn't
   be completed due to a hardware error. */
if (be32_to_cpu(args.rets[0]) == -1)

-- 
2.41.0



[PATCH 5/7] powerpc/pseries/papr-sysparm: expose chardev API to user space

2023-10-06 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Until now the papr_sysparm APIs have been kernel-internal. But user
space needs access to PAPR system parameters too. The only method
available to user space today to get or set system parameters is using
sys_rtas() and /dev/mem to pass RTAS-addressable buffers between user
space and firmware. This is incompatible with lockdown and should be
deprecated.

So provide an alternative ABI to user space in the form of a
/dev/papr-sysparm character device with just two ioctl commands (get
and set). The data payloads involved are small enough to fit in the
ioctl argument buffer, making the code relatively simple.

Exposing the system parameters through sysfs has been considered but
it would be too awkward:

* The kernel currently does not have to contain an exhaustive list of
  defined system parameters. This is a convenient property to maintain
  because we don't have to update the kernel whenever a new parameter
  is added to PAPR. Exporting a named attribute in sysfs for each
  parameter would negate this.

* Some system parameters are text-based and some are not.

* Retrieval of at least one system parameter requires input data,
  which a simple read-oriented interface can't support.

Signed-off-by: Nathan Lynch 
---
 Documentation/userspace-api/ioctl/ioctl-number.rst |   2 +
 arch/powerpc/include/asm/papr-sysparm.h|  17 ++-
 arch/powerpc/include/uapi/asm/papr-sysparm.h   |  58 
 arch/powerpc/platforms/pseries/papr-sysparm.c  | 160 -
 4 files changed, 229 insertions(+), 8 deletions(-)

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst 
b/Documentation/userspace-api/ioctl/ioctl-number.rst
index a950545bf7cd..d8b6cb1a3636 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -351,6 +351,8 @@ Code  Seq#Include File  
 Comments
  

 0xB2  00 arch/powerpc/include/uapi/asm/papr-vpd.h
powerpc/pseries VPD API
  

+0xB2  01-02  arch/powerpc/include/uapi/asm/papr-sysparm.h
powerpc/pseries system parameter API
+ 

 0xB3  00 linux/mmc/ioctl.h
 0xB4  00-0F  linux/gpio.h

 0xB5  00-0F  uapi/linux/rpmsg.h  

diff --git a/arch/powerpc/include/asm/papr-sysparm.h 
b/arch/powerpc/include/asm/papr-sysparm.h
index f5fdbd8ae9db..0dbbff59101d 100644
--- a/arch/powerpc/include/asm/papr-sysparm.h
+++ b/arch/powerpc/include/asm/papr-sysparm.h
@@ -2,8 +2,10 @@
 #ifndef _ASM_POWERPC_PAPR_SYSPARM_H
 #define _ASM_POWERPC_PAPR_SYSPARM_H
 
+#include 
+
 typedef struct {
-   const u32 token;
+   u32 token;
 } papr_sysparm_t;
 
 #define mk_papr_sysparm(x_) ((papr_sysparm_t){ .token = x_, })
@@ -20,11 +22,14 @@ typedef struct {
 #define PAPR_SYSPARM_TLB_BLOCK_INVALIDATE_ATTRSmk_papr_sysparm(50)
 #define PAPR_SYSPARM_LPAR_NAME mk_papr_sysparm(55)
 
-enum {
-   PAPR_SYSPARM_MAX_INPUT  = 1024,
-   PAPR_SYSPARM_MAX_OUTPUT = 4000,
-};
-
+/**
+ * struct papr_sysparm_buf - RTAS work area layout for system parameter 
functions.
+ *
+ * This is the memory layout of the buffers passed to/from
+ * ibm,get-system-parameter and ibm,set-system-parameter. It is
+ * distinct from the papr_sysparm_io_block structure that is passed
+ * between user space and the kernel.
+ */
 struct papr_sysparm_buf {
__be16 len;
char val[PAPR_SYSPARM_MAX_OUTPUT];
diff --git a/arch/powerpc/include/uapi/asm/papr-sysparm.h 
b/arch/powerpc/include/uapi/asm/papr-sysparm.h
new file mode 100644
index ..9f9a0f267ea5
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-sysparm.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_SYSPARM_H_
+#define _UAPI_PAPR_SYSPARM_H_
+
+#include 
+#include 
+#include 
+
+enum {
+   PAPR_SYSPARM_MAX_INPUT  = 1024,
+   PAPR_SYSPARM_MAX_OUTPUT = 4000,
+};
+
+struct papr_sysparm_io_block {
+   __u32 parameter;
+   __u16 length;
+   char data[PAPR_SYSPARM_MAX_OUTPUT];
+};
+
+/**
+ * PAPR_SYSPARM_IOC_GET - Retrieve the value of a PAPR system parameter.
+ *
+ * Uses _IOWR because of one corner case: Retrieving the value of the
+ * "OS Service Entitlement Status" parameter (60) requires the caller
+ * to supply input data (a date string) in the buffer passed to
+ * firmware. So the @length and @data of the incoming
+ * papr_sysparm_io_block are always used to initialize the work area
+ * supplied to ibm,get-system-parameter. No other parameters are known
+ * to pa

  1   2   >