Re: [PATCH v4 3/4] xen/ppc: Implement early serial printk on pseries

2023-07-24 Thread Jan Beulich
On 21.07.2023 18:53, Shawn Anastasio wrote:
> On 7/19/23 9:05 AM, Jan Beulich wrote:
>> On 18.07.2023 22:20, Shawn Anastasio wrote:
>>> +void __init boot_of_init(unsigned long vec)
>>> +{
>>> +int bof_chosen;
>>> +
>>> +of_vec = vec;
>>> +
>>> +/* Get a handle to the default console */
>>> +bof_chosen = of_finddevice("/chosen");
>>> +of_getprop(bof_chosen, "stdout", &of_out, sizeof(of_out));
>>> +of_out = be32_to_cpu(of_out);
>>
>> Can any of these fail, and hence lead to ...
> 
> These calls are allowed to fail, but their return value in those cases
> is well-defined (an invalid handle), so...
> 
>>
>>> +early_printk_init(of_putchar);
>>
>> ... this better not getting invoked?
> 
> this being invoked is fine even in those cases. It will just result in
> the invalid handle being passed to of_write and the firmware will refuse
> to service the writes.

I assumed all this to be the case; the question was more towards "Wouldn't
it make sense to avoid installing the function pointer in such a case?"

Jan



Re: [PATCH v4 3/4] xen/ppc: Implement early serial printk on pseries

2023-07-21 Thread Shawn Anastasio
On 7/19/23 9:05 AM, Jan Beulich wrote:
> On 18.07.2023 22:20, Shawn Anastasio wrote:
>> +void __init boot_of_init(unsigned long vec)
>> +{
>> +int bof_chosen;
>> +
>> +of_vec = vec;
>> +
>> +/* Get a handle to the default console */
>> +bof_chosen = of_finddevice("/chosen");
>> +of_getprop(bof_chosen, "stdout", &of_out, sizeof(of_out));
>> +of_out = be32_to_cpu(of_out);
> 
> Can any of these fail, and hence lead to ...

These calls are allowed to fail, but their return value in those cases
is well-defined (an invalid handle), so...

> 
>> +early_printk_init(of_putchar);
> 
> ... this better not getting invoked?

this being invoked is fine even in those cases. It will just result in
the invalid handle being passed to of_write and the firmware will refuse
to service the writes.

>> --- a/xen/arch/ppc/ppc64/asm-offsets.c
>> +++ b/xen/arch/ppc/ppc64/asm-offsets.c
>> @@ -0,0 +1,59 @@
>> +/*
>> + * Generate definitions needed by assembly language modules.
>> + * This code generates raw asm output which is post-processed
>> + * to extract and format the required data.
>> + */
>> +
>> +#include 
>> +
>> +#define DEFINE(_sym, _val) \
>> +asm volatile ("\n.ascii\"==>#define " #_sym " %0 /* " #_val " */<==\"" \
>> +  : : "i" (_val) )
> 
> Nit: There's a blank missing after the opening paren, which will then want
> the 2nd line to be indented by one more character. (Instead, as a matter of
> your taste, you may omit the blank between the two colons.)

Will fix.

>> +#define BLANK()\
>> +asm volatile ( "\n.ascii\"==><==\"" : : )
>> +#define OFFSET(_sym, _str, _mem)   \
>> +DEFINE(_sym, offsetof(_str, _mem));
>> +
>> +/* base-2 logarithm */
>> +#define __L2(_x)  (((_x) & 0x0002) ?   1 : 0)
>> +#define __L4(_x)  (((_x) & 0x000c) ? ( 2 + __L2( (_x)>> 2)) : __L2( _x))
>> +#define __L8(_x)  (((_x) & 0x00f0) ? ( 4 + __L4( (_x)>> 4)) : __L4( _x))
>> +#define __L16(_x) (((_x) & 0xff00) ? ( 8 + __L8( (_x)>> 8)) : __L8( _x))
>> +#define LOG_2(_x) (((_x) & 0x) ? (16 + __L16((_x)>>16)) : __L16(_x))
>> +
>> +void __dummy__(void)
>> +{
>> +DEFINE(GPR_WIDTH, sizeof(unsigned long));
>> +DEFINE(FPR_WIDTH, sizeof(double));
>> +
>> +OFFSET(UREGS_gprs, struct cpu_user_regs, gprs);
>> +OFFSET(UREGS_r0, struct cpu_user_regs, gprs[0]);
>> +OFFSET(UREGS_r1, struct cpu_user_regs, gprs[1]);
>> +OFFSET(UREGS_r13, struct cpu_user_regs, gprs[13]);
>> +OFFSET(UREGS_srr0, struct cpu_user_regs, srr0);
>> +OFFSET(UREGS_srr1, struct cpu_user_regs, srr1);
>> +OFFSET(UREGS_pc, struct cpu_user_regs, pc);
>> +OFFSET(UREGS_msr, struct cpu_user_regs, msr);
>> +OFFSET(UREGS_lr, struct cpu_user_regs, lr);
>> +OFFSET(UREGS_ctr, struct cpu_user_regs, ctr);
>> +OFFSET(UREGS_xer, struct cpu_user_regs, xer);
>> +OFFSET(UREGS_hid4, struct cpu_user_regs, hid4);
>> +OFFSET(UREGS_dar, struct cpu_user_regs, dar);
>> +OFFSET(UREGS_dsisr, struct cpu_user_regs, dsisr);
>> +OFFSET(UREGS_cr, struct cpu_user_regs, cr);
>> +OFFSET(UREGS_fpscr, struct cpu_user_regs, fpscr);
>> +DEFINE(UREGS_sizeof, sizeof(struct cpu_user_regs));
>> +}
>> +
>> +/* TODO: Replace with BUILD_BUG_ON + IS_ALIGNED once we can use  
>> */
>> +_Static_assert(sizeof(struct cpu_user_regs) % STACK_ALIGN == 0,
>> +   "struct cpu_user_regs not stack aligned!");
> 
> But patch 1 makes BUILD_BUG_ON() available now.

Good point, will fix.

>> --- /dev/null
>> +++ b/xen/arch/ppc/ppc64/of-call.S
>> @@ -0,0 +1,83 @@
>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>> +/*
>> + * Adapted from Linux's arch/powerpc/kernel/entry_64.S, with the
>> + * following copyright notice:
>> + *
>> + *  PowerPC version
>> + *Copyright (C) 1995-1996 Gary Thomas (g...@linuxppc.org)
>> + *  Rewritten by Cort Dougan (c...@cs.nmt.edu) for PReP
>> + *Copyright (C) 1996 Cort Dougan 
>> + *  Adapted for Power Macintosh by Paul Mackerras.
>> + *  Low-level exception handlers and MMU support
>> + *  rewritten by Paul Mackerras.
>> + *Copyright (C) 1996 Paul Mackerras.
>> + *  MPC8xx modifications Copyright (C) 1997 Dan Malek (dma...@jlc.net).
>> + */
>> +
>> +#include 
>> +#include 
>> +#include 
>> +
>> +/* size of minimum stack frame that can hold an entire cpu_user_regs struct 
>> */
>> +#define STACK_SWITCH_FRAME_SIZE UREGS_sizeof
>> +
>> +.section .init.text, "ax", @progbits
>> +
>> +ENTRY(enter_of)
>> +mflr %r0
>> +std %r0, 16(%r1)
>> +stdu %r1,-STACK_SWITCH_FRAME_SIZE(%r1) /* Save SP and create stack 
>> space */
> 
> Nit: A blank after the comma would again be nice.

Will fix.

>> +/*
>> + * Because PROM is running in 32b mode, it clobbers the high order half
>> + * of all registers that it saves.  We therefore save those registers
>> + * PROM might touch to the stack.  (%r0,

Re: [PATCH v4 3/4] xen/ppc: Implement early serial printk on pseries

2023-07-20 Thread Jan Beulich
On 20.07.2023 23:01, Shawn Anastasio wrote:
> On 7/19/23 9:05 AM, Jan Beulich wrote:
>> Before you/we grow more assembly code, may I re-raise a request regarding
>> readability: I think it would be nice if operands started at a fixed column,
>> unless the insn mnemonic is unusually long. Where exactly to draw the line
>> is up to each archtecture; on x86 we use 8 positions from the start of the
>> mnemonic.
> 
> There is quite a large variance in mnemonic length on ppc -- many common
> mnemonics only use 2 characters (e.g. ld, mr) while other common ones
> use 6+ (e.g. rldicr, the mtspr family, etc.). Enforcing a column size
> that's too short would make the longer mnemonics look misaligned and out
> of place, but using a longer column length (like 8) that can accommodate
> most common mnemonics adds too much space between short mnemonics and
> their arguments.

Common length is 3 on x86, and as said we use 8.

> That said if you still feel strongly about this then I am not strongly
> opposed to adding an 8-space column alignment.

I certainly think it helps readability a lot. 8 also matches the common
use (fair parts of gas'es testsuite, Linux) of hard tabs.

Jan



Re: [PATCH v4 3/4] xen/ppc: Implement early serial printk on pseries

2023-07-20 Thread Shawn Anastasio
On 7/19/23 9:05 AM, Jan Beulich wrote:
> Before you/we grow more assembly code, may I re-raise a request regarding
> readability: I think it would be nice if operands started at a fixed column,
> unless the insn mnemonic is unusually long. Where exactly to draw the line
> is up to each archtecture; on x86 we use 8 positions from the start of the
> mnemonic.

There is quite a large variance in mnemonic length on ppc -- many common
mnemonics only use 2 characters (e.g. ld, mr) while other common ones
use 6+ (e.g. rldicr, the mtspr family, etc.). Enforcing a column size
that's too short would make the longer mnemonics look misaligned and out
of place, but using a longer column length (like 8) that can accommodate
most common mnemonics adds too much space between short mnemonics and
their arguments.

That said if you still feel strongly about this then I am not strongly
opposed to adding an 8-space column alignment.

> Jan

Thanks,
Shawn



Re: [PATCH v4 3/4] xen/ppc: Implement early serial printk on pseries

2023-07-19 Thread Jan Beulich
On 18.07.2023 22:20, Shawn Anastasio wrote:
> +void __init boot_of_init(unsigned long vec)
> +{
> +int bof_chosen;
> +
> +of_vec = vec;
> +
> +/* Get a handle to the default console */
> +bof_chosen = of_finddevice("/chosen");
> +of_getprop(bof_chosen, "stdout", &of_out, sizeof(of_out));
> +of_out = be32_to_cpu(of_out);

Can any of these fail, and hence lead to ...

> +early_printk_init(of_putchar);

... this better not getting invoked?

> --- a/xen/arch/ppc/ppc64/asm-offsets.c
> +++ b/xen/arch/ppc/ppc64/asm-offsets.c
> @@ -0,0 +1,59 @@
> +/*
> + * Generate definitions needed by assembly language modules.
> + * This code generates raw asm output which is post-processed
> + * to extract and format the required data.
> + */
> +
> +#include 
> +
> +#define DEFINE(_sym, _val) \
> +asm volatile ("\n.ascii\"==>#define " #_sym " %0 /* " #_val " */<==\"" \
> +  : : "i" (_val) )

Nit: There's a blank missing after the opening paren, which will then want
the 2nd line to be indented by one more character. (Instead, as a matter of
your taste, you may omit the blank between the two colons.)

> +#define BLANK()\
> +asm volatile ( "\n.ascii\"==><==\"" : : )
> +#define OFFSET(_sym, _str, _mem)   \
> +DEFINE(_sym, offsetof(_str, _mem));
> +
> +/* base-2 logarithm */
> +#define __L2(_x)  (((_x) & 0x0002) ?   1 : 0)
> +#define __L4(_x)  (((_x) & 0x000c) ? ( 2 + __L2( (_x)>> 2)) : __L2( _x))
> +#define __L8(_x)  (((_x) & 0x00f0) ? ( 4 + __L4( (_x)>> 4)) : __L4( _x))
> +#define __L16(_x) (((_x) & 0xff00) ? ( 8 + __L8( (_x)>> 8)) : __L8( _x))
> +#define LOG_2(_x) (((_x) & 0x) ? (16 + __L16((_x)>>16)) : __L16(_x))
> +
> +void __dummy__(void)
> +{
> +DEFINE(GPR_WIDTH, sizeof(unsigned long));
> +DEFINE(FPR_WIDTH, sizeof(double));
> +
> +OFFSET(UREGS_gprs, struct cpu_user_regs, gprs);
> +OFFSET(UREGS_r0, struct cpu_user_regs, gprs[0]);
> +OFFSET(UREGS_r1, struct cpu_user_regs, gprs[1]);
> +OFFSET(UREGS_r13, struct cpu_user_regs, gprs[13]);
> +OFFSET(UREGS_srr0, struct cpu_user_regs, srr0);
> +OFFSET(UREGS_srr1, struct cpu_user_regs, srr1);
> +OFFSET(UREGS_pc, struct cpu_user_regs, pc);
> +OFFSET(UREGS_msr, struct cpu_user_regs, msr);
> +OFFSET(UREGS_lr, struct cpu_user_regs, lr);
> +OFFSET(UREGS_ctr, struct cpu_user_regs, ctr);
> +OFFSET(UREGS_xer, struct cpu_user_regs, xer);
> +OFFSET(UREGS_hid4, struct cpu_user_regs, hid4);
> +OFFSET(UREGS_dar, struct cpu_user_regs, dar);
> +OFFSET(UREGS_dsisr, struct cpu_user_regs, dsisr);
> +OFFSET(UREGS_cr, struct cpu_user_regs, cr);
> +OFFSET(UREGS_fpscr, struct cpu_user_regs, fpscr);
> +DEFINE(UREGS_sizeof, sizeof(struct cpu_user_regs));
> +}
> +
> +/* TODO: Replace with BUILD_BUG_ON + IS_ALIGNED once we can use  
> */
> +_Static_assert(sizeof(struct cpu_user_regs) % STACK_ALIGN == 0,
> +   "struct cpu_user_regs not stack aligned!");

But patch 1 makes BUILD_BUG_ON() available now.

> --- /dev/null
> +++ b/xen/arch/ppc/ppc64/of-call.S
> @@ -0,0 +1,83 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +/*
> + * Adapted from Linux's arch/powerpc/kernel/entry_64.S, with the
> + * following copyright notice:
> + *
> + *  PowerPC version
> + *Copyright (C) 1995-1996 Gary Thomas (g...@linuxppc.org)
> + *  Rewritten by Cort Dougan (c...@cs.nmt.edu) for PReP
> + *Copyright (C) 1996 Cort Dougan 
> + *  Adapted for Power Macintosh by Paul Mackerras.
> + *  Low-level exception handlers and MMU support
> + *  rewritten by Paul Mackerras.
> + *Copyright (C) 1996 Paul Mackerras.
> + *  MPC8xx modifications Copyright (C) 1997 Dan Malek (dma...@jlc.net).
> + */
> +
> +#include 
> +#include 
> +#include 
> +
> +/* size of minimum stack frame that can hold an entire cpu_user_regs struct 
> */
> +#define STACK_SWITCH_FRAME_SIZE UREGS_sizeof
> +
> +.section .init.text, "ax", @progbits
> +
> +ENTRY(enter_of)
> +mflr %r0
> +std %r0, 16(%r1)
> +stdu %r1,-STACK_SWITCH_FRAME_SIZE(%r1) /* Save SP and create stack space 
> */

Nit: A blank after the comma would again be nice.

> +/*
> + * Because PROM is running in 32b mode, it clobbers the high order half
> + * of all registers that it saves.  We therefore save those registers
> + * PROM might touch to the stack.  (%r0, %r3-%r13 are caller saved)
> + */
> +SAVE_GPR(2, %r1)
> +SAVE_GPR(13, %r1)
> +SAVE_NVGPRS(%r1)
> +mfcr %r10
> +mfmsr %r11
> +std %r10, UREGS_cr(%r1)
> +std %r11, UREGS_msr(%r1)
> +
> +/* Put PROM address in SRR0 */
> +mtsrr0 %r4
> +
> +/* Setup our trampoline return addr in LR */
> +bcl 20, 31, .+4
> +0:  mflr %r4
> +addi %r4, %r4, 1f - 0b
> +mtlr %r4
> +
> +/* Prepare a 32-bit mode big endian MSR */
> +LOAD_IMM64(%r12, MSR_SF | M

[PATCH v4 3/4] xen/ppc: Implement early serial printk on pseries

2023-07-18 Thread Shawn Anastasio
On typical Power VMs (e.g. QEMU's -M pseries), a variety of services
including an early serial console are provided by Open Firmware.
Implement the required interfaces to call into Open Firmware and write
to the serial console.

Since Open Firmware runs in 32-bit Big Endian mode and Xen runs in
64-bit Little Endian mode, a thunk is required to save/restore
any potentially-clobbered registers as well as to perform the
required endianness switch. Thankfully, linux already has such
a routine, which was imported into ppc64/of-call.S.

Support for bare metal (PowerNV) will be implemented in a future
patch.

Signed-off-by: Shawn Anastasio 
---
 xen/arch/ppc/Kconfig.debug  |   5 +
 xen/arch/ppc/Makefile   |   2 +
 xen/arch/ppc/boot-of.c  | 113 +++
 xen/arch/ppc/configs/ppc64_defconfig|   1 +
 xen/arch/ppc/early_printk.c |  28 +
 xen/arch/ppc/include/asm/asm-defns.h|  17 +++
 xen/arch/ppc/include/asm/boot.h |  23 
 xen/arch/ppc/include/asm/byteorder.h|  12 +++
 xen/arch/ppc/include/asm/config.h   |   3 +
 xen/arch/ppc/include/asm/early_printk.h |  15 +++
 xen/arch/ppc/include/asm/msr.h  |  51 +
 xen/arch/ppc/include/asm/processor.h| 138 
 xen/arch/ppc/include/asm/types.h|  21 
 xen/arch/ppc/ppc64/Makefile |   1 +
 xen/arch/ppc/ppc64/asm-offsets.c|  59 ++
 xen/arch/ppc/ppc64/head.S   |   9 ++
 xen/arch/ppc/ppc64/of-call.S|  83 ++
 xen/arch/ppc/setup.c|  19 +++-
 18 files changed, 597 insertions(+), 3 deletions(-)
 create mode 100644 xen/arch/ppc/boot-of.c
 create mode 100644 xen/arch/ppc/early_printk.c
 create mode 100644 xen/arch/ppc/include/asm/boot.h
 create mode 100644 xen/arch/ppc/include/asm/byteorder.h
 create mode 100644 xen/arch/ppc/include/asm/early_printk.h
 create mode 100644 xen/arch/ppc/include/asm/msr.h
 create mode 100644 xen/arch/ppc/include/asm/processor.h
 create mode 100644 xen/arch/ppc/include/asm/types.h
 create mode 100644 xen/arch/ppc/ppc64/of-call.S

diff --git a/xen/arch/ppc/Kconfig.debug b/xen/arch/ppc/Kconfig.debug
index e69de29bb2..608c9ff832 100644
--- a/xen/arch/ppc/Kconfig.debug
+++ b/xen/arch/ppc/Kconfig.debug
@@ -0,0 +1,5 @@
+config EARLY_PRINTK
+bool "Enable early printk"
+default DEBUG
+help
+  Enables early printk debug messages
diff --git a/xen/arch/ppc/Makefile b/xen/arch/ppc/Makefile
index 530fba2121..098a4dd0a9 100644
--- a/xen/arch/ppc/Makefile
+++ b/xen/arch/ppc/Makefile
@@ -1,5 +1,7 @@
 obj-$(CONFIG_PPC64) += ppc64/
 
+obj-y += boot-of.init.o
+obj-$(CONFIG_EARLY_PRINTK) += early_printk.init.o
 obj-y += setup.o
 
 $(TARGET): $(TARGET)-syms
diff --git a/xen/arch/ppc/boot-of.c b/xen/arch/ppc/boot-of.c
new file mode 100644
index 00..a06546871e
--- /dev/null
+++ b/xen/arch/ppc/boot-of.c
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file was derived from Xen 3.2's xen/arch/powerpc/boot_of.c,
+ * originally licensed under GPL version 2 or later.
+ *
+ * Copyright IBM Corp. 2005, 2006, 2007
+ * Copyright Raptor Engineering, LLC
+ *
+ * Authors: Jimi Xenidis 
+ *  Hollis Blanchard 
+ *  Shawn Anastasio 
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/*
+ * The Open Firmware client interface is called in 32-bit mode with the MMU 
off,
+ * so any addresses passed to it must be physical addresses under 4GB.
+ *
+ * Since the client interface is only used during early boot before the MMU is 
on
+ * and Xen itself was loaded by Open Firmware (and therefore resides below 
4GB),
+ * we can achieve the desired result with a simple cast to uint32_t.
+ */
+#define ADDR(x) ((uint32_t)(unsigned long)(x))
+
+/* OF entrypoint*/
+static unsigned long __initdata of_vec;
+
+static int __initdata of_out;
+
+static int __init of_call(const char *service, uint32_t nargs, uint32_t nrets,
+  int32_t rets[], ...)
+{
+int rc;
+va_list args;
+unsigned int i;
+struct of_service s = { 0 };
+
+s.ofs_service = cpu_to_be32(ADDR(service));
+s.ofs_nargs = cpu_to_be32(nargs);
+s.ofs_nrets = cpu_to_be32(nrets);
+
+/* Copy all the params into the args array */
+va_start(args, rets);
+
+for ( i = 0; i < nargs; i++ )
+s.ofs_args[i] = cpu_to_be32(va_arg(args, uint32_t));
+
+va_end(args);
+
+rc = enter_of(&s, of_vec);
+
+/* Copy all return values to the output rets array */
+for ( i = 0; i < nrets; i++ )
+rets[i] = be32_to_cpu(s.ofs_args[i + nargs]);
+
+return rc;
+}
+
+static int __init of_finddevice(const char *devspec)
+{
+int32_t rets[1] = { OF_FAILURE };
+
+of_call("finddevice", 1, ARRAY_SIZE(rets), rets, ADDR(devspec));
+return rets[0];
+}
+
+static int __init of_getprop(int ph, const char *name, void *buf, uint32_t 
buflen)
+{
+int32_t rets[1] = {