Re: Fix kernel stack alignment on riscv64

2022-02-23 Thread Mark Kettenis
> Date: Wed, 23 Feb 2022 14:18:40 +
> From: Visa Hankala 
> 
> So far this patch has worked fine on my test system.

Cool.  I'm a little bit bothered by that 0x10 in the calculation, and
maybe that can go now.  But that is cleanup for later.  Making the
allocation explicit is progress.

ok kettenis@

> On Tue, Feb 22, 2022 at 06:41:31PM +, Visa Hankala wrote:
> > Index: arch/riscv64/include/frame.h
> > ===
> > RCS file: src/sys/arch/riscv64/include/frame.h,v
> > retrieving revision 1.2
> > diff -u -p -r1.2 frame.h
> > --- arch/riscv64/include/frame.h12 May 2021 01:20:52 -  1.2
> > +++ arch/riscv64/include/frame.h22 Feb 2022 18:20:46 -
> > @@ -64,6 +64,7 @@ typedef struct trapframe {
> > register_t tf_sstatus;
> > register_t tf_stval;
> > register_t tf_scause;
> > +   register_t tf_pad;
> >  } trapframe_t;
> >  
> >  /*
> > @@ -85,6 +86,7 @@ struct sigframe {
> >  struct switchframe {
> > register_t sf_s[12];
> > register_t sf_ra;
> > +   register_t sf_pad;
> >  };
> >  
> >  struct callframe {
> > Index: arch/riscv64/riscv64/vm_machdep.c
> > ===
> > RCS file: src/sys/arch/riscv64/riscv64/vm_machdep.c,v
> > retrieving revision 1.8
> > diff -u -p -r1.8 vm_machdep.c
> > --- arch/riscv64/riscv64/vm_machdep.c   22 Feb 2022 07:47:46 -  
> > 1.8
> > +++ arch/riscv64/riscv64/vm_machdep.c   22 Feb 2022 18:20:46 -
> > @@ -62,6 +62,10 @@ cpu_fork(struct proc *p1, struct proc *p
> > struct trapframe *tf;
> > struct switchframe *sf;
> >  
> > +   /* Ensure proper stack alignment. */
> > +   CTASSERT((sizeof(struct trapframe) & STACKALIGNBYTES) == 0);
> > +   CTASSERT((sizeof(struct switchframe) & STACKALIGNBYTES) == 0);
> > +
> > /* Save FPU state to PCB if necessary. */
> > if (pcb1->pcb_flags & PCB_FPU)
> > fpu_save(p1, pcb1->pcb_tf);
> > @@ -74,6 +78,7 @@ cpu_fork(struct proc *p1, struct proc *p
> > tf = (struct trapframe *)((u_long)p2->p_addr
> > + USPACE
> > - sizeof(struct trapframe)
> > +   - sizeof(register_t)/* for holding curcpu */
> > - 0x10);
> >  
> > tf = (struct trapframe *)STACKALIGN(tf);
> > 
> 



Re: Fix kernel stack alignment on riscv64

2022-02-23 Thread Visa Hankala
So far this patch has worked fine on my test system.

On Tue, Feb 22, 2022 at 06:41:31PM +, Visa Hankala wrote:
> Index: arch/riscv64/include/frame.h
> ===
> RCS file: src/sys/arch/riscv64/include/frame.h,v
> retrieving revision 1.2
> diff -u -p -r1.2 frame.h
> --- arch/riscv64/include/frame.h  12 May 2021 01:20:52 -  1.2
> +++ arch/riscv64/include/frame.h  22 Feb 2022 18:20:46 -
> @@ -64,6 +64,7 @@ typedef struct trapframe {
>   register_t tf_sstatus;
>   register_t tf_stval;
>   register_t tf_scause;
> + register_t tf_pad;
>  } trapframe_t;
>  
>  /*
> @@ -85,6 +86,7 @@ struct sigframe {
>  struct switchframe {
>   register_t sf_s[12];
>   register_t sf_ra;
> + register_t sf_pad;
>  };
>  
>  struct callframe {
> Index: arch/riscv64/riscv64/vm_machdep.c
> ===
> RCS file: src/sys/arch/riscv64/riscv64/vm_machdep.c,v
> retrieving revision 1.8
> diff -u -p -r1.8 vm_machdep.c
> --- arch/riscv64/riscv64/vm_machdep.c 22 Feb 2022 07:47:46 -  1.8
> +++ arch/riscv64/riscv64/vm_machdep.c 22 Feb 2022 18:20:46 -
> @@ -62,6 +62,10 @@ cpu_fork(struct proc *p1, struct proc *p
>   struct trapframe *tf;
>   struct switchframe *sf;
>  
> + /* Ensure proper stack alignment. */
> + CTASSERT((sizeof(struct trapframe) & STACKALIGNBYTES) == 0);
> + CTASSERT((sizeof(struct switchframe) & STACKALIGNBYTES) == 0);
> +
>   /* Save FPU state to PCB if necessary. */
>   if (pcb1->pcb_flags & PCB_FPU)
>   fpu_save(p1, pcb1->pcb_tf);
> @@ -74,6 +78,7 @@ cpu_fork(struct proc *p1, struct proc *p
>   tf = (struct trapframe *)((u_long)p2->p_addr
>   + USPACE
>   - sizeof(struct trapframe)
> + - sizeof(register_t)/* for holding curcpu */
>   - 0x10);
>  
>   tf = (struct trapframe *)STACKALIGN(tf);
> 



Re: Fix kernel stack alignment on riscv64

2022-02-22 Thread Visa Hankala
On Tue, Feb 22, 2022 at 07:04:23PM +0100, Mark Kettenis wrote:
> > Date: Tue, 22 Feb 2022 17:45:26 +
> > From: Visa Hankala 
> > 
> > On Tue, Feb 22, 2022 at 06:13:54PM +0100, Mark Kettenis wrote:
> > > > Date: Tue, 22 Feb 2022 16:59:24 +
> > > > From: Visa Hankala 
> > > > 
> > > > On Tue, Feb 22, 2022 at 05:31:31PM +0100, Mark Kettenis wrote:
> > > > > > Date: Tue, 22 Feb 2022 15:58:31 +
> > > > > > From: Visa Hankala 
> > > > > > 
> > > > > > The standard RISC-V calling convention says that the stack pointer
> > > > > > should be 16-byte aligned.
> > > > > > 
> > > > > > The patch below corrects the alignment of the kernel stack in 
> > > > > > context
> > > > > > switching and exception handling.
> > > > > > 
> > > > > > OK?
> > > > > 
> > > > > Is there a reason why we don't simply extend struct switchframe to
> > > > > include another register_t such that it is a multiple of 16 bytes?
> > > > > 
> > > > > (and then add a compile-time assertion somewhere to make sure we don't
> > > > > mess that up again)
> > > > 
> > > > Well, that is another way to do it.
> > > > 
> > > > > > The diff reveals that curcpu is stored in an unnamed spot near the 
> > > > > > upper
> > > > > > end of u-area when running in user mode. Should struct trapframe 
> > > > > > contain
> > > > > > a field where curcpu is stored, so that the usage would become more
> > > > > > apparent? I guess the pointer could also be stored in one of the
> > > > > > existing fields with an appropriate comment in the struct 
> > > > > > definition.
> > > > > 
> > > > > Sorry, but I don't understand you; curpcu is kept in the tp 
> > > > > register...
> > > > 
> > > > In kernel, tp holds curcpu. In userland, tp holds TCB pointer. The 
> > > > exception entry and exit code has to switch the register's content.
> > > > curcpu is stored in kernel stack when the CPU runs userland.
> > > 
> > > Ah, right.  So RISC-V doesn't have a special register to hold this.  I
> > > don't think adding a field to struct trapframe would make this easier
> > > to understand.
> > 
> > Here is a version that shows how a curcpu field would look like,
> > now that a new field is being added anyway.
> > 
> > Still not tested...
> 
> The weird thing about that is that curcpu doesn't actually live in the
> trapframe.  And it would only live at that location for a userland ->
> kernel trapframe.  So I think this would make things more confusing...

All right, trapframe is not the right place for it. The existence of
the "variable" should be visible more clearly nevertheless.

So far the variable has occupied the padding slot after the trapframe.
With the new padding field, the slot is gone and the variable would
reside in the final 16 bytes of u-area.

Index: arch/riscv64/include/frame.h
===
RCS file: src/sys/arch/riscv64/include/frame.h,v
retrieving revision 1.2
diff -u -p -r1.2 frame.h
--- arch/riscv64/include/frame.h12 May 2021 01:20:52 -  1.2
+++ arch/riscv64/include/frame.h22 Feb 2022 18:20:46 -
@@ -64,6 +64,7 @@ typedef struct trapframe {
register_t tf_sstatus;
register_t tf_stval;
register_t tf_scause;
+   register_t tf_pad;
 } trapframe_t;
 
 /*
@@ -85,6 +86,7 @@ struct sigframe {
 struct switchframe {
register_t sf_s[12];
register_t sf_ra;
+   register_t sf_pad;
 };
 
 struct callframe {
Index: arch/riscv64/riscv64/vm_machdep.c
===
RCS file: src/sys/arch/riscv64/riscv64/vm_machdep.c,v
retrieving revision 1.8
diff -u -p -r1.8 vm_machdep.c
--- arch/riscv64/riscv64/vm_machdep.c   22 Feb 2022 07:47:46 -  1.8
+++ arch/riscv64/riscv64/vm_machdep.c   22 Feb 2022 18:20:46 -
@@ -62,6 +62,10 @@ cpu_fork(struct proc *p1, struct proc *p
struct trapframe *tf;
struct switchframe *sf;
 
+   /* Ensure proper stack alignment. */
+   CTASSERT((sizeof(struct trapframe) & STACKALIGNBYTES) == 0);
+   CTASSERT((sizeof(struct switchframe) & STACKALIGNBYTES) == 0);
+
/* Save FPU state to PCB if necessary. */
if (pcb1->pcb_flags & PCB_FPU)
fpu_save(p1, pcb1->pcb_tf);
@@ -74,6 +78,7 @@ cpu_fork(struct proc *p1, struct proc *p
tf = (struct trapframe *)((u_long)p2->p_addr
+ USPACE
- sizeof(struct trapframe)
+   - sizeof(register_t)/* for holding curcpu */
- 0x10);
 
tf = (struct trapframe *)STACKALIGN(tf);



Re: Fix kernel stack alignment on riscv64

2022-02-22 Thread Mark Kettenis
> Date: Tue, 22 Feb 2022 17:45:26 +
> From: Visa Hankala 
> 
> On Tue, Feb 22, 2022 at 06:13:54PM +0100, Mark Kettenis wrote:
> > > Date: Tue, 22 Feb 2022 16:59:24 +
> > > From: Visa Hankala 
> > > 
> > > On Tue, Feb 22, 2022 at 05:31:31PM +0100, Mark Kettenis wrote:
> > > > > Date: Tue, 22 Feb 2022 15:58:31 +
> > > > > From: Visa Hankala 
> > > > > 
> > > > > The standard RISC-V calling convention says that the stack pointer
> > > > > should be 16-byte aligned.
> > > > > 
> > > > > The patch below corrects the alignment of the kernel stack in context
> > > > > switching and exception handling.
> > > > > 
> > > > > OK?
> > > > 
> > > > Is there a reason why we don't simply extend struct switchframe to
> > > > include another register_t such that it is a multiple of 16 bytes?
> > > > 
> > > > (and then add a compile-time assertion somewhere to make sure we don't
> > > > mess that up again)
> > > 
> > > Well, that is another way to do it.
> > > 
> > > > > The diff reveals that curcpu is stored in an unnamed spot near the 
> > > > > upper
> > > > > end of u-area when running in user mode. Should struct trapframe 
> > > > > contain
> > > > > a field where curcpu is stored, so that the usage would become more
> > > > > apparent? I guess the pointer could also be stored in one of the
> > > > > existing fields with an appropriate comment in the struct definition.
> > > > 
> > > > Sorry, but I don't understand you; curpcu is kept in the tp register...
> > > 
> > > In kernel, tp holds curcpu. In userland, tp holds TCB pointer. The 
> > > exception entry and exit code has to switch the register's content.
> > > curcpu is stored in kernel stack when the CPU runs userland.
> > 
> > Ah, right.  So RISC-V doesn't have a special register to hold this.  I
> > don't think adding a field to struct trapframe would make this easier
> > to understand.
> 
> Here is a version that shows how a curcpu field would look like,
> now that a new field is being added anyway.
> 
> Still not tested...

The weird thing about that is that curcpu doesn't actually live in the
trapframe.  And it would only live at that location for a userland ->
kernel trapframe.  So I think this would make things more confusing...

> Index: arch/riscv64/include/frame.h
> ===
> RCS file: src/sys/arch/riscv64/include/frame.h,v
> retrieving revision 1.2
> diff -u -p -r1.2 frame.h
> --- arch/riscv64/include/frame.h  12 May 2021 01:20:52 -  1.2
> +++ arch/riscv64/include/frame.h  22 Feb 2022 17:40:43 -
> @@ -64,6 +64,8 @@ typedef struct trapframe {
>   register_t tf_sstatus;
>   register_t tf_stval;
>   register_t tf_scause;
> + /* Holds curcpu when the CPU runs in user mode. */
> + register_t tf_curcpu;
>  } trapframe_t;
>  
>  /*
> @@ -85,6 +87,7 @@ struct sigframe {
>  struct switchframe {
>   register_t sf_s[12];
>   register_t sf_ra;
> + register_t sf_pad;
>  };
>  
>  struct callframe {
> Index: arch/riscv64/riscv64/exception.S
> ===
> RCS file: src/sys/arch/riscv64/riscv64/exception.S,v
> retrieving revision 1.5
> diff -u -p -r1.5 exception.S
> --- arch/riscv64/riscv64/exception.S  20 May 2021 04:22:33 -  1.5
> +++ arch/riscv64/riscv64/exception.S  22 Feb 2022 17:40:43 -
> @@ -54,7 +54,7 @@
>  
>   /* Load our pcpu */
>   sd  tp, (TF_TP)(sp)
> - ld  tp, (TRAPFRAME_SIZEOF)(sp)
> + ld  tp, (TF_CURCPU)(sp)
>  .endif
>  
>   sd  t0, (TF_T + 0 * 8)(sp)
> @@ -142,7 +142,7 @@
>   csrwsscratch, t0
>  
>   /* Store our pcpu */
> - sd  tp, (TRAPFRAME_SIZEOF)(sp)
> + sd  tp, (TF_CURCPU)(sp)
>   ld  tp, (TF_TP)(sp)
>  
>   /* And restore the user's global pointer */
> Index: arch/riscv64/riscv64/genassym.cf
> ===
> RCS file: src/sys/arch/riscv64/riscv64/genassym.cf,v
> retrieving revision 1.6
> diff -u -p -r1.6 genassym.cf
> --- arch/riscv64/riscv64/genassym.cf  2 Jul 2021 10:42:22 -   1.6
> +++ arch/riscv64/riscv64/genassym.cf  22 Feb 2022 17:40:43 -
> @@ -36,6 +36,7 @@ member  tf_sepc
>  member   tf_sstatus
>  member   tf_stval
>  member   tf_scause
> +member   tf_curcpu
>  
>  struct   switchframe
>  member   sf_s
> Index: arch/riscv64/riscv64/vm_machdep.c
> ===
> RCS file: src/sys/arch/riscv64/riscv64/vm_machdep.c,v
> retrieving revision 1.8
> diff -u -p -r1.8 vm_machdep.c
> --- arch/riscv64/riscv64/vm_machdep.c 22 Feb 2022 07:47:46 -  1.8
> +++ arch/riscv64/riscv64/vm_machdep.c 22 Feb 2022 17:40:43 -
> @@ -62,6 +62,10 @@ cpu_fork(struct proc *p1, struct proc *p
>   struct trapframe *tf;
>   struct switchframe *sf;
>  
> + /* Ensure proper stack alignment. */
> + CTASSERT((sizeof(struct trapframe) 

Re: Fix kernel stack alignment on riscv64

2022-02-22 Thread Visa Hankala
On Tue, Feb 22, 2022 at 06:13:54PM +0100, Mark Kettenis wrote:
> > Date: Tue, 22 Feb 2022 16:59:24 +
> > From: Visa Hankala 
> > 
> > On Tue, Feb 22, 2022 at 05:31:31PM +0100, Mark Kettenis wrote:
> > > > Date: Tue, 22 Feb 2022 15:58:31 +
> > > > From: Visa Hankala 
> > > > 
> > > > The standard RISC-V calling convention says that the stack pointer
> > > > should be 16-byte aligned.
> > > > 
> > > > The patch below corrects the alignment of the kernel stack in context
> > > > switching and exception handling.
> > > > 
> > > > OK?
> > > 
> > > Is there a reason why we don't simply extend struct switchframe to
> > > include another register_t such that it is a multiple of 16 bytes?
> > > 
> > > (and then add a compile-time assertion somewhere to make sure we don't
> > > mess that up again)
> > 
> > Well, that is another way to do it.
> > 
> > > > The diff reveals that curcpu is stored in an unnamed spot near the upper
> > > > end of u-area when running in user mode. Should struct trapframe contain
> > > > a field where curcpu is stored, so that the usage would become more
> > > > apparent? I guess the pointer could also be stored in one of the
> > > > existing fields with an appropriate comment in the struct definition.
> > > 
> > > Sorry, but I don't understand you; curpcu is kept in the tp register...
> > 
> > In kernel, tp holds curcpu. In userland, tp holds TCB pointer. The 
> > exception entry and exit code has to switch the register's content.
> > curcpu is stored in kernel stack when the CPU runs userland.
> 
> Ah, right.  So RISC-V doesn't have a special register to hold this.  I
> don't think adding a field to struct trapframe would make this easier
> to understand.

Here is a version that shows how a curcpu field would look like,
now that a new field is being added anyway.

Still not tested...

Index: arch/riscv64/include/frame.h
===
RCS file: src/sys/arch/riscv64/include/frame.h,v
retrieving revision 1.2
diff -u -p -r1.2 frame.h
--- arch/riscv64/include/frame.h12 May 2021 01:20:52 -  1.2
+++ arch/riscv64/include/frame.h22 Feb 2022 17:40:43 -
@@ -64,6 +64,8 @@ typedef struct trapframe {
register_t tf_sstatus;
register_t tf_stval;
register_t tf_scause;
+   /* Holds curcpu when the CPU runs in user mode. */
+   register_t tf_curcpu;
 } trapframe_t;
 
 /*
@@ -85,6 +87,7 @@ struct sigframe {
 struct switchframe {
register_t sf_s[12];
register_t sf_ra;
+   register_t sf_pad;
 };
 
 struct callframe {
Index: arch/riscv64/riscv64/exception.S
===
RCS file: src/sys/arch/riscv64/riscv64/exception.S,v
retrieving revision 1.5
diff -u -p -r1.5 exception.S
--- arch/riscv64/riscv64/exception.S20 May 2021 04:22:33 -  1.5
+++ arch/riscv64/riscv64/exception.S22 Feb 2022 17:40:43 -
@@ -54,7 +54,7 @@
 
/* Load our pcpu */
sd  tp, (TF_TP)(sp)
-   ld  tp, (TRAPFRAME_SIZEOF)(sp)
+   ld  tp, (TF_CURCPU)(sp)
 .endif
 
sd  t0, (TF_T + 0 * 8)(sp)
@@ -142,7 +142,7 @@
csrwsscratch, t0
 
/* Store our pcpu */
-   sd  tp, (TRAPFRAME_SIZEOF)(sp)
+   sd  tp, (TF_CURCPU)(sp)
ld  tp, (TF_TP)(sp)
 
/* And restore the user's global pointer */
Index: arch/riscv64/riscv64/genassym.cf
===
RCS file: src/sys/arch/riscv64/riscv64/genassym.cf,v
retrieving revision 1.6
diff -u -p -r1.6 genassym.cf
--- arch/riscv64/riscv64/genassym.cf2 Jul 2021 10:42:22 -   1.6
+++ arch/riscv64/riscv64/genassym.cf22 Feb 2022 17:40:43 -
@@ -36,6 +36,7 @@ membertf_sepc
 member tf_sstatus
 member tf_stval
 member tf_scause
+member tf_curcpu
 
 struct switchframe
 member sf_s
Index: arch/riscv64/riscv64/vm_machdep.c
===
RCS file: src/sys/arch/riscv64/riscv64/vm_machdep.c,v
retrieving revision 1.8
diff -u -p -r1.8 vm_machdep.c
--- arch/riscv64/riscv64/vm_machdep.c   22 Feb 2022 07:47:46 -  1.8
+++ arch/riscv64/riscv64/vm_machdep.c   22 Feb 2022 17:40:43 -
@@ -62,6 +62,10 @@ cpu_fork(struct proc *p1, struct proc *p
struct trapframe *tf;
struct switchframe *sf;
 
+   /* Ensure proper stack alignment. */
+   CTASSERT((sizeof(struct trapframe) & STACKALIGNBYTES) == 0);
+   CTASSERT((sizeof(struct switchframe) & STACKALIGNBYTES) == 0);
+
/* Save FPU state to PCB if necessary. */
if (pcb1->pcb_flags & PCB_FPU)
fpu_save(p1, pcb1->pcb_tf);



Re: Fix kernel stack alignment on riscv64

2022-02-22 Thread Mark Kettenis
> Date: Tue, 22 Feb 2022 16:59:24 +
> From: Visa Hankala 
> 
> On Tue, Feb 22, 2022 at 05:31:31PM +0100, Mark Kettenis wrote:
> > > Date: Tue, 22 Feb 2022 15:58:31 +
> > > From: Visa Hankala 
> > > 
> > > The standard RISC-V calling convention says that the stack pointer
> > > should be 16-byte aligned.
> > > 
> > > The patch below corrects the alignment of the kernel stack in context
> > > switching and exception handling.
> > > 
> > > OK?
> > 
> > Is there a reason why we don't simply extend struct switchframe to
> > include another register_t such that it is a multiple of 16 bytes?
> > 
> > (and then add a compile-time assertion somewhere to make sure we don't
> > mess that up again)
> 
> Well, that is another way to do it.
> 
> > > The diff reveals that curcpu is stored in an unnamed spot near the upper
> > > end of u-area when running in user mode. Should struct trapframe contain
> > > a field where curcpu is stored, so that the usage would become more
> > > apparent? I guess the pointer could also be stored in one of the
> > > existing fields with an appropriate comment in the struct definition.
> > 
> > Sorry, but I don't understand you; curpcu is kept in the tp register...
> 
> In kernel, tp holds curcpu. In userland, tp holds TCB pointer. The 
> exception entry and exit code has to switch the register's content.
> curcpu is stored in kernel stack when the CPU runs userland.

Ah, right.  So RISC-V doesn't have a special register to hold this.  I
don't think adding a field to struct trapframe would make this easier
to understand.

> 
> I have not tested the following patch yet. It takes a while
> to compile...

I think this looks much better.

> Index: arch/riscv64/include/frame.h
> ===
> RCS file: src/sys/arch/riscv64/include/frame.h,v
> retrieving revision 1.2
> diff -u -p -r1.2 frame.h
> --- arch/riscv64/include/frame.h  12 May 2021 01:20:52 -  1.2
> +++ arch/riscv64/include/frame.h  22 Feb 2022 16:50:53 -
> @@ -64,6 +64,7 @@ typedef struct trapframe {
>   register_t tf_sstatus;
>   register_t tf_stval;
>   register_t tf_scause;
> + register_t tf_pad;
>  } trapframe_t;
>  
>  /*
> @@ -85,6 +86,7 @@ struct sigframe {
>  struct switchframe {
>   register_t sf_s[12];
>   register_t sf_ra;
> + register_t sf_pad;
>  };
>  
>  struct callframe {
> Index: arch/riscv64/riscv64/vm_machdep.c
> ===
> RCS file: src/sys/arch/riscv64/riscv64/vm_machdep.c,v
> retrieving revision 1.8
> diff -u -p -r1.8 vm_machdep.c
> --- arch/riscv64/riscv64/vm_machdep.c 22 Feb 2022 07:47:46 -  1.8
> +++ arch/riscv64/riscv64/vm_machdep.c 22 Feb 2022 16:50:53 -
> @@ -62,6 +62,10 @@ cpu_fork(struct proc *p1, struct proc *p
>   struct trapframe *tf;
>   struct switchframe *sf;
>  
> + /* Ensure proper stack alignment. */
> + CTASSERT((sizeof(struct trapframe) & STACKALIGNBYTES) == 0);
> + CTASSERT((sizeof(struct switchframe) & STACKALIGNBYTES) == 0);
> +
>   /* Save FPU state to PCB if necessary. */
>   if (pcb1->pcb_flags & PCB_FPU)
>   fpu_save(p1, pcb1->pcb_tf);
> 



Re: Fix kernel stack alignment on riscv64

2022-02-22 Thread Visa Hankala
On Tue, Feb 22, 2022 at 05:31:31PM +0100, Mark Kettenis wrote:
> > Date: Tue, 22 Feb 2022 15:58:31 +
> > From: Visa Hankala 
> > 
> > The standard RISC-V calling convention says that the stack pointer
> > should be 16-byte aligned.
> > 
> > The patch below corrects the alignment of the kernel stack in context
> > switching and exception handling.
> > 
> > OK?
> 
> Is there a reason why we don't simply extend struct switchframe to
> include another register_t such that it is a multiple of 16 bytes?
> 
> (and then add a compile-time assertion somewhere to make sure we don't
> mess that up again)

Well, that is another way to do it.

> > The diff reveals that curcpu is stored in an unnamed spot near the upper
> > end of u-area when running in user mode. Should struct trapframe contain
> > a field where curcpu is stored, so that the usage would become more
> > apparent? I guess the pointer could also be stored in one of the
> > existing fields with an appropriate comment in the struct definition.
> 
> Sorry, but I don't understand you; curpcu is kept in the tp register...

In kernel, tp holds curcpu. In userland, tp holds TCB pointer. The 
exception entry and exit code has to switch the register's content.
curcpu is stored in kernel stack when the CPU runs userland.

I have not tested the following patch yet. It takes a while
to compile...

Index: arch/riscv64/include/frame.h
===
RCS file: src/sys/arch/riscv64/include/frame.h,v
retrieving revision 1.2
diff -u -p -r1.2 frame.h
--- arch/riscv64/include/frame.h12 May 2021 01:20:52 -  1.2
+++ arch/riscv64/include/frame.h22 Feb 2022 16:50:53 -
@@ -64,6 +64,7 @@ typedef struct trapframe {
register_t tf_sstatus;
register_t tf_stval;
register_t tf_scause;
+   register_t tf_pad;
 } trapframe_t;
 
 /*
@@ -85,6 +86,7 @@ struct sigframe {
 struct switchframe {
register_t sf_s[12];
register_t sf_ra;
+   register_t sf_pad;
 };
 
 struct callframe {
Index: arch/riscv64/riscv64/vm_machdep.c
===
RCS file: src/sys/arch/riscv64/riscv64/vm_machdep.c,v
retrieving revision 1.8
diff -u -p -r1.8 vm_machdep.c
--- arch/riscv64/riscv64/vm_machdep.c   22 Feb 2022 07:47:46 -  1.8
+++ arch/riscv64/riscv64/vm_machdep.c   22 Feb 2022 16:50:53 -
@@ -62,6 +62,10 @@ cpu_fork(struct proc *p1, struct proc *p
struct trapframe *tf;
struct switchframe *sf;
 
+   /* Ensure proper stack alignment. */
+   CTASSERT((sizeof(struct trapframe) & STACKALIGNBYTES) == 0);
+   CTASSERT((sizeof(struct switchframe) & STACKALIGNBYTES) == 0);
+
/* Save FPU state to PCB if necessary. */
if (pcb1->pcb_flags & PCB_FPU)
fpu_save(p1, pcb1->pcb_tf);



Re: Fix kernel stack alignment on riscv64

2022-02-22 Thread Mark Kettenis
> Date: Tue, 22 Feb 2022 15:58:31 +
> From: Visa Hankala 
> 
> The standard RISC-V calling convention says that the stack pointer
> should be 16-byte aligned.
> 
> The patch below corrects the alignment of the kernel stack in context
> switching and exception handling.
> 
> OK?

Is there a reason why we don't simply extend struct switchframe to
include another register_t such that it is a multiple of 16 bytes?

(and then add a compile-time assertion somewhere to make sure we don't
mess that up again)

> The diff reveals that curcpu is stored in an unnamed spot near the upper
> end of u-area when running in user mode. Should struct trapframe contain
> a field where curcpu is stored, so that the usage would become more
> apparent? I guess the pointer could also be stored in one of the
> existing fields with an appropriate comment in the struct definition.

Sorry, but I don't understand you; curpcu is kept in the tp register...

> Index: arch/riscv64/include/param.h
> ===
> RCS file: src/sys/arch/riscv64/include/param.h,v
> retrieving revision 1.4
> diff -u -p -r1.4 param.h
> --- arch/riscv64/include/param.h  16 Jun 2021 12:00:15 -  1.4
> +++ arch/riscv64/include/param.h  22 Feb 2022 15:29:05 -
> @@ -75,6 +75,7 @@
>  
>  #define  STACKALIGNBYTES (16 - 1)
>  #define  STACKALIGN(p)   ((u_long)(p) &~ STACKALIGNBYTES)
> +#define  FRAMESZ(sz) (((sz) + STACKALIGNBYTES) & 
> ~STACKALIGNBYTES)
>  
>  #define __HAVE_FDT
>  
> Index: arch/riscv64/riscv64/cpuswitch.S
> ===
> RCS file: src/sys/arch/riscv64/riscv64/cpuswitch.S,v
> retrieving revision 1.6
> diff -u -p -r1.6 cpuswitch.S
> --- arch/riscv64/riscv64/cpuswitch.S  22 Feb 2022 07:47:46 -  1.6
> +++ arch/riscv64/riscv64/cpuswitch.S  22 Feb 2022 15:29:05 -
> @@ -17,8 +17,9 @@
>   * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
>   */
>  
> -#include "machine/asm.h"
>  #include "assym.h"
> +#include 
> +#include 
>  
>  /*
>   * cpu_switchto(struct proc *oldproc, struct proc *newproc)
> @@ -30,7 +31,7 @@ ENTRY(cpu_switchto_asm)
>   beqza0, 1f
>  
>   // create switchframe
> - addisp, sp, -SWITCHFRAME_SIZEOF
> + addisp, sp, -FRAMESZ(SWITCHFRAME_SIZEOF)
>   sd  s0, (SF_S + 0 * 8)(sp)
>   sd  s1, (SF_S + 1 * 8)(sp)
>   sd  s2, (SF_S + 2 * 8)(sp)
> @@ -86,7 +87,7 @@ ENTRY(cpu_switchto_asm)
>   ld  ra, SF_RA(sp)
>  
>   RETGUARD_CALC_COOKIE(a7)
> - addisp, sp, SWITCHFRAME_SIZEOF
> + addisp, sp, FRAMESZ(SWITCHFRAME_SIZEOF)
>   RETGUARD_CHECK(cpu_switchto, a7)
>   ret
>  END(cpu_switch_asm)
> Index: arch/riscv64/riscv64/exception.S
> ===
> RCS file: src/sys/arch/riscv64/riscv64/exception.S,v
> retrieving revision 1.5
> diff -u -p -r1.5 exception.S
> --- arch/riscv64/riscv64/exception.S  20 May 2021 04:22:33 -  1.5
> +++ arch/riscv64/riscv64/exception.S  22 Feb 2022 15:29:05 -
> @@ -36,11 +36,12 @@
>  
>  #include "assym.h"
>  #include 
> +#include 
>  #include 
>  #include 
>  
>  .macro save_registers mode
> - addisp, sp, -(TRAPFRAME_SIZEOF)
> + addisp, sp, -FRAMESZ(TRAPFRAME_SIZEOF)
>  
>   sd  ra, (TF_RA)(sp)
>  
> @@ -54,7 +55,7 @@
>  
>   /* Load our pcpu */
>   sd  tp, (TF_TP)(sp)
> - ld  tp, (TRAPFRAME_SIZEOF)(sp)
> + ld  tp, (FRAMESZ(TRAPFRAME_SIZEOF))(sp)
>  .endif
>  
>   sd  t0, (TF_T + 0 * 8)(sp)
> @@ -89,7 +90,7 @@
>  
>  .if \mode == 1
>   /* Store kernel sp */
> - li  t1, TRAPFRAME_SIZEOF
> + li  t1, FRAMESZ(TRAPFRAME_SIZEOF)
>   add t0, sp, t1
>   sd  t0, (TF_SP)(sp)
>  .else
> @@ -142,7 +143,7 @@
>   csrwsscratch, t0
>  
>   /* Store our pcpu */
> - sd  tp, (TRAPFRAME_SIZEOF)(sp)
> + sd  tp, (FRAMESZ(TRAPFRAME_SIZEOF))(sp)
>   ld  tp, (TF_TP)(sp)
>  
>   /* And restore the user's global pointer */
> @@ -181,7 +182,7 @@
>   ld  a6, (TF_A + 6 * 8)(sp)
>   ld  a7, (TF_A + 7 * 8)(sp)
>  
> - addisp, sp, (TRAPFRAME_SIZEOF)
> + addisp, sp, FRAMESZ(TRAPFRAME_SIZEOF)
>  .endm
>  
>  .macro   do_ast
> Index: arch/riscv64/riscv64/vm_machdep.c
> ===
> RCS file: src/sys/arch/riscv64/riscv64/vm_machdep.c,v
> retrieving revision 1.8
> diff -u -p -r1.8 vm_machdep.c
> --- arch/riscv64/riscv64/vm_machdep.c 22 Feb 2022 07:47:46 -  1.8
> +++ arch/riscv64/riscv64/vm_machdep.c 22 Feb 2022 15:29:05 -
> @@ -91,7 +91,8 @@ cpu_fork(struct proc *p1, struct proc *p
>   tf->tf_sstatus |= (SSTATUS_SPIE); /* Enable interrupts. */
>   tf->tf_sstatus &= ~(SSTATUS_SPP); /* Enter user mode. */
>  
> - sf = (struct switchframe *)tf - 1;
> + sf = (struct 

Fix kernel stack alignment on riscv64

2022-02-22 Thread Visa Hankala
The standard RISC-V calling convention says that the stack pointer
should be 16-byte aligned.

The patch below corrects the alignment of the kernel stack in context
switching and exception handling.

OK?

The diff reveals that curcpu is stored in an unnamed spot near the upper
end of u-area when running in user mode. Should struct trapframe contain
a field where curcpu is stored, so that the usage would become more
apparent? I guess the pointer could also be stored in one of the
existing fields with an appropriate comment in the struct definition.

Index: arch/riscv64/include/param.h
===
RCS file: src/sys/arch/riscv64/include/param.h,v
retrieving revision 1.4
diff -u -p -r1.4 param.h
--- arch/riscv64/include/param.h16 Jun 2021 12:00:15 -  1.4
+++ arch/riscv64/include/param.h22 Feb 2022 15:29:05 -
@@ -75,6 +75,7 @@
 
 #defineSTACKALIGNBYTES (16 - 1)
 #defineSTACKALIGN(p)   ((u_long)(p) &~ STACKALIGNBYTES)
+#defineFRAMESZ(sz) (((sz) + STACKALIGNBYTES) & 
~STACKALIGNBYTES)
 
 #define __HAVE_FDT
 
Index: arch/riscv64/riscv64/cpuswitch.S
===
RCS file: src/sys/arch/riscv64/riscv64/cpuswitch.S,v
retrieving revision 1.6
diff -u -p -r1.6 cpuswitch.S
--- arch/riscv64/riscv64/cpuswitch.S22 Feb 2022 07:47:46 -  1.6
+++ arch/riscv64/riscv64/cpuswitch.S22 Feb 2022 15:29:05 -
@@ -17,8 +17,9 @@
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
-#include "machine/asm.h"
 #include "assym.h"
+#include 
+#include 
 
 /*
  * cpu_switchto(struct proc *oldproc, struct proc *newproc)
@@ -30,7 +31,7 @@ ENTRY(cpu_switchto_asm)
beqza0, 1f
 
// create switchframe
-   addisp, sp, -SWITCHFRAME_SIZEOF
+   addisp, sp, -FRAMESZ(SWITCHFRAME_SIZEOF)
sd  s0, (SF_S + 0 * 8)(sp)
sd  s1, (SF_S + 1 * 8)(sp)
sd  s2, (SF_S + 2 * 8)(sp)
@@ -86,7 +87,7 @@ ENTRY(cpu_switchto_asm)
ld  ra, SF_RA(sp)
 
RETGUARD_CALC_COOKIE(a7)
-   addisp, sp, SWITCHFRAME_SIZEOF
+   addisp, sp, FRAMESZ(SWITCHFRAME_SIZEOF)
RETGUARD_CHECK(cpu_switchto, a7)
ret
 END(cpu_switch_asm)
Index: arch/riscv64/riscv64/exception.S
===
RCS file: src/sys/arch/riscv64/riscv64/exception.S,v
retrieving revision 1.5
diff -u -p -r1.5 exception.S
--- arch/riscv64/riscv64/exception.S20 May 2021 04:22:33 -  1.5
+++ arch/riscv64/riscv64/exception.S22 Feb 2022 15:29:05 -
@@ -36,11 +36,12 @@
 
 #include "assym.h"
 #include 
+#include 
 #include 
 #include 
 
 .macro save_registers mode
-   addisp, sp, -(TRAPFRAME_SIZEOF)
+   addisp, sp, -FRAMESZ(TRAPFRAME_SIZEOF)
 
sd  ra, (TF_RA)(sp)
 
@@ -54,7 +55,7 @@
 
/* Load our pcpu */
sd  tp, (TF_TP)(sp)
-   ld  tp, (TRAPFRAME_SIZEOF)(sp)
+   ld  tp, (FRAMESZ(TRAPFRAME_SIZEOF))(sp)
 .endif
 
sd  t0, (TF_T + 0 * 8)(sp)
@@ -89,7 +90,7 @@
 
 .if \mode == 1
/* Store kernel sp */
-   li  t1, TRAPFRAME_SIZEOF
+   li  t1, FRAMESZ(TRAPFRAME_SIZEOF)
add t0, sp, t1
sd  t0, (TF_SP)(sp)
 .else
@@ -142,7 +143,7 @@
csrwsscratch, t0
 
/* Store our pcpu */
-   sd  tp, (TRAPFRAME_SIZEOF)(sp)
+   sd  tp, (FRAMESZ(TRAPFRAME_SIZEOF))(sp)
ld  tp, (TF_TP)(sp)
 
/* And restore the user's global pointer */
@@ -181,7 +182,7 @@
ld  a6, (TF_A + 6 * 8)(sp)
ld  a7, (TF_A + 7 * 8)(sp)
 
-   addisp, sp, (TRAPFRAME_SIZEOF)
+   addisp, sp, FRAMESZ(TRAPFRAME_SIZEOF)
 .endm
 
 .macro do_ast
Index: arch/riscv64/riscv64/vm_machdep.c
===
RCS file: src/sys/arch/riscv64/riscv64/vm_machdep.c,v
retrieving revision 1.8
diff -u -p -r1.8 vm_machdep.c
--- arch/riscv64/riscv64/vm_machdep.c   22 Feb 2022 07:47:46 -  1.8
+++ arch/riscv64/riscv64/vm_machdep.c   22 Feb 2022 15:29:05 -
@@ -91,7 +91,8 @@ cpu_fork(struct proc *p1, struct proc *p
tf->tf_sstatus |= (SSTATUS_SPIE); /* Enable interrupts. */
tf->tf_sstatus &= ~(SSTATUS_SPP); /* Enter user mode. */
 
-   sf = (struct switchframe *)tf - 1;
+   sf = (struct switchframe *)STACKALIGN((u_long)tf
+   - sizeof(struct switchframe));
sf->sf_s[0] = 0;/* Terminate chain of call frames. */
sf->sf_s[1] = (uint64_t)func;
sf->sf_s[2] = (uint64_t)arg;