On Sat, Apr 02, 2016 at 02:19:17PM +0200, Patrick Wildt wrote:
> Hi,
>
> the Procedure Call Standard used in EABI requires the stack pointer to
> be 8-byte aligned by
>
> * exception handlers, before calling AAPCS-conforming code.
> * the OS, before giving control to an application.
>
> This diff makes sure our kernel interfaces adhere to that requirement.
>
> Can someone make sure this doesn't break armish/zaurus?
>
> Patrick
>
> diff --git sys/arch/arm/arm/cpuswitch.S sys/arch/arm/arm/cpuswitch.S
> index 98e2dbe..0c3d0af 100644
> --- sys/arch/arm/arm/cpuswitch.S
> +++ sys/arch/arm/arm/cpuswitch.S
> @@ -171,6 +171,7 @@ ENTRY(cpu_idle_leave)
>
> ENTRY(cpu_switchto)
> stmfd sp!, {r4-r7, lr}
> + sub sp, sp, #4
>
> #ifdef MULTIPROCESSOR
> /* XXX use curcpu() */
> @@ -451,6 +452,7 @@ ENTRY(cpu_switchto)
> * Pull the registers that got pushed when either savectx() or
> * cpu_switch() was called and return.
> */
> + add sp, sp, #4
> ldmfd sp!, {r4-r7, pc}
>
> /* LINTSTUB: Func: void savectx(struct pcb *pcb) */
> @@ -461,6 +463,7 @@ ENTRY(savectx)
>
> /* Push registers.*/
> stmfd sp!, {r4-r7, lr}
> + sub sp, sp, #4
>
> /* Store all the registers in the process's pcb */
> #ifndef __XSCALE__
> @@ -473,6 +476,7 @@ ENTRY(savectx)
> #endif
>
> /* Pull the regs of the stack */
> + add sp, sp, #4
> ldmfd sp!, {r4-r7, pc}
>
> ENTRY(proc_trampoline)
> diff --git sys/arch/arm/arm/cpuswitch7.S sys/arch/arm/arm/cpuswitch7.S
> index 126b41a..4db9a86 100644
> --- sys/arch/arm/arm/cpuswitch7.S
> +++ sys/arch/arm/arm/cpuswitch7.S
> @@ -165,6 +165,7 @@ ENTRY(cpu_idle_leave)
>
> ENTRY(cpu_switchto)
> stmfd sp!, {r4-r7, lr}
> + sub sp, sp, #4
>
> #ifdef MULTIPROCESSOR
> /* XXX use curcpu() */
> @@ -396,6 +397,7 @@ ENTRY(cpu_switchto)
> * Pull the registers that got pushed when either savectx() or
> * cpu_switch() was called and return.
> */
> + add sp, sp, #4
> ldmfd sp!, {r4-r7, pc}
>
> /* LINTSTUB: Func: void savectx(struct pcb *pcb) */
> @@ -406,6 +408,7 @@ ENTRY(savectx)
>
> /* Push registers.*/
> stmfd sp!, {r4-r7, lr}
> + sub sp, sp, #4
>
> /* Store all the registers in the process's pcb */
> #ifndef __XSCALE__
> @@ -418,6 +421,7 @@ ENTRY(savectx)
> #endif
>
> /* Pull the regs of the stack */
> + add sp, sp, #4
> ldmfd sp!, {r4-r7, pc}
>
> ENTRY(proc_trampoline)
> diff --git sys/arch/arm/arm/vm_machdep.c sys/arch/arm/arm/vm_machdep.c
> index 06f217b..84dfb68 100644
> --- sys/arch/arm/arm/vm_machdep.c
> +++ sys/arch/arm/arm/vm_machdep.c
> @@ -140,10 +140,11 @@ cpu_fork(p1, p2, stack, stacksize, func, arg)
> *tf = *p1->p_addr->u_pcb.pcb_tf;
>
> /*
> - * If specified, give the child a different stack.
> + * If specified, give the child a different stack (make sure
> + * it's 8-byte aligned).
> */
> if (stack != NULL)
> - tf->tf_usr_sp = (u_int)stack + stacksize;
> + tf->tf_usr_sp = ((vaddr_t)(stack) + stacksize) & -8;
>
> sf = (struct switchframe *)tf - 1;
> sf->sf_r4 = (u_int)func;
> diff --git sys/arch/arm/include/frame.h sys/arch/arm/include/frame.h
> index 31b2936..56e1368 100644
> --- sys/arch/arm/include/frame.h
> +++ sys/arch/arm/include/frame.h
> @@ -75,6 +75,7 @@ typedef struct trapframe {
> register_t tf_svc_sp;
> register_t tf_svc_lr;
> register_t tf_pc;
> + register_t tf_pad;
> } trapframe_t;
>
> /* Register numbers */
> @@ -137,6 +138,7 @@ typedef struct irqframe {
> unsigned int if_svc_sp;
> unsigned int if_svc_lr;
> unsigned int if_pc;
> + unsigned int if_pad;
> } irqframe_t;
>
> #define clockframe irqframe
> @@ -146,6 +148,7 @@ typedef struct irqframe {
> */
>
> struct switchframe {
> + u_int sf_pad;
> u_int sf_r4;
> u_int sf_r5;
> u_int sf_r6;
> @@ -203,6 +206,7 @@ struct frame {
> */
>
> #define PUSHFRAME \
> + sub sp, sp, #4; /* Align the stack */ \
> str lr, [sp, #-4]!; /* Push the return address */ \
> sub sp, sp, #(4*17); /* Adjust the stack pointer */ \
> stmia sp, {r0-r14}^; /* Push the user mode registers */ \
> @@ -221,7 +225,8 @@ struct frame {
> ldmia sp, {r0-r14}^; /* Restore registers (usr mode)
> */ \
> mov r0, r0; /* NOP for previous instruction */ \
> add sp, sp, #(4*17); /* Adjust the stack pointer */ \
> - ldr lr, [sp], #0x0004 /* Pull the return address */
> + ldr lr, [sp], #0x0004; /* Pull the return address */ \
> + add sp, sp, #4 /* Align the stack */
>
> /*
> * PUSHFRAMEINSVC - macro to push a trap frame on the stack in SVC32 mode
> @@ -241,6 +246,8 @@ struct frame {
> orr r2, r2, #(PSR_SVC32_MODE); \
> msr cpsr_c, r2; /* Punch into SVC mode */ \
> mov r2, sp; /* Save SVC sp */ \
> + bic sp, sp, #7; /* Align sp to an 8-byte addrress */ \
> + sub sp, sp, #4; /* Pad trapframe to keep alignment */ \
> str r0, [sp, #-4]!; /* Push return address */ \
> str lr, [sp, #-4]!; /* Push SVC lr */ \
> str r2, [sp, #-4]!; /* Push SVC sp */ \
>
Any other concerns or questions regarding this diff?
ok?