The branch main has been updated by jhibbits:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=077e30e61d7e1c90af7df31989bb976a3ace0c69

commit 077e30e61d7e1c90af7df31989bb976a3ace0c69
Author:     Timothy Pearson <tpear...@raptorengineering.com>
AuthorDate: 2025-07-06 19:28:30 +0000
Commit:     Justin Hibbits <jhibb...@freebsd.org>
CommitDate: 2025-07-13 18:00:56 +0000

    powerpc: Fix multiple issues with FP/VSX save/restore
    
    Multiple issues existed within the powerpc FP/VSX save/restore 
functionality,
    leading to register corruption and loss of register contents in specific
    scenarios involving high signal load and use of both floating point and VSX
    instructions.
    
    Issue #1
    
    On little endian systems the PCB used the wrong location for the shadowed
    FP register within the larger VSX register.  This appears to have been an
    attempt to correct issue #2 without understanding how the vector load/store
    instructions actually operate.
    
    Issue #2
    
    On little endian systems, the VSX state save/restore routines swapped 32-bit
    words within the 64-bit aliased double word for the associated floating 
point
    register.  This was due to the use of a word-oriented load/store vs. 
doubleword
    oriented load/store.
    
    Issue #3
    
    The FPU was turned off in the PCB but not in hardware, leading to a 
potential
    race condition if the same thread was scheduled immediately after sigreturn.
    
    The triggering codebase for this is Go, which makes heavy use of signals and
    and generates an unusual mix of floating point and VSX assembler.  As a 
result,
    when combined with th powerpc lazy FPU restore, a condition was repeatedly 
hit
    whereby the thread was interrupted in FP+VSX mode, then restored in FP only
    mode, thus reliably triggering the issues above.
    
    Also clean up the associated asm() style issue flagged by GitHub Actions.
    
    Signed-off-by: Timothy Pearson <tpear...@raptorengineering.com>
    
    MFC after:      1 week
    Pull Request:   https://github.com/freebsd/freebsd-src/pull/1756
---
 sys/powerpc/include/pcb.h          | 10 +---------
 sys/powerpc/include/ucontext.h     |  2 ++
 sys/powerpc/powerpc/exec_machdep.c | 33 ++++++++++++++++++++++++---------
 sys/powerpc/powerpc/fpu.c          | 30 ++++++++++++++++++++++++++----
 4 files changed, 53 insertions(+), 22 deletions(-)

diff --git a/sys/powerpc/include/pcb.h b/sys/powerpc/include/pcb.h
index 050ada6b0f64..0230cf78aba7 100644
--- a/sys/powerpc/include/pcb.h
+++ b/sys/powerpc/include/pcb.h
@@ -66,16 +66,8 @@ struct pcb {
 #define        PCB_VECREGS     0x200   /* Process had Altivec registers 
initialized */
        struct fpu {
                union {
-#if _BYTE_ORDER == _BIG_ENDIAN
-                       double fpr;
-                       uint32_t vsr[4];
-#else
                        uint32_t vsr[4];
-                       struct {
-                               double padding;
-                               double fpr;
-                       };
-#endif
+                       double fpr;
                } fpr[32];
                double  fpscr;  /* FPSCR stored as double for easier access */
        } pcb_fpu;              /* Floating point processor */
diff --git a/sys/powerpc/include/ucontext.h b/sys/powerpc/include/ucontext.h
index d35c6c773fe0..dc87edd578bc 100644
--- a/sys/powerpc/include/ucontext.h
+++ b/sys/powerpc/include/ucontext.h
@@ -41,6 +41,7 @@ typedef struct __mcontext {
        int             mc_flags;
 #define _MC_FP_VALID   0x01
 #define _MC_AV_VALID   0x02
+#define _MC_VS_VALID   0x04
        int             mc_onstack;             /* saved onstack flag */
        int             mc_len;                 /* sizeof(__mcontext) */
        __uint64_t      mc_avec[32*2];          /* vector register file */
@@ -56,6 +57,7 @@ typedef struct __mcontext32 {
        int             mc_flags;
 #define _MC_FP_VALID   0x01
 #define _MC_AV_VALID   0x02
+#define _MC_VS_VALID   0x04
        int             mc_onstack;             /* saved onstack flag */
        int             mc_len;                 /* sizeof(__mcontext) */
        uint64_t        mc_avec[32*2];          /* vector register file */
diff --git a/sys/powerpc/powerpc/exec_machdep.c 
b/sys/powerpc/powerpc/exec_machdep.c
index c6b2b01d9d30..8a33d0f589a7 100644
--- a/sys/powerpc/powerpc/exec_machdep.c
+++ b/sys/powerpc/powerpc/exec_machdep.c
@@ -349,13 +349,6 @@ sys_sigreturn(struct thread *td, struct sigreturn_args 
*uap)
        if (error != 0)
                return (error);
 
-       /*
-        * Save FPU state if needed. User may have changed it on
-        * signal handler
-        */
-       if (uc.uc_mcontext.mc_srr1 & PSL_FP)
-               save_fpu(td);
-
        kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
        CTR3(KTR_SIG, "sigreturn: return td=%p pc=%#x sp=%#x",
@@ -432,6 +425,7 @@ grab_mcontext(struct thread *td, mcontext_t *mcp, int flags)
        }
 
        if (pcb->pcb_flags & PCB_VSX) {
+               mcp->mc_flags |= _MC_VS_VALID;
                for (i = 0; i < 32; i++)
                        memcpy(&mcp->mc_vsxfpreg[i],
                            &pcb->pcb_fpu.fpr[i].vsr[2], sizeof(double));
@@ -481,6 +475,7 @@ set_mcontext(struct thread *td, mcontext_t *mcp)
        struct pcb *pcb;
        struct trapframe *tf;
        register_t tls;
+       register_t msr;
        int i;
 
        pcb = td->td_pcb;
@@ -531,6 +526,22 @@ set_mcontext(struct thread *td, mcontext_t *mcp)
        tf->srr1 &= ~(PSL_FP | PSL_VSX | PSL_VEC);
        pcb->pcb_flags &= ~(PCB_FPU | PCB_VSX | PCB_VEC);
 
+       /*
+        * Ensure the FPU is also disabled in hardware.
+        *
+        * Without this, it's possible for the register reload to fail if we
+        * don't switch to a FPU disabled context before resuming the original
+        * thread.  Specifically, if the FPU/VSX unavailable exception is never
+        * hit, then whatever data is still in the FP/VSX registers when
+        * sigresume is callled will used by the resumed thread, instead of the
+        * previously saved data from the mcontext.
+        */
+       critical_enter();
+       msr = mfmsr() & ~(PSL_FP | PSL_VSX | PSL_VEC);
+       isync();
+       mtmsr(msr);
+       critical_exit();
+
        if (mcp->mc_flags & _MC_FP_VALID) {
                /* enable_fpu() will happen lazily on a fault */
                pcb->pcb_flags |= PCB_FPREGS;
@@ -539,8 +550,12 @@ set_mcontext(struct thread *td, mcontext_t *mcp)
                for (i = 0; i < 32; i++) {
                        memcpy(&pcb->pcb_fpu.fpr[i].fpr, &mcp->mc_fpreg[i],
                            sizeof(double));
-                       memcpy(&pcb->pcb_fpu.fpr[i].vsr[2],
-                           &mcp->mc_vsxfpreg[i], sizeof(double));
+               }
+               if (mcp->mc_flags & _MC_VS_VALID) {
+                       for (i = 0; i < 32; i++) {
+                               memcpy(&pcb->pcb_fpu.fpr[i].vsr[2],
+                                   &mcp->mc_vsxfpreg[i], sizeof(double));
+                       }
                }
        }
 
diff --git a/sys/powerpc/powerpc/fpu.c b/sys/powerpc/powerpc/fpu.c
index 0eaff2ea4932..cc8f22f7dda3 100644
--- a/sys/powerpc/powerpc/fpu.c
+++ b/sys/powerpc/powerpc/fpu.c
@@ -64,8 +64,19 @@ save_fpu_int(struct thread *td)
         * Save the floating-point registers and FPSCR to the PCB
         */
        if (pcb->pcb_flags & PCB_VSX) {
-       #define SFP(n)   __asm ("stxvw4x " #n ", 0,%0" \
+#if _BYTE_ORDER == _BIG_ENDIAN
+       #define SFP(n)   __asm("stxvw4x " #n ", 0,%0" \
                        :: "b"(&pcb->pcb_fpu.fpr[n]));
+#else
+       /*
+        * stxvw2x will swap words within the FP double word on LE systems,
+        * leading to corruption if VSX is used to store state and FP is
+        * subsequently used to restore state.
+        * Use stxvd2x instead.
+        */
+       #define SFP(n)   __asm("stxvd2x " #n ", 0,%0" \
+                       :: "b"(&pcb->pcb_fpu.fpr[n]));
+#endif
                SFP(0);         SFP(1);         SFP(2);         SFP(3);
                SFP(4);         SFP(5);         SFP(6);         SFP(7);
                SFP(8);         SFP(9);         SFP(10);        SFP(11);
@@ -76,7 +87,7 @@ save_fpu_int(struct thread *td)
                SFP(28);        SFP(29);        SFP(30);        SFP(31);
        #undef SFP
        } else {
-       #define SFP(n)   __asm ("stfd " #n ", 0(%0)" \
+       #define SFP(n)   __asm("stfd " #n ", 0(%0)" \
                        :: "b"(&pcb->pcb_fpu.fpr[n].fpr));
                SFP(0);         SFP(1);         SFP(2);         SFP(3);
                SFP(4);         SFP(5);         SFP(6);         SFP(7);
@@ -149,8 +160,19 @@ enable_fpu(struct thread *td)
                          :: "b"(&pcb->pcb_fpu.fpscr));
 
        if (pcb->pcb_flags & PCB_VSX) {
-       #define LFP(n)   __asm ("lxvw4x " #n ", 0,%0" \
+#if _BYTE_ORDER == _BIG_ENDIAN
+       #define LFP(n)   __asm("lxvw4x " #n ", 0,%0" \
+                       :: "b"(&pcb->pcb_fpu.fpr[n]));
+#else
+       /*
+        * lxvw4x will swap words within the FP double word on LE systems,
+        * leading to corruption if FP is used to store state and VSX is
+        * subsequently used to restore state.
+        * Use lxvd2x instead.
+        */
+       #define LFP(n)   __asm("lxvd2x " #n ", 0,%0" \
                        :: "b"(&pcb->pcb_fpu.fpr[n]));
+#endif
                LFP(0);         LFP(1);         LFP(2);         LFP(3);
                LFP(4);         LFP(5);         LFP(6);         LFP(7);
                LFP(8);         LFP(9);         LFP(10);        LFP(11);
@@ -161,7 +183,7 @@ enable_fpu(struct thread *td)
                LFP(28);        LFP(29);        LFP(30);        LFP(31);
        #undef LFP
        } else {
-       #define LFP(n)   __asm ("lfd " #n ", 0(%0)" \
+       #define LFP(n)   __asm("lfd " #n ", 0(%0)" \
                        :: "b"(&pcb->pcb_fpu.fpr[n].fpr));
                LFP(0);         LFP(1);         LFP(2);         LFP(3);
                LFP(4);         LFP(5);         LFP(6);         LFP(7);

Reply via email to