On Tue, Nov 21, 2023 at 04:48:29PM +0100, Mark Kettenis wrote:
> > Date: Tue, 21 Nov 2023 16:08:13 +0100
> > From: Tobias Heider <tobias.hei...@stusta.de>
> > 
> > On Tue, Nov 21, 2023 at 12:23:08PM +0100, Mark Kettenis wrote:
> > > > Date: Tue, 21 Nov 2023 12:04:42 +0100
> > > > From: Tobias Heider <tobias.hei...@stusta.de>
> > > > 
> > > > On Tue, Nov 21, 2023 at 11:56:18AM +0100, Mark Kettenis wrote:
> > > > > > Date: Tue, 21 Nov 2023 00:16:40 +0100
> > > > > > From: Tobias Heider <tobias.hei...@stusta.de>
> > > > > > 
> > > > > > Diff below fixes make regress for libffi with arm64 BTI enabled.
> > > > > > The tricky part were two jump tables in ffi.c and sysV.S.
> > > > > > 
> > > > > > ok?
> > > > > 
> > > > > I think you missed the "computed goto" in ffi_closure_SYSV.
> > > > > 
> > > > > Maybe we shouldn't add a "bti j" for the unused slots?
> > > > 
> > > > Functionally yes, that should also work and makes more sense.
> > > > But then the brks would be a bit redundant. I wasn't sure if there
> > > > are tests or something that expects those breaks to be reachable.
> > > 
> > > The brks still need to be there for those poor CPUs that don't
> > > implement BTI.
> > > 
> > 
> > Here is an updated diff where all reserved and unused entries
> > don't get bti j.  make regress still passes.
> 
> Looks like you missed my comment about ffi_closure_SYSV.

urgh yes, missed that. fixed in diff below

> Is the closures stuff not tested as part of make regress?

it seems like a lot of tests are skipped in our port. I'll see
if I can force it to run a few more.

# of expected passes            5822
# of unsupported tests          209 

Index: Makefile
===================================================================
RCS file: /cvs/ports/devel/libffi/Makefile,v
retrieving revision 1.48
diff -u -p -r1.48 Makefile
--- Makefile    21 Sep 2023 09:49:57 -0000      1.48
+++ Makefile    21 Nov 2023 16:07:43 -0000
@@ -1,6 +1,7 @@
 COMMENT=               Foreign Function Interface
 
 V=                     3.4.4
+REVISION=              0
 DISTNAME=              libffi-$V
 SHARED_LIBS +=  ffi                  2.0      # 9.2
 CATEGORIES=            devel
Index: patches/patch-src_aarch64_ffi_c
===================================================================
RCS file: patches/patch-src_aarch64_ffi_c
diff -N patches/patch-src_aarch64_ffi_c
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ patches/patch-src_aarch64_ffi_c     21 Nov 2023 16:07:43 -0000
@@ -0,0 +1,76 @@
+Index: src/aarch64/ffi.c
+--- src/aarch64/ffi.c.orig
++++ src/aarch64/ffi.c
+@@ -390,47 +390,59 @@ extend_hfa_type (void *dest, void *src, int h)
+       "adr    %0, 0f\n"
+ "     add     %0, %0, %1\n"
+ "     br      %0\n"
+-"0:   ldp     s16, s17, [%3]\n"       /* S4 */
++"0:   bti     j\n"                    /* S4 */
++"     ldp     s16, s17, [%3]\n"
+ "     ldp     s18, s19, [%3, #8]\n"
+ "     b       4f\n"
+-"     ldp     s16, s17, [%3]\n"       /* S3 */
++"     bti     j\n"                    /* S3 */
++"     ldp     s16, s17, [%3]\n"
+ "     ldr     s18, [%3, #8]\n"
+ "     b       3f\n"
+-"     ldp     s16, s17, [%3]\n"       /* S2 */
++"     bti     j\n"                    /* S2 */
++"     ldp     s16, s17, [%3]\n"
+ "     b       2f\n"
+ "     nop\n"
+-"     ldr     s16, [%3]\n"            /* S1 */
++"     bti     j\n"                    /* S1 */
++"     ldr     s16, [%3]\n"
+ "     b       1f\n"
+ "     nop\n"
+-"     ldp     d16, d17, [%3]\n"       /* D4 */
++"     bti     j\n"                    /* D4 */
++"     ldp     d16, d17, [%3]\n"
+ "     ldp     d18, d19, [%3, #16]\n"
+ "     b       4f\n"
+-"     ldp     d16, d17, [%3]\n"       /* D3 */
++"     bti     j\n"                    /* D3 */
++"     ldp     d16, d17, [%3]\n"
+ "     ldr     d18, [%3, #16]\n"
+ "     b       3f\n"
+-"     ldp     d16, d17, [%3]\n"       /* D2 */
++"     bti     j\n"                    /* D2 */
++"     ldp     d16, d17, [%3]\n"
+ "     b       2f\n"
+ "     nop\n"
+-"     ldr     d16, [%3]\n"            /* D1 */
++"     bti     j\n"                    /* D1 */
++"     ldr     d16, [%3]\n"
+ "     b       1f\n"
+ "     nop\n"
+-"     ldp     q16, q17, [%3]\n"       /* Q4 */
++"     bti     j\n"                    /* Q4 */
++"     ldp     q16, q17, [%3]\n"
+ "     ldp     q18, q19, [%3, #32]\n"
+ "     b       4f\n"
+-"     ldp     q16, q17, [%3]\n"       /* Q3 */
++"     bti     j\n"                    /* Q3 */
++"     ldp     q16, q17, [%3]\n"
+ "     ldr     q18, [%3, #32]\n"
+ "     b       3f\n"
+-"     ldp     q16, q17, [%3]\n"       /* Q2 */
++"     bti     j\n"                    /* Q2 */
++"     ldp     q16, q17, [%3]\n"
+ "     b       2f\n"
+ "     nop\n"
+-"     ldr     q16, [%3]\n"            /* Q1 */
++"     bti     j\n"                    /* Q1 */
++"     ldr     q16, [%3]\n"
+ "     b       1f\n"
+ "4:   str     q19, [%2, #48]\n"
+ "3:   str     q18, [%2, #32]\n"
+ "2:   str     q17, [%2, #16]\n"
+ "1:   str     q16, [%2]"
+     : "=&r"(x0)
+-    : "r"(f * 12), "r"(dest), "r"(src)
++    : "r"(f * 16), "r"(dest), "r"(src)
+     : "memory", "v16", "v17", "v18", "v19");
+ }
+ #endif
Index: patches/patch-src_aarch64_sysv_S
===================================================================
RCS file: patches/patch-src_aarch64_sysv_S
diff -N patches/patch-src_aarch64_sysv_S
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ patches/patch-src_aarch64_sysv_S    21 Nov 2023 16:07:43 -0000
@@ -0,0 +1,369 @@
+Index: src/aarch64/sysv.S
+--- src/aarch64/sysv.S.orig
++++ src/aarch64/sysv.S
+@@ -78,6 +78,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+       cfi_startproc
+ CNAME(ffi_call_SYSV):
++      bti     c
+       /* Sign the lr with x1 since that is where it will be stored */
+       SIGN_LR_WITH_REG(x1)
+ 
+@@ -138,78 +139,142 @@ CNAME(ffi_call_SYSV):
+       /* Save the return value as directed.  */
+       adr     x5, 0f
+       and     w4, w4, #AARCH64_RET_MASK
+-      add     x5, x5, x4, lsl #3
++      add     x5, x5, x4, lsl #4
+       br      x5
+ 
+-      /* Note that each table entry is 2 insns, and thus 8 bytes.
++      /* Note that each table entry is 4 insns, and thus 16 bytes.
+          For integer data, note that we're storing into ffi_arg
+          and therefore we want to extend to 64 bits; these types
+          have two consecutive entries allocated for them.  */
+       .align  4
+-0:    b 99f                           /* VOID */
++0:    bti     j                       /* VOID */
++      b 99f
++      nop 
+       nop
+-1:    str     x0, [x3]                /* INT64 */
++1:    bti     j                       /* INT64 */
++      str     x0, [x3]
+       b 99f
+-2:    stp     x0, x1, [x3]            /* INT128 */
++      nop
++2:    bti     j                       /* INT128 */
++      stp     x0, x1, [x3]
+       b 99f
++      nop
+ 3:    brk     #1000                   /* UNUSED */
+       b 99f
++      nop
++      nop
+ 4:    brk     #1000                   /* UNUSED */
+       b 99f
++      nop
++      nop
+ 5:    brk     #1000                   /* UNUSED */
+       b 99f
++      nop
++      nop
+ 6:    brk     #1000                   /* UNUSED */
+       b 99f
++      nop
++      nop
+ 7:    brk     #1000                   /* UNUSED */
+       b 99f
+-8:    st4     { v0.s, v1.s, v2.s, v3.s }[0], [x3]     /* S4 */
++      nop
++      nop
++8:    bti     j                       /* S4 */
++      st4     { v0.s, v1.s, v2.s, v3.s }[0], [x3]
+       b 99f
+-9:    st3     { v0.s, v1.s, v2.s }[0], [x3]   /* S3 */
++      nop
++9:    bti     j                       /* S3 */
++      st3     { v0.s, v1.s, v2.s }[0], [x3]
+       b 99f
+-10:   stp     s0, s1, [x3]            /* S2 */
++      nop
++10:   bti     j                       /* S2 */
++      stp     s0, s1, [x3]
+       b 99f
+-11:   str     s0, [x3]                /* S1 */
++      nop
++11:   bti     j
++      str     s0, [x3]                /* S1 */
+       b 99f
+-12:   st4     { v0.d, v1.d, v2.d, v3.d }[0], [x3]     /* D4 */
++      nop
++12:   bti     j                       /* D4 */
++      st4     { v0.d, v1.d, v2.d, v3.d }[0], [x3]
+       b 99f
+-13:   st3     { v0.d, v1.d, v2.d }[0], [x3]   /* D3 */
++      nop
++13:   bti     j                       /* D3 */
++      st3     { v0.d, v1.d, v2.d }[0], [x3]
+       b 99f
+-14:   stp     d0, d1, [x3]            /* D2 */
++      nop
++14:   bti     j                       /* D2 */
++      stp     d0, d1, [x3]
+       b 99f
+-15:   str     d0, [x3]                /* D1 */
++      nop
++15:   bti     j                       /* D1 */
++      str     d0, [x3]
+       b 99f
+-16:   str     q3, [x3, #48]           /* Q4 */
+       nop
+-17:   str     q2, [x3, #32]           /* Q3 */
++16:   bti     j                       /* Q4 */
++      str     q3, [x3, #48]
+       nop
+-18:   stp     q0, q1, [x3]            /* Q2 */
++      nop
++17:   bti     j                       /* Q3 */
++      str     q2, [x3, #32]
++      nop
++      nop
++18:   bti     j                       /* Q2 */
++      stp     q0, q1, [x3]
+       b 99f
+-19:   str     q0, [x3]                /* Q1 */
++      nop
++19:   bti     j                       /* Q1 */
++      str     q0, [x3]
+       b 99f
+-20:   uxtb    w0, w0                  /* UINT8 */
++      nop
++20:   bti     j                       /* UINT8 */
++      uxtb    w0, w0
+       str     x0, [x3]
++      nop
+ 21:   b 99f                           /* reserved */
+       nop
+-22:   uxth    w0, w0                  /* UINT16 */
++      nop
++      nop
++22:   bti     j                       /* UINT16 */
++      uxth    w0, w0
+       str     x0, [x3]
++      nop
+ 23:   b 99f                           /* reserved */
+       nop
+-24:   mov     w0, w0                  /* UINT32 */
++      nop
++      nop
++24:   bti     j                       /* UINT32 */
++      mov     w0, w0
+       str     x0, [x3]
++      nop
+ 25:   b 99f                           /* reserved */
+       nop
+-26:   sxtb    x0, w0                  /* SINT8 */
++      nop
++      nop
++26:   bti     j                       /* SINT8 */
++      sxtb    x0, w0
+       str     x0, [x3]
++      nop
+ 27:   b 99f                           /* reserved */
+       nop
+-28:   sxth    x0, w0                  /* SINT16 */
++      nop
++      nop
++28:   bti     j                       /* SINT16 */
++      sxth    x0, w0
+       str     x0, [x3]
++      nop
+ 29:   b 99f                           /* reserved */
+       nop
+-30:   sxtw    x0, w0                  /* SINT32 */
++      nop
++      nop
++30:   bti     j                       /* SINT32 */
++      sxtw    x0, w0
+       str     x0, [x3]
++      nop
+ 31:   b 99f                           /* reserved */
+       nop
++      nop
++      nop
+ 
+       /* Return now that result has been populated. */
+ 99:
+@@ -246,6 +311,7 @@ CNAME(ffi_call_SYSV):
+       .align 4
+ CNAME(ffi_closure_SYSV_V):
+       cfi_startproc
++      bti     c
+       SIGN_LR
+       stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]!
+       cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
+@@ -270,6 +336,7 @@ CNAME(ffi_closure_SYSV_V):
+       .align  4
+       cfi_startproc
+ CNAME(ffi_closure_SYSV):
++      bti     c
+       SIGN_LR
+       stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]!
+       cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
+@@ -299,74 +366,136 @@ CNAME(ffi_closure_SYSV):
+       /* Load the return value as directed.  */
+       adr     x1, 0f
+       and     w0, w0, #AARCH64_RET_MASK
+-      add     x1, x1, x0, lsl #3
++      add     x1, x1, x0, lsl #4
+       add     x3, sp, #16+CALL_CONTEXT_SIZE
+       br      x1
+ 
+-      /* Note that each table entry is 2 insns, and thus 8 bytes.  */
++      /* Note that each table entry is 4 insns, and thus 16 bytes.  */
+       .align  4
+-0:    b       99f                     /* VOID */
++0:    bti     j                       /* VOID */
++      b       99f
+       nop
+-1:    ldr     x0, [x3]                /* INT64 */
++      nop
++1:    bti     j                       /* INT64 */
++      ldr     x0, [x3]
+       b       99f
+-2:    ldp     x0, x1, [x3]            /* INT128 */
++      nop
++2:    bti     j                       /* INT128 */
++      ldp     x0, x1, [x3]
+       b       99f
++      nop
+ 3:    brk     #1000                   /* UNUSED */
+       nop
++      nop
++      nop
+ 4:    brk     #1000                   /* UNUSED */
+       nop
++      nop
++      nop
+ 5:    brk     #1000                   /* UNUSED */
+       nop
++      nop
++      nop
+ 6:    brk     #1000                   /* UNUSED */
+       nop
++      nop
++      nop
+ 7:    brk     #1000                   /* UNUSED */
+       nop
+-8:    ldr     s3, [x3, #12]           /* S4 */
+       nop
+-9:    ldr     s2, [x3, #8]            /* S3 */
+       nop
+-10:   ldp     s0, s1, [x3]            /* S2 */
++8:    bti     j                       /* S4 */
++      ldr     s3, [x3, #12]
++      nop
++      nop
++9:    bti     j                       /* S3 */
++      ldr     s2, [x3, #8]
++      nop
++      nop
++10:   bti     j                       /* S2 */
++      ldp     s0, s1, [x3]
+       b       99f
+-11:   ldr     s0, [x3]                /* S1 */
++      nop
++11:   bti     j                       /* S1 */
++      ldr     s0, [x3]
+       b       99f
+-12:   ldr     d3, [x3, #24]           /* D4 */
+       nop
+-13:   ldr     d2, [x3, #16]           /* D3 */
++12:   bti     j                       /* D4 */
++      ldr     d3, [x3, #24]
+       nop
+-14:   ldp     d0, d1, [x3]            /* D2 */
++      nop
++13:   bti     j                       /* D3 */
++      ldr     d2, [x3, #16]
++      nop
++      nop
++14:   bti     j                       /* D2 */
++      ldp     d0, d1, [x3]
+       b       99f
+-15:   ldr     d0, [x3]                /* D1 */
++      nop
++15:   bti     j                       /* D1 */
++      ldr     d0, [x3]
+       b       99f
+-16:   ldr     q3, [x3, #48]           /* Q4 */
+       nop
+-17:   ldr     q2, [x3, #32]           /* Q3 */
++16:   bti     j                       /* Q4 */
++      ldr     q3, [x3, #48]
+       nop
+-18:   ldp     q0, q1, [x3]            /* Q2 */
++      nop
++17:   bti     j                       /* Q3 */
++      ldr     q2, [x3, #32]
++      nop
++      nop
++18:   bti     j                       /* Q2 */
++      ldp     q0, q1, [x3]
+       b       99f
+-19:   ldr     q0, [x3]                /* Q1 */
++      nop
++19:   bti     j                       /* Q1 */
++      ldr     q0, [x3]
+       b       99f
+-20:   ldrb    w0, [x3, #BE(7)]        /* UINT8 */
++      nop
++20:   bti     j                       /* UINT8 */
++      ldrb    w0, [x3, #BE(7)]
+       b       99f
++      nop
+ 21:   brk     #1000                   /* reserved */
+       nop
+-22:   ldrh    w0, [x3, #BE(6)]        /* UINT16 */
++      nop
++      nop
++22:   bti     j                       /* UINT16 */
++      ldrh    w0, [x3, #BE(6)]
+       b       99f
++      nop
+ 23:   brk     #1000                   /* reserved */
+       nop
+-24:   ldr     w0, [x3, #BE(4)]        /* UINT32 */
++      nop
++      nop
++24:   bti     j                       /* UINT32 */
++      ldr     w0, [x3, #BE(4)]
+       b       99f
++      nop
+ 25:   brk     #1000                   /* reserved */
+       nop
+-26:   ldrsb   x0, [x3, #BE(7)]        /* SINT8 */
++      nop
++      nop
++26:   bti     j                       /* SINT8 */
++      ldrsb   x0, [x3, #BE(7)]
+       b       99f
++      nop
+ 27:   brk     #1000                   /* reserved */
+       nop
+-28:   ldrsh   x0, [x3, #BE(6)]        /* SINT16 */
++      nop
++      nop
++28:   bti     j                       /* SINT16 */
++      ldrsh   x0, [x3, #BE(6)]
+       b       99f
++      nop
+ 29:   brk     #1000                   /* reserved */
+       nop
+-30:   ldrsw   x0, [x3, #BE(4)]        /* SINT32 */
+       nop
++      nop
++30:   bti     j                       /* SINT32 */
++      ldrsw   x0, [x3, #BE(4)]
++      nop
++      nop
+ 31:                                   /* reserved */
+ 99:   ldp     x29, x30, [sp], #ffi_closure_SYSV_FS
+       cfi_adjust_cfa_offset (-ffi_closure_SYSV_FS)
+@@ -479,6 +608,7 @@ CNAME(ffi_closure_trampoline_table_page):
+       .align 4
+ CNAME(ffi_go_closure_SYSV_V):
+       cfi_startproc
++      bti     c
+       stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]!
+       cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
+       cfi_rel_offset (x29, 0)
+@@ -502,6 +632,7 @@ CNAME(ffi_go_closure_SYSV_V):
+       .align  4
+       cfi_startproc
+ CNAME(ffi_go_closure_SYSV):
++      bti     c
+       stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]!
+       cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
+       cfi_rel_offset (x29, 0)

Reply via email to