> Date: Tue, 21 Nov 2023 00:16:40 +0100 > From: Tobias Heider <tobias.hei...@stusta.de> > > Diff below fixes make regress for libffi with arm64 BTI enabled. > The tricky part were two jump tables in ffi.c and sysV.S. > > ok?
I think you missed the "computed goto" in ffi_closure_SYSV. Maybe we shouldn't add a "bti j" for the unused slots? > Index: Makefile > =================================================================== > RCS file: /cvs/ports/devel/libffi/Makefile,v > retrieving revision 1.48 > diff -u -p -r1.48 Makefile > --- Makefile 21 Sep 2023 09:49:57 -0000 1.48 > +++ Makefile 20 Nov 2023 23:14:17 -0000 > @@ -1,6 +1,7 @@ > COMMENT= Foreign Function Interface > > V= 3.4.4 > +REVISION= 0 > DISTNAME= libffi-$V > SHARED_LIBS += ffi 2.0 # 9.2 > CATEGORIES= devel > Index: patches/patch-src_aarch64_ffi_c > =================================================================== > RCS file: patches/patch-src_aarch64_ffi_c > diff -N patches/patch-src_aarch64_ffi_c > --- /dev/null 1 Jan 1970 00:00:00 -0000 > +++ patches/patch-src_aarch64_ffi_c 20 Nov 2023 23:14:17 -0000 > @@ -0,0 +1,76 @@ > +Index: src/aarch64/ffi.c > +--- src/aarch64/ffi.c.orig > ++++ src/aarch64/ffi.c > +@@ -390,47 +390,59 @@ extend_hfa_type (void *dest, void *src, int h) > + "adr %0, 0f\n" > + " add %0, %0, %1\n" > + " br %0\n" > +-"0: ldp s16, s17, [%3]\n" /* S4 */ > ++"0: bti j\n" /* S4 */ > ++" ldp s16, s17, [%3]\n" > + " ldp s18, s19, [%3, #8]\n" > + " b 4f\n" > +-" ldp s16, s17, [%3]\n" /* S3 */ > ++" bti j\n" /* S3 */ > ++" ldp s16, s17, [%3]\n" > + " ldr s18, [%3, #8]\n" > + " b 3f\n" > +-" ldp s16, s17, [%3]\n" /* S2 */ > ++" bti j\n" /* S2 */ > ++" ldp s16, s17, [%3]\n" > + " b 2f\n" > + " nop\n" > +-" ldr s16, [%3]\n" /* S1 */ > ++" bti j\n" /* S1 */ > ++" ldr s16, [%3]\n" > + " b 1f\n" > + " nop\n" > +-" ldp d16, d17, [%3]\n" /* D4 */ > ++" bti j\n" /* D4 */ > ++" ldp d16, d17, [%3]\n" > + " ldp d18, d19, [%3, #16]\n" > + " b 4f\n" > +-" ldp d16, d17, [%3]\n" /* D3 */ > ++" bti j\n" /* D3 */ > ++" ldp d16, d17, [%3]\n" > + " ldr d18, [%3, #16]\n" > + " b 3f\n" > +-" ldp d16, d17, [%3]\n" /* D2 */ > ++" bti j\n" /* D2 */ > ++" ldp d16, d17, [%3]\n" > + " b 2f\n" > + " nop\n" > +-" ldr d16, [%3]\n" /* D1 */ > ++" bti j\n" /* D1 */ > ++" ldr d16, [%3]\n" > + " b 1f\n" > + " nop\n" > +-" ldp q16, q17, [%3]\n" /* Q4 */ > ++" bti j\n" /* Q4 */ > ++" ldp q16, q17, [%3]\n" > + " ldp q18, q19, [%3, #32]\n" > + " b 4f\n" > +-" ldp q16, q17, [%3]\n" /* Q3 */ > ++" bti j\n" /* Q3 */ > ++" ldp q16, q17, [%3]\n" > + " ldr q18, [%3, #32]\n" > + " b 3f\n" > +-" ldp q16, q17, [%3]\n" /* Q2 */ > ++" bti j\n" /* Q2 */ > ++" ldp q16, q17, [%3]\n" > + " b 2f\n" > + " nop\n" > +-" ldr q16, [%3]\n" /* Q1 */ > ++" bti j\n" /* Q1 */ > ++" ldr q16, [%3]\n" > + " b 1f\n" > + "4: str q19, [%2, #48]\n" > + "3: str q18, [%2, #32]\n" > + "2: str q17, [%2, #16]\n" > + "1: str q16, [%2]" > + : "=&r"(x0) > +- : "r"(f * 12), "r"(dest), "r"(src) > ++ : "r"(f * 16), "r"(dest), "r"(src) > + : "memory", "v16", "v17", "v18", "v19"); > + } > + #endif > Index: patches/patch-src_aarch64_sysv_S > =================================================================== > RCS file: patches/patch-src_aarch64_sysv_S > diff -N patches/patch-src_aarch64_sysv_S > --- /dev/null 1 Jan 1970 00:00:00 -0000 > +++ patches/patch-src_aarch64_sysv_S 20 Nov 2023 23:14:17 -0000 > @@ -0,0 +1,220 @@ > +Index: src/aarch64/sysv.S > +--- src/aarch64/sysv.S.orig > ++++ src/aarch64/sysv.S > +@@ -78,6 +78,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. > + > + cfi_startproc > + CNAME(ffi_call_SYSV): > ++ bti c > + /* Sign the lr with x1 since that is where it will be stored */ > + SIGN_LR_WITH_REG(x1) > + > +@@ -138,78 +139,142 @@ CNAME(ffi_call_SYSV): > + /* Save the return value as directed. */ > + adr x5, 0f > + and w4, w4, #AARCH64_RET_MASK > +- add x5, x5, x4, lsl #3 > ++ add x5, x5, x4, lsl #4 > + br x5 > + > +- /* Note that each table entry is 2 insns, and thus 8 bytes. > ++ /* Note that each table entry is 4 insns, and thus 16 bytes. > + For integer data, note that we're storing into ffi_arg > + and therefore we want to extend to 64 bits; these types > + have two consecutive entries allocated for them. */ > + .align 4 > +-0: b 99f /* VOID */ > ++0: bti j > ++ b 99f /* VOID */ > ++ nop > + nop > +-1: str x0, [x3] /* INT64 */ > ++1: bti j > ++ str x0, [x3] /* INT64 */ > + b 99f > +-2: stp x0, x1, [x3] /* INT128 */ > ++ nop > ++2: bti j > ++ stp x0, x1, [x3] /* INT128 */ > + b 99f > +-3: brk #1000 /* UNUSED */ > ++ nop > ++3: bti j > ++ brk #1000 /* UNUSED */ > + b 99f > +-4: brk #1000 /* UNUSED */ > ++ nop > ++4: bti j > ++ brk #1000 /* UNUSED */ > + b 99f > +-5: brk #1000 /* UNUSED */ > ++ nop > ++5: bti j > ++ brk #1000 /* UNUSED */ > + b 99f > +-6: brk #1000 /* UNUSED */ > ++ nop > ++6: bti j > ++ brk #1000 /* UNUSED */ > + b 99f > +-7: brk #1000 /* UNUSED */ > ++ nop > ++7: bti j > ++ brk #1000 /* UNUSED */ > + b 99f > +-8: st4 { v0.s, v1.s, v2.s, v3.s }[0], [x3] /* S4 */ > ++ nop > ++8: bti j > ++ st4 { v0.s, v1.s, v2.s, v3.s }[0], [x3] /* S4 */ > + b 99f > +-9: st3 { v0.s, v1.s, v2.s }[0], [x3] /* S3 */ > ++ nop > ++9: bti j > ++ st3 { v0.s, v1.s, v2.s }[0], [x3] /* S3 */ > + b 99f > +-10: stp s0, s1, [x3] /* S2 */ > ++ nop > ++10: bti j > ++ stp s0, s1, [x3] /* S2 */ > + b 99f > +-11: str s0, [x3] /* S1 */ > ++ nop > ++11: bti j > ++ str s0, [x3] /* S1 */ > + b 99f > +-12: st4 { v0.d, v1.d, v2.d, v3.d }[0], [x3] /* D4 */ > ++ nop > ++12: bti j > ++ st4 { v0.d, v1.d, v2.d, v3.d }[0], [x3] /* D4 */ > + b 99f > +-13: st3 { v0.d, v1.d, v2.d }[0], [x3] /* D3 */ > ++ nop > ++13: bti j > ++ st3 { v0.d, v1.d, v2.d }[0], [x3] /* D3 */ > + b 99f > +-14: stp d0, d1, [x3] /* D2 */ > ++ nop > ++14: bti j > ++ stp d0, d1, [x3] /* D2 */ > + b 99f > +-15: str d0, [x3] /* D1 */ > ++ nop > ++15: bti j > ++ str d0, [x3] /* D1 */ > + b 99f > +-16: str q3, [x3, #48] /* Q4 */ > + nop > +-17: str q2, [x3, #32] /* Q3 */ > ++16: bti j > ++ str q3, [x3, #48] /* Q4 */ > + nop > +-18: stp q0, q1, [x3] /* Q2 */ > ++ nop > ++17: bti j > ++ str q2, [x3, #32] /* Q3 */ > ++ nop > ++ nop > ++18: bti j > ++ stp q0, q1, [x3] /* Q2 */ > + b 99f > +-19: str q0, [x3] /* Q1 */ > ++ nop > ++19: bti j > ++ str q0, [x3] /* Q1 */ > + b 99f > +-20: uxtb w0, w0 /* UINT8 */ > ++ nop > ++20: bti j > ++ uxtb w0, w0 /* UINT8 */ > + str x0, [x3] > +-21: b 99f /* reserved */ > + nop > +-22: uxth w0, w0 /* UINT16 */ > ++21: bti j > ++ b 99f /* reserved */ > ++ nop > ++ nop > ++22: bti j > ++ uxth w0, w0 /* UINT16 */ > + str x0, [x3] > +-23: b 99f /* reserved */ > + nop > +-24: mov w0, w0 /* UINT32 */ > ++23: bti j > ++ b 99f /* reserved */ > ++ nop > ++ nop > ++24: bti j > ++ mov w0, w0 /* UINT32 */ > + str x0, [x3] > +-25: b 99f /* reserved */ > + nop > +-26: sxtb x0, w0 /* SINT8 */ > ++25: bti j > ++ b 99f /* reserved */ > ++ nop > ++ nop > ++26: bti j > ++ sxtb x0, w0 /* SINT8 */ > + str x0, [x3] > +-27: b 99f /* reserved */ > + nop > +-28: sxth x0, w0 /* SINT16 */ > ++27: bti j > ++ b 99f /* reserved */ > ++ nop > ++ nop > ++28: bti j > ++ sxth x0, w0 /* SINT16 */ > + str x0, [x3] > +-29: b 99f /* reserved */ > + nop > +-30: sxtw x0, w0 /* SINT32 */ > ++29: bti j > ++ b 99f /* reserved */ > ++ nop > ++ nop > ++30: bti j > ++ sxtw x0, w0 /* SINT32 */ > + str x0, [x3] > +-31: b 99f /* reserved */ > + nop > ++31: bti j > ++ b 99f /* reserved */ > ++ nop > ++ nop > + > + /* Return now that result has been populated. */ > + 99: > +@@ -246,6 +311,7 @@ CNAME(ffi_call_SYSV): > + .align 4 > + CNAME(ffi_closure_SYSV_V): > + cfi_startproc > ++ bti c > + SIGN_LR > + stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! > + cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) > +@@ -270,6 +336,7 @@ CNAME(ffi_closure_SYSV_V): > + .align 4 > + cfi_startproc > + CNAME(ffi_closure_SYSV): > ++ bti c > + SIGN_LR > + stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! > + cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) > +@@ -479,6 +546,7 @@ CNAME(ffi_closure_trampoline_table_page): > + .align 4 > + CNAME(ffi_go_closure_SYSV_V): > + cfi_startproc > ++ bti c > + stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! > + cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) > + cfi_rel_offset (x29, 0) > +@@ -502,6 +570,7 @@ CNAME(ffi_go_closure_SYSV_V): > + .align 4 > + cfi_startproc > + CNAME(ffi_go_closure_SYSV): > ++ bti c > + stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! > + cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) > + cfi_rel_offset (x29, 0) >