> Date: Tue, 21 Nov 2023 00:16:40 +0100
> From: Tobias Heider <tobias.hei...@stusta.de>
> 
> Diff below fixes make regress for libffi with arm64 BTI enabled.
> The tricky part were two jump tables in ffi.c and sysV.S.
> 
> ok?

I think you missed the "computed goto" in ffi_closure_SYSV.

Maybe we shouldn't add a "bti j" for the unused slots?

> Index: Makefile
> ===================================================================
> RCS file: /cvs/ports/devel/libffi/Makefile,v
> retrieving revision 1.48
> diff -u -p -r1.48 Makefile
> --- Makefile  21 Sep 2023 09:49:57 -0000      1.48
> +++ Makefile  20 Nov 2023 23:14:17 -0000
> @@ -1,6 +1,7 @@
>  COMMENT=             Foreign Function Interface
>  
>  V=                   3.4.4
> +REVISION=            0
>  DISTNAME=            libffi-$V
>  SHARED_LIBS +=  ffi                  2.0      # 9.2
>  CATEGORIES=          devel
> Index: patches/patch-src_aarch64_ffi_c
> ===================================================================
> RCS file: patches/patch-src_aarch64_ffi_c
> diff -N patches/patch-src_aarch64_ffi_c
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ patches/patch-src_aarch64_ffi_c   20 Nov 2023 23:14:17 -0000
> @@ -0,0 +1,76 @@
> +Index: src/aarch64/ffi.c
> +--- src/aarch64/ffi.c.orig
> ++++ src/aarch64/ffi.c
> +@@ -390,47 +390,59 @@ extend_hfa_type (void *dest, void *src, int h)
> +     "adr    %0, 0f\n"
> + "   add     %0, %0, %1\n"
> + "   br      %0\n"
> +-"0: ldp     s16, s17, [%3]\n"       /* S4 */
> ++"0: bti     j\n"                    /* S4 */
> ++"   ldp     s16, s17, [%3]\n"
> + "   ldp     s18, s19, [%3, #8]\n"
> + "   b       4f\n"
> +-"   ldp     s16, s17, [%3]\n"       /* S3 */
> ++"   bti     j\n"                    /* S3 */
> ++"   ldp     s16, s17, [%3]\n"
> + "   ldr     s18, [%3, #8]\n"
> + "   b       3f\n"
> +-"   ldp     s16, s17, [%3]\n"       /* S2 */
> ++"   bti     j\n"                    /* S2 */
> ++"   ldp     s16, s17, [%3]\n"
> + "   b       2f\n"
> + "   nop\n"
> +-"   ldr     s16, [%3]\n"            /* S1 */
> ++"   bti     j\n"                    /* S1 */
> ++"   ldr     s16, [%3]\n"
> + "   b       1f\n"
> + "   nop\n"
> +-"   ldp     d16, d17, [%3]\n"       /* D4 */
> ++"   bti     j\n"                    /* D4 */
> ++"   ldp     d16, d17, [%3]\n"
> + "   ldp     d18, d19, [%3, #16]\n"
> + "   b       4f\n"
> +-"   ldp     d16, d17, [%3]\n"       /* D3 */
> ++"   bti     j\n"                    /* D3 */
> ++"   ldp     d16, d17, [%3]\n"
> + "   ldr     d18, [%3, #16]\n"
> + "   b       3f\n"
> +-"   ldp     d16, d17, [%3]\n"       /* D2 */
> ++"   bti     j\n"                    /* D2 */
> ++"   ldp     d16, d17, [%3]\n"
> + "   b       2f\n"
> + "   nop\n"
> +-"   ldr     d16, [%3]\n"            /* D1 */
> ++"   bti     j\n"                    /* D1 */
> ++"   ldr     d16, [%3]\n"
> + "   b       1f\n"
> + "   nop\n"
> +-"   ldp     q16, q17, [%3]\n"       /* Q4 */
> ++"   bti     j\n"                    /* Q4 */
> ++"   ldp     q16, q17, [%3]\n"
> + "   ldp     q18, q19, [%3, #32]\n"
> + "   b       4f\n"
> +-"   ldp     q16, q17, [%3]\n"       /* Q3 */
> ++"   bti     j\n"                    /* Q3 */
> ++"   ldp     q16, q17, [%3]\n"
> + "   ldr     q18, [%3, #32]\n"
> + "   b       3f\n"
> +-"   ldp     q16, q17, [%3]\n"       /* Q2 */
> ++"   bti     j\n"                    /* Q2 */
> ++"   ldp     q16, q17, [%3]\n"
> + "   b       2f\n"
> + "   nop\n"
> +-"   ldr     q16, [%3]\n"            /* Q1 */
> ++"   bti     j\n"                    /* Q1 */
> ++"   ldr     q16, [%3]\n"
> + "   b       1f\n"
> + "4: str     q19, [%2, #48]\n"
> + "3: str     q18, [%2, #32]\n"
> + "2: str     q17, [%2, #16]\n"
> + "1: str     q16, [%2]"
> +     : "=&r"(x0)
> +-    : "r"(f * 12), "r"(dest), "r"(src)
> ++    : "r"(f * 16), "r"(dest), "r"(src)
> +     : "memory", "v16", "v17", "v18", "v19");
> + }
> + #endif
> Index: patches/patch-src_aarch64_sysv_S
> ===================================================================
> RCS file: patches/patch-src_aarch64_sysv_S
> diff -N patches/patch-src_aarch64_sysv_S
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ patches/patch-src_aarch64_sysv_S  20 Nov 2023 23:14:17 -0000
> @@ -0,0 +1,220 @@
> +Index: src/aarch64/sysv.S
> +--- src/aarch64/sysv.S.orig
> ++++ src/aarch64/sysv.S
> +@@ -78,6 +78,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> + 
> +     cfi_startproc
> + CNAME(ffi_call_SYSV):
> ++    bti     c
> +     /* Sign the lr with x1 since that is where it will be stored */
> +     SIGN_LR_WITH_REG(x1)
> + 
> +@@ -138,78 +139,142 @@ CNAME(ffi_call_SYSV):
> +     /* Save the return value as directed.  */
> +     adr     x5, 0f
> +     and     w4, w4, #AARCH64_RET_MASK
> +-    add     x5, x5, x4, lsl #3
> ++    add     x5, x5, x4, lsl #4
> +     br      x5
> + 
> +-    /* Note that each table entry is 2 insns, and thus 8 bytes.
> ++    /* Note that each table entry is 4 insns, and thus 16 bytes.
> +        For integer data, note that we're storing into ffi_arg
> +        and therefore we want to extend to 64 bits; these types
> +        have two consecutive entries allocated for them.  */
> +     .align  4
> +-0:  b 99f                           /* VOID */
> ++0:  bti     j
> ++    b 99f                           /* VOID */
> ++    nop 
> +     nop
> +-1:  str     x0, [x3]                /* INT64 */
> ++1:  bti     j
> ++    str     x0, [x3]                /* INT64 */
> +     b 99f
> +-2:  stp     x0, x1, [x3]            /* INT128 */
> ++    nop
> ++2:  bti     j
> ++    stp     x0, x1, [x3]            /* INT128 */
> +     b 99f
> +-3:  brk     #1000                   /* UNUSED */
> ++    nop
> ++3:  bti     j
> ++    brk     #1000                   /* UNUSED */
> +     b 99f
> +-4:  brk     #1000                   /* UNUSED */
> ++    nop
> ++4:  bti     j
> ++    brk     #1000                   /* UNUSED */
> +     b 99f
> +-5:  brk     #1000                   /* UNUSED */
> ++    nop
> ++5:  bti     j
> ++    brk     #1000                   /* UNUSED */
> +     b 99f
> +-6:  brk     #1000                   /* UNUSED */
> ++    nop
> ++6:  bti     j
> ++    brk     #1000                   /* UNUSED */
> +     b 99f
> +-7:  brk     #1000                   /* UNUSED */
> ++    nop
> ++7:  bti     j
> ++    brk     #1000                   /* UNUSED */
> +     b 99f
> +-8:  st4     { v0.s, v1.s, v2.s, v3.s }[0], [x3]     /* S4 */
> ++    nop
> ++8:  bti     j
> ++    st4     { v0.s, v1.s, v2.s, v3.s }[0], [x3]     /* S4 */
> +     b 99f
> +-9:  st3     { v0.s, v1.s, v2.s }[0], [x3]   /* S3 */
> ++    nop
> ++9:  bti     j
> ++    st3     { v0.s, v1.s, v2.s }[0], [x3]   /* S3 */
> +     b 99f
> +-10: stp     s0, s1, [x3]            /* S2 */
> ++    nop
> ++10: bti     j
> ++    stp     s0, s1, [x3]            /* S2 */
> +     b 99f
> +-11: str     s0, [x3]                /* S1 */
> ++    nop
> ++11: bti     j
> ++    str     s0, [x3]                /* S1 */
> +     b 99f
> +-12: st4     { v0.d, v1.d, v2.d, v3.d }[0], [x3]     /* D4 */
> ++    nop
> ++12: bti     j
> ++    st4     { v0.d, v1.d, v2.d, v3.d }[0], [x3]     /* D4 */
> +     b 99f
> +-13: st3     { v0.d, v1.d, v2.d }[0], [x3]   /* D3 */
> ++    nop
> ++13: bti     j
> ++    st3     { v0.d, v1.d, v2.d }[0], [x3]   /* D3 */
> +     b 99f
> +-14: stp     d0, d1, [x3]            /* D2 */
> ++    nop
> ++14: bti     j
> ++    stp     d0, d1, [x3]            /* D2 */
> +     b 99f
> +-15: str     d0, [x3]                /* D1 */
> ++    nop
> ++15: bti     j
> ++    str     d0, [x3]                /* D1 */
> +     b 99f
> +-16: str     q3, [x3, #48]           /* Q4 */
> +     nop
> +-17: str     q2, [x3, #32]           /* Q3 */
> ++16: bti     j
> ++    str     q3, [x3, #48]           /* Q4 */
> +     nop
> +-18: stp     q0, q1, [x3]            /* Q2 */
> ++    nop
> ++17: bti     j
> ++    str     q2, [x3, #32]           /* Q3 */
> ++    nop
> ++    nop
> ++18: bti     j
> ++    stp     q0, q1, [x3]            /* Q2 */
> +     b 99f
> +-19: str     q0, [x3]                /* Q1 */
> ++    nop
> ++19: bti     j
> ++    str     q0, [x3]                /* Q1 */
> +     b 99f
> +-20: uxtb    w0, w0                  /* UINT8 */
> ++    nop
> ++20: bti     j
> ++    uxtb    w0, w0                  /* UINT8 */
> +     str     x0, [x3]
> +-21: b 99f                           /* reserved */
> +     nop
> +-22: uxth    w0, w0                  /* UINT16 */
> ++21: bti     j
> ++    b 99f                           /* reserved */
> ++    nop
> ++    nop
> ++22: bti     j
> ++    uxth    w0, w0                  /* UINT16 */
> +     str     x0, [x3]
> +-23: b 99f                           /* reserved */
> +     nop
> +-24: mov     w0, w0                  /* UINT32 */
> ++23: bti     j
> ++    b 99f                           /* reserved */
> ++    nop
> ++    nop
> ++24: bti     j
> ++    mov     w0, w0                  /* UINT32 */
> +     str     x0, [x3]
> +-25: b 99f                           /* reserved */
> +     nop
> +-26: sxtb    x0, w0                  /* SINT8 */
> ++25: bti     j
> ++    b 99f                           /* reserved */
> ++    nop
> ++    nop
> ++26: bti     j
> ++    sxtb    x0, w0                  /* SINT8 */
> +     str     x0, [x3]
> +-27: b 99f                           /* reserved */
> +     nop
> +-28: sxth    x0, w0                  /* SINT16 */
> ++27: bti     j
> ++    b 99f                           /* reserved */
> ++    nop
> ++    nop
> ++28: bti     j
> ++    sxth    x0, w0                  /* SINT16 */
> +     str     x0, [x3]
> +-29: b 99f                           /* reserved */
> +     nop
> +-30: sxtw    x0, w0                  /* SINT32 */
> ++29: bti     j
> ++    b 99f                           /* reserved */
> ++    nop
> ++    nop
> ++30: bti     j
> ++    sxtw    x0, w0                  /* SINT32 */
> +     str     x0, [x3]
> +-31: b 99f                           /* reserved */
> +     nop
> ++31: bti     j
> ++    b 99f                           /* reserved */
> ++    nop
> ++    nop
> + 
> +     /* Return now that result has been populated. */
> + 99:
> +@@ -246,6 +311,7 @@ CNAME(ffi_call_SYSV):
> +     .align 4
> + CNAME(ffi_closure_SYSV_V):
> +     cfi_startproc
> ++    bti     c
> +     SIGN_LR
> +     stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]!
> +     cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
> +@@ -270,6 +336,7 @@ CNAME(ffi_closure_SYSV_V):
> +     .align  4
> +     cfi_startproc
> + CNAME(ffi_closure_SYSV):
> ++    bti     c
> +     SIGN_LR
> +     stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]!
> +     cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
> +@@ -479,6 +546,7 @@ CNAME(ffi_closure_trampoline_table_page):
> +     .align 4
> + CNAME(ffi_go_closure_SYSV_V):
> +     cfi_startproc
> ++    bti     c
> +     stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]!
> +     cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
> +     cfi_rel_offset (x29, 0)
> +@@ -502,6 +570,7 @@ CNAME(ffi_go_closure_SYSV_V):
> +     .align  4
> +     cfi_startproc
> + CNAME(ffi_go_closure_SYSV):
> ++    bti     c
> +     stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]!
> +     cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
> +     cfi_rel_offset (x29, 0)
> 

Reply via email to