Re: [PATCHv2 2/2] aarch64: Add support for _BitInt

Richard Sandiford Thu, 28 Mar 2024 08:01:09 -0700

"Andre Vieira (lists)" <andre.simoesdiasvie...@arm.com> writes:
> This patch adds support for C23's _BitInt for the AArch64 port when 
> compiling for little endianness.  Big Endianness requires further 
> target-agnostic support and we therefor disable it for now.
>
> The tests expose some suboptimal codegen for which I'll create PR's for 
> optimizations after this goes in.
>
> gcc/ChangeLog:
>
>       * config/aarch64/aarch64.cc (TARGET_C_BITINT_TYPE_INFO): Declare MACRO.
>       (aarch64_bitint_type_info): New function.
>       (aarch64_return_in_memory_1): Return large _BitInt's in memory.
>       (aarch64_function_arg_alignment): Adapt to correctly return the ABI
>       mandated alignment of _BitInt(N) where N > 128 as the alignment of
>       TImode.
>       (aarch64_composite_type_p): Return true for _BitInt(N), where N > 128.
>
> libgcc/ChangeLog:
>
>       * config/aarch64/t-softfp (softfp_extras): Add floatbitinthf,
>       floatbitintbf, floatbitinttf and fixtfbitint.
>       * config/aarch64/libgcc-softfp.ver (GCC_14.0.0): Add __floatbitinthf,
>       __floatbitintbf, __floatbitinttf and __fixtfbitint.
>
> gcc/testsuite/ChangeLog:
>
>       * gcc.target/aarch64/bitint-alignments.c: New test.
>       * gcc.target/aarch64/bitint-args.c: New test.
>       * gcc.target/aarch64/bitint-sizes.c: New test.
>       * gcc.target/aarch64/bitfield-bitint-abi.h: New header.
>       * gcc.target/aarch64/bitfield-bitint-abi-align16.c: New test.
>       * gcc.target/aarch64/bitfield-bitint-abi-align8.c: New test.


Since we don't support big-endian yet, I assume the tests should be
conditional on aarch64_little_endian.

> [...]
> diff --git a/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c 
> b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..048d04e4c1bf90215892aa0173f22226246a097d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c
> @@ -0,0 +1,378 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-stack-protector -save-temps -fno-schedule-insns 
> -fno-schedule-insns2" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +#define ALIGN 16
> +#include "bitfield-bitint-abi.h"
> +
> +// f1-f16 are all the same
> +
> +/*
> +** f1:
> +**   and     x0, x2, 1
> +**   ret
> +*/
> +/*
> +** f8:
> +**   and     x0, x2, 1
> +**   ret
> +*/
> +/*
> +** f16:
> +**   and     x0, x2, 1
> +**   ret
> +*/
> +
> +/* fp seems to be unable to optimize away stack-usage, TODO: to fix.  */
> +
> +/*
> +** fp:
> +**...
> +**   and     x0, x1, 1
> +**...
> +**   ret
> +*/
> +
> +// all other f1p-f8p generate the same code, for f16p the value comes from x2
> +/*
> +** f1p:
> +**   and     x0, x1, 1
> +**   ret
> +*/
> +/*
> +** f8p:
> +**   and     x0, x1, 1
> +**   ret
> +*/
> +/*
> +** f16p:
> +**   and     x0, x2, 1
> +**   ret
> +*/
> +
> +// g1-g16 are all the same
> +/*
> +** g1:
> +**   mov     (x[0-9]+), x0
> +**   mov     w0, w1
> +**   and     x4, \1, 9223372036854775807
> +**   and     x2, \1, 1
> +**   mov     x3, 0
> +**   b       f1
> +*/
> +
> +/*
> +** g8:
> +**   mov     (x[0-9]+), x0
> +**   mov     w0, w1
> +**   and     x4, \1, 9223372036854775807
> +**   and     x2, \1, 1
> +**   mov     x3, 0
> +**   b       f8
> +*/
> +/*
> +** g16:
> +**   mov     (x[0-9]+), x0
> +**   mov     w0, w1
> +**   and     x4, \1, 9223372036854775807
> +**   and     x2, \1, 1
> +**   mov     x3, 0
> +**   b       f16
> +*/
> +
> +// again gp different from the rest
> +
> +/*
> +** gp:
> +**   sub     sp, sp, #16
> +**   mov     (x[0-9]+), x0
> +**   mov     w0, w1
> +**   sbfx    x([0-9]+), \1, 0, 63
> +**   mov     (w[0-9]+), 0
> +**   bfi     \3, w\2, 0, 1
> +**   and     x3, x\2, 9223372036854775807
> +**   mov     x2, 0
> +**   str     xzr, \[sp\]
> +**   strb    \3, \[sp\]
> +**   ldr     x1, \[sp\]
> +**   add     sp, sp, 16
> +**   b       fp
> +*/
> +
> +// g1p-g8p are all the same, g16p uses x2 to pass parameter to f16p
> +
> +/*
> +** g1p:
> +**   mov     (w[0-9]+), w1
> +**   and     x3, x0, 9223372036854775807
> +**   and     x1, x0, 1
> +**   mov     x2, 0
> +**   mov     w0, \1
> +**   b       f1p
> +*/
> +/*
> +** g8p:
> +**   mov     (w[0-9]+), w1
> +**   and     x3, x0, 9223372036854775807
> +**   and     x1, x0, 1
> +**   mov     x2, 0
> +**   mov     w0, \1
> +**   b       f8p
> +*/
> +/*
> +** g16p:
> +**   mov     (x[0-9]+), x0
> +**   mov     w0, w1
> +**   and     x4, \1, 9223372036854775807
> +**   and     x2, \1, 1
> +**   mov     x3, 0
> +**   b       f16p
> +*/
> +
> +// f*_stack are all the same
> +/*
> +** f1_stack:
> +**   ldr     (x[0-9]+), \[sp, 16\]
> +**   and     x0, \1, 1
> +**   ret
> +*/
> +/*
> +** f8_stack:
> +**   ldr     (x[0-9]+), \[sp, 16\]
> +**   and     x0, \1, 1
> +**   ret
> +*/
> +/*
> +** f16_stack:
> +**   ldr     (x[0-9]+), \[sp, 16\]
> +**   and     x0, \1, 1
> +**   ret
> +*/
> +
> +// fp{,1,8}_stack are all the same but fp16_stack loads from sp+16
> +/*
> +** fp_stack:
> +**   ldr     (x[0-9]+), \[sp, 8\]
> +**   and     x0, \1, 1
> +**   ret
> +*/
> +/*
> +** f1p_stack:
> +**   ldr     (x[0-9]+), \[sp, 8\]
> +**   and     x0, \1, 1
> +**   ret
> +*/
> +/*
> +** f8p_stack:
> +**   ldr     (x[0-9]+), \[sp, 8\]
> +**   and     x0, \1, 1
> +**   ret
> +*/
> +
> +/*
> +** f16p_stack:
> +**   ldr     (x[0-9]+), \[sp, 16\]
> +**   and     x0, \1, 1
> +**   ret
> +*/
> +
> +/*
> +** gp_stack:
> +**...
> +**   mov     x([0-9]+), x0
> +**   sxtw    (x[0-9]+), w1
> +**   mov     x0, \2
> +**   and     (x[0-9]+), \2, 9223372036854775807

I assume this is x7, is that right?  x7 doesn't seem to be set elsewhere.
If so, I think this is one case that we need to match as x7 rather than
capture.  Similarly for the other stack tests, including in *-align8.c.

> +**   mov     (w[0-9]+), 0
> +**   bfi     \4, w\1, 0, 1
> +**   strb    wzr, \[sp, 16\]
> +**   mov     x6, \3
> +**   mov     x5, \3
> +**   mov     x4, \3
> +**   mov     x3, \3
> +**   mov     x2, \3
> +**   str     xzr, \[sp, 48\]
> +**   strb    \4, \[sp, 48\]
> +**   ldr     (x[0-9]+), \[sp, 48\]
> +**   stp     \3, \5, \[sp\]
> +**   mov     x1, \3
> +**   bl      fp_stack
> +**   sbfx    x0, x0, 0, 63
> +**...
> +**   ret
> +*/
> [...]
> diff --git a/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align8.c 
> b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align8.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..11f0580fd60c3d619126c5b41d646e22374c3593
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align8.c
> @@ -0,0 +1,380 @@
> [...]
> +/*
> +** g16:
> +**   mov     x2, x0

This set and...

> +**   mov     w0, w1
> +**   and     x4, x2, 9223372036854775807
> +**   and     x2, x2, 1

...these two reads should be captured.  (The destination of the last
instruction must be x2, of course.)

> +**   mov     x3, 0
> +**   b       f16
> +*/
> +
> +// again gp different from the rest
> +
> +/*
> +** gp:
> +**   sub     sp, sp, #16
> +**   mov     (x[0-9]+), x0
> +**   mov     w0, w1
> +**   sbfx    x([0-9]+), \1, 0, 63
> +**   mov     w1, 0
> +**   bfi     w1, w\2, 0, 1

The use of w1 in the last two instructions should be captured
similarly to the align16.c test.

> diff --git a/gcc/testsuite/gcc.target/aarch64/bitint-alignments.c 
> b/gcc/testsuite/gcc.target/aarch64/bitint-alignments.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..4de31fe7ebd933247911c48ace01ab520fe194a3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/bitint-alignments.c
> @@ -0,0 +1,58 @@
> +/* { dg-do run } */
> +/* { dg-options "-std=c23" } */
> +
> +static long unsigned int
> +calc_size (int n)

Would be more natural as calc_align(of).

> +{
> +  if (n > 64)
> +    return alignof(__int128_t);
> +  if (n > 32)
> +    return alignof(long long);
> +  if (n > 16)
> +    return alignof(int);
> +  if (n > 8)
> +    return alignof(short);
> +  else
> +    return alignof(char);
> +}
> +
> +#define CHECK_ALIGNMENT(N) \
> +  if (alignof(_BitInt(N)) != calc_size(N)) \
> +    __builtin_abort ();

I'm relying on Jakub's previous LGTM for the libgcc changes :)

OK with those changes.  Thanks a lot for doing this.

Richard

Re: [PATCHv2 2/2] aarch64: Add support for _BitInt

Reply via email to