[gcc r14-10289] arm: Fix CASE_VECTOR_SHORTEN_MODE for thumb2.
https://gcc.gnu.org/g:ca1924947b5bed8105ae020bef6950bddda448f3 commit r14-10289-gca1924947b5bed8105ae020bef6950bddda448f3 Author: Richard Ball Date: Thu Jun 6 16:10:14 2024 +0100 arm: Fix CASE_VECTOR_SHORTEN_MODE for thumb2. The CASE_VECTOR_SHORTEN_MODE query is missing some equals signs which causes suboptimal codegen due to missed optimisation opportunities. This patch also adds a test for thumb2 switch statements as none exist currently. gcc/ChangeLog: PR target/115353 * config/arm/arm.h (enum arm_auto_incmodes): Correct CASE_VECTOR_SHORTEN_MODE query. gcc/testsuite/ChangeLog: * gcc.target/arm/thumb2-switchstatement.c: New test. (cherry picked from commit 2963c76e8e24d4ebaf2b1b4ac4d7ca44eb0a9025) Diff: --- gcc/config/arm/arm.h | 4 +- .../gcc.target/arm/thumb2-switchstatement.c| 144 + 2 files changed, 146 insertions(+), 2 deletions(-) diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 449e6935b32..0cd5d733952 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -2111,8 +2111,8 @@ enum arm_auto_incmodes ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, HImode) \ : SImode) \ : (TARGET_THUMB2\ - ? ((min > 0 && max < 0x200) ? QImode \ - : (min > 0 && max <= 0x2) ? HImode \ + ? ((min >= 0 && max < 0x200) ? QImode\ + : (min >= 0 && max < 0x2) ? HImode \ : SImode) \ : ((min >= 0 && max < 1024) \ ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 1, QImode) \ diff --git a/gcc/testsuite/gcc.target/arm/thumb2-switchstatement.c b/gcc/testsuite/gcc.target/arm/thumb2-switchstatement.c new file mode 100644 index 000..8badf318e62 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/thumb2-switchstatement.c @@ -0,0 +1,144 @@ +/* { dg-do compile } */ +/* { dg-options "-mthumb --param case-values-threshold=1 -fno-reorder-blocks -fno-tree-dce -O2" } */ +/* { dg-require-effective-target arm_thumb2_ok } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#define NOP "nop;" +#define NOP2 NOP NOP +#define NOP4 NOP2 NOP2 +#define NOP8 NOP4 NOP4 +#define NOP16 NOP8 NOP8 +#define NOP32 NOP16 NOP16 +#define NOP64 NOP32 NOP32 +#define NOP128 NOP64 NOP64 +#define NOP256 NOP128 NOP128 +#define NOP512 NOP256 NOP256 +#define NOP1024 NOP512 NOP512 +#define NOP2048 NOP1024 NOP1024 +#define NOP4096 NOP2048 NOP2048 +#define NOP8192 NOP4096 NOP4096 +#define NOP16384 NOP8192 NOP8192 +#define NOP32768 NOP16384 NOP16384 +#define NOP65536 NOP32768 NOP32768 +#define NOP131072 NOP65536 NOP65536 + +enum z +{ + a = 1, + b, + c, + d, + e, + f = 7, +}; + +inline void QIFunction (const char* flag) +{ + asm volatile (NOP32); + return; +} + +inline void HIFunction (const char* flag) +{ + asm volatile (NOP512); + return; +} + +inline void SIFunction (const char* flag) +{ + asm volatile (NOP131072); + return; +} + +/* +**QImode_test: +** ... +** tbb \[pc, r[0-9]+\] +** ... +*/ +__attribute__ ((noinline)) __attribute__ ((noclone)) const char* QImode_test(enum z x) +{ + switch (x) +{ + case d: +QIFunction("QItest"); +return "InlineASM"; + case f: +return "TEST"; + default: +return "Default"; +} +} + +/* { dg-final { scan-assembler ".byte" } } */ + +/* +**HImode_test: +** ... +** tbh \[pc, r[0-9]+, lsl #1\] +** ... +*/ +__attribute__ ((noinline)) __attribute__ ((noclone)) const char* HImode_test(enum z x) +{ + switch (x) + { +case d: + HIFunction("HItest"); + return "InlineASM"; +case f: + return "TEST"; +default: + return "Default"; + } +} + +/* { dg-final { scan-assembler ".2byte" } } */ + +/* +**SImode_test: +** ... +** adr (r[0-9]+), .L[0-9]+ +** ldr pc, \[\1, r[0-9]+, lsl #2\] +** ... +*/ +__attribute__ ((noinline)) __attribute__ ((noclone)) const char* SImode_test(enum z x) +{ + switch (x) + { +case d: + SIFunction("SItest"); + return "InlineASM"; +case f: + return "TEST"; +default: + return "Default"; + } +} + +/* { dg-final { scan-assembler ".word" } } */ + +/* +**backwards_branch_test: +** ... +** adr (r[0-9]+), .L[0-9]+ +** ldr pc, \[\1, r[0-9]+, lsl #2\] +** ... +*/ +__attribute__ ((noinline)) __attribute__ ((noclone)) const char* backwards_branch_test(enum z x, int flag) +{ + if (flag == 5) + { +backwards: + asm volatile (NOP512); + return "ASM"; + } + switch (x) + { +case d: + goto backwards; +
[gcc r14-10285] aarch64: Add missing ACLE macro for NEON-SVE Bridge
https://gcc.gnu.org/g:35ed54f136fe63bd04d48ada6efb305457bbd824 commit r14-10285-g35ed54f136fe63bd04d48ada6efb305457bbd824 Author: Richard Ball Date: Thu Jun 6 16:28:00 2024 +0100 aarch64: Add missing ACLE macro for NEON-SVE Bridge __ARM_NEON_SVE_BRIDGE was missed in the original patch and is added by this patch. gcc/ChangeLog: * config/aarch64/aarch64-c.cc (aarch64_define_unconditional_macros): Add missing __ARM_NEON_SVE_BRIDGE. (cherry picked from commit 43530bc40b1d0465911e493e56a6631202ce85b1) Diff: --- gcc/config/aarch64/aarch64-c.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc index fe1a20e4e54..d042e5fbd8c 100644 --- a/gcc/config/aarch64/aarch64-c.cc +++ b/gcc/config/aarch64/aarch64-c.cc @@ -75,6 +75,7 @@ aarch64_define_unconditional_macros (cpp_reader *pfile) builtin_define ("__ARM_STATE_ZA"); builtin_define ("__ARM_STATE_ZT0"); + builtin_define ("__ARM_NEON_SVE_BRIDGE"); /* Define keyword attributes like __arm_streaming as macros that expand to the associated [[...]] attribute. Use __extension__ in the attribute
[gcc r15-1075] aarch64: Add missing ACLE macro for NEON-SVE Bridge
https://gcc.gnu.org/g:43530bc40b1d0465911e493e56a6631202ce85b1 commit r15-1075-g43530bc40b1d0465911e493e56a6631202ce85b1 Author: Richard Ball Date: Thu Jun 6 16:28:00 2024 +0100 aarch64: Add missing ACLE macro for NEON-SVE Bridge __ARM_NEON_SVE_BRIDGE was missed in the original patch and is added by this patch. gcc/ChangeLog: * config/aarch64/aarch64-c.cc (aarch64_define_unconditional_macros): Add missing __ARM_NEON_SVE_BRIDGE. Diff: --- gcc/config/aarch64/aarch64-c.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc index fe1a20e4e54..d042e5fbd8c 100644 --- a/gcc/config/aarch64/aarch64-c.cc +++ b/gcc/config/aarch64/aarch64-c.cc @@ -75,6 +75,7 @@ aarch64_define_unconditional_macros (cpp_reader *pfile) builtin_define ("__ARM_STATE_ZA"); builtin_define ("__ARM_STATE_ZT0"); + builtin_define ("__ARM_NEON_SVE_BRIDGE"); /* Define keyword attributes like __arm_streaming as macros that expand to the associated [[...]] attribute. Use __extension__ in the attribute
[gcc r15-1074] arm: Fix CASE_VECTOR_SHORTEN_MODE for thumb2.
https://gcc.gnu.org/g:2963c76e8e24d4ebaf2b1b4ac4d7ca44eb0a9025 commit r15-1074-g2963c76e8e24d4ebaf2b1b4ac4d7ca44eb0a9025 Author: Richard Ball Date: Thu Jun 6 16:10:14 2024 +0100 arm: Fix CASE_VECTOR_SHORTEN_MODE for thumb2. The CASE_VECTOR_SHORTEN_MODE query is missing some equals signs which causes suboptimal codegen due to missed optimisation opportunities. This patch also adds a test for thumb2 switch statements as none exist currently. gcc/ChangeLog: PR target/115353 * config/arm/arm.h (enum arm_auto_incmodes): Correct CASE_VECTOR_SHORTEN_MODE query. gcc/testsuite/ChangeLog: * gcc.target/arm/thumb2-switchstatement.c: New test. Diff: --- gcc/config/arm/arm.h | 4 +- .../gcc.target/arm/thumb2-switchstatement.c| 144 + 2 files changed, 146 insertions(+), 2 deletions(-) diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 449e6935b32..0cd5d733952 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -2111,8 +2111,8 @@ enum arm_auto_incmodes ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, HImode) \ : SImode) \ : (TARGET_THUMB2\ - ? ((min > 0 && max < 0x200) ? QImode \ - : (min > 0 && max <= 0x2) ? HImode \ + ? ((min >= 0 && max < 0x200) ? QImode\ + : (min >= 0 && max < 0x2) ? HImode \ : SImode) \ : ((min >= 0 && max < 1024) \ ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 1, QImode) \ diff --git a/gcc/testsuite/gcc.target/arm/thumb2-switchstatement.c b/gcc/testsuite/gcc.target/arm/thumb2-switchstatement.c new file mode 100644 index 000..8badf318e62 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/thumb2-switchstatement.c @@ -0,0 +1,144 @@ +/* { dg-do compile } */ +/* { dg-options "-mthumb --param case-values-threshold=1 -fno-reorder-blocks -fno-tree-dce -O2" } */ +/* { dg-require-effective-target arm_thumb2_ok } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#define NOP "nop;" +#define NOP2 NOP NOP +#define NOP4 NOP2 NOP2 +#define NOP8 NOP4 NOP4 +#define NOP16 NOP8 NOP8 +#define NOP32 NOP16 NOP16 +#define NOP64 NOP32 NOP32 +#define NOP128 NOP64 NOP64 +#define NOP256 NOP128 NOP128 +#define NOP512 NOP256 NOP256 +#define NOP1024 NOP512 NOP512 +#define NOP2048 NOP1024 NOP1024 +#define NOP4096 NOP2048 NOP2048 +#define NOP8192 NOP4096 NOP4096 +#define NOP16384 NOP8192 NOP8192 +#define NOP32768 NOP16384 NOP16384 +#define NOP65536 NOP32768 NOP32768 +#define NOP131072 NOP65536 NOP65536 + +enum z +{ + a = 1, + b, + c, + d, + e, + f = 7, +}; + +inline void QIFunction (const char* flag) +{ + asm volatile (NOP32); + return; +} + +inline void HIFunction (const char* flag) +{ + asm volatile (NOP512); + return; +} + +inline void SIFunction (const char* flag) +{ + asm volatile (NOP131072); + return; +} + +/* +**QImode_test: +** ... +** tbb \[pc, r[0-9]+\] +** ... +*/ +__attribute__ ((noinline)) __attribute__ ((noclone)) const char* QImode_test(enum z x) +{ + switch (x) +{ + case d: +QIFunction("QItest"); +return "InlineASM"; + case f: +return "TEST"; + default: +return "Default"; +} +} + +/* { dg-final { scan-assembler ".byte" } } */ + +/* +**HImode_test: +** ... +** tbh \[pc, r[0-9]+, lsl #1\] +** ... +*/ +__attribute__ ((noinline)) __attribute__ ((noclone)) const char* HImode_test(enum z x) +{ + switch (x) + { +case d: + HIFunction("HItest"); + return "InlineASM"; +case f: + return "TEST"; +default: + return "Default"; + } +} + +/* { dg-final { scan-assembler ".2byte" } } */ + +/* +**SImode_test: +** ... +** adr (r[0-9]+), .L[0-9]+ +** ldr pc, \[\1, r[0-9]+, lsl #2\] +** ... +*/ +__attribute__ ((noinline)) __attribute__ ((noclone)) const char* SImode_test(enum z x) +{ + switch (x) + { +case d: + SIFunction("SItest"); + return "InlineASM"; +case f: + return "TEST"; +default: + return "Default"; + } +} + +/* { dg-final { scan-assembler ".word" } } */ + +/* +**backwards_branch_test: +** ... +** adr (r[0-9]+), .L[0-9]+ +** ldr pc, \[\1, r[0-9]+, lsl #2\] +** ... +*/ +__attribute__ ((noinline)) __attribute__ ((noclone)) const char* backwards_branch_test(enum z x, int flag) +{ + if (flag == 5) + { +backwards: + asm volatile (NOP512); + return "ASM"; + } + switch (x) + { +case d: + goto backwards; +case f: + return "TEST"; +default: + return "Default"; + }
[gcc r13-8678] ifcvt: Don't lower bitfields with non-constant offsets [PR 111882]
https://gcc.gnu.org/g:4950f6bcd3cce9deb630b76af42cd6d6968ba03f commit r13-8678-g4950f6bcd3cce9deb630b76af42cd6d6968ba03f Author: Andre Vieira Date: Fri Oct 20 17:02:32 2023 +0100 ifcvt: Don't lower bitfields with non-constant offsets [PR 111882] This patch stops lowering of bitfields by ifcvt when they have non-constant offsets as we are not likely to be able to do anything useful with those during vectorization. That also fixes the issue reported in PR 111882, which was being caused by an offset with a side-effect being lowered, but constants have no side-effects so we will no longer run into that problem. gcc/ChangeLog: PR tree-optimization/111882 * tree-if-conv.cc (get_bitfield_rep): Return NULL_TREE for bitfields with non-constant offsets. gcc/testsuite/ChangeLog: * gcc.dg/vect/pr111882.c: New test. (cherry picked from commit 24cf1f600b8ad34c68a51f48884e72d01f729893) Diff: --- gcc/testsuite/gcc.dg/vect/pr111882.c | 15 +++ gcc/tree-if-conv.cc | 12 +++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/vect/pr111882.c b/gcc/testsuite/gcc.dg/vect/pr111882.c new file mode 100644 index 000..024ad57b693 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr111882.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-additional-options { -fdump-tree-ifcvt-all } } */ + +static void __attribute__((noipa)) f(int n) { + int i, j; + struct S { char d[n]; int a; int b : 17; int c : 12; }; + struct S A[100][]; + for (i = 0; i < 100; i++) { +asm volatile("" : : "g"([0][0]) : "memory"); +for (j = 0; j < ; j++) A[i][j].b = 2; + } +} +void g(void) { f(1); } + +/* { dg-final { scan-tree-dump-not "Bitfield OK to lower" "ifcvt" } } */ diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc index a19450f533d..fddc4a890c6 100644 --- a/gcc/tree-if-conv.cc +++ b/gcc/tree-if-conv.cc @@ -3330,6 +3330,7 @@ get_bitfield_rep (gassign *stmt, bool write, tree *bitpos, : gimple_assign_rhs1 (stmt); tree field_decl = TREE_OPERAND (comp_ref, 1); + tree ref_offset = component_ref_field_offset (comp_ref); tree rep_decl = DECL_BIT_FIELD_REPRESENTATIVE (field_decl); /* Bail out if the representative is not a suitable type for a scalar @@ -3344,6 +3345,15 @@ get_bitfield_rep (gassign *stmt, bool write, tree *bitpos, if (compare_tree_int (DECL_SIZE (field_decl), bf_prec) != 0) return NULL_TREE; + if (TREE_CODE (DECL_FIELD_OFFSET (rep_decl)) != INTEGER_CST + || TREE_CODE (ref_offset) != INTEGER_CST) +{ + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\t Bitfield NOT OK to lower," + " offset is non-constant.\n"); + return NULL_TREE; +} + if (struct_expr) *struct_expr = TREE_OPERAND (comp_ref, 0); @@ -3364,7 +3374,7 @@ get_bitfield_rep (gassign *stmt, bool write, tree *bitpos, the structure and the container from the number of bits from the start of the structure and the actual bitfield member. */ tree bf_pos = fold_build2 (MULT_EXPR, bitsizetype, -DECL_FIELD_OFFSET (field_decl), +ref_offset, build_int_cst (bitsizetype, BITS_PER_UNIT)); bf_pos = fold_build2 (PLUS_EXPR, bitsizetype, bf_pos, DECL_FIELD_BIT_OFFSET (field_decl));
[gcc r12-10410] tree-optimization/114672 - WIDEN_MULT_PLUS_EXPR type mismatch
https://gcc.gnu.org/g:87e37c72cfb153d65ac8b26d6f2d1fe155818318 commit r12-10410-g87e37c72cfb153d65ac8b26d6f2d1fe155818318 Author: Richard Biener Date: Wed Apr 10 10:33:40 2024 +0200 tree-optimization/114672 - WIDEN_MULT_PLUS_EXPR type mismatch The following makes sure to restrict WIDEN_MULT*_EXPR to a mode precision final compute type as the mode is used to find the optab and type checking chokes when seeing bit-precisions later which would likely also not properly expanded to RTL. PR tree-optimization/114672 * tree-ssa-math-opts.cc (convert_plusminus_to_widen): Only allow mode-precision results. * gcc.dg/torture/pr114672.c: New testcase. (cherry picked from commit 912753cc5f18d786e334dd425469fa7f93155661) Diff: --- gcc/testsuite/gcc.dg/torture/pr114672.c | 14 ++ gcc/tree-ssa-math-opts.cc | 5 +++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gcc.dg/torture/pr114672.c b/gcc/testsuite/gcc.dg/torture/pr114672.c new file mode 100644 index 000..b69511fe8db --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr114672.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ + +struct { + __INT64_TYPE__ m : 60; +} s; + +short a; +short b; + +void +foo () +{ + s.m += a * b; +} diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index bab0cc5aef4..ffd8eebd2a9 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -2802,8 +2802,9 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt, lhs = gimple_assign_lhs (stmt); type = TREE_TYPE (lhs); - if (TREE_CODE (type) != INTEGER_TYPE - && TREE_CODE (type) != FIXED_POINT_TYPE) + if ((TREE_CODE (type) != INTEGER_TYPE + && TREE_CODE (type) != FIXED_POINT_TYPE) + || !type_has_mode_precision_p (type)) return false; if (code == MINUS_EXPR)
[gcc r13-8676] tree-optimization/114672 - WIDEN_MULT_PLUS_EXPR type mismatch
https://gcc.gnu.org/g:0d625dc1bffd885b04eb90ff48a6d34acacc3e0b commit r13-8676-g0d625dc1bffd885b04eb90ff48a6d34acacc3e0b Author: Richard Biener Date: Wed Apr 10 10:33:40 2024 +0200 tree-optimization/114672 - WIDEN_MULT_PLUS_EXPR type mismatch The following makes sure to restrict WIDEN_MULT*_EXPR to a mode precision final compute type as the mode is used to find the optab and type checking chokes when seeing bit-precisions later which would likely also not properly expanded to RTL. PR tree-optimization/114672 * tree-ssa-math-opts.cc (convert_plusminus_to_widen): Only allow mode-precision results. * gcc.dg/torture/pr114672.c: New testcase. (cherry picked from commit 912753cc5f18d786e334dd425469fa7f93155661) Diff: --- gcc/testsuite/gcc.dg/torture/pr114672.c | 14 ++ gcc/tree-ssa-math-opts.cc | 5 +++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gcc.dg/torture/pr114672.c b/gcc/testsuite/gcc.dg/torture/pr114672.c new file mode 100644 index 000..b69511fe8db --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr114672.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ + +struct { + __INT64_TYPE__ m : 60; +} s; + +short a; +short b; + +void +foo () +{ + s.m += a * b; +} diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index ff949e4fec9..08cf33214ad 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -2875,8 +2875,9 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt, lhs = gimple_assign_lhs (stmt); type = TREE_TYPE (lhs); - if (TREE_CODE (type) != INTEGER_TYPE - && TREE_CODE (type) != FIXED_POINT_TYPE) + if ((TREE_CODE (type) != INTEGER_TYPE + && TREE_CODE (type) != FIXED_POINT_TYPE) + || !type_has_mode_precision_p (type)) return false; if (code == MINUS_EXPR)
[gcc r12-10397] aarch64: Fix SCHEDULER_IDENT for Cortex-A510
https://gcc.gnu.org/g:751a0f54345b7e037db7f0389c19c1f87e0ae4de commit r12-10397-g751a0f54345b7e037db7f0389c19c1f87e0ae4de Author: Richard Ball Date: Fri Apr 26 18:21:07 2024 +0100 aarch64: Fix SCHEDULER_IDENT for Cortex-A510 The SCHEDULER_IDENT for this CPU was incorrectly set to cortexa55. This can cause sub-optimal asm to be generated. gcc/ChangeLog: PR target/114272 * config/aarch64/aarch64-cores.def (AARCH64_CORE): Change SCHEDULER_IDENT from cortexa55 to cortexa53 for Cortex-A510. Diff: --- gcc/config/aarch64/aarch64-cores.def | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index 30b5591f1a3..956afa70714 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -164,7 +164,7 @@ AARCH64_CORE("cortex-r82", cortexr82, cortexa53, 8R, AARCH64_FL_FOR_ARCH8_R, cor /* Armv9.0-A Architecture Processors. */ /* Arm ('A') cores. */ -AARCH64_CORE("cortex-a510", cortexa510, cortexa55, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG +AARCH64_CORE("cortex-a510", cortexa510, cortexa53, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1) AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1)
[gcc r13-8654] aarch64: Fix SCHEDULER_IDENT for Cortex-A510
https://gcc.gnu.org/g:28b3b8a2fe55521a43f3710cf6ced1ab63f1ea03 commit r13-8654-g28b3b8a2fe55521a43f3710cf6ced1ab63f1ea03 Author: Richard Ball Date: Fri Apr 26 18:15:23 2024 +0100 aarch64: Fix SCHEDULER_IDENT for Cortex-A510 The SCHEDULER_IDENT for this CPU was incorrectly set to cortexa55. This can cause sub-optimal asm to be generated. gcc/ChangeLog: PR target/114272 * config/aarch64/aarch64-cores.def (AARCH64_CORE): Change SCHEDULER_IDENT from cortexa55 to cortexa53 for Cortex-A510. Diff: --- gcc/config/aarch64/aarch64-cores.def | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index 8d9eb817d6d..fdda0697b88 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -168,7 +168,7 @@ AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, (), cortexa53, 0x41, 0xd15 /* Armv9.0-A Architecture Processors. */ /* Arm ('A') cores. */ -AARCH64_CORE("cortex-a510", cortexa510, cortexa55, V9A, (SVE2_BITPERM, MEMTAG, I8MM, BF16), cortexa53, 0x41, 0xd46, -1) +AARCH64_CORE("cortex-a510", cortexa510, cortexa53, V9A, (SVE2_BITPERM, MEMTAG, I8MM, BF16), cortexa53, 0x41, 0xd46, -1) AARCH64_CORE("cortex-a710", cortexa710, cortexa57, V9A, (SVE2_BITPERM, MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd47, -1)
[gcc r11-11364] arm: Zero/Sign extends for CMSE security
https://gcc.gnu.org/g:dabd742cc25f8992c24e639510df0965dbf14f21 commit r11-11364-gdabd742cc25f8992c24e639510df0965dbf14f21 Author: Richard Ball Date: Thu Apr 25 15:30:42 2024 +0100 arm: Zero/Sign extends for CMSE security Co-Authored by: Andre Simoes Dias Vieira This patch makes the following changes: 1) When calling a secure function from non-secure code then any arguments smaller than 32-bits that are passed in registers are zero- or sign-extended. 2) After a non-secure function returns into secure code then any return value smaller than 32-bits that is passed in a register is zero- or sign-extended. This patch addresses the following CVE-2024-0151. gcc/ChangeLog: PR target/114837 * config/arm/arm.c (cmse_nonsecure_call_inline_register_clear): Add zero/sign extend. (arm_expand_prologue): Add zero/sign extend. gcc/testsuite/ChangeLog: * gcc.target/arm/cmse/extend-param.c: New test. * gcc.target/arm/cmse/extend-return.c: New test. (cherry picked from commit ad45086178d833254d66fab518b14234418f002b) Diff: --- gcc/config/arm/arm.c | 69 gcc/testsuite/gcc.target/arm/cmse/extend-param.c | 96 +++ gcc/testsuite/gcc.target/arm/cmse/extend-return.c | 92 ++ 3 files changed, 257 insertions(+) diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 96d62b2164e..e386186db6f 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -18864,6 +18864,30 @@ cmse_nonsecure_call_inline_register_clear (void) end_sequence (); emit_insn_before (seq, insn); + /* The AAPCS requires the callee to widen integral types narrower +than 32 bits to the full width of the register; but when handling +calls to non-secure space, we cannot trust the callee to have +correctly done so. So forcibly re-widen the result here. */ + tree ret_type = TREE_TYPE (fntype); + if ((TREE_CODE (ret_type) == INTEGER_TYPE + || TREE_CODE (ret_type) == ENUMERAL_TYPE + || TREE_CODE (ret_type) == BOOLEAN_TYPE) + && known_lt (GET_MODE_SIZE (TYPE_MODE (ret_type)), 4)) + { + machine_mode ret_mode = TYPE_MODE (ret_type); + rtx extend; + if (TYPE_UNSIGNED (ret_type)) + extend = gen_rtx_ZERO_EXTEND (SImode, + gen_rtx_REG (ret_mode, R0_REGNUM)); + else + extend = gen_rtx_SIGN_EXTEND (SImode, + gen_rtx_REG (ret_mode, R0_REGNUM)); + emit_insn_after (gen_rtx_SET (gen_rtx_REG (SImode, R0_REGNUM), +extend), insn); + + } + + if (TARGET_HAVE_FPCXT_CMSE) { rtx_insn *last, *pop_insn, *after = insn; @@ -23272,6 +23296,51 @@ arm_expand_prologue (void) ip_rtx = gen_rtx_REG (SImode, IP_REGNUM); + /* The AAPCS requires the callee to widen integral types narrower + than 32 bits to the full width of the register; but when handling + calls to non-secure space, we cannot trust the callee to have + correctly done so. So forcibly re-widen the result here. */ + if (IS_CMSE_ENTRY (func_type)) +{ + function_args_iterator args_iter; + CUMULATIVE_ARGS args_so_far_v; + cumulative_args_t args_so_far; + bool first_param = true; + tree arg_type; + tree fndecl = current_function_decl; + tree fntype = TREE_TYPE (fndecl); + arm_init_cumulative_args (_so_far_v, fntype, NULL_RTX, fndecl); + args_so_far = pack_cumulative_args (_so_far_v); + FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter) + { + rtx arg_rtx; + + if (VOID_TYPE_P (arg_type)) + break; + + function_arg_info arg (arg_type, /*named=*/true); + if (!first_param) + /* We should advance after processing the argument and pass + the argument we're advancing past. */ + arm_function_arg_advance (args_so_far, arg); + first_param = false; + arg_rtx = arm_function_arg (args_so_far, arg); + gcc_assert (REG_P (arg_rtx)); + if ((TREE_CODE (arg_type) == INTEGER_TYPE + || TREE_CODE (arg_type) == ENUMERAL_TYPE + || TREE_CODE (arg_type) == BOOLEAN_TYPE) + && known_lt (GET_MODE_SIZE (GET_MODE (arg_rtx)), 4)) + { + if (TYPE_UNSIGNED (arg_type)) + emit_set_insn (gen_rtx_REG (SImode, REGNO (arg_rtx)), + gen_rtx_ZERO_EXTEND (SImode, arg_rtx)); + else + emit_set_insn (gen_rtx_REG (SImode, REGNO (arg_rtx)), + gen_rtx_SIGN_EXTEND (SImode, arg_rtx)); + } +
[gcc r12-10394] arm: Zero/Sign extends for CMSE security
https://gcc.gnu.org/g:441e194abcf3211de647d74c892f90879ae9ca8c commit r12-10394-g441e194abcf3211de647d74c892f90879ae9ca8c Author: Richard Ball Date: Thu Apr 25 15:30:42 2024 +0100 arm: Zero/Sign extends for CMSE security Co-Authored by: Andre Simoes Dias Vieira This patch makes the following changes: 1) When calling a secure function from non-secure code then any arguments smaller than 32-bits that are passed in registers are zero- or sign-extended. 2) After a non-secure function returns into secure code then any return value smaller than 32-bits that is passed in a register is zero- or sign-extended. This patch addresses the following CVE-2024-0151. gcc/ChangeLog: PR target/114837 * config/arm/arm.cc (cmse_nonsecure_call_inline_register_clear): Add zero/sign extend. (arm_expand_prologue): Add zero/sign extend. gcc/testsuite/ChangeLog: * gcc.target/arm/cmse/extend-param.c: New test. * gcc.target/arm/cmse/extend-return.c: New test. (cherry picked from commit ad45086178d833254d66fab518b14234418f002b) Diff: --- gcc/config/arm/arm.cc | 69 gcc/testsuite/gcc.target/arm/cmse/extend-param.c | 96 +++ gcc/testsuite/gcc.target/arm/cmse/extend-return.c | 92 ++ 3 files changed, 257 insertions(+) diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index b700c23b866..f3064b4e270 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -18989,6 +18989,30 @@ cmse_nonsecure_call_inline_register_clear (void) end_sequence (); emit_insn_before (seq, insn); + /* The AAPCS requires the callee to widen integral types narrower +than 32 bits to the full width of the register; but when handling +calls to non-secure space, we cannot trust the callee to have +correctly done so. So forcibly re-widen the result here. */ + tree ret_type = TREE_TYPE (fntype); + if ((TREE_CODE (ret_type) == INTEGER_TYPE + || TREE_CODE (ret_type) == ENUMERAL_TYPE + || TREE_CODE (ret_type) == BOOLEAN_TYPE) + && known_lt (GET_MODE_SIZE (TYPE_MODE (ret_type)), 4)) + { + machine_mode ret_mode = TYPE_MODE (ret_type); + rtx extend; + if (TYPE_UNSIGNED (ret_type)) + extend = gen_rtx_ZERO_EXTEND (SImode, + gen_rtx_REG (ret_mode, R0_REGNUM)); + else + extend = gen_rtx_SIGN_EXTEND (SImode, + gen_rtx_REG (ret_mode, R0_REGNUM)); + emit_insn_after (gen_rtx_SET (gen_rtx_REG (SImode, R0_REGNUM), +extend), insn); + + } + + if (TARGET_HAVE_FPCXT_CMSE) { rtx_insn *last, *pop_insn, *after = insn; @@ -23397,6 +23421,51 @@ arm_expand_prologue (void) ip_rtx = gen_rtx_REG (SImode, IP_REGNUM); + /* The AAPCS requires the callee to widen integral types narrower + than 32 bits to the full width of the register; but when handling + calls to non-secure space, we cannot trust the callee to have + correctly done so. So forcibly re-widen the result here. */ + if (IS_CMSE_ENTRY (func_type)) +{ + function_args_iterator args_iter; + CUMULATIVE_ARGS args_so_far_v; + cumulative_args_t args_so_far; + bool first_param = true; + tree arg_type; + tree fndecl = current_function_decl; + tree fntype = TREE_TYPE (fndecl); + arm_init_cumulative_args (_so_far_v, fntype, NULL_RTX, fndecl); + args_so_far = pack_cumulative_args (_so_far_v); + FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter) + { + rtx arg_rtx; + + if (VOID_TYPE_P (arg_type)) + break; + + function_arg_info arg (arg_type, /*named=*/true); + if (!first_param) + /* We should advance after processing the argument and pass + the argument we're advancing past. */ + arm_function_arg_advance (args_so_far, arg); + first_param = false; + arg_rtx = arm_function_arg (args_so_far, arg); + gcc_assert (REG_P (arg_rtx)); + if ((TREE_CODE (arg_type) == INTEGER_TYPE + || TREE_CODE (arg_type) == ENUMERAL_TYPE + || TREE_CODE (arg_type) == BOOLEAN_TYPE) + && known_lt (GET_MODE_SIZE (GET_MODE (arg_rtx)), 4)) + { + if (TYPE_UNSIGNED (arg_type)) + emit_set_insn (gen_rtx_REG (SImode, REGNO (arg_rtx)), + gen_rtx_ZERO_EXTEND (SImode, arg_rtx)); + else + emit_set_insn (gen_rtx_REG (SImode, REGNO (arg_rtx)), + gen_rtx_SIGN_EXTEND (SImode, arg_rtx)); +
[gcc r13-8647] arm: Zero/Sign extends for CMSE security
https://gcc.gnu.org/g:5550214b58e95320b54e42ef0e37c6479e04b27b commit r13-8647-g5550214b58e95320b54e42ef0e37c6479e04b27b Author: Richard Ball Date: Thu Apr 25 15:30:42 2024 +0100 arm: Zero/Sign extends for CMSE security Co-Authored by: Andre Simoes Dias Vieira This patch makes the following changes: 1) When calling a secure function from non-secure code then any arguments smaller than 32-bits that are passed in registers are zero- or sign-extended. 2) After a non-secure function returns into secure code then any return value smaller than 32-bits that is passed in a register is zero- or sign-extended. This patch addresses the following CVE-2024-0151. gcc/ChangeLog: PR target/114837 * config/arm/arm.cc (cmse_nonsecure_call_inline_register_clear): Add zero/sign extend. (arm_expand_prologue): Add zero/sign extend. gcc/testsuite/ChangeLog: * gcc.target/arm/cmse/extend-param.c: New test. * gcc.target/arm/cmse/extend-return.c: New test. (cherry picked from commit ad45086178d833254d66fab518b14234418f002b) Diff: --- gcc/config/arm/arm.cc | 69 gcc/testsuite/gcc.target/arm/cmse/extend-param.c | 96 +++ gcc/testsuite/gcc.target/arm/cmse/extend-return.c | 92 ++ 3 files changed, 257 insertions(+) diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index bf7ff9a9704..cd82728ae60 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -19133,6 +19133,30 @@ cmse_nonsecure_call_inline_register_clear (void) end_sequence (); emit_insn_before (seq, insn); + /* The AAPCS requires the callee to widen integral types narrower +than 32 bits to the full width of the register; but when handling +calls to non-secure space, we cannot trust the callee to have +correctly done so. So forcibly re-widen the result here. */ + tree ret_type = TREE_TYPE (fntype); + if ((TREE_CODE (ret_type) == INTEGER_TYPE + || TREE_CODE (ret_type) == ENUMERAL_TYPE + || TREE_CODE (ret_type) == BOOLEAN_TYPE) + && known_lt (GET_MODE_SIZE (TYPE_MODE (ret_type)), 4)) + { + machine_mode ret_mode = TYPE_MODE (ret_type); + rtx extend; + if (TYPE_UNSIGNED (ret_type)) + extend = gen_rtx_ZERO_EXTEND (SImode, + gen_rtx_REG (ret_mode, R0_REGNUM)); + else + extend = gen_rtx_SIGN_EXTEND (SImode, + gen_rtx_REG (ret_mode, R0_REGNUM)); + emit_insn_after (gen_rtx_SET (gen_rtx_REG (SImode, R0_REGNUM), +extend), insn); + + } + + if (TARGET_HAVE_FPCXT_CMSE) { rtx_insn *last, *pop_insn, *after = insn; @@ -23575,6 +23599,51 @@ arm_expand_prologue (void) ip_rtx = gen_rtx_REG (SImode, IP_REGNUM); + /* The AAPCS requires the callee to widen integral types narrower + than 32 bits to the full width of the register; but when handling + calls to non-secure space, we cannot trust the callee to have + correctly done so. So forcibly re-widen the result here. */ + if (IS_CMSE_ENTRY (func_type)) +{ + function_args_iterator args_iter; + CUMULATIVE_ARGS args_so_far_v; + cumulative_args_t args_so_far; + bool first_param = true; + tree arg_type; + tree fndecl = current_function_decl; + tree fntype = TREE_TYPE (fndecl); + arm_init_cumulative_args (_so_far_v, fntype, NULL_RTX, fndecl); + args_so_far = pack_cumulative_args (_so_far_v); + FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter) + { + rtx arg_rtx; + + if (VOID_TYPE_P (arg_type)) + break; + + function_arg_info arg (arg_type, /*named=*/true); + if (!first_param) + /* We should advance after processing the argument and pass + the argument we're advancing past. */ + arm_function_arg_advance (args_so_far, arg); + first_param = false; + arg_rtx = arm_function_arg (args_so_far, arg); + gcc_assert (REG_P (arg_rtx)); + if ((TREE_CODE (arg_type) == INTEGER_TYPE + || TREE_CODE (arg_type) == ENUMERAL_TYPE + || TREE_CODE (arg_type) == BOOLEAN_TYPE) + && known_lt (GET_MODE_SIZE (GET_MODE (arg_rtx)), 4)) + { + if (TYPE_UNSIGNED (arg_type)) + emit_set_insn (gen_rtx_REG (SImode, REGNO (arg_rtx)), + gen_rtx_ZERO_EXTEND (SImode, arg_rtx)); + else + emit_set_insn (gen_rtx_REG (SImode, REGNO (arg_rtx)), + gen_rtx_SIGN_EXTEND (SImode, arg_rtx)); +
[gcc r14-10122] arm: Zero/Sign extends for CMSE security
https://gcc.gnu.org/g:ad45086178d833254d66fab518b14234418f002b commit r14-10122-gad45086178d833254d66fab518b14234418f002b Author: Richard Ball Date: Thu Apr 25 15:30:42 2024 +0100 arm: Zero/Sign extends for CMSE security Co-Authored by: Andre Simoes Dias Vieira This patch makes the following changes: 1) When calling a secure function from non-secure code then any arguments smaller than 32-bits that are passed in registers are zero- or sign-extended. 2) After a non-secure function returns into secure code then any return value smaller than 32-bits that is passed in a register is zero- or sign-extended. This patch addresses the following CVE-2024-0151. gcc/ChangeLog: PR target/114837 * config/arm/arm.cc (cmse_nonsecure_call_inline_register_clear): Add zero/sign extend. (arm_expand_prologue): Add zero/sign extend. gcc/testsuite/ChangeLog: * gcc.target/arm/cmse/extend-param.c: New test. * gcc.target/arm/cmse/extend-return.c: New test. Diff: --- gcc/config/arm/arm.cc | 69 gcc/testsuite/gcc.target/arm/cmse/extend-param.c | 96 +++ gcc/testsuite/gcc.target/arm/cmse/extend-return.c | 92 ++ 3 files changed, 257 insertions(+) diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index 0217abc218d..ea0c963a4d6 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -19210,6 +19210,30 @@ cmse_nonsecure_call_inline_register_clear (void) end_sequence (); emit_insn_before (seq, insn); + /* The AAPCS requires the callee to widen integral types narrower +than 32 bits to the full width of the register; but when handling +calls to non-secure space, we cannot trust the callee to have +correctly done so. So forcibly re-widen the result here. */ + tree ret_type = TREE_TYPE (fntype); + if ((TREE_CODE (ret_type) == INTEGER_TYPE + || TREE_CODE (ret_type) == ENUMERAL_TYPE + || TREE_CODE (ret_type) == BOOLEAN_TYPE) + && known_lt (GET_MODE_SIZE (TYPE_MODE (ret_type)), 4)) + { + machine_mode ret_mode = TYPE_MODE (ret_type); + rtx extend; + if (TYPE_UNSIGNED (ret_type)) + extend = gen_rtx_ZERO_EXTEND (SImode, + gen_rtx_REG (ret_mode, R0_REGNUM)); + else + extend = gen_rtx_SIGN_EXTEND (SImode, + gen_rtx_REG (ret_mode, R0_REGNUM)); + emit_insn_after (gen_rtx_SET (gen_rtx_REG (SImode, R0_REGNUM), +extend), insn); + + } + + if (TARGET_HAVE_FPCXT_CMSE) { rtx_insn *last, *pop_insn, *after = insn; @@ -23652,6 +23676,51 @@ arm_expand_prologue (void) ip_rtx = gen_rtx_REG (SImode, IP_REGNUM); + /* The AAPCS requires the callee to widen integral types narrower + than 32 bits to the full width of the register; but when handling + calls to non-secure space, we cannot trust the callee to have + correctly done so. So forcibly re-widen the result here. */ + if (IS_CMSE_ENTRY (func_type)) +{ + function_args_iterator args_iter; + CUMULATIVE_ARGS args_so_far_v; + cumulative_args_t args_so_far; + bool first_param = true; + tree arg_type; + tree fndecl = current_function_decl; + tree fntype = TREE_TYPE (fndecl); + arm_init_cumulative_args (_so_far_v, fntype, NULL_RTX, fndecl); + args_so_far = pack_cumulative_args (_so_far_v); + FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter) + { + rtx arg_rtx; + + if (VOID_TYPE_P (arg_type)) + break; + + function_arg_info arg (arg_type, /*named=*/true); + if (!first_param) + /* We should advance after processing the argument and pass + the argument we're advancing past. */ + arm_function_arg_advance (args_so_far, arg); + first_param = false; + arg_rtx = arm_function_arg (args_so_far, arg); + gcc_assert (REG_P (arg_rtx)); + if ((TREE_CODE (arg_type) == INTEGER_TYPE + || TREE_CODE (arg_type) == ENUMERAL_TYPE + || TREE_CODE (arg_type) == BOOLEAN_TYPE) + && known_lt (GET_MODE_SIZE (GET_MODE (arg_rtx)), 4)) + { + if (TYPE_UNSIGNED (arg_type)) + emit_set_insn (gen_rtx_REG (SImode, REGNO (arg_rtx)), + gen_rtx_ZERO_EXTEND (SImode, arg_rtx)); + else + emit_set_insn (gen_rtx_REG (SImode, REGNO (arg_rtx)), + gen_rtx_SIGN_EXTEND (SImode, arg_rtx)); + } + } +} + if (IS_STACKALIGN (func_type)) { rtx r0,
[gcc r14-9875] aarch64: Fix ACLE SME streaming mode error in neon-sve-bridge
https://gcc.gnu.org/g:685d822e524cc8b2726ad6c44c2ccaabe55a198c commit r14-9875-g685d822e524cc8b2726ad6c44c2ccaabe55a198c Author: Richard Ball Date: Tue Apr 9 16:31:35 2024 +0100 aarch64: Fix ACLE SME streaming mode error in neon-sve-bridge When using LTO, handling the pragma for sme before the pragma for the neon-sve-bridge caused the following error on svset_neonq, in the neon-sve-bridge.c test. error: ACLE function '0' can only be called when SME streaming mode is enabled. This has been resolved by changing the pragma handlers to accept two modes. One where they add functions normally and a second in which registered_functions is filled with a placeholder value. By using this, the ordering of the functions can be maintained. gcc/ChangeLog: * config/aarch64/aarch64-c.cc (aarch64_pragma_aarch64): Add functions_nulls parameter to pragma_handlers. * config/aarch64/aarch64-protos.h: Likewise. * config/aarch64/aarch64-sve-builtins.h (enum handle_pragma_index): Add enum to count number of pragmas to be handled. * config/aarch64/aarch64-sve-builtins.cc (GTY): Add global variable for initial indexes and change overload_names to an array. (function_builder::function_builder): Add pragma handler information. (function_builder::add_function): Add code for overwriting previous registered_functions entries. (add_unique_function): Use an array to register overload_names for both pragma handler modes. (add_overloaded_function): Likewise. (init_builtins): Add functions_nulls parameter to pragma_handlers. (handle_arm_sve_h): Initialize pragma handler information. (handle_arm_neon_sve_bridge_h): Likewise. (handle_arm_sme_h): Likewise. Diff: --- gcc/config/aarch64/aarch64-c.cc| 6 +- gcc/config/aarch64/aarch64-protos.h| 6 +- gcc/config/aarch64/aarch64-sve-builtins.cc | 88 ++ gcc/config/aarch64/aarch64-sve-builtins.h | 17 +- 4 files changed, 75 insertions(+), 42 deletions(-) diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc index b5a6917d06d..fe1a20e4e54 100644 --- a/gcc/config/aarch64/aarch64-c.cc +++ b/gcc/config/aarch64/aarch64-c.cc @@ -344,15 +344,15 @@ aarch64_pragma_aarch64 (cpp_reader *) const char *name = TREE_STRING_POINTER (x); if (strcmp (name, "arm_sve.h") == 0) -aarch64_sve::handle_arm_sve_h (); +aarch64_sve::handle_arm_sve_h (false); else if (strcmp (name, "arm_sme.h") == 0) -aarch64_sve::handle_arm_sme_h (); +aarch64_sve::handle_arm_sme_h (false); else if (strcmp (name, "arm_neon.h") == 0) handle_arm_neon_h (); else if (strcmp (name, "arm_acle.h") == 0) handle_arm_acle_h (); else if (strcmp (name, "arm_neon_sve_bridge.h") == 0) -aarch64_sve::handle_arm_neon_sve_bridge_h (); +aarch64_sve::handle_arm_neon_sve_bridge_h (false); else error ("unknown %<#pragma GCC aarch64%> option %qs", name); } diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index bd719b992a5..42639e9efcf 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -1014,9 +1014,9 @@ bool aarch64_general_check_builtin_call (location_t, vec, namespace aarch64_sve { void init_builtins (); - void handle_arm_sve_h (); - void handle_arm_sme_h (); - void handle_arm_neon_sve_bridge_h (); + void handle_arm_sve_h (bool); + void handle_arm_sme_h (bool); + void handle_arm_neon_sve_bridge_h (bool); tree builtin_decl (unsigned, bool); bool builtin_type_p (const_tree); bool builtin_type_p (const_tree, unsigned int *, unsigned int *); diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index e124d1f90a5..f3983a123e3 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -933,14 +933,19 @@ tree acle_svprfop; /* The list of all registered function decls, indexed by code. */ static GTY(()) vec *registered_functions; +/* Stores the starting function index for each pragma handler. */ +static unsigned int initial_indexes[NUM_PRAGMA_HANDLERS]; + /* All registered function decls, hashed on the function_instance that they implement. This is used for looking up implementations of overloaded functions. */ static hash_table *function_table; -/* Maps all overloaded function names that we've registered so far to - their associated function_instances. The map keys are IDENTIFIER_NODEs. */ -static GTY(()) hash_map *overload_names; +/* Index 0 maps all overloaded function names that we've registered so far to + their associated function_instances.
gcc-wwwdocs branch master updated. 7cd7e13e443da8e2aae389fa30eb547530c6e2c8
This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "gcc-wwwdocs". The branch, master has been updated via 7cd7e13e443da8e2aae389fa30eb547530c6e2c8 (commit) from 8765e9c73ae14cfad592b8a3885fe1bcc3ff96cd (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log - commit 7cd7e13e443da8e2aae389fa30eb547530c6e2c8 Author: Richard Ball Date: Mon Apr 8 16:52:00 2024 +0100 [PATCH][wwwdocs] Add NEON-SVE bridge intrinsics to changes.html Adding the NEON-SVE bridge intrinsics that were missed in the last patch. diff --git a/htdocs/gcc-14/changes.html b/htdocs/gcc-14/changes.html index 2d8968cf..1b345b32 100644 --- a/htdocs/gcc-14/changes.html +++ b/htdocs/gcc-14/changes.html @@ -503,6 +503,12 @@ a work-in-progress. -march=armv8.2-a or higher to be specified. Likewise, the intrinsics enabled by +memtag no longer require -march=armv8.5-a. + Support for the + https://github.com/ARM-software/acle/blob/main/main/acle.md#neon-sve-bridge;> + NEON-SVE Bridge intrinsics. + These are intrinsics that allow conversions between NEON and SVE vectors, + enabled through the inclusion of the arm_neon_sve_bridge.h header. + The option -mtp= is now supported for changing the TPIDR register used for TLS accesses. For more details please refer to the --- Summary of changes: htdocs/gcc-14/changes.html | 6 ++ 1 file changed, 6 insertions(+) hooks/post-receive -- gcc-wwwdocs
[gcc r14-9672] aarch64: Fix SCHEDULER_IDENT for Cortex-A510 and Cortex-A520
https://gcc.gnu.org/g:cab53aae43cf94171b01320c08302e47a5daa391 commit r14-9672-gcab53aae43cf94171b01320c08302e47a5daa391 Author: Richard Ball Date: Tue Mar 26 13:54:31 2024 + aarch64: Fix SCHEDULER_IDENT for Cortex-A510 and Cortex-A520 The SCHEDULER_IDENT for these two CPUs was incorrectly set to cortexa55. This can cause sub-optimal asm to be generated. gcc/ChangeLog: PR target/114272 * config/aarch64/aarch64-cores.def (AARCH64_CORE): Change SCHEDULER_IDENT from cortexa55 to cortexa53 for Cortex-A510 and Cortex-A520. Diff: --- gcc/config/aarch64/aarch64-cores.def | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index 7ebefcf543b..f69fc212d56 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -169,9 +169,9 @@ AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, (), cortexa53, 0x41, 0xd15 /* Armv9.0-A Architecture Processors. */ /* Arm ('A') cores. */ -AARCH64_CORE("cortex-a510", cortexa510, cortexa55, V9A, (SVE2_BITPERM, MEMTAG, I8MM, BF16), cortexa53, 0x41, 0xd46, -1) +AARCH64_CORE("cortex-a510", cortexa510, cortexa53, V9A, (SVE2_BITPERM, MEMTAG, I8MM, BF16), cortexa53, 0x41, 0xd46, -1) -AARCH64_CORE("cortex-a520", cortexa520, cortexa55, V9_2A, (SVE2_BITPERM, MEMTAG), cortexa53, 0x41, 0xd80, -1) +AARCH64_CORE("cortex-a520", cortexa520, cortexa53, V9_2A, (SVE2_BITPERM, MEMTAG), cortexa53, 0x41, 0xd80, -1) AARCH64_CORE("cortex-a710", cortexa710, cortexa57, V9A, (SVE2_BITPERM, MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd47, -1)