Re: [PATCH V2] aarch64: Add missing ACLE macro for NEON-SVE Bridge

2024-06-06 Thread Richard Ball
v2: Change macro definition following internal discussion.

__ARM_NEON_SVE_BRIDGE was missed in the original patch and is
added by this patch.

Ok for trunk and a backport into gcc-14?

gcc/ChangeLog:

* config/aarch64/aarch64-c.cc (aarch64_define_unconditional_macros):
Add missing __ARM_NEON_SVE_BRIDGE.

On 6/6/24 13:20, Richard Sandiford wrote:
> Richard Ball  writes:
>> __ARM_NEON_SVE_BRIDGE was missed in the original patch and is
>> added by this patch.
>>
>> Ok for trunk and a backport into gcc-14?
>>
>> gcc/ChangeLog:
>>
>>  * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins):
>>  Add missing __ARM_NEON_SVE_BRIDGE.
> 
> After this patch was posted, there was some internal discussion
> involving LLVM & GNU devs about what this kind of macro means, now that
> we have FMV.  The feeling was that __ARM_NEON_SVE_BRIDGE should just
> indicate whether the compiler provides the file, not whether AdvSIMD
> & SVE are enabled.  I think we should therefore add this to
> aarch64_define_unconditional_macros instead.
> 
> Sorry for the slow review.  I was waiting for the outcome of that
> discussion before replying.
> 
> Thanks,
> Richard
> 
>> diff --git a/gcc/config/aarch64/aarch64-c.cc 
>> b/gcc/config/aarch64/aarch64-c.cc
>> index 
>> fe1a20e4e546a68e5f7eddff3bbb0d3e831fbd9b..1121be118cf8d05e3736ad4ee75568ff7cb92bfd
>>  100644
>> --- a/gcc/config/aarch64/aarch64-c.cc
>> +++ b/gcc/config/aarch64/aarch64-c.cc
>> @@ -260,6 +260,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
>>aarch64_def_or_undef (TARGET_SME_I16I64, "__ARM_FEATURE_SME_I16I64", 
>> pfile);
>>aarch64_def_or_undef (TARGET_SME_F64F64, "__ARM_FEATURE_SME_F64F64", 
>> pfile);
>>aarch64_def_or_undef (TARGET_SME2, "__ARM_FEATURE_SME2", pfile);
>> +  aarch64_def_or_undef (TARGET_SVE, "__ARM_NEON_SVE_BRIDGE", pfile);
>>  
>>/* Not for ACLE, but required to keep "float.h" correct if we switch
>>   target between implementations that do or do not support ARMv8.2-Adiff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index fe1a20e4e546a68e5f7eddff3bbb0d3e831fbd9b..d042e5fbd8c562df2e4538b51b960c194d2ca2c9 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -75,6 +75,7 @@ aarch64_define_unconditional_macros (cpp_reader *pfile)
 
   builtin_define ("__ARM_STATE_ZA");
   builtin_define ("__ARM_STATE_ZT0");
+  builtin_define ("__ARM_NEON_SVE_BRIDGE");
 
   /* Define keyword attributes like __arm_streaming as macros that expand
  to the associated [[...]] attribute.  Use __extension__ in the attribute


[PATCH] arm: Fix CASE_VECTOR_SHORTEN_MODE for thumb2.

2024-06-06 Thread Richard Ball
The CASE_VECTOR_SHORTEN_MODE query is missing some equals signs
which causes suboptimal codegen due to missed optimisation
opportunities. This patch also adds a test for thumb2
switch statements as none exist currently.

gcc/ChangeLog:
PR target/115353
* config/arm/arm.h (enum arm_auto_incmodes):
Correct CASE_VECTOR_SHORTEN_MODE query.

gcc/testsuite/ChangeLog:

* gcc.target/arm/thumb2-switchstatement.c: New test.diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 449e6935b32f8f272df709ba43aa2ba7de37e6b3..0cd5d733952d7620f452d9d90cec9103b3fb5300 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -2111,8 +2111,8 @@ enum arm_auto_incmodes
   ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, HImode)	\
   : SImode)\
: (TARGET_THUMB2			\
-  ? ((min > 0 && max < 0x200) ? QImode\
-  : (min > 0 && max <= 0x2) ? HImode\
+  ? ((min >= 0 && max < 0x200) ? QImode\
+  : (min >= 0 && max < 0x2) ? HImode\
   : SImode)\
: ((min >= 0 && max < 1024)		\
   ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 1, QImode)	\
diff --git a/gcc/testsuite/gcc.target/arm/thumb2-switchstatement.c b/gcc/testsuite/gcc.target/arm/thumb2-switchstatement.c
new file mode 100644
index ..8badf318e626de1911e297bff8e93ac72160224f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/thumb2-switchstatement.c
@@ -0,0 +1,144 @@
+/* { dg-do compile } */
+/* { dg-options "-mthumb --param case-values-threshold=1 -fno-reorder-blocks -fno-tree-dce -O2" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define NOP "nop;"
+#define NOP2 NOP NOP
+#define NOP4 NOP2 NOP2
+#define NOP8 NOP4 NOP4
+#define NOP16 NOP8 NOP8
+#define NOP32 NOP16 NOP16
+#define NOP64 NOP32 NOP32
+#define NOP128 NOP64 NOP64
+#define NOP256 NOP128 NOP128
+#define NOP512 NOP256 NOP256
+#define NOP1024 NOP512 NOP512
+#define NOP2048 NOP1024 NOP1024
+#define NOP4096 NOP2048 NOP2048
+#define NOP8192 NOP4096 NOP4096
+#define NOP16384 NOP8192 NOP8192
+#define NOP32768 NOP16384 NOP16384
+#define NOP65536 NOP32768 NOP32768
+#define NOP131072 NOP65536 NOP65536
+
+enum z
+{
+  a = 1,
+  b,
+  c,
+  d,
+  e,
+  f = 7,
+};
+
+inline void QIFunction (const char* flag)
+{
+  asm volatile (NOP32);
+  return;
+}
+
+inline void HIFunction (const char* flag)
+{
+  asm volatile (NOP512);
+  return;
+}
+
+inline void SIFunction (const char* flag)
+{
+  asm volatile (NOP131072);
+  return;
+}
+
+/*
+**QImode_test:
+**	...
+**	tbb	\[pc, r[0-9]+\]
+**	...
+*/
+__attribute__ ((noinline)) __attribute__ ((noclone)) const char* QImode_test(enum z x)
+{
+  switch (x)
+{
+  case d:
+QIFunction("QItest");
+return "InlineASM";
+  case f:
+return "TEST";
+  default:
+return "Default";
+}
+}
+
+/* { dg-final { scan-assembler ".byte" } } */
+
+/*
+**HImode_test:
+**	...
+**	tbh	\[pc, r[0-9]+, lsl #1\]
+**	...
+*/
+__attribute__ ((noinline)) __attribute__ ((noclone)) const char* HImode_test(enum z x)
+{
+  switch (x)
+  {
+case d:
+  HIFunction("HItest");
+  return "InlineASM";
+case f:
+  return "TEST";
+default:
+  return "Default";
+  }
+}
+
+/* { dg-final { scan-assembler ".2byte" } } */
+
+/*
+**SImode_test:
+**	...
+**	adr	(r[0-9]+), .L[0-9]+
+**	ldr	pc, \[\1, r[0-9]+, lsl #2\]
+**	...
+*/
+__attribute__ ((noinline)) __attribute__ ((noclone)) const char* SImode_test(enum z x)
+{
+  switch (x)
+  {
+case d:
+  SIFunction("SItest");
+  return "InlineASM";
+case f:
+  return "TEST";
+default:
+  return "Default";
+  }
+}
+
+/* { dg-final { scan-assembler ".word" } } */
+
+/*
+**backwards_branch_test:
+**	...
+**	adr	(r[0-9]+), .L[0-9]+
+**	ldr	pc, \[\1, r[0-9]+, lsl #2\]
+**	...
+*/
+__attribute__ ((noinline)) __attribute__ ((noclone)) const char* backwards_branch_test(enum z x, int flag)
+{
+  if (flag == 5)
+  {
+backwards:
+  asm volatile (NOP512);
+  return "ASM";
+  }
+  switch (x)
+  {
+case d:
+  goto backwards;
+case f:
+  return "TEST";
+default:
+  return "Default";
+  }
+}
\ No newline at end of file


[PATCH] aarch64: Add missing ACLE macro for NEON-SVE Bridge

2024-05-31 Thread Richard Ball
__ARM_NEON_SVE_BRIDGE was missed in the original patch and is
added by this patch.

Ok for trunk and a backport into gcc-14?

gcc/ChangeLog:

* config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins):
Add missing __ARM_NEON_SVE_BRIDGE.diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index 
fe1a20e4e546a68e5f7eddff3bbb0d3e831fbd9b..1121be118cf8d05e3736ad4ee75568ff7cb92bfd
 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -260,6 +260,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
   aarch64_def_or_undef (TARGET_SME_I16I64, "__ARM_FEATURE_SME_I16I64", pfile);
   aarch64_def_or_undef (TARGET_SME_F64F64, "__ARM_FEATURE_SME_F64F64", pfile);
   aarch64_def_or_undef (TARGET_SME2, "__ARM_FEATURE_SME2", pfile);
+  aarch64_def_or_undef (TARGET_SVE, "__ARM_NEON_SVE_BRIDGE", pfile);
 
   /* Not for ACLE, but required to keep "float.h" correct if we switch
  target between implementations that do or do not support ARMv8.2-A


[Backport] ifcvt: Don't lower bitfields with non-constant offsets [PR 111882]

2024-05-02 Thread Richard Ball
Hi,

Requesting permission to backport:
https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=24cf1f600b8ad34c68a51f48884e72d01f729893
to gcc-13 in order to fix:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111882

Applies cleanly and with no regressions.

Thanks,
Richard


[Backport] tree-optimization/114672 - WIDEN_MULT_PLUS_EXPR type mismatch

2024-05-02 Thread Richard Ball
Hi,

Requesting permission to backport: 
https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=912753cc5f18d786e334dd425469fa7f93155661
to fix the issue listed here:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114672
in gcc-12 and gcc-13.

Thanks,
Richard


Re: [PATCH][GCC] aarch64: Fix SCHEDULER_IDENT for Cortex-A510

2024-04-25 Thread Richard Ball
Hi Richard,

I committed this combined patch (with Cortex-A520) for trunk 
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=cab53aae43cf94171b01320c08302e47a5daa391

Am I ok to commit just the Cortex-A510 half into gcc-12 and gcc-13.

Thanks,
Richard Ball

From: Richard Ball
Sent: 12 March 2024 14:08
To: gcc-patches@gcc.gnu.org ; Richard Earnshaw 
; Richard Sandiford ; 
Marcus Shawcroft 
Subject: [PATCH][GCC] aarch64: Fix SCHEDULER_IDENT for Cortex-A510

The SCHEDULER_IDENT for this CPU was incorrectly
set to cortexa55, which is incorrect. This can cause
sub-optimal asm to be generated.

Ok for trunk?

Can I also backport this to gcc-12 and gcc-13?

gcc/ChangeLog:
PR target/114272
* config/aarch64/aarch64-cores.def (AARCH64_CORE):
Change SCHEDULER_IDENT from cortexa55 to cortexa53
for Cortex-A510.


Re: [PATCH] arm: Zero/Sign extends for CMSE security

2024-04-25 Thread Richard Ball
Hi Torbjorn,

Thanks very much for the comments.
I think given that the code that handles this, is within a 
FOREACH_FUNCTION_ARGS loop.
It seems a fairly safe assumption that if the code works for one that it will 
work for all.
To go back and add extra tests to me seems a little overkill.

Kind Regards,
Richard Ball

From: Torbjorn SVENSSON 
Sent: 25 April 2024 12:47
To: Richard Ball ; gcc-patches@gcc.gnu.org 
; Richard Earnshaw ; Richard 
Sandiford ; Marcus Shawcroft 
; Kyrylo Tkachov 
Subject: Re: [PATCH] arm: Zero/Sign extends for CMSE security

Hi,

On 2024-04-24 17:55, Richard Ball wrote:
> This patch makes the following changes:
>
> 1) When calling a secure function from non-secure code then any arguments
> smaller than 32-bits that are passed in registers are zero- or 
> sign-extended.
> 2) After a non-secure function returns into secure code then any return value
> smaller than 32-bits that is passed in a register is  zero- or 
> sign-extended.
>
> This patch addresses the following CVE-2024-0151.
>
> gcc/ChangeLog:
>  PR target/114837
>  * config/arm/arm.cc (cmse_nonsecure_call_inline_register_clear):
>Add zero/sign extend.
>  (arm_expand_prologue): Add zero/sign extend.
>
> gcc/testsuite/ChangeLog:
>
>  * gcc.target/arm/cmse/extend-param.c: New test.
>  * gcc.target/arm/cmse/extend-return.c: New test.

I think it would make sense that there is at least one test case that
takes 2 or more arguments to ensure that not only the first argument is
extended. WDYT?


Kind regards,
Torbjörn


[PATCH] arm: Zero/Sign extends for CMSE security

2024-04-24 Thread Richard Ball
This patch makes the following changes:

1) When calling a secure function from non-secure code then any arguments
   smaller than 32-bits that are passed in registers are zero- or sign-extended.
2) After a non-secure function returns into secure code then any return value
   smaller than 32-bits that is passed in a register is  zero- or sign-extended.

This patch addresses the following CVE-2024-0151.

gcc/ChangeLog:
PR target/114837
* config/arm/arm.cc (cmse_nonsecure_call_inline_register_clear):
  Add zero/sign extend.
(arm_expand_prologue): Add zero/sign extend.

gcc/testsuite/ChangeLog:

* gcc.target/arm/cmse/extend-param.c: New test.
* gcc.target/arm/cmse/extend-return.c: New test.diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 0217abc218d60956ce727e6d008d46b9176dddc5..ea0c963a4d67ecd70e1571624e84dfe46d757df9 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -19210,6 +19210,30 @@ cmse_nonsecure_call_inline_register_clear (void)
 	  end_sequence ();
 	  emit_insn_before (seq, insn);
 
+	  /* The AAPCS requires the callee to widen integral types narrower
+	 than 32 bits to the full width of the register; but when handling
+	 calls to non-secure space, we cannot trust the callee to have
+	 correctly done so.  So forcibly re-widen the result here.  */
+	  tree ret_type = TREE_TYPE (fntype);
+	  if ((TREE_CODE (ret_type) == INTEGER_TYPE
+	  || TREE_CODE (ret_type) == ENUMERAL_TYPE
+	  || TREE_CODE (ret_type) == BOOLEAN_TYPE)
+	  && known_lt (GET_MODE_SIZE (TYPE_MODE (ret_type)), 4))
+	{
+	  machine_mode ret_mode = TYPE_MODE (ret_type);
+	  rtx extend;
+	  if (TYPE_UNSIGNED (ret_type))
+		extend = gen_rtx_ZERO_EXTEND (SImode,
+	  gen_rtx_REG (ret_mode, R0_REGNUM));
+	  else
+		extend = gen_rtx_SIGN_EXTEND (SImode,
+	  gen_rtx_REG (ret_mode, R0_REGNUM));
+	  emit_insn_after (gen_rtx_SET (gen_rtx_REG (SImode, R0_REGNUM),
+	 extend), insn);
+
+	}
+
+
 	  if (TARGET_HAVE_FPCXT_CMSE)
 	{
 	  rtx_insn *last, *pop_insn, *after = insn;
@@ -23652,6 +23676,51 @@ arm_expand_prologue (void)
 
   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
 
+  /* The AAPCS requires the callee to widen integral types narrower
+ than 32 bits to the full width of the register; but when handling
+ calls to non-secure space, we cannot trust the callee to have
+ correctly done so.  So forcibly re-widen the result here.  */
+  if (IS_CMSE_ENTRY (func_type))
+{
+  function_args_iterator args_iter;
+  CUMULATIVE_ARGS args_so_far_v;
+  cumulative_args_t args_so_far;
+  bool first_param = true;
+  tree arg_type;
+  tree fndecl = current_function_decl;
+  tree fntype = TREE_TYPE (fndecl);
+  arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
+  args_so_far = pack_cumulative_args (&args_so_far_v);
+  FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
+	{
+	  rtx arg_rtx;
+
+	  if (VOID_TYPE_P (arg_type))
+	break;
+
+	  function_arg_info arg (arg_type, /*named=*/true);
+	  if (!first_param)
+	/* We should advance after processing the argument and pass
+	   the argument we're advancing past.  */
+	arm_function_arg_advance (args_so_far, arg);
+	  first_param = false;
+	  arg_rtx = arm_function_arg (args_so_far, arg);
+	  gcc_assert (REG_P (arg_rtx));
+	  if ((TREE_CODE (arg_type) == INTEGER_TYPE
+	  || TREE_CODE (arg_type) == ENUMERAL_TYPE
+	  || TREE_CODE (arg_type) == BOOLEAN_TYPE)
+	  && known_lt (GET_MODE_SIZE (GET_MODE (arg_rtx)), 4))
+	{
+	  if (TYPE_UNSIGNED (arg_type))
+		emit_set_insn (gen_rtx_REG (SImode, REGNO (arg_rtx)),
+			   gen_rtx_ZERO_EXTEND (SImode, arg_rtx));
+	  else
+		emit_set_insn (gen_rtx_REG (SImode, REGNO (arg_rtx)),
+			   gen_rtx_SIGN_EXTEND (SImode, arg_rtx));
+	}
+	}
+}
+
   if (IS_STACKALIGN (func_type))
 {
   rtx r0, r1;
diff --git a/gcc/testsuite/gcc.target/arm/cmse/extend-param.c b/gcc/testsuite/gcc.target/arm/cmse/extend-param.c
new file mode 100644
index ..01fac7862385f871f3ecc246ede95eea180be025
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/extend-param.c
@@ -0,0 +1,96 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include 
+#include 
+
+#define ARRAY_SIZE (256)
+char array[ARRAY_SIZE];
+
+enum offset
+{
+zero = 0,
+one = 1,
+two = 2
+};
+
+/*
+**__acle_se_unsignSecureFunc:
+**	...
+**	uxtb	r0, r0
+**	...
+*/
+__attribute__((cmse_nonsecure_entry)) char unsignSecureFunc (unsigned char index) {
+if (index >= ARRAY_SIZE)
+  return 0;
+return array[index];
+}
+
+/*
+**__acle_se_signSecureFunc:
+**	...
+**	sxtb	r0, r0
+**	...
+*/
+__attribute__((cmse_nonsecure_entry)) char signSecureFunc (signed char index) {
+if (index >= ARRAY_SIZE)
+  return 0;
+return array[index]

[PATCH v2] aarch64: Fix ACLE SME streaming mode error in neon-sve-bridge

2024-04-09 Thread Richard Ball
When using LTO, handling the pragma for sme before the pragma
for the neon-sve-bridge caused the following error on svset_neonq,
in the neon-sve-bridge.c test.

error: ACLE function '0' can only be called when SME streaming mode is enabled.

This has been resolved by changing the pragma handlers to accept two modes.
One where they add functions normally and a second in which registered_functions
is filled with a placeholder value.

By using this, the ordering of the functions can be maintained.

No regressions on aarch64-none-elf.

gcc/ChangeLog:

* config/aarch64/aarch64-c.cc (aarch64_pragma_aarch64):
Add functions_nulls parameter to pragma_handlers.
* config/aarch64/aarch64-protos.h: Likewise.
* config/aarch64/aarch64-sve-builtins.h
(enum handle_pragma_index): Add enum to count
number of pragmas to be handled.
* config/aarch64/aarch64-sve-builtins.cc
(GTY): Add global variable for initial indexes
and change overload_names to an array.
(function_builder::function_builder):
Add pragma handler information.
(function_builder::add_function):
Add code for overwriting previous
registered_functions entries.
(add_unique_function):
Use an array to register overload_names
for both pragma handler modes.
(add_overloaded_function): Likewise.
(init_builtins):
Add functions_nulls parameter to pragma_handlers.
(handle_arm_sve_h):
Initialize pragma handler information.
(handle_arm_neon_sve_bridge_h): Likewise.
(handle_arm_sme_h): Likewise.diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index b5a6917d06db41a845681ed6f13f2800c70ede10..fe1a20e4e546a68e5f7eddff3bbb0d3e831fbd9b 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -344,15 +344,15 @@ aarch64_pragma_aarch64 (cpp_reader *)
 
   const char *name = TREE_STRING_POINTER (x);
   if (strcmp (name, "arm_sve.h") == 0)
-aarch64_sve::handle_arm_sve_h ();
+aarch64_sve::handle_arm_sve_h (false);
   else if (strcmp (name, "arm_sme.h") == 0)
-aarch64_sve::handle_arm_sme_h ();
+aarch64_sve::handle_arm_sme_h (false);
   else if (strcmp (name, "arm_neon.h") == 0)
 handle_arm_neon_h ();
   else if (strcmp (name, "arm_acle.h") == 0)
 handle_arm_acle_h ();
   else if (strcmp (name, "arm_neon_sve_bridge.h") == 0)
-aarch64_sve::handle_arm_neon_sve_bridge_h ();
+aarch64_sve::handle_arm_neon_sve_bridge_h (false);
   else
 error ("unknown %<#pragma GCC aarch64%> option %qs", name);
 }
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index bd719b992a545630f9ad6a73753fad55c6ac5d7e..42639e9efcf1e0f9362f759ae63a31b8eeb0d581 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1014,9 +1014,9 @@ bool aarch64_general_check_builtin_call (location_t, vec,
 
 namespace aarch64_sve {
   void init_builtins ();
-  void handle_arm_sve_h ();
-  void handle_arm_sme_h ();
-  void handle_arm_neon_sve_bridge_h ();
+  void handle_arm_sve_h (bool);
+  void handle_arm_sme_h (bool);
+  void handle_arm_neon_sve_bridge_h (bool);
   tree builtin_decl (unsigned, bool);
   bool builtin_type_p (const_tree);
   bool builtin_type_p (const_tree, unsigned int *, unsigned int *);
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
index e66729ed63532811b3b16ab57ae11cb10518caca..3e0d2b8560c538d201c97fba3591ddf9893d664a 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
@@ -123,6 +123,15 @@ enum units_index
   UNITS_vectors
 };
 
+/* Enumerates the pragma handlers.  */
+enum handle_pragma_index
+{
+  arm_sve_handle,
+  arm_sme_handle,
+  arm_neon_sve_handle,
+  NUM_PRAGMA_HANDLERS
+};
+
 /* Describes the various uses of a governing predicate.  */
 enum predication_index
 {
@@ -419,7 +428,7 @@ class registered_function;
 class function_builder
 {
 public:
-  function_builder ();
+  function_builder (handle_pragma_index, bool);
   ~function_builder ();
 
   void add_unique_function (const function_instance &, tree,
@@ -453,6 +462,12 @@ private:
 
   /* Used for building up function names.  */
   obstack m_string_obstack;
+
+  /* Used to store the index for the current function.  */
+  unsigned int m_function_index;
+
+  /* Stores the mode of the current pragma handler.  */
+  bool m_function_nulls;
 };
 
 /* A base class for handling calls to built-in functions.  */
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 11f5c5c500c8331094933cb1c1205a1360eca79b..924bfeb3e23bb1bbed79d36adf33286b87b52f85 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -933,14 +933,19 @@ tree acle_svprfop;
 /* The list of all registered function decls, indexed by code.  */
 

[PATCH][wwwdocs] Add NEON-SVE bridge intrinsics to changes.html

2024-04-08 Thread Richard Ball
Hi all,

Adding the NEON-SVE bridge intrinsics that were missed
in the last patch.

Thanks,
Richarddiff --git a/htdocs/gcc-14/changes.html b/htdocs/gcc-14/changes.html
index 
9fd224c1df3f05eadcedaaa41c0859e712b93b78..df63af48298564de9c35bab1dd35891c2581e3d6
 100644
--- a/htdocs/gcc-14/changes.html
+++ b/htdocs/gcc-14/changes.html
@@ -420,6 +420,12 @@ a work-in-progress.
   -march=armv8.2-a or higher to be specified.  Likewise, the
   intrinsics enabled by +memtag no longer require
   -march=armv8.5-a.
+  Support for the
+  https://github.com/ARM-software/acle/blob/main/main/acle.md#neon-sve-bridge";>
+  NEON-SVE Bridge intrinsics.
+  These are intrinsics that allow conversions between NEON and SVE vectors,
+  enabled through the inclusion of the arm_neon_sve_bridge.h 
header.
+  
 
   The option -mtp= is now supported for changing the TPIDR
register used for TLS accesses.  For more details please refer to the


[PATCH][GCC] aarch64: Fix SCHEDULER_IDENT for Cortex-A520

2024-03-12 Thread Richard Ball
The SCHEDULER_IDENT for this CPU was incorrectly
set to cortexa55, which is incorrect. This can cause
sub-optimal asm to be generated.

Ok for trunk?

gcc/ChangeLog:
PR target/114272
* config/aarch64/aarch64-cores.def (AARCH64_CORE):
Change SCHEDULER_IDENT from cortexa55 to cortexa53
for Cortex-A520.diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index 
ea84dcb11a2c11fb7d8ada3b66b3caaa39f08daf..f69fc212d56418887e29615f8acdeb02e39750c6
 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -171,7 +171,7 @@ AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, (), 
cortexa53, 0x41, 0xd15
 /* Arm ('A') cores. */
 AARCH64_CORE("cortex-a510",  cortexa510, cortexa53, V9A,  (SVE2_BITPERM, 
MEMTAG, I8MM, BF16), cortexa53, 0x41, 0xd46, -1)
 
-AARCH64_CORE("cortex-a520",  cortexa520, cortexa55, V9_2A,  (SVE2_BITPERM, 
MEMTAG), cortexa53, 0x41, 0xd80, -1)
+AARCH64_CORE("cortex-a520",  cortexa520, cortexa53, V9_2A,  (SVE2_BITPERM, 
MEMTAG), cortexa53, 0x41, 0xd80, -1)
 
 AARCH64_CORE("cortex-a710",  cortexa710, cortexa57, V9A,  (SVE2_BITPERM, 
MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd47, -1)
 


[PATCH][GCC] aarch64: Fix SCHEDULER_IDENT for Cortex-A510

2024-03-12 Thread Richard Ball
The SCHEDULER_IDENT for this CPU was incorrectly
set to cortexa55, which is incorrect. This can cause
sub-optimal asm to be generated.

Ok for trunk?

Can I also backport this to gcc-12 and gcc-13?

gcc/ChangeLog:
PR target/114272
* config/aarch64/aarch64-cores.def (AARCH64_CORE):
Change SCHEDULER_IDENT from cortexa55 to cortexa53
for Cortex-A510.diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index 
7ebefcf543b6f84b3df22ab836728111b56fa76f..ea84dcb11a2c11fb7d8ada3b66b3caaa39f08daf
 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -169,7 +169,7 @@ AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, (), 
cortexa53, 0x41, 0xd15
 /* Armv9.0-A Architecture Processors.  */
 
 /* Arm ('A') cores. */
-AARCH64_CORE("cortex-a510",  cortexa510, cortexa55, V9A,  (SVE2_BITPERM, 
MEMTAG, I8MM, BF16), cortexa53, 0x41, 0xd46, -1)
+AARCH64_CORE("cortex-a510",  cortexa510, cortexa53, V9A,  (SVE2_BITPERM, 
MEMTAG, I8MM, BF16), cortexa53, 0x41, 0xd46, -1)
 
 AARCH64_CORE("cortex-a520",  cortexa520, cortexa55, V9_2A,  (SVE2_BITPERM, 
MEMTAG), cortexa53, 0x41, 0xd80, -1)
 


[PATCH] aarch64: Fix ACLE SME streaming mode error in neon-sve-bridge

2024-02-01 Thread Richard Ball
When using LTO, handling the pragma for sme before the pragma
for the neon-sve-bridge caused the following error on svset_neonq,
in the neon-sve-bridge.c test.

error: ACLE function '0' can only be called when SME streaming mode is enabled.

Handling the pragmas the other way around fixes this.

No regressions on aarch64-none-elf.


gcc/ChangeLog:

* config/aarch64/aarch64-sve-builtins.cc (init_builtins):
Re-order handle pragma functions for lto.diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index c2f1486315f02c3e38f808b3124a9b4cf49a4708..10f1df233ca9ae4e4a639a45a25db1f055a7d7c3 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -4511,8 +4511,8 @@ init_builtins ()
   if (in_lto_p)
 {
   handle_arm_sve_h ();
-  handle_arm_sme_h ();
   handle_arm_neon_sve_bridge_h ();
+  handle_arm_sme_h ();
 }
 }
 


[PATCH v2] middle-end: Fix ICE in poly-int.h due to SLP.

2024-02-01 Thread Richard Ball
Adds a check to ensure that the input vector arguments
to a function are not variable length. Previously, only the
output vector of a function was checked.

The ICE in question is within the neon-sve-bridge.c test,
and is related to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111268

gcc/ChangeLog:

* tree-vect-slp.cc (vectorizable_slp_permutation_1):
Add variable-length check for vector input arguments
to a function.diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 086377a9ac08528880c90bee3b1d1e08341a6288..7cf9504398c98cccfdba3b91c3e995f73f8b5d50 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -8987,7 +8987,8 @@ vectorizable_slp_permutation_1 (vec_info *vinfo, gimple_stmt_iterator *gsi,
 {
   /* Calculate every element of every permute mask vector explicitly,
 	 instead of relying on the pattern described above.  */
-  if (!nunits.is_constant (&npatterns))
+  if (!nunits.is_constant (&npatterns)
+	  || !TYPE_VECTOR_SUBPARTS (op_vectype).is_constant ())
 	return -1;
   nelts_per_pattern = ncopies = 1;
   if (loop_vec_info linfo = dyn_cast  (vinfo))


Re: [PATCH] aarch64: Fix ICE in poly-int.h due to SLP.

2024-01-31 Thread Richard Ball
Hi Prathamesh,

Thanks for the review, I missed that code up above.
I've looking into this and it seems to me at least, that what you have 
suggested, is equivalent.
I'll make the change and repost.

Thanks,
Richard

From: Prathamesh Kulkarni 
Sent: 30 January 2024 17:36
To: Richard Ball 
Cc: gcc-patches@gcc.gnu.org ; Richard Sandiford 
; Kyrylo Tkachov ; Richard 
Earnshaw ; Marcus Shawcroft 
Subject: Re: [PATCH] aarch64: Fix ICE in poly-int.h due to SLP.

On Tue, 30 Jan 2024 at 20:13, Richard Ball  wrote:
>
> Adds a check to ensure that the input vector arguments
> to a function are not variable length. Previously, only the
> output vector of a function was checked.
Hi,
Quoting from patch:
@@ -8989,6 +8989,14 @@ vectorizable_slp_permutation_1 (vec_info
*vinfo, gimple_stmt_iterator *gsi,
   instead of relying on the pattern described above.  */
   if (!nunits.is_constant (&npatterns))
  return -1;
+  FOR_EACH_VEC_ELT (children, i, child)
+ if (SLP_TREE_VECTYPE (child))
+   {
+ tree child_vectype = SLP_TREE_VECTYPE (child);
+ poly_uint64 child_nunits = TYPE_VECTOR_SUBPARTS (child_vectype);
+ if (!child_nunits.is_constant ())
+   return -1;
+   }

Just wondering if that'd be equivalent to checking:
if (!TYPE_VECTOR_SUBPARTS (op_vectype).is_constant ())
  return -1;
Instead of (again) iterating over children since we bail out in the
function above,
if SLP_TREE_VECTYPE (child) and op_vectype are not compatible types ?

Also, could you please include the offending test-case in the patch ?

Thanks,
Prathamesh

>
> gcc/ChangeLog:
>
> * tree-vect-slp.cc (vectorizable_slp_permutation_1):
> Add variable-length check for vector input arguments
> to a function.


[PATCH] aarch64: Fix ICE in poly-int.h due to SLP.

2024-01-30 Thread Richard Ball
Adds a check to ensure that the input vector arguments
to a function are not variable length. Previously, only the
output vector of a function was checked.

gcc/ChangeLog:

* tree-vect-slp.cc (vectorizable_slp_permutation_1):
Add variable-length check for vector input arguments
to a function.diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 086377a9ac08528880c90bee3b1d1e08341a6288..bbd2c51d60baa31ea4169081442e2e9d55055e80 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -8989,6 +8989,14 @@ vectorizable_slp_permutation_1 (vec_info *vinfo, gimple_stmt_iterator *gsi,
 	 instead of relying on the pattern described above.  */
   if (!nunits.is_constant (&npatterns))
 	return -1;
+  FOR_EACH_VEC_ELT (children, i, child)
+	if (SLP_TREE_VECTYPE (child))
+	  {
+	tree child_vectype = SLP_TREE_VECTYPE (child);
+	poly_uint64 child_nunits = TYPE_VECTOR_SUBPARTS (child_vectype);
+	if (!child_nunits.is_constant ())
+	  return -1;
+	  }
   nelts_per_pattern = ncopies = 1;
   if (loop_vec_info linfo = dyn_cast  (vinfo))
 	if (!LOOP_VINFO_VECT_FACTOR (linfo).is_constant (&ncopies))


[PATCH v2] arm: Fix missing bti instruction for virtual thunks

2024-01-26 Thread Richard Ball
v2: Formatting and test options fix.

Adds missing bti instruction at the beginning of a virtual
thunk, when bti is enabled.

gcc/ChangeLog:

* config/arm/arm.cc (arm_output_mi_thunk): Emit
insn for bti_c when bti is enabled.

gcc/testsuite/ChangeLog:

* lib/target-supports.exp: Add v8_1_m_main_pacbti.
* g++.target/arm/bti_thunk.C: New test.diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index e5a944486d7bd583627b0e22dfe8f95862e975bb..c44047c377a802d0c1dc1406df1b88a6b079607b 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -29257,6 +29257,8 @@ arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
 
   assemble_start_function (thunk, fnname);
+  if (aarch_bti_enabled ())
+emit_insn (aarch_gen_bti_c ());
   if (TARGET_32BIT)
 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
   else
diff --git a/gcc/testsuite/g++.target/arm/bti_thunk.C b/gcc/testsuite/g++.target/arm/bti_thunk.C
new file mode 100644
index ..23c1acc66629ba5907830ae27de0b29a47032c65
--- /dev/null
+++ b/gcc/testsuite/g++.target/arm/bti_thunk.C
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8_1m_main_pacbti_ok } */
+/* { dg-add-options arm_arch_v8_1m_main_pacbti } */
+/* { dg-additional-options "-mbranch-protection=bti" }*/
+
+#include 
+
+struct C18 {
+  virtual void f7();
+};
+
+struct C19 : virtual C18 {
+  virtual void f7();
+};
+
+void C19::f7() {
+  printf("foo\n");
+}
+
+/* { dg-final { scan-assembler-times "\tbti" 2 } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 73360cd3a0d5553ff7586b48e0bb17298e6612f0..3070b51b9b86e1a2bad630a765a075f8b911d2ad 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -5524,6 +5524,8 @@ foreach { armfunc armflag armdefs } {
 		__ARM_ARCH_8M_BASE__
 	v8m_main "-march=armv8-m.main+fp -mthumb" __ARM_ARCH_8M_MAIN__
 	v8_1m_main "-march=armv8.1-m.main+fp -mthumb" __ARM_ARCH_8M_MAIN__
+	v8_1m_main_pacbti "-march=armv8.1-m.main+pacbti+fp -mthumb"
+		"__ARM_ARCH_8M_MAIN__ && __ARM_FEATURE_BTI"
 	v9a "-march=armv9-a+simd" __ARM_ARCH_9A__ } {
 eval [string map [list FUNC $armfunc FLAG $armflag DEFS $armdefs ] {
 	proc check_effective_target_arm_arch_FUNC_ok { } {


[PATCH] arm: Fix missing bti instruction for virtual thunks

2024-01-23 Thread Richard Ball
Adds missing bti instruction at the beginning of a virtual
thunk, when bti is enabled.

gcc/ChangeLog:

* config/arm/arm.cc (arm_output_mi_thunk): Emit
insn for bti_c when bti is enabled.

gcc/testsuite/ChangeLog:

* g++.target/arm/bti_thunk.C: New test.diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index e5a944486d7bd583627b0e22dfe8f95862e975bb..91eee8be7c1a59118fbf443557561fb3e0689d61 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -29257,6 +29257,8 @@ arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
 
   assemble_start_function (thunk, fnname);
+  if (aarch_bti_enabled ())
+emit_insn (aarch_gen_bti_c());
   if (TARGET_32BIT)
 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
   else
diff --git a/gcc/testsuite/g++.target/arm/bti_thunk.C b/gcc/testsuite/g++.target/arm/bti_thunk.C
new file mode 100644
index ..5c4a8e5a8d74581eca2b877c000a5b34ddca0e9b
--- /dev/null
+++ b/gcc/testsuite/g++.target/arm/bti_thunk.C
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.1-m.main+pacbti -O1 -mbranch-protection=bti --save-temps" } */
+
+#include 
+
+struct C18 {
+  virtual void f7();
+};
+
+struct C19 : virtual C18 {
+  virtual void f7();
+};
+
+void C19::f7() {
+  printf("foo\n");
+}
+
+/* { dg-final { scan-assembler-times "\tbti" 2 } } */
\ No newline at end of file


[PATCH v4] aarch64: SVE/NEON Bridging intrinsics

2023-12-11 Thread Richard Ball
ACLE has added intrinsics to bridge between SVE and Neon.

The NEON_SVE Bridge adds intrinsics that allow conversions between NEON and
SVE vectors.

This patch adds support to GCC for the following 3 intrinsics:
svset_neonq, svget_neonq and svdup_neonq

gcc/ChangeLog:

* config.gcc: Adds new header to config.
* config/aarch64/aarch64-builtins.cc (enum aarch64_type_qualifiers):
Moved to header file.
(ENTRY): Likewise.
(enum aarch64_simd_type): Likewise.
(struct aarch64_simd_type_info): Remove static.
(GTY): Likewise.
* config/aarch64/aarch64-c.cc (aarch64_pragma_aarch64):
Defines pragma for arm_neon_sve_bridge.h.
* config/aarch64/aarch64-sve-builtins-base.h: New intrinsics.
* config/aarch64/aarch64-sve-builtins-base.cc
(class svget_neonq_impl): New intrinsic implementation.
(class svset_neonq_impl): Likewise.
(class svdup_neonq_impl): Likewise.
(NEON_SVE_BRIDGE_FUNCTION): New intrinsics.
* config/aarch64/aarch64-sve-builtins-functions.h
(NEON_SVE_BRIDGE_FUNCTION): Defines macro for NEON_SVE_BRIDGE
functions.
* config/aarch64/aarch64-sve-builtins-shapes.h: New shapes.
* config/aarch64/aarch64-sve-builtins-shapes.cc
(parse_element_type): Add NEON element types.
(parse_type): Likewise.
(struct get_neonq_def): Defines function shape for get_neonq.
(struct set_neonq_def): Defines function shape for set_neonq.
(struct dup_neonq_def): Defines function shape for dup_neonq.
* config/aarch64/aarch64-sve-builtins.cc 
(DEF_SVE_TYPE_SUFFIX): Changed to be called through
SVE_NEON macro.
(DEF_SVE_NEON_TYPE_SUFFIX): Defines 
macro for NEON_SVE_BRIDGE type suffixes.
(DEF_NEON_SVE_FUNCTION): Defines 
macro for NEON_SVE_BRIDGE functions.
(function_resolver::infer_neon128_vector_type): Infers type suffix
for overloaded functions.
(init_neon_sve_builtins): Initialise neon_sve_bridge_builtins for LTO.
(handle_arm_neon_sve_bridge_h): Handles #pragma arm_neon_sve_bridge.h.
* config/aarch64/aarch64-sve-builtins.def
(DEF_SVE_NEON_TYPE_SUFFIX): Macro for handling neon_sve type suffixes.
(bf16): Replace entry with neon-sve entry.
(f16): Likewise.
(f32): Likewise.
(f64): Likewise.
(s8): Likewise.
(s16): Likewise.
(s32): Likewise.
(s64): Likewise.
(u8): Likewise.
(u16): Likewise.
(u32): Likewise.
(u64): Likewise.
* config/aarch64/aarch64-sve-builtins.h
(GCC_AARCH64_SVE_BUILTINS_H): Include aarch64-builtins.h.
(ENTRY): Add aarch64_simd_type definiton.
(enum aarch64_simd_type): Add neon information to type_suffix_info.
(struct type_suffix_info): New function.
* config/aarch64/aarch64-sve.md
(@aarch64_sve_get_neonq_): New intrinsic insn for big endian.
(@aarch64_sve_set_neonq_): Likewise.
* config/aarch64/aarch64.cc 
(aarch64_init_builtins): Add call to init_neon_sve_builtins.
* config/aarch64/iterators.md: Add UNSPEC_SET_NEONQ.
* config/aarch64/aarch64-builtins.h: New file.
* config/aarch64/aarch64-neon-sve-bridge-builtins.def: New file.
* config/aarch64/arm_neon_sve_bridge.h: New file.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/sve/acle/asm/test_sve_acle.h: Add include 
arm_neon_sve_bridge header file
* gcc.dg/torture/neon-sve-bridge.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_bf16.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_f16.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_f32.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_f64.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_s16.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_s32.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_s64.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_s8.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_u16.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_u32.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_u64.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_u8.c: New test.
* gcc.target/aarch64/sve/acle/asm/get_neonq_bf16.c: New test.
* gcc.target/aarch64/sve/acle/asm/get_neonq_f16.c: New test.
* gcc.target/aarch64/sve/acle/asm/get_neonq_f32.c: New test.
* gcc.target/aarch64/sve/acle/asm/get_neonq_f64.c: New test.
* gcc.target/aarch64/sve/acle/asm/get_neonq_s16.c: New test.
* gcc.target/aarch64/sve/acle/asm/get_neonq_s32.c: New test.
* gcc.target/aarch64/sve/acle/asm/get_neonq_s64.c: New test.
* gcc.target/aarch64/sve/acle/asm/get_neonq_s8.c: New test.

[Ping][PATCH v3] aarch64: SVE/NEON Bridging intrinsics

2023-11-21 Thread Richard Ball
Gentle Ping for the patch below:

On 11/9/23 16:14, Richard Ball wrote:
> ACLE has added intrinsics to bridge between SVE and Neon.
> 
> The NEON_SVE Bridge adds intrinsics that allow conversions between NEON and
> SVE vectors.
> 
> This patch adds support to GCC for the following 3 intrinsics:
> svset_neonq, svget_neonq and svdup_neonq
> 
> gcc/ChangeLog:
> 
>   * config.gcc: Adds new header to config.
>   * config/aarch64/aarch64-builtins.cc (enum aarch64_type_qualifiers):
>   Moved to header file.
>   (ENTRY): Likewise.
>   (enum aarch64_simd_type): Likewise.
>   (struct aarch64_simd_type_info): Make extern.
>   (GTY): Likewise.
>   * config/aarch64/aarch64-c.cc (aarch64_pragma_aarch64):
>   Defines pragma for arm_neon_sve_bridge.h.
>   * config/aarch64/aarch64-protos.h: New function.
>   * config/aarch64/aarch64-sve-builtins-base.h: New intrinsics.
>   * config/aarch64/aarch64-sve-builtins-base.cc
>   (class svget_neonq_impl): New intrinsic implementation.
>   (class svset_neonq_impl): Likewise.
>   (class svdup_neonq_impl): Likewise.
>   (NEON_SVE_BRIDGE_FUNCTION): New intrinsics.
>   * config/aarch64/aarch64-sve-builtins-functions.h
>   (NEON_SVE_BRIDGE_FUNCTION): Defines macro for NEON_SVE_BRIDGE
>   functions.
>   * config/aarch64/aarch64-sve-builtins-shapes.h: New shapes.
>   * config/aarch64/aarch64-sve-builtins-shapes.cc
>   (parse_element_type): Add NEON element types.
>   (parse_type): Likewise.
>   (struct get_neonq_def): Defines function shape for get_neonq.
>   (struct set_neonq_def): Defines function shape for set_neonq.
>   (struct dup_neonq_def): Defines function shape for dup_neonq.
>   * config/aarch64/aarch64-sve-builtins.cc (DEF_SVE_TYPE_SUFFIX):
>   (DEF_SVE_NEON_TYPE_SUFFIX): Defines 
> macro for NEON_SVE_BRIDGE type suffixes.
>   (DEF_NEON_SVE_FUNCTION): Defines 
> macro for NEON_SVE_BRIDGE functions.
>   (function_resolver::infer_neon128_vector_type): Infers type suffix
>   for overloaded functions.
>   (init_neon_sve_builtins): Initialise neon_sve_bridge_builtins for LTO.
>   (handle_arm_neon_sve_bridge_h): Handles #pragma arm_neon_sve_bridge.h.
>   * config/aarch64/aarch64-sve-builtins.def
>   (DEF_SVE_NEON_TYPE_SUFFIX): Macro for handling neon_sve type suffixes.
>   (bf16): Replace entry with neon-sve entry.
>   (f16): Likewise.
>   (f32): Likewise.
>   (f64): Likewise.
>   (s8): Likewise.
>   (s16): Likewise.
>   (s32): Likewise.
>   (s64): Likewise.
>   (u8): Likewise.
>   (u16): Likewise.
>   (u32): Likewise.
>   (u64): Likewise.
>   * config/aarch64/aarch64-sve-builtins.h
>   (GCC_AARCH64_SVE_BUILTINS_H): Include aarch64-builtins.h.
>   (ENTRY): Add aarch64_simd_type definiton.
>   (enum aarch64_simd_type): Add neon information to type_suffix_info.
>   (struct type_suffix_info): New function.
>   * config/aarch64/aarch64-sve.md
>   (@aarch64_sve_get_neonq_): New intrinsic insn for big endian.
>   (@aarch64_sve_set_neonq_): Likewise.
>   (@aarch64_sve_dup_neonq_): Likewise.
>   * config/aarch64/aarch64.cc 
>   (aarch64_init_builtins): Add call to init_neon_sve_builtins.
> (aarch64_output_sve_set_neonq): asm output for Big Endian set_neonq.
>   * config/aarch64/iterators.md: Add UNSPEC_SET_NEONQ.
>   * config/aarch64/aarch64-builtins.h: New file.
>   * config/aarch64/aarch64-neon-sve-bridge-builtins.def: New file.
>   * config/aarch64/arm_neon_sve_bridge.h: New file.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/aarch64/sve/acle/asm/test_sve_acle.h: Add include 
>   arm_neon_sve_bridge header file
>   * gcc.dg/torture/neon-sve-bridge.c: New test.
>   * gcc.target/aarch64/sve/acle/asm/dup_neonq_bf16.c: New test.
>   * gcc.target/aarch64/sve/acle/asm/dup_neonq_f16.c: New test.
>   * gcc.target/aarch64/sve/acle/asm/dup_neonq_f32.c: New test.
>   * gcc.target/aarch64/sve/acle/asm/dup_neonq_f64.c: New test.
>   * gcc.target/aarch64/sve/acle/asm/dup_neonq_s16.c: New test.
>   * gcc.target/aarch64/sve/acle/asm/dup_neonq_s32.c: New test.
>   * gcc.target/aarch64/sve/acle/asm/dup_neonq_s64.c: New test.
>   * gcc.target/aarch64/sve/acle/asm/dup_neonq_s8.c: New test.
>   * gcc.target/aarch64/sve/acle/asm/dup_neonq_u16.c: New test.
>   * gcc.target/aarch64/sve/acle/asm/dup_neonq_u32.c: New test.
>   * gcc.target/aarch64/sve/acle/asm/dup_neonq_u64.c: New test.
>   * gcc.target/aarch64/sve/acle/asm/dup_neonq_u8.c: New test.
>   * gcc.target/aarch64/sve/acle/asm/get_neonq_bf16.

[PATCH v3] aarch64: SVE/NEON Bridging intrinsics

2023-11-09 Thread Richard Ball
ACLE has added intrinsics to bridge between SVE and Neon.

The NEON_SVE Bridge adds intrinsics that allow conversions between NEON and
SVE vectors.

This patch adds support to GCC for the following 3 intrinsics:
svset_neonq, svget_neonq and svdup_neonq

gcc/ChangeLog:

* config.gcc: Adds new header to config.
* config/aarch64/aarch64-builtins.cc (enum aarch64_type_qualifiers):
Moved to header file.
(ENTRY): Likewise.
(enum aarch64_simd_type): Likewise.
(struct aarch64_simd_type_info): Make extern.
(GTY): Likewise.
* config/aarch64/aarch64-c.cc (aarch64_pragma_aarch64):
Defines pragma for arm_neon_sve_bridge.h.
* config/aarch64/aarch64-protos.h: New function.
* config/aarch64/aarch64-sve-builtins-base.h: New intrinsics.
* config/aarch64/aarch64-sve-builtins-base.cc
(class svget_neonq_impl): New intrinsic implementation.
(class svset_neonq_impl): Likewise.
(class svdup_neonq_impl): Likewise.
(NEON_SVE_BRIDGE_FUNCTION): New intrinsics.
* config/aarch64/aarch64-sve-builtins-functions.h
(NEON_SVE_BRIDGE_FUNCTION): Defines macro for NEON_SVE_BRIDGE
functions.
* config/aarch64/aarch64-sve-builtins-shapes.h: New shapes.
* config/aarch64/aarch64-sve-builtins-shapes.cc
(parse_element_type): Add NEON element types.
(parse_type): Likewise.
(struct get_neonq_def): Defines function shape for get_neonq.
(struct set_neonq_def): Defines function shape for set_neonq.
(struct dup_neonq_def): Defines function shape for dup_neonq.
* config/aarch64/aarch64-sve-builtins.cc (DEF_SVE_TYPE_SUFFIX):
(DEF_SVE_NEON_TYPE_SUFFIX): Defines 
macro for NEON_SVE_BRIDGE type suffixes.
(DEF_NEON_SVE_FUNCTION): Defines 
macro for NEON_SVE_BRIDGE functions.
(function_resolver::infer_neon128_vector_type): Infers type suffix
for overloaded functions.
(init_neon_sve_builtins): Initialise neon_sve_bridge_builtins for LTO.
(handle_arm_neon_sve_bridge_h): Handles #pragma arm_neon_sve_bridge.h.
* config/aarch64/aarch64-sve-builtins.def
(DEF_SVE_NEON_TYPE_SUFFIX): Macro for handling neon_sve type suffixes.
(bf16): Replace entry with neon-sve entry.
(f16): Likewise.
(f32): Likewise.
(f64): Likewise.
(s8): Likewise.
(s16): Likewise.
(s32): Likewise.
(s64): Likewise.
(u8): Likewise.
(u16): Likewise.
(u32): Likewise.
(u64): Likewise.
* config/aarch64/aarch64-sve-builtins.h
(GCC_AARCH64_SVE_BUILTINS_H): Include aarch64-builtins.h.
(ENTRY): Add aarch64_simd_type definiton.
(enum aarch64_simd_type): Add neon information to type_suffix_info.
(struct type_suffix_info): New function.
* config/aarch64/aarch64-sve.md
(@aarch64_sve_get_neonq_): New intrinsic insn for big endian.
(@aarch64_sve_set_neonq_): Likewise.
(@aarch64_sve_dup_neonq_): Likewise.
* config/aarch64/aarch64.cc 
(aarch64_init_builtins): Add call to init_neon_sve_builtins.
(aarch64_output_sve_set_neonq): asm output for Big Endian set_neonq.
* config/aarch64/iterators.md: Add UNSPEC_SET_NEONQ.
* config/aarch64/aarch64-builtins.h: New file.
* config/aarch64/aarch64-neon-sve-bridge-builtins.def: New file.
* config/aarch64/arm_neon_sve_bridge.h: New file.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/sve/acle/asm/test_sve_acle.h: Add include 
arm_neon_sve_bridge header file
* gcc.dg/torture/neon-sve-bridge.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_bf16.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_f16.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_f32.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_f64.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_s16.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_s32.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_s64.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_s8.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_u16.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_u32.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_u64.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_u8.c: New test.
* gcc.target/aarch64/sve/acle/asm/get_neonq_bf16.c: New test.
* gcc.target/aarch64/sve/acle/asm/get_neonq_f16.c: New test.
* gcc.target/aarch64/sve/acle/asm/get_neonq_f32.c: New test.
* gcc.target/aarch64/sve/acle/asm/get_neonq_f64.c: New test.
* gcc.target/aarch64/sve/acle/asm/get_neonq_s16.c: New test.
* gcc.target/aarch64/sve/acle/asm/get_neonq_s32.c: New test.
* gcc.target/aarch64/s

RE: [PATCH v2] aarch64: SVE/NEON Bridging intrinsics

2023-10-28 Thread Richard Ball
Hi all,

Please disregard patch v2, some necessary changes have become apparent after 
submission.
I will make these changes and submit as patch v3.

Apologies,
Richard Ball

-Original Message-
From: Richard Ball  
Sent: Friday, October 27, 2023 6:18 PM
To: gcc-patches@gcc.gnu.org; Richard Earnshaw ; 
Richard Sandiford ; Kyrylo Tkachov 
; Marcus Shawcroft 
Subject: [PATCH v2] aarch64: SVE/NEON Bridging intrinsics

ACLE has added intrinsics to bridge between SVE and Neon.

The NEON_SVE Bridge adds intrinsics that allow conversions between NEON and SVE 
vectors.

This patch adds support to GCC for the following 3 intrinsics:
svset_neonq, svget_neonq and svdup_neonq

gcc/ChangeLog:

* config.gcc: Adds new header to config.
* config/aarch64/aarch64-builtins.cc (enum aarch64_type_qualifiers):
Moved to header file.
(ENTRY): Likewise.
(enum aarch64_simd_type): Likewise.
(struct aarch64_simd_type_info): Make extern.
(GTY): Likewise.
* config/aarch64/aarch64-c.cc (aarch64_pragma_aarch64):
Defines pragma for arm_neon_sve_bridge.h.
* config/aarch64/aarch64-protos.h: New function.
* config/aarch64/aarch64-sve-builtins-base.h: New intrinsics.
* config/aarch64/aarch64-sve-builtins-base.cc
(class svget_neonq_impl): New intrinsic implementation.
(class svset_neonq_impl): Likewise.
(class svdup_neonq_impl): Likewise.
(NEON_SVE_BRIDGE_FUNCTION): New intrinsics.
* config/aarch64/aarch64-sve-builtins-functions.h
(NEON_SVE_BRIDGE_FUNCTION): Defines macro for NEON_SVE_BRIDGE
functions.
* config/aarch64/aarch64-sve-builtins-shapes.h: New shapes.
* config/aarch64/aarch64-sve-builtins-shapes.cc
(parse_element_type): Add NEON element types.
(parse_type): Likewise.
(struct get_neonq_def): Defines function shape for get_neonq.
(struct set_neonq_def): Defines function shape for set_neonq.
(struct dup_neonq_def): Defines function shape for dup_neonq.
* config/aarch64/aarch64-sve-builtins.cc (DEF_SVE_TYPE_SUFFIX):
(DEF_SVE_NEON_TYPE_SUFFIX): Defines 
macro for NEON_SVE_BRIDGE type suffixes.
(DEF_NEON_SVE_FUNCTION): Defines 
macro for NEON_SVE_BRIDGE functions.
(function_resolver::infer_neon128_vector_type): Infers type suffix
for overloaded functions.
(init_neon_sve_builtins): Initialise neon_sve_bridge_builtins for LTO.
(handle_arm_neon_sve_bridge_h): Handles #pragma arm_neon_sve_bridge.h.
* config/aarch64/aarch64-sve-builtins.def
(DEF_SVE_NEON_TYPE_SUFFIX): Macro for handling neon_sve type suffixes.
(bf16): Replace entry with neon-sve entry.
(f16): Likewise.
(f32): Likewise.
(f64): Likewise.
(s8): Likewise.
(s16): Likewise.
(s32): Likewise.
(s64): Likewise.
(u8): Likewise.
(u16): Likewise.
(u32): Likewise.
(u64): Likewise.
* config/aarch64/aarch64-sve-builtins.h
(GCC_AARCH64_SVE_BUILTINS_H): Include aarch64-builtins.h.
(ENTRY): Add aarch64_simd_type definiton.
(enum aarch64_simd_type): Add neon information to type_suffix_info.
(struct type_suffix_info): New function.
* config/aarch64/aarch64-sve.md
(@aarch64_sve_get_neonq_): New intrinsic insn for big endian.
(@aarch64_sve_set_neonq_): Likewise.
(@aarch64_sve_dup_neonq_): Likewise.
* config/aarch64/aarch64.cc (aarch64_init_builtins):
Add call to init_neon_sve_builtins.
* config/aarch64/iterators.md: Add UNSPEC_SET_NEONQ.
* config/aarch64/aarch64-builtins.h: New file.
* config/aarch64/aarch64-neon-sve-bridge-builtins.def: New file.
* config/aarch64/arm_neon_sve_bridge.h: New file.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/sve/acle/asm/test_sve_acle.h: Add include 
arm_neon_sve_bridge header file
* gcc.dg/torture/neon-sve-bridge.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_bf16.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_f16.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_f32.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_f64.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_s16.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_s32.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_s64.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_s8.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_u16.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_u32.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_u64.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_u8.c: New test.
* gcc.target/aarch64/sve/acle/asm/get_neonq_bf16.c: New test.
* gcc.target

[PATCH v2] aarch64: SVE/NEON Bridging intrinsics

2023-10-27 Thread Richard Ball
ACLE has added intrinsics to bridge between SVE and Neon.

The NEON_SVE Bridge adds intrinsics that allow conversions between NEON and
SVE vectors.

This patch adds support to GCC for the following 3 intrinsics:
svset_neonq, svget_neonq and svdup_neonq

gcc/ChangeLog:

* config.gcc: Adds new header to config.
* config/aarch64/aarch64-builtins.cc (enum aarch64_type_qualifiers):
Moved to header file.
(ENTRY): Likewise.
(enum aarch64_simd_type): Likewise.
(struct aarch64_simd_type_info): Make extern.
(GTY): Likewise.
* config/aarch64/aarch64-c.cc (aarch64_pragma_aarch64):
Defines pragma for arm_neon_sve_bridge.h.
* config/aarch64/aarch64-protos.h: New function.
* config/aarch64/aarch64-sve-builtins-base.h: New intrinsics.
* config/aarch64/aarch64-sve-builtins-base.cc
(class svget_neonq_impl): New intrinsic implementation.
(class svset_neonq_impl): Likewise.
(class svdup_neonq_impl): Likewise.
(NEON_SVE_BRIDGE_FUNCTION): New intrinsics.
* config/aarch64/aarch64-sve-builtins-functions.h
(NEON_SVE_BRIDGE_FUNCTION): Defines macro for NEON_SVE_BRIDGE
functions.
* config/aarch64/aarch64-sve-builtins-shapes.h: New shapes.
* config/aarch64/aarch64-sve-builtins-shapes.cc
(parse_element_type): Add NEON element types.
(parse_type): Likewise.
(struct get_neonq_def): Defines function shape for get_neonq.
(struct set_neonq_def): Defines function shape for set_neonq.
(struct dup_neonq_def): Defines function shape for dup_neonq.
* config/aarch64/aarch64-sve-builtins.cc (DEF_SVE_TYPE_SUFFIX):
(DEF_SVE_NEON_TYPE_SUFFIX): Defines 
macro for NEON_SVE_BRIDGE type suffixes.
(DEF_NEON_SVE_FUNCTION): Defines 
macro for NEON_SVE_BRIDGE functions.
(function_resolver::infer_neon128_vector_type): Infers type suffix
for overloaded functions.
(init_neon_sve_builtins): Initialise neon_sve_bridge_builtins for LTO.
(handle_arm_neon_sve_bridge_h): Handles #pragma arm_neon_sve_bridge.h.
* config/aarch64/aarch64-sve-builtins.def
(DEF_SVE_NEON_TYPE_SUFFIX): Macro for handling neon_sve type suffixes.
(bf16): Replace entry with neon-sve entry.
(f16): Likewise.
(f32): Likewise.
(f64): Likewise.
(s8): Likewise.
(s16): Likewise.
(s32): Likewise.
(s64): Likewise.
(u8): Likewise.
(u16): Likewise.
(u32): Likewise.
(u64): Likewise.
* config/aarch64/aarch64-sve-builtins.h
(GCC_AARCH64_SVE_BUILTINS_H): Include aarch64-builtins.h.
(ENTRY): Add aarch64_simd_type definiton.
(enum aarch64_simd_type): Add neon information to type_suffix_info.
(struct type_suffix_info): New function.
* config/aarch64/aarch64-sve.md
(@aarch64_sve_get_neonq_): New intrinsic insn for big endian.
(@aarch64_sve_set_neonq_): Likewise.
(@aarch64_sve_dup_neonq_): Likewise.
* config/aarch64/aarch64.cc (aarch64_init_builtins):
Add call to init_neon_sve_builtins.
* config/aarch64/iterators.md: Add UNSPEC_SET_NEONQ.
* config/aarch64/aarch64-builtins.h: New file.
* config/aarch64/aarch64-neon-sve-bridge-builtins.def: New file.
* config/aarch64/arm_neon_sve_bridge.h: New file.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/sve/acle/asm/test_sve_acle.h: Add include 
arm_neon_sve_bridge header file
* gcc.dg/torture/neon-sve-bridge.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_bf16.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_f16.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_f32.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_f64.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_s16.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_s32.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_s64.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_s8.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_u16.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_u32.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_u64.c: New test.
* gcc.target/aarch64/sve/acle/asm/dup_neonq_u8.c: New test.
* gcc.target/aarch64/sve/acle/asm/get_neonq_bf16.c: New test.
* gcc.target/aarch64/sve/acle/asm/get_neonq_f16.c: New test.
* gcc.target/aarch64/sve/acle/asm/get_neonq_f32.c: New test.
* gcc.target/aarch64/sve/acle/asm/get_neonq_f64.c: New test.
* gcc.target/aarch64/sve/acle/asm/get_neonq_s16.c: New test.
* gcc.target/aarch64/sve/acle/asm/get_neonq_s32.c: New test.
* gcc.target/aarch64/sve/acle/asm/get_neonq_s64.c: New test.
* gcc.target/aarch64/sve/acle/a

[PATCH 2/2 v2] arm: move the switch tables for Arm to the RO data section

2023-10-27 Thread Richard Ball
v2: Formatting and nits fixed.

Follow up patch to arm: Use deltas for Arm switch tables
This patch moves the switch tables for Arm from the .text section
into the .rodata section.

gcc/ChangeLog:

* config/arm/aout.h: Change to use the Lrtx label.
* config/arm/arm.h (CASE_VECTOR_PC_RELATIVE): Remove arm targets
from (!target_pure_code) condition.
(ADDR_VEC_ALIGN): Add align for tables in rodata section.
* config/arm/arm.cc (arm_output_casesi): Alter the function to include
.Lrtx label and remove adr instructions.
* config/arm/arm.md
(arm_casesi_internal): Use force_reg to generate ldr instructions that
would otherwise be out of range, and change rtl to accommodate force 
reg.
Additionally remove unnecessary register temp.
(casesi): Remove pure code check for Arm.
* config/arm/elf.h (JUMP_TABLES_IN_TEXT_SECTION): Remove arm
targets from JUMP_TABLES_IN_TEXT_SECTION definition.

gcc/testsuite/ChangeLog:

* gcc.target/arm/arm-switchstatement.c: Alter the tests to
change adr instruction to ldr.diff --git a/gcc/config/arm/aout.h b/gcc/config/arm/aout.h
index 
6a4c8da5f6d5a1695518f42830b9d045888eeed6..49896bb962081a5ee4b5328029813c681c489a9e
 100644
--- a/gcc/config/arm/aout.h
+++ b/gcc/config/arm/aout.h
@@ -187,16 +187,16 @@
  switch (GET_MODE (body))  \
{   \
case E_QImode:  \
- asm_fprintf (STREAM, "\t.byte\t(%LL%d-%LL%d-4)/4\n",  \
+ asm_fprintf (STREAM, "\t.byte\t(%LL%d-%LLrtx%d-4)/4\n",   \
   VALUE, REL); \
  break;\
case E_HImode:  \
- asm_fprintf (STREAM, "\t.2byte\t(%LL%d-%LL%d-4)/4\n", \
+ asm_fprintf (STREAM, "\t.2byte\t(%LL%d-%LLrtx%d-4)/4\n",  \
   VALUE, REL); \
  break;\
case E_SImode:  \
  if (flag_pic) \
-   asm_fprintf (STREAM, "\t.word\t%LL%d-%LL%d-4\n",\
+   asm_fprintf (STREAM, "\t.word\t%LL%d-%LLrtx%d-4\n", \
 VALUE, REL);   \
  else  \
asm_fprintf (STREAM, "\t.word\t%LL%d\n", VALUE);\
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 
3063e3489094f04ecf03a52952c185d4a75da645..a9c2752c0ea5ecd4597ded254e9426753ac0a098
 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -2092,10 +2092,11 @@ enum arm_auto_incmodes
for the index in the tablejump instruction.  */
 #define CASE_VECTOR_MODE Pmode
 
-#define CASE_VECTOR_PC_RELATIVE ((TARGET_ARM || TARGET_THUMB2  \
- || (TARGET_THUMB1 \
- && (optimize_size || flag_pic)))  \
-&& (!target_pure_code))
+#define CASE_VECTOR_PC_RELATIVE
\
+   (TARGET_ARM \
+|| (!target_pure_code  \
+   && (TARGET_THUMB2   \
+   || (TARGET_THUMB1 && (optimize_size || flag_pic)
 
 
 #define CASE_VECTOR_SHORTEN_MODE(min, max, body)   \
@@ -2301,8 +2302,14 @@ extern int making_const_table;
asm_fprintf (STREAM, "\tpop {%r}\n", REGNO);\
 } while (0)
 
-#define ADDR_VEC_ALIGN(JUMPTABLE)  \
-  ((TARGET_THUMB && GET_MODE (PATTERN (JUMPTABLE)) == SImode) ? 2 : 0)
+/* If the switch table is in the code segment, additional alignment is
+   needed for Thumb SImode tables.  Otherwise, tables in RO data have
+   natural alignment.  */
+#define ADDR_VEC_ALIGN(TABLE)  \
+  (JUMP_TABLES_IN_TEXT_SECTION \
+   ? ((TARGET_THUMB && GET_MODE (PATTERN (TABLE)) == SImode) ? 2 : 0)  \
+   : (exact_log2 (GET_MODE_ALIGNMENT (GET_MODE (PATTERN (TABLE)))  \
+ / BITS_PER_UNIT)))
 
 /* Alignment for case labels comes from ADDR_VEC_ALIGN; avoid the
default alignment from elfos.h.  */
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 
4e5e6997ed555372683e01b2aff5c25265f4e50c..620ef7bfb2f3af9b8de576359a6157190c439aad
 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -30469,44 +30469,55 @@ arm_output_iwmmxt_tinsr (rtx *operands)
 const char *
 arm_output_ca

[PATCH 2/2] arm: move the switch tables for Arm to the RO data section.

2023-09-28 Thread Richard Ball
Follow up patch to arm: Use deltas for Arm switch tables
This patch moves the switch tables for Arm from the .text section
into the .rodata section.

gcc/ChangeLog:

* config/arm/aout.h: Change to use the Lrtx label.
* config/arm/arm.h (CASE_VECTOR_PC_RELATIVE): Remove arm targets
from (!target_pure_code) condition.
(ADDR_VEC_ALIGN): Add align for tables in rodata section.
* config/arm/arm.cc (arm_output_casesi): Alter the function to include
.Lrtx label and remove adr instructions.
* config/arm/arm.md
(arm_casesi_internal): Use force_reg to generate ldr instructions that
would otherwise be out of range, and change rtl to accommodate force 
reg.
Additionally remove unnecessary register temp.
(casesi): Remove pure code check for Arm.
* config/arm/elf.h (JUMP_TABLES_IN_TEXT_SECTION): Remove arm
targets from JUMP_TABLES_IN_TEXT_SECTION definition.

gcc/testsuite/ChangeLog:

* gcc.target/arm/arm-switchstatement.c: Alter the tests to
change adr instruction to ldr.diff --git a/gcc/config/arm/aout.h b/gcc/config/arm/aout.h
index 
6a4c8da5f6d5a1695518f42830b9d045888eeed6..49896bb962081a5ee4b5328029813c681c489a9e
 100644
--- a/gcc/config/arm/aout.h
+++ b/gcc/config/arm/aout.h
@@ -187,16 +187,16 @@
  switch (GET_MODE (body))  \
{   \
case E_QImode:  \
- asm_fprintf (STREAM, "\t.byte\t(%LL%d-%LL%d-4)/4\n",  \
+ asm_fprintf (STREAM, "\t.byte\t(%LL%d-%LLrtx%d-4)/4\n",   \
   VALUE, REL); \
  break;\
case E_HImode:  \
- asm_fprintf (STREAM, "\t.2byte\t(%LL%d-%LL%d-4)/4\n", \
+ asm_fprintf (STREAM, "\t.2byte\t(%LL%d-%LLrtx%d-4)/4\n",  \
   VALUE, REL); \
  break;\
case E_SImode:  \
  if (flag_pic) \
-   asm_fprintf (STREAM, "\t.word\t%LL%d-%LL%d-4\n",\
+   asm_fprintf (STREAM, "\t.word\t%LL%d-%LLrtx%d-4\n", \
 VALUE, REL);   \
  else  \
asm_fprintf (STREAM, "\t.word\t%LL%d\n", VALUE);\
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 
3063e3489094f04ecf03a52952c185d4a75da645..ba61cf6fb9e4969776b49b499ce2205a940385d0
 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -2092,10 +2092,10 @@ enum arm_auto_incmodes
for the index in the tablejump instruction.  */
 #define CASE_VECTOR_MODE Pmode
 
-#define CASE_VECTOR_PC_RELATIVE ((TARGET_ARM || TARGET_THUMB2  \
+#define CASE_VECTOR_PC_RELATIVE (TARGET_ARM || ((TARGET_THUMB2 \
  || (TARGET_THUMB1 \
  && (optimize_size || flag_pic)))  \
-&& (!target_pure_code))
+&& (!target_pure_code)))
 
 
 #define CASE_VECTOR_SHORTEN_MODE(min, max, body)   \
@@ -2301,8 +2301,14 @@ extern int making_const_table;
asm_fprintf (STREAM, "\tpop {%r}\n", REGNO);\
 } while (0)
 
-#define ADDR_VEC_ALIGN(JUMPTABLE)  \
-  ((TARGET_THUMB && GET_MODE (PATTERN (JUMPTABLE)) == SImode) ? 2 : 0)
+/* If the switch table is in the code segment, additional alignment is
+   needed for Thumb SImode tables.  Otherwise, tables in RO data have
+   natural alignment.  */
+#define ADDR_VEC_ALIGN(TABLE)  \
+  (JUMP_TABLES_IN_TEXT_SECTION \
+   ? ((TARGET_THUMB && GET_MODE (PATTERN (TABLE)) == SImode) ? 2 : 0)  \
+   : (exact_log2 (GET_MODE_ALIGNMENT (GET_MODE (PATTERN (TABLE)))  \
+ / BITS_PER_UNIT)))
 
 /* Alignment for case labels comes from ADDR_VEC_ALIGN; avoid the
default alignment from elfos.h.  */
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 
4e5e6997ed555372683e01b2aff5c25265f4e50c..c3a5ef274276cdef1b41690eb0ad7fd4f4218ecf
 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -30469,44 +30469,58 @@ arm_output_iwmmxt_tinsr (rtx *operands)
 const char *
 arm_output_casesi (rtx *operands)
 {
+  char buf[100];
+  char label[100];
   rtx diff_vec = PATTERN (NEXT_INSN (as_a  (operands[2])));
-
   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
-
   output_asm_insn ("cmp\t%0, %1", operands);
   output_asm_insn ("bhi\t%l3", operands);

[PATCH 1/2] arm: Use deltas for Arm switch tables

2023-09-28 Thread Richard Ball
For normal optimization for the Arm state in gcc we get an uncompressed
table of jump targets. This is in the middle of the text segment
far larger than necessary, especially at -Os.
This patch compresses the table to use deltas in a similar manner to
Thumb code generation.
Similar code is also used for -fpic where we currently generate a jump
to a jump. In this format the jumps are too dense for the hardware branch
predictor to handle accurately, so execution is likely to be very expensive.

Changes to switch statements for arm include a new function to handle the
assembly generation for different machine modes. This allows for more
optimisation to be performed in aout.h where arm has switched from using
ASM_OUTPUT_ADDR_VEC_ELT to using ASM_OUTPUT_ADDR_DIFF_ELT.
In ASM_OUTPUT_ADDR_DIFF_ELT new assembly generation options have been
added to utilise the different machine modes. Additional changes
made to the casesi expand and insn, CASE_VECTOR_PC_RELATIVE,
CASE_VECTOR_SHORTEN_MODE and LABEL_ALIGN_AFTER_BARRIER are all
to accomodate this new approach to switch statement generation.

New tests have been added and no regressions on arm-none-eabi.

gcc/ChangeLog:

* config/arm/aout.h (ASM_OUTPUT_ADDR_DIFF_ELT): Add table output
for different machine modes for arm.
* config/arm/arm-protos.h (arm_output_casesi): New prototype.
* config/arm/arm.h (CASE_VECTOR_PC_RELATIVE): Make arm use
ASM_OUTPUT_ADDR_DIFF_ELT.
(CASE_VECTOR_SHORTEN_MODE): Change table size calculation for
TARGET_ARM.
(LABEL_ALIGN_AFTER_BARRIER): Change to accommodate .p2align 2
for TARGET_ARM.
* config/arm/arm.cc (arm_output_casesi): New function.
* config/arm/arm.md (arm_casesi_internal): Change casesi expand
and insn.
for arm to use new function arm_output_casesi.

gcc/testsuite/ChangeLog:

* gcc.target/arm/arm-switchstatement.c: New test.diff --git a/gcc/config/arm/aout.h b/gcc/config/arm/aout.h
index 
57c3b9b7b8b02f15e191ffcb9446f0edf27bbce6..6a4c8da5f6d5a1695518f42830b9d045888eeed6
 100644
--- a/gcc/config/arm/aout.h
+++ b/gcc/config/arm/aout.h
@@ -183,7 +183,28 @@
   do   \
 {  \
   if (TARGET_ARM)  \
-   asm_fprintf (STREAM, "\tb\t%LL%d\n", VALUE);\
+   {   \
+ switch (GET_MODE (body))  \
+   {   \
+   case E_QImode:  \
+ asm_fprintf (STREAM, "\t.byte\t(%LL%d-%LL%d-4)/4\n",  \
+  VALUE, REL); \
+ break;\
+   case E_HImode:  \
+ asm_fprintf (STREAM, "\t.2byte\t(%LL%d-%LL%d-4)/4\n", \
+  VALUE, REL); \
+ break;\
+   case E_SImode:  \
+ if (flag_pic) \
+   asm_fprintf (STREAM, "\t.word\t%LL%d-%LL%d-4\n",\
+VALUE, REL);   \
+ else  \
+   asm_fprintf (STREAM, "\t.word\t%LL%d\n", VALUE);\
+ break;\
+   default:\
+ gcc_unreachable ();   \
+   }   \
+   }   \
   else if (TARGET_THUMB1)  \
{   \
  if (flag_pic || optimize_size)\
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 
77e76336e94096975093c0c7c72a005993a4c27d..2f5ca79ed8ddd647b212782a0454ee4fefc07257
 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -261,6 +261,7 @@ extern void thumb_expand_cpymemqi (rtx *);
 extern rtx arm_return_addr (int, rtx);
 extern void thumb_reload_out_hi (rtx *);
 extern void thumb_set_return_address (rtx, rtx);
+extern const char *arm_output_casesi (rtx *);
 extern const char *thumb1_output_casesi (rtx *);
 extern const char *thumb2_output_casesi (rtx *);
 #endif
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 
4f54530adcb616413bf210dff

[PATCH v2][GCC] aarch64: Add support for Cortex-A720 CPU

2023-08-15 Thread Richard Ball via Gcc-patches

v2: Add missing PROFILE feature flag.

This patch adds support for the Cortex-A720 CPU to GCC.

No regressions on aarch64-none-elf.

Ok for master?

gcc/ChangeLog:

* config/aarch64/aarch64-cores.def (AARCH64_CORE): Add Cortex-
A720 CPU.
* config/aarch64/aarch64-tune.md: Regenerate.
* doc/invoke.texi: Document Cortex-A720 CPU.

diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index 
dbac497ef3aab410eb81db185b2e9532186888bb..73976e9a4c5e4f0b5c04bc7974e2006ddfd02fff
 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -176,6 +176,8 @@ AARCH64_CORE("cortex-a710",  cortexa710, cortexa57, V9A,  
(SVE2_BITPERM, MEMTAG,
 
 AARCH64_CORE("cortex-a715",  cortexa715, cortexa57, V9A,  (SVE2_BITPERM, 
MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd4d, -1)
 
+AARCH64_CORE("cortex-a720",  cortexa720, cortexa57, V9_2A,  (SVE2_BITPERM, 
MEMTAG, PROFILE), neoversen2, 0x41, 0xd81, -1)
+
 AARCH64_CORE("cortex-x2",  cortexx2, cortexa57, V9A,  (SVE2_BITPERM, MEMTAG, 
I8MM, BF16), neoversen2, 0x41, 0xd48, -1)
 
 AARCH64_CORE("cortex-x3",  cortexx3, cortexa57, V9A,  (SVE2_BITPERM, MEMTAG, 
I8MM, BF16), neoversen2, 0x41, 0xd4e, -1)
diff --git a/gcc/config/aarch64/aarch64-tune.md 
b/gcc/config/aarch64/aarch64-tune.md
index 
2170980dddb0d5d410a49631ad26ff2e346b39dd..12d610f0f6580096eed9cf3de8ad3239efde5e4b
 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexx2,cortexx3,neoversen2,demeter,neoversev2"
+   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexx2,cortexx3,neoversen2,demeter,neoversev2"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 
2c870d3c34b587ffc721b1f18f99ecd66d4217be..62537d9d09e25f864c27534b7ac2ec467ea24789
 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -20517,7 +20517,8 @@ performance of the code.  Permissible values for this 
option are:
 @samp{cortex-a75.cortex-a55}, @samp{cortex-a76.cortex-a55},
 @samp{cortex-r82}, @samp{cortex-x1}, @samp{cortex-x1c}, @samp{cortex-x2},
 @samp{cortex-x3}, @samp{cortex-a510}, @samp{cortex-a520}, @samp{cortex-a710},
-@samp{cortex-a715}, @samp{ampere1}, @samp{ampere1a}, and @samp{native}.
+@samp{cortex-a715}, @samp{cortex-a720}, @samp{ampere1}, @samp{ampere1a},
+and @samp{native}.
 
 The values @samp{cortex-a57.cortex-a53}, @samp{cortex-a72.cortex-a53},
 @samp{cortex-a73.cortex-a35}, @samp{cortex-a73.cortex-a53},


[PATCH][GCC] aarch64: Add support for Cortex-A720 CPU

2023-08-14 Thread Richard Ball via Gcc-patches

This patch adds support for the Cortex-A720 CPU to GCC.

No regressions on aarch64-none-elf.

Ok for master?

gcc/ChangeLog:

* config/aarch64/aarch64-cores.def (AARCH64_CORE): Add Cortex-
A720 CPU.
* config/aarch64/aarch64-tune.md: Regenerate.
* doc/invoke.texi: Document Cortex-A720 CPU.
diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index 
dbac497ef3aab410eb81db185b2e9532186888bb..5369dd3dd0fe695a371261547c76f034c29b9bcd
 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -176,6 +176,8 @@ AARCH64_CORE("cortex-a710",  cortexa710, cortexa57, V9A,  
(SVE2_BITPERM, MEMTAG,
 
 AARCH64_CORE("cortex-a715",  cortexa715, cortexa57, V9A,  (SVE2_BITPERM, 
MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd4d, -1)
 
+AARCH64_CORE("cortex-a720",  cortexa720, cortexa57, V9_2A,  (SVE2_BITPERM, 
MEMTAG), neoversen2, 0x41, 0xd81, -1)
+
 AARCH64_CORE("cortex-x2",  cortexx2, cortexa57, V9A,  (SVE2_BITPERM, MEMTAG, 
I8MM, BF16), neoversen2, 0x41, 0xd48, -1)
 
 AARCH64_CORE("cortex-x3",  cortexx3, cortexa57, V9A,  (SVE2_BITPERM, MEMTAG, 
I8MM, BF16), neoversen2, 0x41, 0xd4e, -1)
diff --git a/gcc/config/aarch64/aarch64-tune.md 
b/gcc/config/aarch64/aarch64-tune.md
index 
2170980dddb0d5d410a49631ad26ff2e346b39dd..12d610f0f6580096eed9cf3de8ad3239efde5e4b
 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexx2,cortexx3,neoversen2,demeter,neoversev2"
+   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexx2,cortexx3,neoversen2,demeter,neoversev2"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 
2c870d3c34b587ffc721b1f18f99ecd66d4217be..62537d9d09e25f864c27534b7ac2ec467ea24789
 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -20517,7 +20517,8 @@ performance of the code.  Permissible values for this 
option are:
 @samp{cortex-a75.cortex-a55}, @samp{cortex-a76.cortex-a55},
 @samp{cortex-r82}, @samp{cortex-x1}, @samp{cortex-x1c}, @samp{cortex-x2},
 @samp{cortex-x3}, @samp{cortex-a510}, @samp{cortex-a520}, @samp{cortex-a710},
-@samp{cortex-a715}, @samp{ampere1}, @samp{ampere1a}, and @samp{native}.
+@samp{cortex-a715}, @samp{cortex-a720}, @samp{ampere1}, @samp{ampere1a},
+and @samp{native}.
 
 The values @samp{cortex-a57.cortex-a53}, @samp{cortex-a72.cortex-a53},
 @samp{cortex-a73.cortex-a35}, @samp{cortex-a73.cortex-a53},


[PATCH][GCC] aarch64: Add support for Cortex-A520 CPU

2023-08-08 Thread Richard Ball via Gcc-patches

This patch adds support for the Cortex-A520 CPU to GCC.

No regressions on aarch64-none-elf.

Ok for master?


gcc/ChangeLog:

    * config/aarch64/aarch64-cores.def (AARCH64_CORE): Add 
Cortex-A520 CPU.

    * config/aarch64/aarch64-tune.md: Regenerate.
    * doc/invoke.texi: Document Cortex-A520 CPU.


###

diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index 
2ec88c98400d5a2d7bdb954baca9e2664d2885ac..dbac497ef3aab410eb81db185b2e9532186888bb 
100644

--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -170,6 +170,8 @@ AARCH64_CORE("cortex-r82", cortexr82, cortexa53, 
V8R, (), cortexa53, 0x41, 0xd15

 /* Arm ('A') cores. */
 AARCH64_CORE("cortex-a510",  cortexa510, cortexa55, V9A, 
(SVE2_BITPERM, MEMTAG, I8MM, BF16), cortexa53, 0x41, 0xd46, -1)


+AARCH64_CORE("cortex-a520",  cortexa520, cortexa55, V9_2A, 
(SVE2_BITPERM, MEMTAG), cortexa53, 0x41, 0xd80, -1)

+
 AARCH64_CORE("cortex-a710",  cortexa710, cortexa57, V9A, 
(SVE2_BITPERM, MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd47, -1)


 AARCH64_CORE("cortex-a715",  cortexa715, cortexa57, V9A, 
(SVE2_BITPERM, MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd4d, -1)
diff --git a/gcc/config/aarch64/aarch64-tune.md 
b/gcc/config/aarch64/aarch64-tune.md
index 
4fd35fa4884617b901b9ae6faea2f39975c4f4b2..2170980dddb0d5d410a49631ad26ff2e346b39dd 
100644

--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
- 
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexa715,cortexx2,cortexx3,neoversen2,demeter,neoversev2"
+ 
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexx2,cortexx3,neoversen2,demeter,neoversev2"

 (const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 
104766f446d118d30e9e2bfd6cd485255f54ab5f..2c870d3c34b587ffc721b1f18f99ecd66d4217be 
100644

--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -20516,8 +20516,8 @@ performance of the code.  Permissible values for 
this option are:

 @samp{cortex-a73.cortex-a35}, @samp{cortex-a73.cortex-a53},
 @samp{cortex-a75.cortex-a55}, @samp{cortex-a76.cortex-a55},
 @samp{cortex-r82}, @samp{cortex-x1}, @samp{cortex-x1c}, @samp{cortex-x2},
-@samp{cortex-x3}, @samp{cortex-a510}, @samp{cortex-a710}, 
@samp{cortex-a715},

-@samp{ampere1}, @samp{ampere1a}, and @samp{native}.
+@samp{cortex-x3}, @samp{cortex-a510}, @samp{cortex-a520}, 
@samp{cortex-a710},

+@samp{cortex-a715}, @samp{ampere1}, @samp{ampere1a}, and @samp{native}.

 The values @samp{cortex-a57.cortex-a53}, @samp{cortex-a72.cortex-a53},
 @samp{cortex-a73.cortex-a35}, @samp{cortex-a73.cortex-a53},



[PATCH] aarch64: SVE/NEON Bridging intrinsics

2023-08-02 Thread Richard Ball via Gcc-patches

ACLE has added intrinsics to bridge between SVE and Neon.

The NEON_SVE Bridge adds intrinsics that allow conversions between NEON and
SVE vectors.

This patch adds support to GCC for the following 3 intrinsics:
svset_neonq, svget_neonq and svdup_neonq

gcc/ChangeLog:

* config.gcc: Adds new header to config.
* config/aarch64/aarch64-builtins.cc (GTY): Externs aarch64_simd_types.
* config/aarch64/aarch64-c.cc (aarch64_pragma_aarch64):
 Defines pragma for arm_neon_sve_bridge.h.
* config/aarch64/aarch64-protos.h: New function.
* config/aarch64/aarch64-sve-builtins-base.h: New intrinsics.
* config/aarch64/aarch64-sve-builtins-base.cc
 (class svget_neonq_impl): New intrinsic implementation.
(class svset_neonq_impl): Likewise.
(class svdup_neonq_impl): Likewise.
(NEON_SVE_BRIDGE_FUNCTION): New intrinsics.
* config/aarch64/aarch64-sve-builtins-functions.h
 (NEON_SVE_BRIDGE_FUNCTION): Defines macro for NEON_SVE_BRIDGE 
functions.

* config/aarch64/aarch64-sve-builtins-shapes.h: New shapes.
* config/aarch64/aarch64-sve-builtins-shapes.cc
 (parse_neon_type): Parser for NEON types.
(parse_element_type): Add NEON element types.
(parse_type): Likewise.
(NEON_SVE_BRIDGE_SHAPE): Defines macro for NEON_SVE_BRIDGE shapes.
(struct get_neonq_def): Defines function shape for get_neonq.
(struct set_neonq_def): Defines function shape for set_neonq.
(struct dup_neonq_def): Defines function shape for dup_neonq.
* config/aarch64/aarch64-sve-builtins.cc (DEF_NEON_SVE_FUNCTION): 
Defines
 macro for NEON_SVE_BRIDGE functions.
(handle_arm_neon_sve_bridge_h): Handles #pragma arm_neon_sve_bridge.h.
* config/aarch64/aarch64-builtins.h: New header file to extern neon 
types.
* config/aarch64/aarch64-neon-sve-bridge-builtins.def: New instrinsics
 function def file.
* config/aarch64/arm_neon_sve_bridge.h: New header file.

gcc/testsuite/ChangeLog:

* gcc.c-torture/execute/neon-sve-bridge.c: New test.

#

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 
d88071773c9e1280cc5f38e36e09573214323b48..ca55992200dbe58782c3dbf66906339de021ba6b 
100644

--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -334,7 +334,7 @@ m32c*-*-*)
  ;;
  aarch64*-*-*)
cpu_type=aarch64
-   extra_headers="arm_fp16.h arm_neon.h arm_bf16.h arm_acle.h arm_sve.h"
+	extra_headers="arm_fp16.h arm_neon.h arm_bf16.h arm_acle.h arm_sve.h 
arm_neon_sve_bridge.h"

c_target_objs="aarch64-c.o"
cxx_target_objs="aarch64-c.o"
d_target_objs="aarch64-d.o"
diff --git a/gcc/config/aarch64/aarch64-builtins.h 
b/gcc/config/aarch64/aarch64-builtins.h

new file mode 100644
index 
..eebde448f92c230c8f88b4da1ca8ebd9670b1536

--- /dev/null
+++ b/gcc/config/aarch64/aarch64-builtins.h
@@ -0,0 +1,86 @@
+/* Builtins' description for AArch64 SIMD architecture.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of GCC.
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   .  */
+#ifndef GCC_AARCH64_BUILTINS_H
+#define GCC_AARCH64_BUILTINS_H
+#include "tree.h"
+enum aarch64_type_qualifiers
+{
+  /* T foo.  */
+  qualifier_none = 0x0,
+  /* unsigned T foo.  */
+  qualifier_unsigned = 0x1, /* 1 << 0  */
+  /* const T foo.  */
+  qualifier_const = 0x2, /* 1 << 1  */
+  /* T *foo.  */
+  qualifier_pointer = 0x4, /* 1 << 2  */
+  /* Used when expanding arguments if an operand could
+ be an immediate.  */
+  qualifier_immediate = 0x8, /* 1 << 3  */
+  qualifier_maybe_immediate = 0x10, /* 1 << 4  */
+  /* void foo (...).  */
+  qualifier_void = 0x20, /* 1 << 5  */
+  /* 1 << 6 is now unused */
+  /* Some builtins should use the T_*mode* encoded in a simd_builtin_datum
+ rather than using the type of the operand.  */
+  qualifier_map_mode = 0x80, /* 1 << 7  */
+  /* qualifier_pointer | qualifier_map_mode  */
+  qualifier_pointer_map_mode = 0x84,
+  /* qualifier_const | qualifier_pointer | qualifier_map_mode  */
+  qualifier_const_pointer_map_mode = 0x86,
+  /* Polynomial types.  */
+  qualifier_poly = 0x100,
+  /* Lane indices - must be in range, and flipped for bigendian.  */
+  qualifier_lane_index = 0x200,
+  /* Lane 

Re: [PATCH] Add POLY_INT_CST support to fold_ctor_reference in gimple-fold.cc

2023-08-01 Thread Richard Ball via Gcc-patches

Thanks Richard,

I've gone through the write access process and committed this.

On 7/31/2023 10:56 AM, Richard Sandiford wrote:

Richard Ball  writes:

Add POLY_INT_CST support to code within
fold_ctor_reference. This code previously
only supported INTEGER_CST which caused a
bug when using VEC_PERM_EXPR with SVE vectors.


Just to add for others: this is a prerequisite for a follow-on patch,
so the change will be tested there.


gcc/ChangeLog:

  * gimple-fold.cc (fold_ctor_reference):
  Add support for Poly_int.


Nit: s/Poly_int/poly_int/

OK with that change, thanks.  Please follow https://gcc.gnu.org/gitwrite.html
to get write access (I'll sponsor).

Richard


#

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index
4027ff71e10337fe49c600fcd5a80026b260d54d..91e80b9aaa3b4797ce3a94129ca42c98d974cbd9
100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -8162,8 +8162,8 @@ fold_ctor_reference (tree type, tree ctor, const
poly_uint64 &poly_offset,
result.  */
 if (!AGGREGATE_TYPE_P (TREE_TYPE (ctor)) && !offset
 /* VIEW_CONVERT_EXPR is defined only for matching sizes.  */
-  && !compare_tree_int (TYPE_SIZE (type), size)
-  && !compare_tree_int (TYPE_SIZE (TREE_TYPE (ctor)), size))
+  && known_eq (wi::to_poly_widest (TYPE_SIZE (type)), size)
+  && known_eq (wi::to_poly_widest (TYPE_SIZE (TREE_TYPE (ctor))),
size))
   {
 ret = canonicalize_constructor_val (unshare_expr (ctor), from_decl);
 if (ret)


[committed] MAINTAINERS: Add myself to write after approval

2023-08-01 Thread Richard Ball via Gcc-patches

Sponsored by Richard Sandiford 

ChangeLog:

* MAINTAINERS: Add myself.

###

diff --git a/MAINTAINERS b/MAINTAINERS
index 49aa6bae73b..a8bb43f50c3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -332,6 +332,7 @@ David Ayers 


 Prakhar Bahuguna   
 Giovanni Bajo  
 Simon Baldwin  
+Richard Ball   
 Scott Bambrough 


 Wolfgang Bangerth  
 Gergö Barany   


[PATCH] Add POLY_INT_CST support to fold_ctor_reference in gimple-fold.cc

2023-07-31 Thread Richard Ball via Gcc-patches

Add POLY_INT_CST support to code within
fold_ctor_reference. This code previously
only supported INTEGER_CST which caused a
bug when using VEC_PERM_EXPR with SVE vectors.

gcc/ChangeLog:

* gimple-fold.cc (fold_ctor_reference):
Add support for Poly_int.


#

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 
4027ff71e10337fe49c600fcd5a80026b260d54d..91e80b9aaa3b4797ce3a94129ca42c98d974cbd9 
100644

--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -8162,8 +8162,8 @@ fold_ctor_reference (tree type, tree ctor, const 
poly_uint64 &poly_offset,

  result.  */
   if (!AGGREGATE_TYPE_P (TREE_TYPE (ctor)) && !offset
   /* VIEW_CONVERT_EXPR is defined only for matching sizes.  */
-  && !compare_tree_int (TYPE_SIZE (type), size)
-  && !compare_tree_int (TYPE_SIZE (TREE_TYPE (ctor)), size))
+  && known_eq (wi::to_poly_widest (TYPE_SIZE (type)), size)
+  && known_eq (wi::to_poly_widest (TYPE_SIZE (TREE_TYPE (ctor))), 
size))

 {
   ret = canonicalize_constructor_val (unshare_expr (ctor), from_decl);
   if (ret)


[PATCH] aarch64: Replace manual swapping idiom with std::swap in aarch64.cc

2022-07-15 Thread Richard Ball via Gcc-patches

Replace manual swapping idiom with std::swap in aarch64.cc

gcc/config/aarch64/aarch64.cc has a few manual swapping idioms of the form:

x = in0, in0 = in1, in1 = x;

The preferred way is using the standard:

std::swap (in0, in1);

We should just fix these to use std::swap.
This will also allow us to eliminate the x temporary rtx.

gcc/ChangeLog:

* config/aarch64/aarch64.cc (aarch64_evpc_trn): Use std:swap.
(aarch64_evpc_uzp): Likewise.
(aarch64_evpc_zip): Likewise.

---


diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
d049f9a9819628a73bfd57114c3b89d848da7d9c..b75a032c2f2c55d71bcb6b4b6ef1bd7f42a97235 
100644

--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -23498,7 +23498,7 @@ aarch64_evpc_trn (struct expand_vec_perm_d *d)
 {
   HOST_WIDE_INT odd;
   poly_uint64 nelt = d->perm.length ();
-  rtx out, in0, in1, x;
+  rtx out, in0, in1;
   machine_mode vmode = d->vmode;

   if (GET_MODE_UNIT_SIZE (vmode) > 8)
@@ -23522,7 +23522,7 @@ aarch64_evpc_trn (struct expand_vec_perm_d *d)
  at the head of aarch64-sve.md for details.  */
   if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
 {
-  x = in0, in0 = in1, in1 = x;
+  std::swap (in0, in1);
   odd = !odd;
 }
   out = d->target;
@@ -23592,7 +23592,7 @@ static bool
 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
 {
   HOST_WIDE_INT odd;
-  rtx out, in0, in1, x;
+  rtx out, in0, in1;
   machine_mode vmode = d->vmode;

   if (GET_MODE_UNIT_SIZE (vmode) > 8)
@@ -23615,7 +23615,7 @@ aarch64_evpc_uzp (struct expand_vec_perm_d *d)
  at the head of aarch64-sve.md for details.  */
   if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
 {
-  x = in0, in0 = in1, in1 = x;
+  std::swap (in0, in1);
   odd = !odd;
 }
   out = d->target;
@@ -23631,7 +23631,7 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d)
 {
   unsigned int high;
   poly_uint64 nelt = d->perm.length ();
-  rtx out, in0, in1, x;
+  rtx out, in0, in1;
   machine_mode vmode = d->vmode;

   if (GET_MODE_UNIT_SIZE (vmode) > 8)
@@ -23656,7 +23656,7 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d)
  at the head of aarch64-sve.md for details.  */
   if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
 {
-  x = in0, in0 = in1, in1 = x;
+  std::swap (in0, in1);
   high = !high;
 }
   out = d->target;