On 10/18/23 21:39, Richard Sandiford wrote:
Victor Do Nascimento <victor.donascime...@arm.com> writes:
Implement the aarch64 intrinsics for reading and writing system
registers with the following signatures:

        uint32_t __arm_rsr(const char *special_register);
        uint64_t __arm_rsr64(const char *special_register);
        void* __arm_rsrp(const char *special_register);
        float __arm_rsrf(const char *special_register);
        double __arm_rsrf64(const char *special_register);
        void __arm_wsr(const char *special_register, uint32_t value);
        void __arm_wsr64(const char *special_register, uint64_t value);
        void __arm_wsrp(const char *special_register, const void *value);
        void __arm_wsrf(const char *special_register, float value);
        void __arm_wsrf64(const char *special_register, double value);

gcc/ChangeLog:

        * gcc/config/aarch64/aarch64-builtins.cc (enum aarch64_builtins):
        Add enums for new builtins.
        (aarch64_init_rwsr_builtins): New.
        (aarch64_general_init_builtins): Call aarch64_init_rwsr_builtins.
        (aarch64_expand_rwsr_builtin):  New.
        (aarch64_general_expand_builtin): Call aarch64_general_expand_builtin.
        * gcc/config/aarch64/aarch64.md (read_sysregdi): New insn_and_split.
        (write_sysregdi): Likewise.
        * gcc/config/aarch64/arm_acle.h (__arm_rsr): New.
        (__arm_rsrp): Likewise.
        (__arm_rsr64): Likewise.
        (__arm_rsrf): Likewise.
        (__arm_rsrf64): Likewise.
        (__arm_wsr): Likewise.
        (__arm_wsrp): Likewise.
        (__arm_wsr64): Likewise.
        (__arm_wsrf): Likewise.
        (__arm_wsrf64): Likewise.

gcc/testsuite/ChangeLog:

        * gcc/testsuite/gcc.target/aarch64/acle/rwsr.c: New.
        * gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c: Likewise.
---
  gcc/config/aarch64/aarch64-builtins.cc        | 200 ++++++++++++++++++
  gcc/config/aarch64/aarch64.md                 |  17 ++
  gcc/config/aarch64/arm_acle.h                 |  30 +++
  .../gcc.target/aarch64/acle/rwsr-1.c          |  20 ++
  gcc/testsuite/gcc.target/aarch64/acle/rwsr.c  | 144 +++++++++++++
  5 files changed, 411 insertions(+)
  create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c
  create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rwsr.c

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 04f59fd9a54..d8bb2a989a5 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -808,6 +808,17 @@ enum aarch64_builtins
    AARCH64_RBIT,
    AARCH64_RBITL,
    AARCH64_RBITLL,
+  /* System register builtins.  */
+  AARCH64_RSR,
+  AARCH64_RSRP,
+  AARCH64_RSR64,
+  AARCH64_RSRF,
+  AARCH64_RSRF64,
+  AARCH64_WSR,
+  AARCH64_WSRP,
+  AARCH64_WSR64,
+  AARCH64_WSRF,
+  AARCH64_WSRF64,
    AARCH64_BUILTIN_MAX
  };
@@ -1798,6 +1809,65 @@ aarch64_init_rng_builtins (void)
                                   AARCH64_BUILTIN_RNG_RNDRRS);
  }
+/* Add builtins for reading system register. */
+static void
+aarch64_init_rwsr_builtins (void)
+{
+  tree fntype = NULL;
+  tree const_char_ptr_type
+    = build_pointer_type (build_type_variant (char_type_node, true, false));
+
+#define AARCH64_INIT_RWSR_BUILTINS_DECL(F, N, T) \
+  aarch64_builtin_decls[AARCH64_##F] \
+    = aarch64_general_add_builtin ("__builtin_aarch64_"#N, T, AARCH64_##F);
+
+  fntype
+    = build_function_type_list (uint32_type_node, const_char_ptr_type, NULL);
+  AARCH64_INIT_RWSR_BUILTINS_DECL (RSR, rsr, fntype);
+
+  fntype
+    = build_function_type_list (ptr_type_node, const_char_ptr_type, NULL);
+  AARCH64_INIT_RWSR_BUILTINS_DECL (RSRP, rsrp, fntype);
+
+  fntype
+    = build_function_type_list (uint64_type_node, const_char_ptr_type, NULL);
+  AARCH64_INIT_RWSR_BUILTINS_DECL (RSR64, rsr64, fntype);
+
+  fntype
+    = build_function_type_list (float_type_node, const_char_ptr_type, NULL);
+  AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF, rsrf, fntype);
+
+  fntype
+    = build_function_type_list (double_type_node, const_char_ptr_type, NULL);
+  AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF64, rsrf64, fntype);
+
+  fntype
+    = build_function_type_list (void_type_node, const_char_ptr_type,
+                               uint32_type_node, NULL);
+
+  AARCH64_INIT_RWSR_BUILTINS_DECL (WSR, wsr, fntype);
+
+  fntype
+    = build_function_type_list (void_type_node, const_char_ptr_type,
+                               const_ptr_type_node, NULL);
+  AARCH64_INIT_RWSR_BUILTINS_DECL (WSRP, wsrp, fntype);
+
+  fntype
+    = build_function_type_list (void_type_node, const_char_ptr_type,
+                               uint64_type_node, NULL);
+  AARCH64_INIT_RWSR_BUILTINS_DECL (WSR64, wsr64, fntype);
+
+  fntype
+    = build_function_type_list (void_type_node, const_char_ptr_type,
+                               float_type_node, NULL);
+  AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF, wsrf, fntype);
+
+  fntype
+    = build_function_type_list (void_type_node, const_char_ptr_type,
+                               double_type_node, NULL);
+  AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF64, wsrf64, fntype);
+}
+
  /* Initialize the memory tagging extension (MTE) builtins.  */
  struct
  {
@@ -2019,6 +2089,8 @@ aarch64_general_init_builtins (void)
    aarch64_init_rng_builtins ();
    aarch64_init_data_intrinsics ();
+ aarch64_init_rwsr_builtins ();
+
    tree ftype_jcvt
      = build_function_type_list (intSI_type_node, double_type_node, NULL);
    aarch64_builtin_decls[AARCH64_JSCVT]
@@ -2599,6 +2671,123 @@ aarch64_expand_rng_builtin (tree exp, rtx target, int 
fcode, int ignore)
    return target;
  }
+/* Expand the read/write system register builtin EXPs. */
+rtx
+aarch64_expand_rwsr_builtin (tree exp, rtx target, int fcode)
+{
+  tree arg0, arg1;
+  rtx const_str, input_val, subreg;
+  enum machine_mode mode;
+  class expand_operand ops[2];
+
+  arg0 = CALL_EXPR_ARG (exp, 0);
+
+  bool write_op = (fcode == AARCH64_WSR
+                  || fcode == AARCH64_WSRP
+                  || fcode == AARCH64_WSR64
+                  || fcode == AARCH64_WSRF
+                  || fcode == AARCH64_WSRF64);
+  bool read_op = (fcode == AARCH64_RSR
+                 || fcode == AARCH64_RSRP
+                 || fcode == AARCH64_RSR64
+                 || fcode == AARCH64_RSRF
+                 || fcode == AARCH64_RSRF64);

Instead of this, how about using:

   if (call_expr_nargs (exp) == 1)

for the read path?


Personally, I don't like that. It's a `read_op' because of the fcode it has, the fact they share the same number of arguments is accidental. With this implementation, we outline very early on exactly what constitutes both a valid write and a valid read operation.

I'm happy to change things around if you feel strongly enough about it or see some material advantage to doing things your way that I've missed :).

+
+  /* Argument 0 (system register name) must be a string literal.  */
+  gcc_assert (!SSA_VAR_P (arg0)
+             && TREE_CODE (TREE_TYPE (arg0)) == POINTER_TYPE
+             && TREE_CODE (TREE_OPERAND (arg0,0)) == STRING_CST);

It looks like this requires arg0 to be an ADDR_EXPR.  It would be good
to check that rather than !SSA_VAR_P.

Minor formatting nit: should be a space after the comma.

+
+  const char *name_input = TREE_STRING_POINTER (TREE_OPERAND (arg0, 0));
+  size_t len = strlen (name_input);

This can use TREE_STRING_LENGTH.  That's preferable since it copes
with embedded nuls, e.g. "foo\0bar".

+
+  if (len == 0)
+    {
+      error_at (EXPR_LOCATION (exp), "invalid system register name provided");
+      return const0_rtx;
+    }

Is this check necessary?  It looks like the following code would
correctly reject empty strings.


Not necessary. As the code evolved, some checks I added early on in development went on to become redundant, this being on example. Thanks for picking up on it.

+
+  char *sysreg_name = (char *) alloca (len + 1);
+  strcpy (sysreg_name, name_input);

We shouldn't use alloca for user input that hasn't been verified yet,
since it could be arbitrarily long.  Libiberty provides an xmemdup that
might be useful.  (xmemdup rather than xstrdup to cope with the embedded
nuls mentioned above.  Or perhaps we should just reject embedded nuls
immediately, so that later code doesn't need to worry about them.)

+
+  for (unsigned pos = 0; pos <= len; pos++)
+    sysreg_name[pos] = TOLOWER (sysreg_name[pos]);
+
+  const char* name_output = aarch64_retrieve_sysreg (sysreg_name, write_op);

Does this TOLOWERing make the checks for capital letters in
is_implem_def_reg redundant?


Potentially.

The quesion is, do we want this explicit dependence of `is_implem_def_reg' on this TOLOWERing?

My intention was to keep things fairly independent so they could be reused elsewhere as easily as possible, should this need arise. This meant I didn't want `is_implem_def_reg' to rely on the passed input having been pre-processed in any way, keeping it as general-purpose as possible.

Thanks,
V.

BTW, I forgot to say, but it'd be better to add an "aarch64_" prefix to
is_implem_def_reg, to avoid accidental clashes with target-independent code.

+  if (name_output == NULL)
+    {
+      error_at (EXPR_LOCATION (exp), "invalid system register name provided");
+      return const0_rtx;
+    }
+
+  /* Assign the string corresponding to the system register name to an RTX.  */
+  const_str = rtx_alloc (CONST_STRING);
+  PUT_CODE (const_str, CONST_STRING);
+  XSTR (const_str, 0) = xstrdup (name_output);

I think this needs to use ggc_strdup rather than xstrdup, since the
memory is managed by the garbage collector.

+
+  /* Set up expander operands and call instruction expansion.  */
+  if (read_op)
+    {
+

Nit: redundant blank line.


+      /* Emit the initial read_sysregdi rtx.  */
+      create_output_operand (&ops[0], target, DImode);
+      create_fixed_operand (&ops[1], const_str);
+      expand_insn (CODE_FOR_aarch64_read_sysregdi, 2, ops);
+      target = ops[0].value;
+
+      /* Do any necessary post-processing on the result.  */
+      switch (fcode)
+       {
+       case AARCH64_RSR:
+         return gen_lowpart_SUBREG (SImode, target);
+       case AARCH64_RSRP:
+       case AARCH64_RSR64:
+         return target;

RSRP would return a 32-bit value for -mabi=ilp32.  It might be easier
to do:

        case AARCH64_RSR:
        case AARCH64_RSRP:
        case AARCH64_RSR64:
        case AARCH64_RSRF64:
          return lowpart_subreg (TYPE_MODE (TREE_TYPE (exp)), target, DImode);

lowpart_subreg is a no-op if the new mode is the same as the old mode.

+       case AARCH64_RSRF:
+         subreg = gen_reg_rtx (SImode);

This line looks redundant.

+         subreg = gen_lowpart_SUBREG (SImode, target);
+         return gen_lowpart_SUBREG (SFmode, subreg);
+       case AARCH64_RSRF64:
+         return gen_lowpart_SUBREG (DFmode, target);
+       }

If we do go for my:

call_expr_nargs (exp) == 1

suggestion above, this switch should probably have a:

   default:
     gcc_unreachable ();

to catch unexpected ops.

+    }
+  if (write_op)
+    {
+
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      mode = TYPE_MODE (TREE_TYPE (arg1));
+      input_val = copy_to_mode_reg (mode, expand_normal (arg1));
+
+      switch (fcode)
+       {
+       case AARCH64_WSR:
+         subreg = gen_lowpart_SUBREG (DImode, input_val);
+         break;
+       case AARCH64_WSRP:
+       case AARCH64_WSR64:
+         subreg = input_val;
+         break;
+       case AARCH64_WSRF:
+         subreg = gen_lowpart_SUBREG (SImode, input_val);
+         subreg = gen_lowpart_SUBREG (DImode, subreg);
+         break;
+       case AARCH64_WSRF64:
+         subreg = gen_lowpart_SUBREG (DImode, input_val);
+         break;
+       }

Similarly here, I think all cases except AARCH64_WSRF could use:

        input_val = lowpart_subreg (DImode, input_value, mode);

+
+      create_fixed_operand (&ops[0], const_str);
+      create_input_operand (&ops[1], subreg, DImode);
+      expand_insn (CODE_FOR_aarch64_write_sysregdi, 2, ops);
+
+      return target;
+    }
+
+  error_at (EXPR_LOCATION (exp),
+           "Malformed call to read/write system register builtin");
+  return target;
+}
+
  /* Expand an expression EXP that calls a MEMTAG built-in FCODE
     with result going to TARGET.  */
  static rtx
@@ -2832,6 +3021,17 @@ aarch64_general_expand_builtin (unsigned int fcode, tree 
exp, rtx target,
      case AARCH64_BUILTIN_RNG_RNDR:
      case AARCH64_BUILTIN_RNG_RNDRRS:
        return aarch64_expand_rng_builtin (exp, target, fcode, ignore);
+    case AARCH64_RSR:
+    case AARCH64_RSRP:
+    case AARCH64_RSR64:
+    case AARCH64_RSRF:
+    case AARCH64_RSRF64:
+    case AARCH64_WSR:
+    case AARCH64_WSRP:
+    case AARCH64_WSR64:
+    case AARCH64_WSRF:
+    case AARCH64_WSRF64:
+      return aarch64_expand_rwsr_builtin (exp, target, fcode);
      }
if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 32c7adc8928..80da30bc30c 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -281,6 +281,8 @@
      UNSPEC_UPDATE_FFRT
      UNSPEC_RDFFR
      UNSPEC_WRFFR
+    UNSPEC_SYSREG_RDI
+    UNSPEC_SYSREG_WDI
      ;; Represents an SVE-style lane index, in which the indexing applies
      ;; within the containing 128-bit block.
      UNSPEC_SVE_LANE_SELECT
@@ -476,6 +478,21 @@
  ;; Jumps and other miscellaneous insns
  ;; -------------------------------------------------------------------
+(define_insn "aarch64_read_sysregdi"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+       (unspec_volatile:DI [(match_operand 1 "aarch64_sysreg_string" "")]
+                 UNSPEC_SYSREG_RDI))]
+ ""
+ "mrs\t%x0, %1"
+ )
+
+(define_insn "aarch64_write_sysregdi"
+ [(unspec_volatile:DI [(match_operand 0 "aarch64_sysreg_string" "")
+             (match_operand:DI 1 "register_operand" "r")] UNSPEC_SYSREG_WDI)]
+ ""
+ "msr\t%0, %x1"
+ )

I think the write pattern should accept rZ (x0-x30 + xzr) rather than just r.

Minor formatting nit, but other patterns are indented by 2 spaces
rather than 1, with the closing ")" not indented.

Thanks,
Richard

+
  (define_insn "indirect_jump"
    [(set (pc) (match_operand:DI 0 "register_operand" "r"))]
    ""
diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
index 7599a32301d..71ada878299 100644
--- a/gcc/config/aarch64/arm_acle.h
+++ b/gcc/config/aarch64/arm_acle.h
@@ -314,6 +314,36 @@ __rndrrs (uint64_t *__res)
#pragma GCC pop_options +#define __arm_rsr(__regname) \
+  __builtin_aarch64_rsr (__regname)
+
+#define __arm_rsrp(__regname) \
+  __builtin_aarch64_rsrp (__regname)
+
+#define __arm_rsr64(__regname) \
+  __builtin_aarch64_rsr64 (__regname)
+
+#define __arm_rsrf(__regname) \
+  __builtin_aarch64_rsrf (__regname)
+
+#define __arm_rsrf64(__regname) \
+  __builtin_aarch64_rsrf64 (__regname)
+
+#define __arm_wsr(__regname, __value) \
+  __builtin_aarch64_wsr (__regname, __value)
+
+#define __arm_wsrp(__regname, __value) \
+  __builtin_aarch64_wsrp (__regname, __value)
+
+#define __arm_wsr64(__regname, __value) \
+  __builtin_aarch64_wsr64 (__regname, __value)
+
+#define __arm_wsrf(__regname, __value) \
+  __builtin_aarch64_wsrf (__regname, __value)
+
+#define __arm_wsrf64(__regname, __value) \
+  __builtin_aarch64_wsrf64 (__regname, __value)
+
  #ifdef __cplusplus
  }
  #endif
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c 
b/gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c
new file mode 100644
index 00000000000..bc9db098f16
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c
@@ -0,0 +1,20 @@
+/* Test the __arm_[r,w]sr ACLE intrinsics family.  */
+/* Ensure that illegal behavior is rejected by the compiler.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.4-a" } */
+
+#include <arm_acle.h>
+
+/* Ensure that read/write-only register attributes are respected by the 
compiler.  */
+void
+test_rwsr_read_write_only ()
+{
+  /* Attempt to write to read-only registers.  */
+  long long a = __arm_rsr64 ("aidr_el1");  /* Read ok.  */
+  __arm_wsr64 ("aidr_el1", a); /* { dg-error {invalid system register name 
provided} } */
+
+  /* Attempt to read from write-only registers.  */
+  __arm_wsr64("icc_asgi1r_el1", a);            /* Write ok.  */
+  long long b = __arm_rsr64("icc_asgi1r_el1"); /* { dg-error {invalid system 
register name provided} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rwsr.c 
b/gcc/testsuite/gcc.target/aarch64/acle/rwsr.c
new file mode 100644
index 00000000000..3af4b960306
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/rwsr.c
@@ -0,0 +1,144 @@
+/* Test the __arm_[r,w]sr ACLE intrinsics family.  */
+/* Check that function variants for different data types handle types 
correctly.  */
+/* { dg-do compile } */
+/* { dg-options "-O1 -march=armv8.4-a" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_acle.h>
+
+/*
+** get_rsr:
+** ...
+**     mrs     x([0-9]+), s2_1_c0_c7_4
+**     add     w\1, w\1, 1
+** ...
+*/
+int
+get_rsr ()
+{
+  int a =  __arm_rsr("trcseqstr");
+  return a+1;
+}
+
+/*
+** get_rsrf:
+**     mrs     x([0-9]+), s2_1_c0_c7_4
+**     fmov    s[0-9]+, w\1
+** ...
+*/
+float
+get_rsrf ()
+{
+  return __arm_rsrf("trcseqstr");
+}
+
+/*
+** get_rsrp:
+**     mrs     x0, s2_1_c0_c7_4
+**     ret
+*/
+void *
+get_rsrp ()
+{
+  return __arm_rsrp("trcseqstr");
+}
+
+/*
+** get_rsr64:
+**     mrs     x0, s2_1_c0_c7_4
+**     ret
+*/
+long long
+get_rsr64 ()
+{
+  return __arm_rsr64("trcseqstr");
+}
+
+/*
+** get_rsrf64:
+**     mrs     x([0-9]+), s2_1_c0_c7_4
+**     fmov    d[0-9]+, x\1
+** ...
+*/
+double
+get_rsrf64 ()
+{
+  return __arm_rsrf64("trcseqstr");
+}
+
+/*
+** set_wsr32:
+** ...
+**     add     w([0-9]+), w\1, 1
+**     msr     s2_1_c0_c7_4, x\1
+** ...
+*/
+void
+set_wsr32 (int a)
+{
+  __arm_wsr("trcseqstr", a+1);
+}
+
+/*
+** set_wsrp:
+** ...
+**     msr     s2_1_c0_c7_4, x[0-9]+
+** ...
+*/
+void
+set_wsrp (void *a)
+{
+  __arm_wsrp("trcseqstr", a);
+}
+
+/*
+** set_wsr64:
+** ...
+**     msr     s2_1_c0_c7_4, x[0-9]+
+** ...
+*/
+void
+set_wsr64 (long long a)
+{
+  __arm_wsr64("trcseqstr", a);
+}
+
+/*
+** set_wsrf32:
+** ...
+**     fmov    w([0-9]+), s[0-9]+
+**     msr     s2_1_c0_c7_4, x\1
+** ...
+*/
+void
+set_wsrf32 (float a)
+{
+  __arm_wsrf("trcseqstr", a);
+}
+
+/*
+** set_wsrf64:
+** ...
+**     fmov    x([0-9]+), d[0-9]+
+**     msr     s2_1_c0_c7_4, x\1
+** ...
+*/
+void
+set_wsrf64(double a)
+{
+  __arm_wsrf64("trcseqstr", a);
+}
+
+/*
+** set_custom:
+** ...
+**     mrs     x0, s1_2_c3_c4_5
+** ...
+**     msr     s1_2_c3_c4_5, x0
+** ...
+*/
+void set_custom()
+{
+  __uint64_t b = __arm_rsr64("S1_2_C3_C4_5");
+  __arm_wsr64("S1_2_C3_C4_5", b);
+}

Reply via email to