https://gcc.gnu.org/g:41f40585570f3995dc53713e86c996c8c535dd75

commit 41f40585570f3995dc53713e86c996c8c535dd75
Author: Arsen Arsenović <[email protected]>
Date:   Mon Mar 16 11:32:44 2026 +0100

    gcc/gcn: Use 'shared_base' register for LDS<->flat conversion
    
    This speeds up address space conversions by about 8x in
    micro-benchmarks.  Those may be slightly unrepresentative, because the
    RTL optimizer is free to do whatever it wants with the two registers
    address space conversion uses (a temporary register and shared_base).
    
    gcc/ChangeLog:
    
            * config/gcn/gcn.cc (gcn_addr_space_convert): Use
            SHARED_BASE_REG to find flat address space base of LDS.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/gcn/addr-space-convert-1.c: New test.
            * gcc.target/gcn/addr-space-convert-2.c: New test.
    
    (cherry picked from commit 85d0911203158baa1eda6f5191b1d5f6ad153b69)

Diff:
---
 gcc/config/gcn/gcn.cc                              | 34 +++++++++++++---------
 .../gcc.target/gcn/addr-space-convert-1.c          |  8 +++++
 .../gcc.target/gcn/addr-space-convert-2.c          | 13 +++++++++
 3 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 9e2c2003dc2a..c4bfc707f047 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -1899,21 +1899,27 @@ gcn_addr_space_convert (rtx op, tree from_type, tree 
to_type)
 
   if (AS_LDS_P (as_from) && AS_FLAT_P (as_to))
     {
-      /* The high bits of the QUEUE_PTR_ARG register are used by
-        GCN_BUILTIN_FIRST_CALL_THIS_THREAD_P, so mask them out.  */
-      rtx queue_reg = gen_rtx_REG (DImode,
-                                  cfun->machine->args.reg[QUEUE_PTR_ARG]);
-      rtx queue_ptr = gen_reg_rtx (DImode);
-      emit_insn (gen_anddi3 (queue_ptr, queue_reg, GEN_INT (0xffffffffffff)));
-      rtx group_seg_aperture_hi = gen_rtx_MEM (SImode,
-                                    gen_rtx_PLUS (DImode, queue_ptr,
-                                                  gen_int_mode (64, SImode)));
-      rtx tmp = gen_reg_rtx (DImode);
-
+      /* The LDS based pointer is held in SHARED_BASE.
+
+        Per:
+
+          For GFX9-GFX11 the aperture base addresses are directly available as
+          inline constant registers SRC_SHARED_BASE/LIMIT and
+          SRC_PRIVATE_BASE/LIMIT. In 64-bit address mode the aperture sizes
+          are 2^32 bytes and the base is aligned to 2^32 which makes it easier
+          to convert from flat to segment or segment to flat.
+          -- User Guide for AMDGPU Backend (LLVM)
+
+        ... we can safely assume that the SImode low-part of SHARED_BASE_REG
+        contains all zeroes.  As OP is an LDS address, it is 32-bit.  Ergo,
+        SHARED_BASE_REG+OP is equivalent to SHARED_BASE_REG|OP.  If
+        SHARED_BASE_REG is in r[N:N+1], then, writing OP to rN should suffice.
+        Ergo, this conversion can be implemented as two moves.  */
+      rtx group_seg_aperture = gen_rtx_REG (Pmode, SHARED_BASE_REG);
+      rtx tmp = gen_reg_rtx (Pmode);
+
+      emit_move_insn (tmp, group_seg_aperture);
       emit_move_insn (gen_lowpart (SImode, tmp), op);
-      emit_move_insn (gen_highpart_mode (SImode, DImode, tmp),
-                     group_seg_aperture_hi);
-
       return tmp;
     }
   else if (as_from == as_to)
diff --git a/gcc/testsuite/gcc.target/gcn/addr-space-convert-1.c 
b/gcc/testsuite/gcc.target/gcn/addr-space-convert-1.c
new file mode 100644
index 000000000000..3928842c3bb0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/gcn/addr-space-convert-1.c
@@ -0,0 +1,8 @@
+/* { dg-do compile }
+   { dg-options "-O -Wall" } */
+
+void __flat *
+convert_lds_addr (void __lds *x)
+{ return x; }
+
+/* { dg-final { scan-assembler "shared_base" } }  */
diff --git a/gcc/testsuite/gcc.target/gcn/addr-space-convert-2.c 
b/gcc/testsuite/gcc.target/gcn/addr-space-convert-2.c
new file mode 100644
index 000000000000..37f6c3771a96
--- /dev/null
+++ b/gcc/testsuite/gcc.target/gcn/addr-space-convert-2.c
@@ -0,0 +1,13 @@
+/* { dg-do run }
+   { dg-options "-O -Wall" } */
+
+int
+main ()
+{
+  int __lds *testptr = (int __lds *)(__UINTPTR_TYPE__)8;
+  *testptr = 4;
+
+  int __flat *testptr_flat = testptr;
+  if (*testptr_flat != 4)
+    return 1;
+}

Reply via email to