Memory aperture registers on GCN are special read-only registers that
hold the 64-bit (i.e. pointer-sized) values. These registers are
accessible as the full value or by their lower half (but not upper
half!), and are also aligned to 2^32 (so the lower half is useless..).
They come in two pairs: shared_base/shared_limit and
private_base/private_limit. The base registers contain the address at
which the LDS and scratch spaces are mapped to, respectively, in the
flat memory space, an the limit registers contain the size of these
mappings.
In a subsequent patch, this will allow us to use a pair of moves to do
conversions from LDS to the flat address space, saving us a few
expensive reads at runtime.
gcc/ChangeLog:
* config/gcn/gcn.cc (gcn_regno_reg_class): Handle
MEMORY_APERTURE_REGS.
(gcn_hard_regno_rename_ok): Return false for memory aperture
regs.
(gcn_memory_move_cost): Handle memory aperture regs the same as
general registers.
(gcn_hard_regno_nregs): Handle MEMORY_APERTURE_REGS.
* config/gcn/gcn.h (SHARED_BASE_REG): Define.
(SHARED_LIMT_REG): Ditto.
(PRIVATE_BASE_REG): Ditto.
(PRIVATE_LIMT_REG): Ditto.
(MEMORY_APERTURE_REG_P): New macro. Evaluates to non-zero if
parameter is one of the above registers.
(SSRC_REGNO_P): Also evaluate to non-zero for aperture regs, as
they can be sources to scalar instructions.
(enum reg_class): New class: MEMORY_APERTURE_REGS.
(REG_CLASS_NAMES): Provide name for that class.
(MEMORY_APERTURE_REGS_MASK): New helper macro. Value of
NAMED_REG_MASK2 for each of the memory aperture regs.
(REG_CLASS_CONTENTS): Add the above registers to
MEMORY_APERTURE_REGS, SGPR_VOP_SRC_REGS, and SGPR_SRC_REGS.
---
gcc/config/gcn/gcn.cc | 35 ++++++++++++++++++++++++++++++++---
gcc/config/gcn/gcn.h | 41 +++++++++++++++++++++++++++++++++++++----
2 files changed, 69 insertions(+), 7 deletions(-)
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 3822febc7211..8277fd15a78e 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -505,6 +505,11 @@ VnMODE (int n, machine_mode mode)
static unsigned char
gcn_class_max_nregs (reg_class_t rclass, machine_mode mode)
{
+ /* The aperture registers always hold a 64-bit value, though they can be
+ accessed as either full registers or their lower half. */
+ if (rclass == MEMORY_APERTURE_REGS)
+ return 1;
+
/* Scalar registers are 32bit, vector registers are in fact tuples of
64 lanes. */
if (rclass == VGPR_REGS || rclass == AVGPR_REGS
@@ -602,6 +607,14 @@ gcn_hard_regno_mode_ok (unsigned int regno, machine_mode
mode)
}
if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
return true;
+
+ if (MEMORY_APERTURE_REG_P (regno))
+ /* Memory aperture registers are accessible either by their lower half
+ (that is always zero, but accessible anyway), or by their whole
+ value, which is the actual base. We, for now, don't permit the former,
+ since there's no use to those values yet. */
+ return mode == DImode;
+
if (SGPR_REGNO_P (regno))
/* We restrict double register values to aligned registers. */
return (sgpr_1reg_mode_p (mode)
@@ -645,6 +658,8 @@ gcn_regno_reg_class (int regno)
case EXEC_HI_REG:
return EXEC_MASK_REG;
}
+ if (MEMORY_APERTURE_REG_P (regno))
+ return MEMORY_APERTURE_REGS;
if (VGPR_REGNO_P (regno))
return VGPR_REGS;
if (AVGPR_REGNO_P (regno))
@@ -765,6 +780,9 @@ gcn_operand_part (machine_mode mode, rtx op, int n)
if (REG_P (op))
{
gcc_assert (REGNO (op) + n < FIRST_PSEUDO_REGISTER);
+ gcc_assert (!MEMORY_APERTURE_REG_P (REGNO (op))
+ /* We can't access the higher parts of aperture regs. */
+ || n == 0);
return gen_rtx_REG (vsimode, REGNO (op) + n);
}
if (GET_CODE (op) == CONST_VECTOR)
@@ -785,6 +803,9 @@ gcn_operand_part (machine_mode mode, rtx op, int n)
else if (GET_MODE_SIZE (mode) == 8 && REG_P (op))
{
gcc_assert (REGNO (op) + n < FIRST_PSEUDO_REGISTER);
+ gcc_assert (!MEMORY_APERTURE_REG_P (REGNO (op))
+ /* We can't access the higher parts of aperture regs. */
+ || n == 0);
return gen_rtx_REG (SImode, REGNO (op) + n);
}
else
@@ -3713,7 +3734,8 @@ gcn_hard_regno_rename_ok (unsigned int from_reg, unsigned
int to_reg)
|| from_reg == EXEC_LO_REG || from_reg == EXEC_HI_REG
|| to_reg == SCC_REG
|| to_reg == VCC_LO_REG || to_reg == VCC_HI_REG
- || to_reg == EXEC_LO_REG || to_reg == EXEC_HI_REG)
+ || to_reg == EXEC_LO_REG || to_reg == EXEC_HI_REG
+ || MEMORY_APERTURE_REG_P (to_reg))
return false;
/* Allow the link register to be used if it was saved. */
@@ -4008,6 +4030,8 @@ gcn_memory_move_cost (machine_mode mode, reg_class_t
regclass, bool in)
case SGPR_DST_REGS:
case GENERAL_REGS:
case AFP_REGS:
+ /* This one can't be written to. */
+ case MEMORY_APERTURE_REGS:
if (!in)
return (STORE_COST + 2) * nregs;
return LOAD_COST * nregs;
@@ -7018,8 +7042,13 @@ print_reg (FILE *file, rtx x)
machine_mode mode = GET_MODE (x);
if (VECTOR_MODE_P (mode))
mode = GET_MODE_INNER (mode);
- if (mode == BImode || mode == QImode || mode == HImode || mode == SImode
- || mode == HFmode || mode == SFmode)
+ if (MEMORY_APERTURE_REG_P (REGNO (x)))
+ {
+ gcc_assert (mode == DImode);
+ fprintf (file, "%s", reg_names[REGNO (x)]);
+ }
+ else if (mode == BImode || mode == QImode || mode == HImode || mode == SImode
+ || mode == HFmode || mode == SFmode)
fprintf (file, "%s", reg_names[REGNO (x)]);
else if (mode == DImode || mode == DFmode)
{
diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h
index 4d6049c128dd..f0c778372c59 100644
--- a/gcc/config/gcn/gcn.h
+++ b/gcc/config/gcn/gcn.h
@@ -169,7 +169,18 @@ extern const struct gcn_device_def {
#define EXECZ_REG 128
#define SCC_REG 129
-/* 132-159 are reserved to simplify masks. */
+/* Memory aperture registers. */
+#define SHARED_BASE_REG 130
+#define SHARED_LIMT_REG 131
+#define PRIVATE_BASE_REG 132
+#define PRIVATE_LIMT_REG 133
+#define MEMORY_APERTURE_REG_P(reg) \
+ ((reg) == SHARED_BASE_REG \
+ || (reg) == SHARED_LIMT_REG \
+ || (reg) == PRIVATE_BASE_REG \
+ || (reg) == PRIVATE_LIMT_REG)
+
+/* 134-159 are reserved to simplify masks. */
#define FIRST_VGPR_REG 160
#define VGPR_REGNO(N) ((N)+FIRST_VGPR_REG)
@@ -214,7 +225,7 @@ STATIC_ASSERT (LAST_AVGPR_REG + 1 - FIRST_AVGPR_REG == 256);
#define SGPR_REGNO_P(N) (/*(N) >= FIRST_SGPR_REG &&*/ (N) <=
LAST_SGPR_REG)
#define VGPR_REGNO_P(N) ((N) >= FIRST_VGPR_REG && (N) <=
LAST_VGPR_REG)
#define AVGPR_REGNO_P(N) ((N) >= FIRST_AVGPR_REG && (N) <=
LAST_AVGPR_REG)
-#define SSRC_REGNO_P(N) ((N) <= SCC_REG && (N) != VCCZ_REG)
+#define SSRC_REGNO_P(N) ((N) <= PRIVATE_LIMT_REG && (N) !=
VCCZ_REG)
#define SDST_REGNO_P(N) ((N) <= EXEC_HI_REG && (N) != VCCZ_REG)
#define CC_REG_P(X) (REG_P (X) && CC_REGNO_P (REGNO (X)))
#define CC_REGNO_P(X) ((X) == SCC_REG || (X) == VCC_REG)
@@ -368,6 +379,12 @@ enum reg_class
/* EXEC */
EXEC_MASK_REG,
+ /* Memory aperture registers (SHARED_{BASE,LIM}, PRIVATE_{BASE,LIM}).
+ Though these are usable as sources to vector instructions (and ergo
+ nominally belong to Sv), they cannot be split into lower and upper half
+ accesses, and so, are left out of that constraint. */
+ MEMORY_APERTURE_REGS,
+
/* SGPR0-101 */
SGPR_REGS,
@@ -411,6 +428,7 @@ enum reg_class
"EXECZ_CONDITIONAL_REG", \
"ALL_CONDITIONAL_REGS", \
"EXEC_MASK_REG", \
+ "MEMORY_APERTURE_REGS" \
"SGPR_REGS", \
"SGPR_EXEC_REGS", \
"SGPR_VOP3A_SRC_REGS", \
@@ -430,6 +448,13 @@ enum reg_class
#define NAMED_REG_MASK(N) (1<<((N)-3*32))
#define NAMED_REG_MASK2(N) (1<<((N)-4*32))
+#define MEMORY_APERTURE_REGS_MASK \
+ (NAMED_REG_MASK2 (SHARED_BASE_REG) \
+ | NAMED_REG_MASK2 (SHARED_LIMT_REG) \
+ | NAMED_REG_MASK2 (PRIVATE_BASE_REG) \
+ | NAMED_REG_MASK2 (PRIVATE_LIMT_REG))
+
+
#define REG_CLASS_CONTENTS { \
/* NO_REGS. */ \
{0, 0, 0, 0, \
@@ -473,6 +498,12 @@ enum reg_class
0, 0, 0, 0, \
0, 0, 0, 0, \
0, 0, 0, 0, 0, 0},
\
+ /* MEMORY_APERTURE_REGS. */ \
+ {0, 0, 0, 0, \
+ MEMORY_APERTURE_REGS_MASK, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0},
\
/* SGPR_REGS. */ \
{0xffffffff, 0xffffffff, 0xffffffff, 0xf1, \
0, 0, 0, 0, \
@@ -510,7 +541,8 @@ enum reg_class
0, 0, 0, 0, 0, 0},
\
/* SGPR_SRC_REGS. */ \
{0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
- NAMED_REG_MASK2 (EXECZ_REG) | NAMED_REG_MASK2 (SCC_REG), 0, 0, 0, \
+ MEMORY_APERTURE_REGS_MASK | NAMED_REG_MASK2 (EXECZ_REG) \
+ | NAMED_REG_MASK2 (SCC_REG), 0, 0, 0, \
0, 0, 0, 0, \
0, 0, 0, 0, \
0, 0, 0, 0, 0, 0},
\
@@ -631,7 +663,8 @@ enum gcn_address_spaces
"ttmp0", "ttmp1", "ttmp2", "ttmp3", "ttmp4", "ttmp5", "ttmp6", "ttmp7", \
"ttmp8", "ttmp9", "ttmp10", "ttmp11", "m0", "exec_lo", "exec_hi", \
"execz", "scc", \
- "res130", "res131", "res132", "res133", "res134", "res135", "res136", \
+ "shared_base", "shared_limit", "private_base", "private_limit", \
+ "res134", "res135", "res136", \
"res137", "res138", "res139", "res140", "res141", "res142", "res143", \
"res144", "res145", "res146", "res147", "res148", "res149", "res150", \
"res151", "res152", "res153", "res154", "res155", "res156", "res157", \
--
2.53.0