Currently on every RLC register read the driver checks for three different
conditions to decide which of the two register read/write functions to
call.

As these register operations are macros, which is required for register
name expansion to work, the result is a significant explosion of generated
code which the compiler cannot optimise away.

We however know that all of the three conditionals are static at runtime
and can therefore move the decision to driver init time. All that we need
to do is define a new vfunc table for the SOC12 RLC read/write functions
and use them directly.

Bloat-o-meter agrees the driver size savings are significant:

add/remove: 9/31 grow/shrink: 74/1012 up/down: 39784/-381724 (-341940)
...
Total: Before=9421643, After=9079703, chg -3.63%

Signed-off-by: Tvrtko Ursulin <[email protected]>
Cc: Alex Deucher <[email protected]>
Cc: Christian König <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c    | 39 ++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h    | 10 ++++++
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c     |  2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c     |  2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c     |  2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c     |  2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c      |  2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c    |  2 ++
 drivers/gpu/drm/amd/amdgpu/soc15_common.h  |  8 ++---
 10 files changed, 64 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e69ab8a923e3..0d5837ca9e26 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4458,6 +4458,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        mutex_init(&adev->gfx.workload_profile_mutex);
        mutex_init(&adev->vcn.workload_profile_mutex);
 
+       amdgpu_early_init_rlc_reg_funcs(adev);
        amdgpu_device_init_apu_flags(adev);
 
        r = amdgpu_device_check_arguments(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
index 572a60e1b3cb..002fae3c380e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
@@ -583,3 +583,42 @@ int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device 
*adev,
                amdgpu_gfx_rlc_init_microcode_v2_5(adev);
        return 0;
 }
+
+static const struct amdgpu_rlc_reg_funcs amdgpu_sriov_rlc_reg_funcs = {
+       .rreg32 = amdgpu_sriov_rreg,
+       .wreg32 = amdgpu_sriov_wreg,
+};
+
+static u32
+amdgpu_rlc_rreg(struct amdgpu_device *adev, u32 reg, u32 acc_flags, u32 hwip,
+               u32 xcc_id)
+{
+       return amdgpu_device_rreg(adev, reg, 0);
+}
+
+static void
+amdgpu_rlc_wreg(struct amdgpu_device *adev, u32 reg, u32 value, u32 acc_flags,
+               u32 hwip, u32 xcc_id)
+{
+       amdgpu_device_wreg(adev, reg, value, 0);
+}
+
+static const struct amdgpu_rlc_reg_funcs amdgpu_rlc_reg_funcs = {
+       .rreg32 = amdgpu_rlc_rreg,
+       .wreg32 = amdgpu_rlc_wreg,
+};
+
+void amdgpu_early_init_rlc_reg_funcs(struct amdgpu_device *adev)
+{
+       adev->gfx.rlc.reg_funcs = &amdgpu_rlc_reg_funcs;
+}
+
+void amdgpu_init_rlc_reg_funcs(struct amdgpu_device *adev)
+{
+       if (amdgpu_sriov_vf(adev) &&
+           adev->gfx.rlc.funcs &&
+           adev->gfx.rlc.rlcg_reg_access_supported)
+               adev->gfx.rlc.reg_funcs = &amdgpu_sriov_rlc_reg_funcs;
+       else
+               adev->gfx.rlc.reg_funcs = &amdgpu_rlc_reg_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
index e535534237a1..959d60c90dcd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
@@ -262,6 +262,11 @@ struct amdgpu_rlc_funcs {
        bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg);
 };
 
+struct amdgpu_rlc_reg_funcs {
+       u32  (*rreg32)(struct amdgpu_device *adev, u32 reg, u32 acc_flags, u32 
hwip, u32 xcc_id);
+       void (*wreg32)(struct amdgpu_device *adev, u32 reg, u32 val, u32 
acc_flags, u32 hwip, u32 xcc_id);
+};
+
 struct amdgpu_rlcg_reg_access_ctrl {
        uint32_t scratch_reg0;
        uint32_t scratch_reg1;
@@ -303,6 +308,7 @@ struct amdgpu_rlc {
        /* safe mode for updating CG/PG state */
        bool in_safe_mode[AMDGPU_MAX_RLC_INSTANCES];
        const struct amdgpu_rlc_funcs *funcs;
+       const struct amdgpu_rlc_reg_funcs *reg_funcs;
 
        /* for firmware data */
        u32 save_and_restore_offset;
@@ -374,4 +380,8 @@ void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev);
 int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev,
                                  uint16_t version_major,
                                  uint16_t version_minor);
+
+void amdgpu_early_init_rlc_reg_funcs(struct amdgpu_device *adev);
+void amdgpu_init_rlc_reg_funcs(struct amdgpu_device *adev);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 1893ceeeb26c..83710f08e70f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -7829,6 +7829,8 @@ static int gfx_v10_0_early_init(struct amdgpu_ip_block 
*ip_block)
        /* init rlcg reg access ctrl */
        gfx_v10_0_init_rlcg_reg_access_ctrl(adev);
 
+       amdgpu_init_rlc_reg_funcs(adev);
+
        return gfx_v10_0_init_microcode(adev);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 427975b5a1d9..12a39259a2b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -5273,6 +5273,8 @@ static int gfx_v11_0_early_init(struct amdgpu_ip_block 
*ip_block)
 
        gfx_v11_0_init_rlcg_reg_access_ctrl(adev);
 
+       amdgpu_init_rlc_reg_funcs(adev);
+
        return gfx_v11_0_init_microcode(adev);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index 79ea1af363a5..998a008ecc8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -3879,6 +3879,8 @@ static int gfx_v12_0_early_init(struct amdgpu_ip_block 
*ip_block)
 
        gfx_v12_0_init_rlcg_reg_access_ctrl(adev);
 
+       amdgpu_init_rlc_reg_funcs(adev);
+
        return gfx_v12_0_init_microcode(adev);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
index eb9725ae1607..e048c975537d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
@@ -2882,6 +2882,8 @@ static int gfx_v12_1_early_init(struct amdgpu_ip_block 
*ip_block)
 
        gfx_v12_1_init_rlcg_reg_access_ctrl(adev);
 
+       amdgpu_init_rlc_reg_funcs(adev);
+
        return gfx_v12_1_init_microcode(adev);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 7e9d753f4a80..26786f71467b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4816,6 +4816,8 @@ static int gfx_v9_0_early_init(struct amdgpu_ip_block 
*ip_block)
        /* init rlcg reg access ctrl */
        gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
 
+       amdgpu_init_rlc_reg_funcs(adev);
+
        return gfx_v9_0_init_microcode(adev);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index ad4d442e7345..2e17fc1157fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -2525,6 +2525,8 @@ static int gfx_v9_4_3_early_init(struct amdgpu_ip_block 
*ip_block)
        /* init rlcg reg access ctrl */
        gfx_v9_4_3_init_rlcg_reg_access_ctrl(adev);
 
+       amdgpu_init_rlc_reg_funcs(adev);
+
        return gfx_v9_4_3_init_microcode(adev);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h 
b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 242b24f73c17..b7f928521f39 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -38,14 +38,10 @@
        (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + (reg)+(offset))
 
 #define __WREG32_SOC15_RLC__(reg, value, flag, hwip, inst) \
-       ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && 
adev->gfx.rlc.rlcg_reg_access_supported) ? \
-        amdgpu_sriov_wreg(adev, reg, value, flag, hwip, inst) : \
-        WREG32(reg, value))
+       adev->gfx.rlc.reg_funcs->wreg32(adev, reg, value, flag, hwip, inst)
 
 #define __RREG32_SOC15_RLC__(reg, flag, hwip, inst) \
-       ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && 
adev->gfx.rlc.rlcg_reg_access_supported) ? \
-        amdgpu_sriov_rreg(adev, reg, flag, hwip, inst) : \
-        RREG32(reg))
+       adev->gfx.rlc.reg_funcs->rreg32(adev, reg, flag, hwip, inst)
 
 #define WREG32_FIELD15(ip, idx, reg, field, val)       \
         
__WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + 
mm##reg,   \
-- 
2.52.0

Reply via email to