[PATCH 3/6] amdgpu/pm: modify Powerplay API get_power_limit to use new pp_power enums

2021-05-28 Thread Darren Powell
 updated {amd_pm_funcs}->get_power_limit() signature
 rewrote pp_get_power_limit to use new enums
 pp_get_power_limit now returns -EOPNOTSUPP for unknown power limit
 update calls to {amd_pm_funcs}->get_power_limit()

* Test Notes
* testing hardware was NAVI10 (tests SMU path)
** needs testing on VANGOGH
** needs testing on SMU < 11
** ie, one of
 TOPAZ, FIJI, TONGA, POLARIS10, POLARIS11, POLARIS12, VEGAM, CARRIZO,
 STONEY, VEGA10, VEGA12,VEGA20, RAVEN, BONAIRE, HAWAII

* Test
 AMDGPU_PCI_ADDR=`lspci -nn | grep "VGA\|Display" | cut -d " " -f 1`
 AMDGPU_HWMON=`ls -la /sys/class/hwmon | grep $AMDGPU_PCI_ADDR | cut -d " " -f 
10`
 HWMON_DIR=/sys/class/hwmon/${AMDGPU_HWMON}

 lspci -nn | grep "VGA\|Display" ; \
 echo "=== power1 cap ===" ; cat $HWMON_DIR/power1_cap ;   \
 echo "=== power1 cap max ===" ; cat $HWMON_DIR/power1_cap_max ;   \
 echo "=== power1 cap def ===" ; cat $HWMON_DIR/power1_cap_default

Signed-off-by: Darren Powell 
---
 .../gpu/drm/amd/include/kgd_pp_interface.h|  5 +--
 drivers/gpu/drm/amd/pm/amdgpu_pm.c| 27 ---
 .../gpu/drm/amd/pm/powerplay/amd_powerplay.c  | 33 ---
 3 files changed, 41 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index ddbf802ea8ad..369a72f03e92 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -305,8 +305,9 @@ struct amd_pm_funcs {
uint32_t block_type, bool gate);
int (*set_clockgating_by_smu)(void *handle, uint32_t msg_id);
int (*set_power_limit)(void *handle, uint32_t n);
-   int (*get_power_limit)(void *handle, uint32_t *limit, uint32_t 
*max_limit,
-   bool default_limit);
+   int (*get_power_limit)(void *handle, uint32_t *limit,
+   enum pp_power_limit_level pp_limit_level,
+   enum pp_power_sample_window sample_window);
int (*get_power_profile_mode)(void *handle, char *buf);
int (*set_power_profile_mode)(void *handle, long *input, uint32_t size);
int (*set_fine_grain_clk_vol)(void *handle, uint32_t type, long *input, 
uint32_t size);
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index f7b45803431d..0098c8b55bb4 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2718,8 +2718,8 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct 
device *dev,
struct amdgpu_device *adev = dev_get_drvdata(dev);
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
enum pp_power_sample_window sample_window = 
to_sensor_dev_attr(attr)->index;
+   enum pp_power_limit_level pp_limit_level = PP_PWR_LIMIT_MAX;
uint32_t limit;
-   uint32_t max_limit = 0;
ssize_t size;
int r;
 
@@ -2735,12 +2735,13 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct 
device *dev,
}
 
if (is_support_sw_smu(adev)) {
-   smu_get_power_limit(>smu, , PP_PWR_LIMIT_MAX, 
sample_window);
+   smu_get_power_limit(>smu, ,
+   pp_limit_level, sample_window);
size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
} else if (pp_funcs && pp_funcs->get_power_limit) {
-   pp_funcs->get_power_limit(adev->powerplay.pp_handle,
-   , _limit, true);
-   size = snprintf(buf, PAGE_SIZE, "%u\n", max_limit * 100);
+   pp_funcs->get_power_limit(adev->powerplay.pp_handle, ,
+ pp_limit_level, sample_window);
+   size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
} else {
size = snprintf(buf, PAGE_SIZE, "\n");
}
@@ -2758,6 +2759,7 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device 
*dev,
struct amdgpu_device *adev = dev_get_drvdata(dev);
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
enum pp_power_sample_window sample_window = 
to_sensor_dev_attr(attr)->index;
+   enum pp_power_limit_level pp_limit_level = PP_PWR_LIMIT_CURRENT;
uint32_t limit;
ssize_t size;
int r;
@@ -2774,11 +2776,12 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct 
device *dev,
}
 
if (is_support_sw_smu(adev)) {
-   smu_get_power_limit(>smu, , PP_PWR_LIMIT_CURRENT, 
sample_window);
+   smu_get_power_limit(>smu, ,
+   pp_limit_level, sample_window);
size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
} else if (pp_funcs && pp_funcs->get_power_limit) {
-   pp_funcs->get_power_limit(adev->powerplay.pp_handle,
-   , NULL, false);
+   pp_funcs->get_power_limit(adev->powerplay.pp_handle, 

[PATCH 6/6] amdgpu/pm: add kernel documentation for smu_get_power_limit

2021-05-28 Thread Darren Powell
 added doc tag "amdgpu_pp_power" with description
 added tags for enums  pp_power_limit_level, pp_power_sample_window
 added tag for function smu_get_power_limit

Test:
* Temporary insertion into Documentation/gpu/amdgpu.rst
START
Power Limit
---
.. kernel-doc:: drivers/gpu/drm/amd/include/kgd_pp_interface.h
   :doc: amdgpu_pp_power

.. kernel-doc:: drivers/gpu/drm/amd/include/kgd_pp_interface.h
   :identifiers: pp_power_limit_level

.. kernel-doc:: drivers/gpu/drm/amd/include/kgd_pp_interface.h
   :identifiers: pp_power_sample_window

.. kernel-doc:: drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
   :identifiers: smu_get_power_limit
-END-

Signed-off-by: Darren Powell 
---
 .../gpu/drm/amd/include/kgd_pp_interface.h| 30 ++-
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 10 +++
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 369a72f03e92..46d2fc434e24 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -192,6 +192,26 @@ enum pp_df_cstate {
DF_CSTATE_ALLOW,
 };
 
+/**
+ * DOC: amdgpu_pp_power
+ *
+ * APU power is managed to system-level requirements through the PPT
+ * (package power tracking) feature. PPT is intended to limit power to the
+ * requirements of the power source and could be dynamically updated to
+ * maximize APU performance within the system power budget.
+ *
+ * Two windows of power measurement can be requested, where supported, with
+ * :c:type:`enum pp_power_sample_window `.
+ */
+
+/**
+ * enum pp_power_limit_level - Used to query the power limits
+ *
+ * @PP_PWR_LIMIT_MIN: Minimum Power Limit
+ * @PP_PWR_LIMIT_CURRENT: Current Power Limit
+ * @PP_PWR_LIMIT_DEFAULT: Default Power Limit
+ * @PP_PWR_LIMIT_MAX: Maximum Power Limit
+ */
 enum pp_power_limit_level
 {
PP_PWR_LIMIT_MIN = -1,
@@ -200,7 +220,15 @@ enum pp_power_limit_level
PP_PWR_LIMIT_MAX,
 };
 
- enum pp_power_sample_window
+/**
+ * enum pp_power_sample_window - Used to specify the window size of the 
requested power
+ *
+ * @PP_PWR_WINDOW_DEFAULT: manages the configurable, thermally significant
+ * moving average of APU power (default ~5000 ms).
+ * @PP_PWR_WINDOW_FAST: manages the ~10 ms moving average of APU power,
+ * where supported.
+ */
+enum pp_power_sample_window
 {
PP_PWR_WINDOW_DEFAULT,
PP_PWR_WINDOW_FAST,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 5671abd58bcf..b7a9037a2dbc 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -2166,6 +2166,16 @@ static int smu_set_fan_speed_rpm(void *handle, uint32_t 
speed)
return ret;
 }
 
+/**
+ * smu_get_power_limit - Request one of the SMU Power Limits
+ *
+ * @handle: pointer to smu context
+ * @limit: requested limit is written back to this variable
+ * @pp_limit_level: _power_limit_level which power limit to return
+ * @sample_window: _power_sample_window measurement window
+ * Return:  0 on success, <0 on error
+ *
+ */
 int smu_get_power_limit(void *handle,
uint32_t *limit,
enum pp_power_limit_level pp_limit_level,
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/6] amdgpu/pm: clean up smu_get_power_limit function signature

2021-05-28 Thread Darren Powell
 add two new powerplay enums (limit_level, sample_window)
 add enums to smu_get_power_limit signature
 remove input bitfield stuffing of output variable limit
 update calls to smu_get_power_limit

* Test
 AMDGPU_PCI_ADDR=`lspci -nn | grep "VGA\|Display" | cut -d " " -f 1`
 AMDGPU_HWMON=`ls -la /sys/class/hwmon | grep $AMDGPU_PCI_ADDR | cut -d " " -f 
10`
 HWMON_DIR=/sys/class/hwmon/${AMDGPU_HWMON}

 lspci -nn | grep "VGA\|Display" ; \
 echo "=== power1 cap ===" ; cat $HWMON_DIR/power1_cap ;   \
 echo "=== power1 cap max ===" ; cat $HWMON_DIR/power1_cap_max ;   \
 echo "=== power1 cap def ===" ; cat $HWMON_DIR/power1_cap_default

Signed-off-by: Darren Powell 
---
 .../gpu/drm/amd/include/kgd_pp_interface.h| 14 
 drivers/gpu/drm/amd/pm/amdgpu_pm.c| 18 +-
 drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h   |  3 +-
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 34 +--
 4 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index b1cd52a9d684..ddbf802ea8ad 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -192,6 +192,20 @@ enum pp_df_cstate {
DF_CSTATE_ALLOW,
 };
 
+enum pp_power_limit_level
+{
+   PP_PWR_LIMIT_MIN = -1,
+   PP_PWR_LIMIT_CURRENT,
+   PP_PWR_LIMIT_DEFAULT,
+   PP_PWR_LIMIT_MAX,
+};
+
+ enum pp_power_sample_window
+{
+   PP_PWR_WINDOW_DEFAULT,
+   PP_PWR_WINDOW_FAST,
+};
+
 #define PP_GROUP_MASK0xF000
 #define PP_GROUP_SHIFT   28
 
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 13da377888d2..f7b45803431d 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2717,8 +2717,8 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct 
device *dev,
 {
struct amdgpu_device *adev = dev_get_drvdata(dev);
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
-   int limit_type = to_sensor_dev_attr(attr)->index;
-   uint32_t limit = limit_type << 24;
+   enum pp_power_sample_window sample_window = 
to_sensor_dev_attr(attr)->index;
+   uint32_t limit;
uint32_t max_limit = 0;
ssize_t size;
int r;
@@ -2735,7 +2735,7 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct 
device *dev,
}
 
if (is_support_sw_smu(adev)) {
-   smu_get_power_limit(>smu, , SMU_PPT_LIMIT_MAX);
+   smu_get_power_limit(>smu, , PP_PWR_LIMIT_MAX, 
sample_window);
size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
} else if (pp_funcs && pp_funcs->get_power_limit) {
pp_funcs->get_power_limit(adev->powerplay.pp_handle,
@@ -2757,8 +2757,8 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device 
*dev,
 {
struct amdgpu_device *adev = dev_get_drvdata(dev);
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
-   int limit_type = to_sensor_dev_attr(attr)->index;
-   uint32_t limit = limit_type << 24;
+   enum pp_power_sample_window sample_window = 
to_sensor_dev_attr(attr)->index;
+   uint32_t limit;
ssize_t size;
int r;
 
@@ -2774,7 +2774,7 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device 
*dev,
}
 
if (is_support_sw_smu(adev)) {
-   smu_get_power_limit(>smu, , SMU_PPT_LIMIT_CURRENT);
+   smu_get_power_limit(>smu, , PP_PWR_LIMIT_CURRENT, 
sample_window);
size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
} else if (pp_funcs && pp_funcs->get_power_limit) {
pp_funcs->get_power_limit(adev->powerplay.pp_handle,
@@ -2796,8 +2796,8 @@ static ssize_t amdgpu_hwmon_show_power_cap_default(struct 
device *dev,
 {
struct amdgpu_device *adev = dev_get_drvdata(dev);
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
-   int limit_type = to_sensor_dev_attr(attr)->index;
-   uint32_t limit = limit_type << 24;
+   enum pp_power_sample_window sample_window = 
to_sensor_dev_attr(attr)->index;
+   uint32_t limit;
ssize_t size;
int r;
 
@@ -2813,7 +2813,7 @@ static ssize_t amdgpu_hwmon_show_power_cap_default(struct 
device *dev,
}
 
if (is_support_sw_smu(adev)) {
-   smu_get_power_limit(>smu, , SMU_PPT_LIMIT_DEFAULT);
+   smu_get_power_limit(>smu, , PP_PWR_LIMIT_DEFAULT, 
sample_window);
size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
} else if (pp_funcs && pp_funcs->get_power_limit) {
pp_funcs->get_power_limit(adev->powerplay.pp_handle,
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
index 523f9d2982e9..b97b960c2eac 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
+++ 

[PATCH 5/6] amdgpu/pm: handle return value for get_power_limit

2021-05-28 Thread Darren Powell
Signed-off-by: Darren Powell 
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 39 ++
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 44715848705a..aa138abe6e1d 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2734,13 +2734,16 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct 
device *dev,
return r;
}
 
-   if (pp_funcs && pp_funcs->get_power_limit) {
-   pp_funcs->get_power_limit(adev->powerplay.pp_handle, ,
- pp_limit_level, sample_window);
+   if (pp_funcs && pp_funcs->get_power_limit)
+   r = pp_funcs->get_power_limit(adev->powerplay.pp_handle, ,
+ pp_limit_level, sample_window);
+   else
+   r = -ENODATA;
+
+   if (!r)
size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
-   } else {
+   else
size = snprintf(buf, PAGE_SIZE, "\n");
-   }
 
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -2771,13 +2774,16 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct 
device *dev,
return r;
}
 
-   if (pp_funcs && pp_funcs->get_power_limit) {
-   pp_funcs->get_power_limit(adev->powerplay.pp_handle, ,
- pp_limit_level, sample_window);
+   if (pp_funcs && pp_funcs->get_power_limit)
+   r = pp_funcs->get_power_limit(adev->powerplay.pp_handle, ,
+ pp_limit_level, sample_window);
+   else
+   r = -ENODATA;
+
+   if (!r)
size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
-   } else {
+   else
size = snprintf(buf, PAGE_SIZE, "\n");
-   }
 
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -2808,13 +2814,16 @@ static ssize_t 
amdgpu_hwmon_show_power_cap_default(struct device *dev,
return r;
}
 
-   if (pp_funcs && pp_funcs->get_power_limit) {
-   pp_funcs->get_power_limit(adev->powerplay.pp_handle, ,
- pp_limit_level, sample_window);
+   if (pp_funcs && pp_funcs->get_power_limit)
+   r = pp_funcs->get_power_limit(adev->powerplay.pp_handle, ,
+ pp_limit_level, sample_window);
+   else
+   r = -ENODATA;
+
+   if (!r)
size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
-   } else {
+   else
size = snprintf(buf, PAGE_SIZE, "\n");
-   }
 
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/6] amdgpu/pm: reorder definition of swsmu_pm_funcs for readability

2021-05-28 Thread Darren Powell
Match the order of definition to the structure's declaration to
help with locating included and missing functions of the API

Signed-off-by: Darren Powell 
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 48 +++
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 285849cef9f2..8aff67a667fa 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -2962,6 +2962,8 @@ static const struct amd_pm_funcs swsmu_pm_funcs = {
.get_fan_control_mode= smu_get_fan_control_mode,
.set_fan_speed_percent   = smu_set_fan_speed_percent,
.get_fan_speed_percent   = smu_get_fan_speed_percent,
+   .force_clock_level   = smu_force_ppclk_levels,
+   .print_clock_levels  = smu_print_ppclk_levels,
.force_performance_level = smu_force_performance_level,
.read_sensor = smu_read_sensor,
.get_performance_level   = smu_get_performance_level,
@@ -2974,38 +2976,36 @@ static const struct amd_pm_funcs swsmu_pm_funcs = {
.switch_power_profile= smu_switch_power_profile,
/* export to amdgpu */
.dispatch_tasks  = smu_handle_dpm_task,
+   .load_firmware   = smu_load_microcode,
.set_powergating_by_smu  = smu_dpm_set_power_gate,
.set_power_limit = smu_set_power_limit,
+   .get_power_profile_mode  = smu_get_power_profile_mode,
+   .set_power_profile_mode  = smu_set_power_profile_mode,
.odn_edit_dpm_table  = smu_od_edit_dpm_table,
.set_mp1_state   = smu_set_mp1_state,
+   .gfx_state_change_set= smu_gfx_state_change_set,
/* export to DC */
-   .get_sclk= smu_get_sclk,
-   .get_mclk= smu_get_mclk,
-   .enable_mgpu_fan_boost   = smu_enable_mgpu_fan_boost,
-   .get_asic_baco_capability = smu_get_baco_capability,
-   .set_asic_baco_state = smu_baco_set_state,
-   .get_ppfeature_status= smu_sys_get_pp_feature_mask,
-   .set_ppfeature_status= smu_sys_set_pp_feature_mask,
-   .asic_reset_mode_2   = smu_mode2_reset,
-   .set_df_cstate   = smu_set_df_cstate,
-   .set_xgmi_pstate = smu_set_xgmi_pstate,
-   .get_gpu_metrics = smu_sys_get_gpu_metrics,
-   .set_power_profile_mode  = smu_set_power_profile_mode,
-   .get_power_profile_mode  = smu_get_power_profile_mode,
-   .force_clock_level   = smu_force_ppclk_levels,
-   .print_clock_levels  = smu_print_ppclk_levels,
-   .get_uclk_dpm_states = smu_get_uclk_dpm_states,
-   .get_dpm_clock_table = smu_get_dpm_clock_table,
-   .display_configuration_change= smu_display_configuration_change,
-   .get_clock_by_type_with_latency  = 
smu_get_clock_by_type_with_latency,
-   .display_clock_voltage_request   = 
smu_display_clock_voltage_request,
-   .set_active_display_count= smu_set_display_count,
-   .set_min_deep_sleep_dcefclk  = smu_set_deep_sleep_dcefclk,
+   .get_sclk = smu_get_sclk,
+   .get_mclk = smu_get_mclk,
+   .display_configuration_change = smu_display_configuration_change,
+   .get_clock_by_type_with_latency   = smu_get_clock_by_type_with_latency,
+   .display_clock_voltage_request= smu_display_clock_voltage_request,
+   .enable_mgpu_fan_boost= smu_enable_mgpu_fan_boost,
+   .set_active_display_count = smu_set_display_count,
+   .set_min_deep_sleep_dcefclk   = smu_set_deep_sleep_dcefclk,
+   .get_asic_baco_capability = smu_get_baco_capability,
+   .set_asic_baco_state  = smu_baco_set_state,
+   .get_ppfeature_status = smu_sys_get_pp_feature_mask,
+   .set_ppfeature_status = smu_sys_set_pp_feature_mask,
+   .asic_reset_mode_2= smu_mode2_reset,
+   .set_df_cstate= smu_set_df_cstate,
+   .set_xgmi_pstate  = smu_set_xgmi_pstate,
+   .get_gpu_metrics  = smu_sys_get_gpu_metrics,
.set_watermarks_for_clock_ranges = 
smu_set_watermarks_for_clock_ranges,
.display_disable_memory_clock_switch = 
smu_display_disable_memory_clock_switch,
.get_max_sustainable_clocks_by_dc= 
smu_get_max_sustainable_clocks_by_dc,
-   .load_firmware   = smu_load_microcode,
-   .gfx_state_change_set= smu_gfx_state_change_set,
+   .get_uclk_dpm_states  = smu_get_uclk_dpm_states,
+   .get_dpm_clock_table  = smu_get_dpm_clock_table,
.get_smu_prv_buf_details = smu_get_prv_buffer_details,
 };
 
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org

[PATCH 4/6] amdgpu/pm: modify and add smu_get_power_limit to Powerplay API

2021-05-28 Thread Darren Powell
 modify args of smu_get_power_limit to match Powerplay API .get_power_limit
 add smu_get_power_limit to Powerplay API swsmu_pm_funcs
 remove special handling of smu in amdgpu_hwmon_show_power_cap*

* Test
 AMDGPU_PCI_ADDR=`lspci -nn | grep "VGA\|Display" | cut -d " " -f 1`
 AMDGPU_HWMON=`ls -la /sys/class/hwmon | grep $AMDGPU_PCI_ADDR | cut -d " " -f 
10`
 HWMON_DIR=/sys/class/hwmon/${AMDGPU_HWMON}

 lspci -nn | grep "VGA\|Display" ; \
 echo "=== power1 cap ===" ; cat $HWMON_DIR/power1_cap ;   \
 echo "=== power1 cap max ===" ; cat $HWMON_DIR/power1_cap_max ;   \
 echo "=== power1 cap def ===" ; cat $HWMON_DIR/power1_cap_default

Signed-off-by: Darren Powell 
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c| 18 +++---
 drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h   |  2 +-
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c |  4 +++-
 3 files changed, 7 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 0098c8b55bb4..44715848705a 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2734,11 +2734,7 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct 
device *dev,
return r;
}
 
-   if (is_support_sw_smu(adev)) {
-   smu_get_power_limit(>smu, ,
-   pp_limit_level, sample_window);
-   size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
-   } else if (pp_funcs && pp_funcs->get_power_limit) {
+   if (pp_funcs && pp_funcs->get_power_limit) {
pp_funcs->get_power_limit(adev->powerplay.pp_handle, ,
  pp_limit_level, sample_window);
size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
@@ -2775,11 +2771,7 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device 
*dev,
return r;
}
 
-   if (is_support_sw_smu(adev)) {
-   smu_get_power_limit(>smu, ,
-   pp_limit_level, sample_window);
-   size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
-   } else if (pp_funcs && pp_funcs->get_power_limit) {
+   if (pp_funcs && pp_funcs->get_power_limit) {
pp_funcs->get_power_limit(adev->powerplay.pp_handle, ,
  pp_limit_level, sample_window);
size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
@@ -2816,11 +2808,7 @@ static ssize_t 
amdgpu_hwmon_show_power_cap_default(struct device *dev,
return r;
}
 
-   if (is_support_sw_smu(adev)) {
-   smu_get_power_limit(>smu, ,
-   pp_limit_level, sample_window);
-   size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
-   } else if (pp_funcs && pp_funcs->get_power_limit) {
+   if (pp_funcs && pp_funcs->get_power_limit) {
pp_funcs->get_power_limit(adev->powerplay.pp_handle, ,
  pp_limit_level, sample_window);
size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 100);
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
index b97b960c2eac..9636a023387f 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
@@ -1260,7 +1260,7 @@ enum smu_cmn2asic_mapping_type {
[profile] = {1, (workload)}
 
 #if !defined(SWSMU_CODE_LAYER_L2) && !defined(SWSMU_CODE_LAYER_L3) && 
!defined(SWSMU_CODE_LAYER_L4)
-int smu_get_power_limit(struct smu_context *smu,
+int smu_get_power_limit(void *handle,
uint32_t *limit,
enum pp_power_limit_level pp_limit_level,
enum pp_power_sample_window sample_window);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 44c1baa2748d..5671abd58bcf 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -2166,11 +2166,12 @@ static int smu_set_fan_speed_rpm(void *handle, uint32_t 
speed)
return ret;
 }
 
-int smu_get_power_limit(struct smu_context *smu,
+int smu_get_power_limit(void *handle,
uint32_t *limit,
enum pp_power_limit_level pp_limit_level,
enum pp_power_sample_window sample_window)
 {
+   struct smu_context *smu = handle;
enum smu_ppt_limit_level limit_level;
uint32_t limit_type;
int ret = 0;
@@ -3009,6 +3010,7 @@ static const struct amd_pm_funcs swsmu_pm_funcs = {
.load_firmware   = smu_load_microcode,
.set_powergating_by_smu  = smu_dpm_set_power_gate,
.set_power_limit = smu_set_power_limit,
+   .get_power_limit = smu_get_power_limit,
.get_power_profile_mode  = smu_get_power_profile_mode,

[PATCH v2 1/6] Modify smu_get_power_limit to implement Powerplay API

2021-05-28 Thread Darren Powell
=== Description ===
modify smu_get_power_limit to implement Powerplay API

 v2: rewrote the patchset to use two enums as args to get_power

=== Test System ===
* DESKTOP(AMD FX-8350 + NAVI10(731F/ca), BIOS: F2)
 + ISO(Ubuntu 20.04.1 LTS)
 + Kernel(5.11.0-custom-fdoagd5f)

=== Patch Summary ===
   linux: (g...@gitlab.freedesktop.org:agd5f) origin/amd-staging-drm-next @ 
3ac16cf10525
+ 212a8ab5269d amdgpu/pm: reorder definition of swsmu_pm_funcs for 
readability
+ 50adb18c2670 amdgpu/pm: clean up smu_get_power_limit function signature
+ ab31cfcad254 amdgpu/pm: modify Powerplay API get_power_limit to use new 
pp_power enums
+ a5e2a4209a3c amdgpu/pm: modify and add smu_get_power_limit to Powerplay 
API
+ 6b732f665a9c amdgpu/pm: handle return value for get_power_limit
+ c1e3e0963996 amdgpu/pm: add kernel documentation for smu_get_power_limit

=== Tests ===
 get_power_limit Test 
* Test 
 AMDGPU_PCI_ADDR=`lspci -nn | grep "VGA\|Display" | cut -d " " -f 1`
 AMDGPU_HWMON=`ls -la /sys/class/hwmon | grep $AMDGPU_PCI_ADDR | cut -d " " -f 
10`
 HWMON_DIR=/sys/class/hwmon/${AMDGPU_HWMON}

 lspci -nn | grep "VGA\|Display" ; \
 echo "=== power1 cap ===" ; cat $HWMON_DIR/power1_cap ;   \
 echo "=== power1 cap max ===" ; cat $HWMON_DIR/power1_cap_max ;   \
 echo "=== power1 cap def ===" ; cat $HWMON_DIR/power1_cap_default

 Documentation Test 
* Insert temp documentation
** Documentation/gpu/amdgpu.rst
 vi Documentation/gpu/amdgpu.rst
** added text to start
START
Documentation Testing
=

Power Limit
---
.. kernel-doc:: drivers/gpu/drm/amd/include/kgd_pp_interface.h
   :doc: amdgpu_pp_power

.. kernel-doc:: drivers/gpu/drm/amd/include/kgd_pp_interface.h
   :identifiers: pp_power_limit_level

.. kernel-doc:: drivers/gpu/drm/amd/include/kgd_pp_interface.h
   :identifiers: pp_power_sample_window

.. kernel-doc:: drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
   :identifiers: smu_get_power_limit
-END-

* Setup
 cd ~/workspace/linux
 . sphinx_2.4.4/bin/activate

* Build
 export SPHINXDOCLOG=sphinx.build.log
 cp $SPHINXDOCLOG{,.old}
 time make -j 8 htmldocs |& tee $SPHINXDOCLOG

* View
 firefox 
file:///home/dapowell/workspace/linux/Documentation/output/gpu/amdgpu.html

Darren Powell (6):
  amdgpu/pm: reorder definition of swsmu_pm_funcs for readability
  amdgpu/pm: clean up smu_get_power_limit function signature
  amdgpu/pm: modify Powerplay API get_power_limit to use new pp_power
enums
  amdgpu/pm: modify and add smu_get_power_limit to Powerplay API
  amdgpu/pm: handle return value for get_power_limit
  amdgpu/pm: add kernel documentation for smu_get_power_limit

 .../gpu/drm/amd/include/kgd_pp_interface.h| 47 -
 drivers/gpu/drm/amd/pm/amdgpu_pm.c| 64 +++--
 drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h   |  5 +-
 .../gpu/drm/amd/pm/powerplay/amd_powerplay.c  | 33 ---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 96 +--
 5 files changed, 172 insertions(+), 73 deletions(-)


base-commit: 3ac16cf105253e17c4e63d4216bd4012cd5b3145
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/3] drm/amdgpu: fix a bug for flag table_freed

2021-05-28 Thread Eric Huang
table_freed will be always true when mapping a memory with size
bigger than 2MB. Using a check of turnning valid PDE into PTE
will resolve the issue.

Signed-off-by: Eric Huang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0dee2e8797c7..b1bdc89cb5d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1553,6 +1553,21 @@ static int amdgpu_vm_update_ptes(struct 
amdgpu_vm_update_params *params,
 */
nptes = max(nptes, 1u);
 
+   /* Fix a page fault in a corner case of
+* turning valid PDE entry to PTE entry
+* for huge page mapping
+*/
+   if (cursor.level < AMDGPU_VM_PTB) {
+   int i;
+   for (i = 0; i < nptes; i++) {
+   uint64_t value = 0;
+   vm->update_funcs->get_pt_entry(pt,
+   pe_start + (i * 8), 
);
+   if (value & AMDGPU_PTE_VALID)
+   params->table_freed = true;
+   }
+   }
+
trace_amdgpu_vm_update_ptes(params, frag_start, upd_end,
nptes, dst, incr, upd_flags,
vm->task_info.pid,
@@ -1584,7 +1599,6 @@ static int amdgpu_vm_update_ptes(struct 
amdgpu_vm_update_params *params,
while (cursor.pfn < frag_start) {
amdgpu_vm_free_pts(adev, params->vm, );
amdgpu_vm_pt_next(adev, );
-   params->table_freed = true;
}
 
} else if (frag >= shift) {
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/3] drm/amdpgu: add a function to get vm page talbe entry

2021-05-28 Thread Eric Huang
it is for large bar/xgmi which has cpu update function,
for small bar that has sdma update function it will be done
later.

Signed-off-by: Eric Huang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c  | 21 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 10 +-
 3 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 67bba8462e7d..8a23de0e0abc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -247,6 +247,7 @@ struct amdgpu_vm_update_funcs {
  unsigned count, uint32_t incr, uint64_t flags);
int (*commit)(struct amdgpu_vm_update_params *p,
  struct dma_fence **fence);
+   int (*get_pt_entry)(struct amdgpu_bo *bo, uint64_t pe, uint64_t *value);
 };
 
 struct amdgpu_vm {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
index aefb3d2927d5..8a68a5c6326c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
@@ -116,9 +116,28 @@ static int amdgpu_vm_cpu_commit(struct 
amdgpu_vm_update_params *p,
return 0;
 }
 
+static int amdgpu_vm_get_pt_entry(struct amdgpu_bo *bo,
+   uint64_t pe, uint64_t *value)
+{
+   int r;
+
+   if (bo->tbo.moving) {
+   r = dma_fence_wait(bo->tbo.moving, true);
+   if (r)
+   return r;
+   }
+
+   pe += (unsigned long)amdgpu_bo_kptr(bo);
+
+   *value = *((uint64_t *)pe);
+
+   return 0;
+}
+
 const struct amdgpu_vm_update_funcs amdgpu_vm_cpu_funcs = {
.map_table = amdgpu_vm_cpu_map_table,
.prepare = amdgpu_vm_cpu_prepare,
.update = amdgpu_vm_cpu_update,
-   .commit = amdgpu_vm_cpu_commit
+   .commit = amdgpu_vm_cpu_commit,
+   .get_pt_entry = amdgpu_vm_get_pt_entry
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index a83a646759c5..d8871fbddc76 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -274,9 +274,17 @@ static int amdgpu_vm_sdma_update(struct 
amdgpu_vm_update_params *p,
return 0;
 }
 
+static int amdgpu_vm_sdma_get_pt_entry(struct amdgpu_bo *bo,
+   uint64_t pe, uint64_t *value)
+{
+   /* TODO */
+   return 0;
+}
+
 const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs = {
.map_table = amdgpu_vm_sdma_map_table,
.prepare = amdgpu_vm_sdma_prepare,
.update = amdgpu_vm_sdma_update,
-   .commit = amdgpu_vm_sdma_commit
+   .commit = amdgpu_vm_sdma_commit,
+   .get_pt_entry = amdgpu_vm_sdma_get_pt_entry
 };
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 3/3] drm/amdkfd: optimize memory mapping latency

2021-05-28 Thread Eric Huang
1. conditionally flush TLBs after map.
2. add heavy weight TLBs flushing after unmap.

Signed-off-by: Eric Huang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  1 +
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 24 +++
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |  6 ++---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  |  4 ++--
 8 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 2560977760b3..997258c24ef2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -85,6 +85,7 @@ struct kgd_mem {
 
bool aql_queue;
bool is_imported;
+   bool table_freed;
 };
 
 /* KFD Memory Eviction */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 385c33675227..8ac0d849fd3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1132,6 +1132,8 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
return ret;
}
 
+   mem->table_freed = bo_va->table_freed;
+
return amdgpu_sync_fence(sync, bo_va->last_pt_update);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 36e7f088d4ee..0e0f27f779cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -87,6 +87,7 @@ struct amdgpu_bo_va {
boolcleared;
 
boolis_xgmi;
+   booltable_freed;
 };
 
 struct amdgpu_bo {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b1bdc89cb5d1..57793483c8d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1897,7 +1897,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, 
struct amdgpu_bo_va *bo_va,
resv, mapping->start,
mapping->last, update_flags,
mapping->offset, mem,
-   pages_addr, last_update, NULL,
+   pages_addr, last_update, 
_va->table_freed,
vram_base_offset);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 960913a35ee4..c45ccd1d03c0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1658,16 +1658,18 @@ static int kfd_ioctl_map_memory_to_gpu(struct file 
*filep,
}
 
/* Flush TLBs after waiting for the page table updates to complete */
-   for (i = 0; i < args->n_devices; i++) {
-   peer = kfd_device_by_id(devices_arr[i]);
-   if (WARN_ON_ONCE(!peer))
-   continue;
-   peer_pdd = kfd_get_process_device_data(peer, p);
-   if (WARN_ON_ONCE(!peer_pdd))
-   continue;
-   if (!amdgpu_read_lock(peer->ddev, true)) {
-   kfd_flush_tlb(peer_pdd);
-   amdgpu_read_unlock(peer->ddev);
+   if (((struct kgd_mem *)mem)->table_freed) {
+   for (i = 0; i < args->n_devices; i++) {
+   peer = kfd_device_by_id(devices_arr[i]);
+   if (WARN_ON_ONCE(!peer))
+   continue;
+   peer_pdd = kfd_get_process_device_data(peer, p);
+   if (WARN_ON_ONCE(!peer_pdd))
+   continue;
+   if (!amdgpu_read_lock(peer->ddev, true)) {
+   kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
+   amdgpu_read_unlock(peer->ddev);
+   }
}
}
 
@@ -1766,6 +1768,8 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file 
*filep,
amdgpu_read_unlock(peer->ddev);
goto unmap_memory_from_gpu_failed;
}
+   ((struct kgd_mem *)mem)->table_freed = false;
+   kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
amdgpu_read_unlock(peer->ddev);
args->n_success = i+1;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index c1bea1f7627b..a4920bc5cfbc 

[PATCH v2 8/8] drm/amd/pm: Add aldebaran throttler translation

2021-05-28 Thread Graham Sider
Perform dependent to independent throttle status translation
for aldebaran.

Signed-off-by: Graham Sider 
---
 .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c| 46 +--
 1 file changed, 41 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
index 7a1abb3d6a7a..a83f1c4673aa 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -213,7 +213,7 @@ static int aldebaran_tables_init(struct smu_context *smu)
return -ENOMEM;
smu_table->metrics_time = 0;
 
-   smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_2);
+   smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_3);
smu_table->gpu_metrics_table = 
kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
if (!smu_table->gpu_metrics_table) {
kfree(smu_table->metrics_table);
@@ -517,6 +517,40 @@ static int aldebaran_freqs_in_same_level(int32_t 
frequency1,
return (abs(frequency1 - frequency2) <= EPSILON);
 }
 
+static uint64_t aldebaran_get_indep_throttler_status(
+   uint32_t dep_status)
+{
+   if (dep_status == 0)
+   return 0;
+
+   uint64_t indep_status = 0;
+
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_PPT0_BIT, THROTTLER_PPT0_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_PPT1_BIT, THROTTLER_PPT1_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TDC_GFX_BIT, THROTTLER_TDC_GFX_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TDC_SOC_BIT, THROTTLER_TDC_SOC_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TDC_MEM_BIT, THROTTLER_TDC_HBM_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_GPU_BIT, THROTTLER_TEMP_GPU_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_MEM_BIT, THROTTLER_TEMP_MEM_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_VR_GFX_BIT, 
THROTTLER_TEMP_VR_GFX_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_VR_SOC_BIT, 
THROTTLER_TEMP_VR_SOC_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_VR_MEM_BIT, 
THROTTLER_TEMP_VR_MEM_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_APCC_BIT, THROTTLER_APCC_BIT);
+
+   return indep_status;
+}
+
 static int aldebaran_get_smu_metrics_data(struct smu_context *smu,
  MetricsMember_t member,
  uint32_t *value)
@@ -1713,8 +1747,8 @@ static ssize_t aldebaran_get_gpu_metrics(struct 
smu_context *smu,
 void **table)
 {
struct smu_table_context *smu_table = >smu_table;
-   struct gpu_metrics_v1_2 *gpu_metrics =
-   (struct gpu_metrics_v1_2 *)smu_table->gpu_metrics_table;
+   struct gpu_metrics_v1_3 *gpu_metrics =
+   (struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table;
SmuMetrics_t metrics;
int i, ret = 0;
 
@@ -1724,7 +1758,7 @@ static ssize_t aldebaran_get_gpu_metrics(struct 
smu_context *smu,
if (ret)
return ret;
 
-   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 2);
+   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3);
 
gpu_metrics->temperature_edge = metrics.TemperatureEdge;
gpu_metrics->temperature_hotspot = metrics.TemperatureHotspot;
@@ -1755,6 +1789,8 @@ static ssize_t aldebaran_get_gpu_metrics(struct 
smu_context *smu,
gpu_metrics->current_dclk0 = metrics.CurrClock[PPCLK_DCLK];
 
gpu_metrics->throttle_status = metrics.ThrottlerStatus;
+   gpu_metrics->indep_throttle_status =
+   
aldebaran_get_indep_throttler_status(metrics.ThrottlerStatus);
 
gpu_metrics->current_fan_speed = 0;
 
@@ -1776,7 +1812,7 @@ static ssize_t aldebaran_get_gpu_metrics(struct 
smu_context *smu,
 
*table = (void *)gpu_metrics;
 
-   return sizeof(struct gpu_metrics_v1_2);
+   return sizeof(struct gpu_metrics_v1_3);
 }
 
 static int aldebaran_mode2_reset(struct smu_context *smu)
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 2/8] drm/amd/pm: Add ASIC independent throttle bits

2021-05-28 Thread Graham Sider
Add new defines for thermal throttle status bits which are ASIC
independent. This bit field will be visible to userspace via
gpu_metrics alongside the previous ASIC dependent bit fields. Seperated
into four 16-bit "types": power throttlers, current throttlers,
temperature, other. Also added extra defines new to v2 applicable to
renoir, namely PROCHOT and EDC bits.

Defined macro smu_u64_throttler_bit which is used instead of
__assign_bit + test_bit from previous patch revision due to upgrading to
64-bit field.

Signed-off-by: Graham Sider 
---
 drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h | 42 +
 1 file changed, 42 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
index 523f9d2982e9..86aa699f6c0c 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
@@ -35,6 +35,48 @@
 
 #define SMU_DPM_USER_PROFILE_RESTORE (1 << 0)
 
+// Power Throttlers
+#define SMU_THROTTLER_PPT0_BIT 0
+#define SMU_THROTTLER_PPT1_BIT 1
+#define SMU_THROTTLER_PPT2_BIT 2
+#define SMU_THROTTLER_PPT3_BIT 3
+#define SMU_THROTTLER_SPL_BIT  4
+#define SMU_THROTTLER_FPPT_BIT 5
+#define SMU_THROTTLER_SPPT_BIT 6
+#define SMU_THROTTLER_SPPT_APU_BIT 7
+
+// Current Throttlers
+#define SMU_THROTTLER_TDC_GFX_BIT  16
+#define SMU_THROTTLER_TDC_SOC_BIT  17
+#define SMU_THROTTLER_TDC_MEM_BIT  18
+#define SMU_THROTTLER_TDC_VDD_BIT  19
+#define SMU_THROTTLER_TDC_CVIP_BIT 20
+#define SMU_THROTTLER_EDC_CPU_BIT  21
+#define SMU_THROTTLER_EDC_GFX_BIT  22
+#define SMU_THROTTLER_APCC_BIT 23
+
+// Temperature
+#define SMU_THROTTLER_TEMP_GPU_BIT 32
+#define SMU_THROTTLER_TEMP_CORE_BIT33
+#define SMU_THROTTLER_TEMP_MEM_BIT 34
+#define SMU_THROTTLER_TEMP_EDGE_BIT35
+#define SMU_THROTTLER_TEMP_HOTSPOT_BIT 36
+#define SMU_THROTTLER_TEMP_VR_GFX_BIT  37
+#define SMU_THROTTLER_TEMP_VR_SOC_BIT  38
+#define SMU_THROTTLER_TEMP_VR_MEM_BIT  39
+#define SMU_THROTTLER_TEMP_LIQUID_BIT  40
+#define SMU_THROTTLER_VRHOT0_BIT   41
+#define SMU_THROTTLER_VRHOT1_BIT   42
+#define SMU_THROTTLER_PROCHOT_CPU_BIT  43
+#define SMU_THROTTLER_PROCHOT_GFX_BIT  44
+
+// Other
+#define SMU_THROTTLER_PPM_BIT  48
+#define SMU_THROTTLER_FIT_BIT  49
+
+#define smu_u64_throttler_bit(dep, INDEP_BIT, DEP_BIT) \
+   ((1ULL & (dep >> DEP_BIT)) << INDEP_BIT)
+
 struct smu_hw_power_state {
unsigned int magic;
 };
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 7/8] drm/amd/pm: Add renoir throttler translation

2021-05-28 Thread Graham Sider
Perform dependent to independent throttle status translation
for renoir.

Signed-off-by: Graham Sider 
---
 .../gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c   | 50 +--
 1 file changed, 45 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
index 1c399c4ab4dc..50c03fc413de 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
@@ -153,7 +153,7 @@ static int renoir_init_smc_tables(struct smu_context *smu)
if (!smu_table->watermarks_table)
goto err2_out;
 
-   smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_1);
+   smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_2);
smu_table->gpu_metrics_table = 
kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
if (!smu_table->gpu_metrics_table)
goto err3_out;
@@ -170,6 +170,44 @@ static int renoir_init_smc_tables(struct smu_context *smu)
return -ENOMEM;
 }
 
+static uint64_t renoir_get_indep_throttler_status(
+   uint32_t dep_status)
+{
+   if (dep_status == 0)
+   return 0;
+
+   uint64_t indep_status = 0;
+
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_SPL_BIT, THROTTLER_STATUS_BIT_SPL);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_FPPT_BIT, THROTTLER_STATUS_BIT_FPPT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_SPPT_BIT, THROTTLER_STATUS_BIT_SPPT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_SPPT_APU_BIT, 
THROTTLER_STATUS_BIT_SPPT_APU);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_CORE_BIT, 
THROTTLER_STATUS_BIT_THM_CORE);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_VR_GFX_BIT, 
THROTTLER_STATUS_BIT_THM_GFX);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_VR_SOC_BIT, 
THROTTLER_STATUS_BIT_THM_SOC);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TDC_VDD_BIT, 
THROTTLER_STATUS_BIT_TDC_VDD);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TDC_SOC_BIT, 
THROTTLER_STATUS_BIT_TDC_SOC);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_PROCHOT_CPU_BIT, 
THROTTLER_STATUS_BIT_PROCHOT_CPU);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_PROCHOT_GFX_BIT, 
THROTTLER_STATUS_BIT_PROCHOT_GFX);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_EDC_CPU_BIT, 
THROTTLER_STATUS_BIT_EDC_CPU);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_EDC_GFX_BIT, 
THROTTLER_STATUS_BIT_EDC_GFX);
+
+   return indep_status;
+}
+
 /*
  * This interface just for getting uclk ultimate freq and should't introduce
  * other likewise function result in overmuch callback.
@@ -1264,8 +1302,8 @@ static ssize_t renoir_get_gpu_metrics(struct smu_context 
*smu,
  void **table)
 {
struct smu_table_context *smu_table = >smu_table;
-   struct gpu_metrics_v2_1 *gpu_metrics =
-   (struct gpu_metrics_v2_1 *)smu_table->gpu_metrics_table;
+   struct gpu_metrics_v2_2 *gpu_metrics =
+   (struct gpu_metrics_v2_2 *)smu_table->gpu_metrics_table;
SmuMetrics_t metrics;
int ret = 0;
 
@@ -1273,7 +1311,7 @@ static ssize_t renoir_get_gpu_metrics(struct smu_context 
*smu,
if (ret)
return ret;
 
-   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 1);
+   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 2);
 
gpu_metrics->temperature_gfx = metrics.GfxTemperature;
gpu_metrics->temperature_soc = metrics.SocTemperature;
@@ -1311,6 +1349,8 @@ static ssize_t renoir_get_gpu_metrics(struct smu_context 
*smu,
gpu_metrics->current_l3clk[1] = metrics.L3Frequency[1];
 
gpu_metrics->throttle_status = metrics.ThrottlerStatus;
+   gpu_metrics->indep_throttle_status =
+   renoir_get_indep_throttler_status(metrics.ThrottlerStatus);
 
gpu_metrics->fan_pwm = metrics.FanPwm;
 
@@ -1318,7 +1358,7 @@ static ssize_t renoir_get_gpu_metrics(struct smu_context 
*smu,
 
*table = (void *)gpu_metrics;
 
-   return sizeof(struct gpu_metrics_v2_1);
+   return sizeof(struct gpu_metrics_v2_2);
 }
 
 static int renoir_gfx_state_change_set(struct smu_context *smu, uint32_t state)
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org

[PATCH v2 5/8] drm/amd/pm: Add sienna cichlid throttler translation

2021-05-28 Thread Graham Sider
Perform dependent to independent throttle status translation
for sienna cichlid.

Signed-off-by: Graham Sider 
---
 .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 60 +--
 1 file changed, 55 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 75acdb80c499..21c2fa4af64e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -434,7 +434,7 @@ static int sienna_cichlid_tables_init(struct smu_context 
*smu)
goto err0_out;
smu_table->metrics_time = 0;
 
-   smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_1);
+   smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_3);
smu_table->gpu_metrics_table = 
kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
if (!smu_table->gpu_metrics_table)
goto err1_out;
@@ -453,6 +453,54 @@ static int sienna_cichlid_tables_init(struct smu_context 
*smu)
return -ENOMEM;
 }
 
+static uint64_t sienna_cichlid_get_indep_throttler_status(
+   uint32_t dep_status)
+{
+   if (dep_status == 0)
+   return 0;
+
+   uint64_t indep_status = 0;
+
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_EDGE_BIT, THROTTLER_TEMP_EDGE_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_HOTSPOT_BIT, 
THROTTLER_TEMP_HOTSPOT_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_MEM_BIT, THROTTLER_TEMP_MEM_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_VR_GFX_BIT, 
THROTTLER_TEMP_VR_GFX_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_VR_MEM_BIT, 
THROTTLER_TEMP_VR_MEM0_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_VR_MEM_BIT, 
THROTTLER_TEMP_VR_MEM1_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_VR_SOC_BIT, 
THROTTLER_TEMP_VR_SOC_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_LIQUID_BIT, 
THROTTLER_TEMP_LIQUID0_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_LIQUID_BIT, 
THROTTLER_TEMP_LIQUID1_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TDC_GFX_BIT, THROTTLER_TDC_GFX_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TDC_SOC_BIT, THROTTLER_TDC_SOC_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_PPT0_BIT, THROTTLER_PPT0_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_PPT1_BIT, THROTTLER_PPT1_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_PPT2_BIT, THROTTLER_PPT2_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_PPT3_BIT, THROTTLER_PPT3_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_FIT_BIT, THROTTLER_FIT_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_PPM_BIT, THROTTLER_PPM_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_APCC_BIT, THROTTLER_APCC_BIT);
+
+   return indep_status;
+}
+
 static int sienna_cichlid_get_smu_metrics_data(struct smu_context *smu,
   MetricsMember_t member,
   uint32_t *value)
@@ -3617,8 +3665,8 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct 
smu_context *smu,
  void **table)
 {
struct smu_table_context *smu_table = >smu_table;
-   struct gpu_metrics_v1_1 *gpu_metrics =
-   (struct gpu_metrics_v1_1 *)smu_table->gpu_metrics_table;
+   struct gpu_metrics_v1_3 *gpu_metrics =
+   (struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table;
SmuMetricsExternal_t metrics_external;
SmuMetrics_t *metrics =
&(metrics_external.SmuMetrics);
@@ -3632,7 +3680,7 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct 
smu_context *smu,
if (ret)
return ret;
 
-   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 1);
+   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3);
 
gpu_metrics->temperature_edge = metrics->TemperatureEdge;
gpu_metrics->temperature_hotspot = metrics->TemperatureHotspot;
@@ -3667,6 

[PATCH v2 6/8] drm/amd/pm: Add vangogh throttler translation

2021-05-28 Thread Graham Sider
Perform dependent to independent throttle status translation
for vangogh.

Signed-off-by: Graham Sider 
---
 .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c  | 56 ---
 1 file changed, 47 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index 77f532a49e37..6c3ffca9c52e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -226,7 +226,7 @@ static int vangogh_tables_init(struct smu_context *smu)
goto err0_out;
smu_table->metrics_time = 0;
 
-   smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_1);
+   smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_2);
smu_table->gpu_metrics_table = 
kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
if (!smu_table->gpu_metrics_table)
goto err1_out;
@@ -251,6 +251,40 @@ static int vangogh_tables_init(struct smu_context *smu)
return -ENOMEM;
 }
 
+static uint64_t vangogh_get_indep_throttler_status(
+   uint32_t dep_status)
+{
+   if (dep_status == 0)
+   return 0;
+
+   uint64_t indep_status = 0;
+
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_SPL_BIT, THROTTLER_STATUS_BIT_SPL);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_FPPT_BIT, THROTTLER_STATUS_BIT_FPPT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_SPPT_BIT, THROTTLER_STATUS_BIT_SPPT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_SPPT_APU_BIT, 
THROTTLER_STATUS_BIT_SPPT_APU);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_CORE_BIT, 
THROTTLER_STATUS_BIT_THM_CORE);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_VR_GFX_BIT, 
THROTTLER_STATUS_BIT_THM_GFX);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_VR_SOC_BIT, 
THROTTLER_STATUS_BIT_THM_SOC);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TDC_VDD_BIT, 
THROTTLER_STATUS_BIT_TDC_VDD);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TDC_SOC_BIT, 
THROTTLER_STATUS_BIT_TDC_SOC);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TDC_GFX_BIT, 
THROTTLER_STATUS_BIT_TDC_GFX);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TDC_CVIP_BIT, 
THROTTLER_STATUS_BIT_TDC_CVIP);
+
+   return indep_status;
+}
+
 static int vangogh_get_legacy_smu_metrics_data(struct smu_context *smu,
   MetricsMember_t member,
   uint32_t *value)
@@ -1632,8 +1666,8 @@ static ssize_t vangogh_get_legacy_gpu_metrics(struct 
smu_context *smu,
  void **table)
 {
struct smu_table_context *smu_table = >smu_table;
-   struct gpu_metrics_v2_1 *gpu_metrics =
-   (struct gpu_metrics_v2_1 *)smu_table->gpu_metrics_table;
+   struct gpu_metrics_v2_2 *gpu_metrics =
+   (struct gpu_metrics_v2_2 *)smu_table->gpu_metrics_table;
SmuMetrics_legacy_t metrics;
int ret = 0;
 
@@ -1641,7 +1675,7 @@ static ssize_t vangogh_get_legacy_gpu_metrics(struct 
smu_context *smu,
if (ret)
return ret;
 
-   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 1);
+   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 2);
 
gpu_metrics->temperature_gfx = metrics.GfxTemperature;
gpu_metrics->temperature_soc = metrics.SocTemperature;
@@ -1674,20 +1708,22 @@ static ssize_t vangogh_get_legacy_gpu_metrics(struct 
smu_context *smu,
gpu_metrics->current_l3clk[0] = metrics.L3Frequency[0];
 
gpu_metrics->throttle_status = metrics.ThrottlerStatus;
+   gpu_metrics->indep_throttle_status =
+   
vangogh_get_indep_throttler_status(metrics.ThrottlerStatus);
 
gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
 
*table = (void *)gpu_metrics;
 
-   return sizeof(struct gpu_metrics_v2_1);
+   return sizeof(struct gpu_metrics_v2_2);
 }
 
 static ssize_t vangogh_get_gpu_metrics(struct smu_context *smu,
  void **table)
 {
struct smu_table_context *smu_table = >smu_table;
-   struct gpu_metrics_v2_1 *gpu_metrics =
-   (struct gpu_metrics_v2_1 *)smu_table->gpu_metrics_table;
+   struct gpu_metrics_v2_2 *gpu_metrics =
+   (struct gpu_metrics_v2_2 *)smu_table->gpu_metrics_table;
SmuMetrics_t metrics;
int ret = 0;
 
@@ -1695,7 

[PATCH v2 3/8] drm/amd/pm: Add arcturus throttler translation

2021-05-28 Thread Graham Sider
Perform dependent to independent throttle status translation
for arcturus.

Signed-off-by: Graham Sider 
---
 .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 58 +--
 1 file changed, 53 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
index 1735a96dd307..ecc046c929fe 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
@@ -237,7 +237,7 @@ static int arcturus_tables_init(struct smu_context *smu)
return -ENOMEM;
smu_table->metrics_time = 0;
 
-   smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_1);
+   smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_3);
smu_table->gpu_metrics_table = 
kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
if (!smu_table->gpu_metrics_table) {
kfree(smu_table->metrics_table);
@@ -540,6 +540,52 @@ static int arcturus_freqs_in_same_level(int32_t frequency1,
return (abs(frequency1 - frequency2) <= EPSILON);
 }
 
+static uint64_t arcturus_get_indep_throttler_status(
+   uint32_t dep_status)
+{
+   if (dep_status == 0)
+   return 0;
+
+   uint64_t indep_status = 0;
+
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_EDGE_BIT, THROTTLER_TEMP_EDGE_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_HOTSPOT_BIT, 
THROTTLER_TEMP_HOTSPOT_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_MEM_BIT, THROTTLER_TEMP_MEM_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_VR_GFX_BIT, 
THROTTLER_TEMP_VR_GFX_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_VR_MEM_BIT, 
THROTTLER_TEMP_VR_MEM_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_VR_SOC_BIT, 
THROTTLER_TEMP_VR_SOC_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TDC_GFX_BIT, THROTTLER_TDC_GFX_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TDC_SOC_BIT, THROTTLER_TDC_SOC_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_PPT0_BIT, THROTTLER_PPT0_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_PPT1_BIT, THROTTLER_PPT1_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_PPT2_BIT, THROTTLER_PPT2_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_PPT3_BIT, THROTTLER_PPT3_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_PPM_BIT, THROTTLER_PPM_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_FIT_BIT, THROTTLER_FIT_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_APCC_BIT, THROTTLER_APCC_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_VRHOT0_BIT, THROTTLER_VRHOT0_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_VRHOT1_BIT, THROTTLER_VRHOT1_BIT);
+
+   return indep_status;
+}
+
 static int arcturus_get_smu_metrics_data(struct smu_context *smu,
 MetricsMember_t member,
 uint32_t *value)
@@ -2275,8 +2321,8 @@ static ssize_t arcturus_get_gpu_metrics(struct 
smu_context *smu,
void **table)
 {
struct smu_table_context *smu_table = >smu_table;
-   struct gpu_metrics_v1_1 *gpu_metrics =
-   (struct gpu_metrics_v1_1 *)smu_table->gpu_metrics_table;
+   struct gpu_metrics_v1_3 *gpu_metrics =
+   (struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table;
SmuMetrics_t metrics;
int ret = 0;
 
@@ -2286,7 +2332,7 @@ static ssize_t arcturus_get_gpu_metrics(struct 
smu_context *smu,
if (ret)
return ret;
 
-   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 1);
+   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3);
 
gpu_metrics->temperature_edge = metrics.TemperatureEdge;
gpu_metrics->temperature_hotspot = metrics.TemperatureHotspot;
@@ -2315,6 +2361,8 @@ static ssize_t arcturus_get_gpu_metrics(struct 
smu_context *smu,
gpu_metrics->current_dclk0 = metrics.CurrClock[PPCLK_DCLK];
 
gpu_metrics->throttle_status = metrics.ThrottlerStatus;
+   gpu_metrics->indep_throttle_status =
+ 

[PATCH v2 4/8] drm/amd/pm: Add navi1x throttler translation

2021-05-28 Thread Graham Sider
Perform dependent to independent throttle status translation
for navi1x.

Signed-off-by: Graham Sider 
---
 .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 56 +++
 1 file changed, 56 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index 78fe13183e8b..878ec698909c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -524,6 +524,54 @@ static int navi10_tables_init(struct smu_context *smu)
return -ENOMEM;
 }
 
+static uint64_t navi1x_get_indep_throttler_status(
+   uint32_t dep_status)
+{
+   if (dep_status == 0)
+   return 0;
+
+   uint64_t indep_status = 0;
+
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_EDGE_BIT, THROTTLER_TEMP_EDGE_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_HOTSPOT_BIT, 
THROTTLER_TEMP_HOTSPOT_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_MEM_BIT, THROTTLER_TEMP_MEM_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_VR_GFX_BIT, 
THROTTLER_TEMP_VR_GFX_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_VR_MEM_BIT, 
THROTTLER_TEMP_VR_MEM0_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_VR_MEM_BIT, 
THROTTLER_TEMP_VR_MEM1_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_VR_SOC_BIT, 
THROTTLER_TEMP_VR_SOC_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_LIQUID_BIT, 
THROTTLER_TEMP_LIQUID0_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TEMP_LIQUID_BIT, 
THROTTLER_TEMP_LIQUID1_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TDC_GFX_BIT, THROTTLER_TDC_GFX_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_TDC_SOC_BIT, THROTTLER_TDC_SOC_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_PPT0_BIT, THROTTLER_PPT0_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_PPT1_BIT, THROTTLER_PPT1_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_PPT2_BIT, THROTTLER_PPT2_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_PPT3_BIT, THROTTLER_PPT3_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_FIT_BIT, THROTTLER_FIT_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_PPM_BIT, THROTTLER_PPM_BIT);
+   indep_status |= smu_u64_throttler_bit(dep_status,
+   SMU_THROTTLER_APCC_BIT, THROTTLER_APCC_BIT);
+
+   return indep_status;
+}
+
 static int navi10_get_legacy_smu_metrics_data(struct smu_context *smu,
  MetricsMember_t member,
  uint32_t *value)
@@ -2673,6 +2721,8 @@ static ssize_t navi10_get_legacy_gpu_metrics(struct 
smu_context *smu,
gpu_metrics->current_dclk0 = metrics.CurrClock[PPCLK_DCLK];
 
gpu_metrics->throttle_status = metrics.ThrottlerStatus;
+   gpu_metrics->indep_throttle_status =
+   
navi1x_get_indep_throttler_status(metrics.ThrottlerStatus);
 
gpu_metrics->current_fan_speed = metrics.CurrFanSpeed;
 
@@ -2750,6 +2800,8 @@ static ssize_t navi10_get_gpu_metrics(struct smu_context 
*smu,
gpu_metrics->current_dclk0 = metrics.CurrClock[PPCLK_DCLK];
 
gpu_metrics->throttle_status = metrics.ThrottlerStatus;
+   gpu_metrics->indep_throttle_status =
+   
navi1x_get_indep_throttler_status(metrics.ThrottlerStatus);
 
gpu_metrics->current_fan_speed = metrics.CurrFanSpeed;
 
@@ -2826,6 +2878,8 @@ static ssize_t navi12_get_legacy_gpu_metrics(struct 
smu_context *smu,
gpu_metrics->current_dclk0 = metrics.CurrClock[PPCLK_DCLK];
 
gpu_metrics->throttle_status = metrics.ThrottlerStatus;
+   gpu_metrics->indep_throttle_status =
+   
navi1x_get_indep_throttler_status(metrics.ThrottlerStatus);
 
gpu_metrics->current_fan_speed = metrics.CurrFanSpeed;
 
@@ -2908,6 +2962,8 @@ static ssize_t navi12_get_gpu_metrics(struct smu_context 
*smu,
gpu_metrics->current_dclk0 = metrics.CurrClock[PPCLK_DCLK];
 
gpu_metrics->throttle_status = metrics.ThrottlerStatus;
+   gpu_metrics->indep_throttle_status =
+   

[PATCH v2 1/8] drm/amd/pm: Add u64 throttler status field to gpu_metrics

2021-05-28 Thread Graham Sider
This patch piggybacks off the gpu_metrics_v1_3 bump and adds a new ASIC
independant u64 throttler status field (indep_throttle_status).
Similarly bumps gpu_metrics_v2 version (to v2_2) to add field. The
alternative to adding this new field would be to overwrite the
original u32 throttle_status (would still require a version bump for
gpu_metrics_v2). The benefit to adding a new field is that we can
allocate 16 bits to each "type" of throttler information and have more
leeway for adding additional throttler bits in the future.

Signed-off-by: Graham Sider 
---
 .../gpu/drm/amd/include/kgd_pp_interface.h| 58 ++-
 drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c|  3 +
 2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index b1cd52a9d684..b50d6bd0833c 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -577,7 +577,7 @@ struct gpu_metrics_v1_3 {
uint16_tcurrent_vclk1;
uint16_tcurrent_dclk1;
 
-   /* Throttle status */
+   /* Throttle status (ASIC dependant) */
uint32_tthrottle_status;
 
/* Fans */
@@ -602,6 +602,9 @@ struct gpu_metrics_v1_3 {
uint16_tvoltage_gfx;
uint16_tvoltage_mem;
 
+   /* Throttle status (ASIC independant) */
+   uint64_tindep_throttle_status;
+
uint16_tpadding1;
 };
 
@@ -709,4 +712,57 @@ struct gpu_metrics_v2_1 {
uint16_tpadding[3];
 };
 
+struct gpu_metrics_v2_2 {
+   struct metrics_table_header common_header;
+
+   /* Temperature */
+   uint16_ttemperature_gfx; // gfx temperature on 
APUs
+   uint16_ttemperature_soc; // soc temperature on 
APUs
+   uint16_ttemperature_core[8]; // CPU core 
temperature on APUs
+   uint16_ttemperature_l3[2];
+
+   /* Utilization */
+   uint16_taverage_gfx_activity;
+   uint16_taverage_mm_activity; // UVD or VCN
+
+   /* Driver attached timestamp (in ns) */
+   uint64_tsystem_clock_counter;
+
+   /* Power/Energy */
+   uint16_taverage_socket_power; // dGPU + APU 
power on A + A platform
+   uint16_taverage_cpu_power;
+   uint16_taverage_soc_power;
+   uint16_taverage_gfx_power;
+   uint16_taverage_core_power[8]; // CPU core 
power on APUs
+
+   /* Average clocks */
+   uint16_taverage_gfxclk_frequency;
+   uint16_taverage_socclk_frequency;
+   uint16_taverage_uclk_frequency;
+   uint16_taverage_fclk_frequency;
+   uint16_taverage_vclk_frequency;
+   uint16_taverage_dclk_frequency;
+
+   /* Current clocks */
+   uint16_tcurrent_gfxclk;
+   uint16_tcurrent_socclk;
+   uint16_tcurrent_uclk;
+   uint16_tcurrent_fclk;
+   uint16_tcurrent_vclk;
+   uint16_tcurrent_dclk;
+   uint16_tcurrent_coreclk[8]; // CPU core clocks
+   uint16_tcurrent_l3clk[2];
+
+   /* Throttle status (ASIC dependant) */
+   uint32_tthrottle_status;
+
+   /* Fans */
+   uint16_tfan_pwm;
+
+   /* Throttle status (ASIC independant) */
+   uint64_tindep_throttle_status;
+
+   uint16_tpadding[3];
+};
+
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 0ceb7329838c..01645537d9ab 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -773,6 +773,9 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t 
frev, uint8_t crev)
case METRICS_VERSION(2, 1):
structure_size = sizeof(struct gpu_metrics_v2_1);
break;
+   case METRICS_VERSION(2, 2):
+   structure_size = sizeof(struct gpu_metrics_v2_2);
+   break;
default:
return;
}
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v4 1/1] drm/amdgpu: flush gart changes after all BO recovery

2021-05-28 Thread Christian König

Am 28.05.21 um 18:51 schrieb Nirmoy Das:

Don't flush gart changes after recovering each BO instead
do it after recovering all the BOs. Flishing gart also needed
for amdgpu_ttm_alloc_gart().

v4: use container_of to retrieve adev struct.
v3: rename amdgpu_gart_tlb_flush() -> amdgpu_gart_invalidate_tlb().
v2: abstract out gart tlb flushing logic to amdgpu_gart.c

Signed-off-by: Nirmoy Das 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c| 22 ++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h|  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c |  4 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  1 +
  4 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 5562b5c90c03..35cc8009ac7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -312,8 +312,6 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t 
offset,
 int pages, struct page **pagelist, dma_addr_t *dma_addr,
 uint64_t flags)
  {
-   int r, i;
-
if (!adev->gart.ready) {
WARN(1, "trying to bind memory to uninitialized GART !\n");
return -EINVAL;
@@ -322,16 +320,26 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t 
offset,
if (!adev->gart.ptr)
return 0;

-   r = amdgpu_gart_map(adev, offset, pages, dma_addr, flags,
-   adev->gart.ptr);
-   if (r)
-   return r;
+   return amdgpu_gart_map(adev, offset, pages, dma_addr, flags,
+  adev->gart.ptr);
+}
+
+/**
+ * amdgpu_gart_invalidate_tlb - invalidate gart TLB
+ *
+ * @adev: amdgpu device driver pointer
+ *
+ * Invalidate gart TLB which can be use as a way to flush gart changes
+ *
+ */
+void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev)
+{
+   int i;

mb();
amdgpu_asic_flush_hdp(adev, NULL);
for (i = 0; i < adev->num_vmhubs; i++)
amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
-   return 0;
  }

  /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
index a25fe97b0196..e104022197ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -66,5 +66,5 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t 
offset,
  int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
 int pages, struct page **pagelist,
 dma_addr_t *dma_addr, uint64_t flags);
-
+void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev);
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 8860545344c7..a1cd775fd61c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -206,10 +206,12 @@ uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager 
*man)
  int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man)
  {
struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
+   struct amdgpu_device *adev;
struct amdgpu_gtt_node *node;
struct drm_mm_node *mm_node;
int r = 0;

+   adev = container_of(mgr, typeof(*adev), mman.gtt_mgr);
spin_lock(>lock);
drm_mm_for_each_node(mm_node, >mm) {
node = container_of(mm_node, struct amdgpu_gtt_node, node);
@@ -219,6 +221,8 @@ int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man)
}
spin_unlock(>lock);

+   amdgpu_gart_invalidate_tlb(adev);
+
return r;
  }

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c0aef327292a..786650a4a493 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1014,6 +1014,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
return r;
}

+   amdgpu_gart_invalidate_tlb(adev);
ttm_resource_free(bo, >mem);
bo->mem = tmp;
}
--
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v4 1/1] drm/amdgpu: flush gart changes after all BO recovery

2021-05-28 Thread Nirmoy Das
Don't flush gart changes after recovering each BO instead
do it after recovering all the BOs. Flishing gart also needed
for amdgpu_ttm_alloc_gart().

v4: use container_of to retrieve adev struct.
v3: rename amdgpu_gart_tlb_flush() -> amdgpu_gart_invalidate_tlb().
v2: abstract out gart tlb flushing logic to amdgpu_gart.c

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c| 22 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h|  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c |  4 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  1 +
 4 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 5562b5c90c03..35cc8009ac7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -312,8 +312,6 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t 
offset,
 int pages, struct page **pagelist, dma_addr_t *dma_addr,
 uint64_t flags)
 {
-   int r, i;
-
if (!adev->gart.ready) {
WARN(1, "trying to bind memory to uninitialized GART !\n");
return -EINVAL;
@@ -322,16 +320,26 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t 
offset,
if (!adev->gart.ptr)
return 0;

-   r = amdgpu_gart_map(adev, offset, pages, dma_addr, flags,
-   adev->gart.ptr);
-   if (r)
-   return r;
+   return amdgpu_gart_map(adev, offset, pages, dma_addr, flags,
+  adev->gart.ptr);
+}
+
+/**
+ * amdgpu_gart_invalidate_tlb - invalidate gart TLB
+ *
+ * @adev: amdgpu device driver pointer
+ *
+ * Invalidate gart TLB which can be use as a way to flush gart changes
+ *
+ */
+void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev)
+{
+   int i;

mb();
amdgpu_asic_flush_hdp(adev, NULL);
for (i = 0; i < adev->num_vmhubs; i++)
amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
-   return 0;
 }

 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
index a25fe97b0196..e104022197ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -66,5 +66,5 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t 
offset,
 int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
 int pages, struct page **pagelist,
 dma_addr_t *dma_addr, uint64_t flags);
-
+void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 8860545344c7..a1cd775fd61c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -206,10 +206,12 @@ uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager 
*man)
 int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man)
 {
struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
+   struct amdgpu_device *adev;
struct amdgpu_gtt_node *node;
struct drm_mm_node *mm_node;
int r = 0;

+   adev = container_of(mgr, typeof(*adev), mman.gtt_mgr);
spin_lock(>lock);
drm_mm_for_each_node(mm_node, >mm) {
node = container_of(mm_node, struct amdgpu_gtt_node, node);
@@ -219,6 +221,8 @@ int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man)
}
spin_unlock(>lock);

+   amdgpu_gart_invalidate_tlb(adev);
+
return r;
 }

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c0aef327292a..786650a4a493 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1014,6 +1014,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
return r;
}

+   amdgpu_gart_invalidate_tlb(adev);
ttm_resource_free(bo, >mem);
bo->mem = tmp;
}
--
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdkfd: move flushing TLBs from map to unmap

2021-05-28 Thread Eric Huang


On 2021-05-28 11:23 a.m., Christian König wrote:



Am 27.05.21 um 16:05 schrieb philip yang:



On 2021-05-26 5:25 p.m., Felix Kuehling wrote:

Am 2021-05-26 um 3:21 p.m. schrieb Eric Huang:

On 2021-05-25 3:16 p.m., Felix Kuehling wrote:

Similar to a recent fix by Philip Yang 76e08b37d0aa ("drm/amdgpu: flush
TLB if valid PDE turns into PTE"), there needs to be a conditional TLB
flush after map, if any PDEs were unmapped and turned into PTEs in the
process. This is currently returned by amdgpu_vm_bo_update_mapping in
the "table_freed" parameter. This needs to be also returned by
amdgpu_vm_bo_update and reported back to KFD, so KFD can do the TLB
flush after map, if needed.

I follow up your suggestion to create another patch (attached) and
test it. It seems it doesn't improve the latency when memory size is
bigger than huge page (2M), because table_freed parameter will always
be true when mapping page is huge page size. I think Philip's patch is
to fix the case of remapping memory from small page to huge page in
HMM, but it doesn't consider if the memory is remapped and arbitrarily
flushes TLBs when mapping huge page.

That's unexpected. Turning an invalid PDE into a valid (huge) PTE should
not trigger a TLB flush.


table_freed will be true if PDE has been used by previous mapping, 
unmap the previous mapping will clear the PTEs, leave PDE unchanged 
as P=0, V=1 (in memory and TLB), then huge page mapping turns PDE to 
PTE (P=1, V=1) in memory, and free PTE page.




I think there might be a little bug in your patch. See we set 
params.table_freed to true when we call amdgpu_vm_free_pts(), but 
amdgpu_vm_free_pts() doesn't necessary frees anything.


It can be that all subsequent page tables where never allocated before.

Christian.


After I printed infos in function amdgpu_vm_update_ptes(), when we map a 
memory with size 2M(huge page), the function will allocate 9 ptes (2M == 
PAGE_SIZE << 9) , until check "if (frag >= parent_shift)", then cursor 
goes up one level to PDE0 and frees all 9 ptes. So that is why 
table_freed is always true when mapping memory which size is bigger than 2M.


I will add some codes to check if PDE entry is valid before 
amdgpu_vm_update_flags(), and set table_freed accordingly. That will fix 
exactly page fault in the corner case above Philip mentioned.


Regards,
Eric



For example, test map 0x7ffe37401000, unmap it, and then map 
0x7ffe374 2MB huge page, table_freed will be true, means that 
flush TLB is needed after mapping huge page.


You can change the test, don't unmap previous mapping, then 2MB huge 
page will get new GPU virtual address, or closeKFD, openKFD again to 
create new GPU vm.


Regards,

Philip


Regards,
   Felix



kfd_flush_tlb probably needs a new parameter to determine the flush
type. The flush after map can be a "legacy" flush (type 0). The flush
after unmap must be a "heavy-weight" flush (type 2) to make sure we
don't evict cache lines into pages that we no longer own.

Finally, in the ticket I thought about possible optimizations using a
worker to minimize the impact of TLB flushes on unmap latency. That
could be a follow up commit.

It is a good idea to use worker, but how do we grantee it done before
memory is remapped? if remapping depends on it, then more latency will
be introduced in map.

Regards,
Eric

Regards,
    Felix


Am 2021-05-25 um 1:53 p.m. schrieb Eric Huang:

It it to optimize memory allocation latency.

Signed-off-by: Eric Huang

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 960913a35ee4..ab73741edb97 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1657,20 +1657,6 @@ static int kfd_ioctl_map_memory_to_gpu(struct
file *filep,
  goto sync_memory_failed;
  }

-   /* Flush TLBs after waiting for the page table updates to
complete */
-   for (i = 0; i < args->n_devices; i++) {
-   peer = kfd_device_by_id(devices_arr[i]);
-   if (WARN_ON_ONCE(!peer))
-   continue;
-   peer_pdd = kfd_get_process_device_data(peer, p);
-   if (WARN_ON_ONCE(!peer_pdd))
-   continue;
-   if (!amdgpu_read_lock(peer->ddev, true)) {
-   kfd_flush_tlb(peer_pdd);
-   amdgpu_read_unlock(peer->ddev);
-   }
-   }
-
  kfree(devices_arr);

  trace_kfd_map_memory_to_gpu_end(p,
@@ -1766,6 +1752,7 @@ static int
kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
  amdgpu_read_unlock(peer->ddev);
  goto unmap_memory_from_gpu_failed;
  }
+   kfd_flush_tlb(peer_pdd);
  amdgpu_read_unlock(peer->ddev);
  args->n_success = i+1;
  }
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org

Re: [PATCH 1/1] drm/amd/display: WARN_ON cleanups

2021-05-28 Thread Das, Nirmoy


On 5/28/2021 5:28 PM, Christian König wrote:



Am 27.05.21 um 14:14 schrieb Nirmoy Das:

Uuse WARN_ON() inside if-condition when possible.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 11 ---
  1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

index f2ed51484980..736eb537a8e4 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -315,8 +315,7 @@ get_crtc_by_otg_inst(struct amdgpu_device *adev,
  struct drm_crtc *crtc;
  struct amdgpu_crtc *amdgpu_crtc;
  -    if (otg_inst == -1) {
-    WARN_ON(1);
+    if (WARN_ON(otg_inst == -1)) {
  return adev->mode_info.crtcs[0];
  }


While at it you could also drop the extra {} here.



I merged it this morning :/ I will speak with Alex to clean this up.


Nirmoy




Apart from that patch is Acked-by: Christian König 
.


Christian.

  @@ -397,8 +396,7 @@ static void dm_pflip_high_irq(void 
*interrupt_params)

  e = amdgpu_crtc->event;
  amdgpu_crtc->event = NULL;
  -    if (!e)
-    WARN_ON(1);
+    WARN_ON(!e);
    vrr_active = amdgpu_dm_vrr_active_irq(amdgpu_crtc);
  @@ -6698,9 +6696,8 @@ static int dm_crtc_helper_atomic_check(struct 
drm_crtc *crtc,

    dm_update_crtc_active_planes(crtc, crtc_state);
  -    if (unlikely(!dm_crtc_state->stream &&
- modeset_required(crtc_state, NULL, 
dm_crtc_state->stream))) {

-    WARN_ON(1);
+    if (WARN_ON(unlikely(!dm_crtc_state->stream &&
+ modeset_required(crtc_state, NULL, 
dm_crtc_state->stream {

  return ret;
  }



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v3 1/1] drm/amdgpu: flush gart changes after all BO recovery

2021-05-28 Thread Das, Nirmoy


On 5/28/2021 5:59 PM, Christian König wrote:



Am 28.05.21 um 17:54 schrieb Nirmoy Das:

Don't flush gart changes after recovering each BO instead
do it after recovering all the BOs. Flishing gart also needed
for amdgpu_ttm_alloc_gart().

v3: rename amdgpu_gart_tlb_flush() -> amdgpu_gart_invalidate_tlb()
v2: abstract out gart tlb flushing logic to amdgpu_gart.c

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c    | 20 +++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h    |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c |  6 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  1 +
  4 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c

index 5562b5c90c03..992b59c29dd7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -322,16 +322,26 @@ int amdgpu_gart_bind(struct amdgpu_device 
*adev, uint64_t offset,

  if (!adev->gart.ptr)
  return 0;

-    r = amdgpu_gart_map(adev, offset, pages, dma_addr, flags,
-    adev->gart.ptr);
-    if (r)
-    return r;
+    return amdgpu_gart_map(adev, offset, pages, dma_addr, flags,
+   adev->gart.ptr);
+}
+
+/**
+ * amdgpu_gart_invalidate_tlb - invalidate gart TLB
+ *
+ * @adev: amdgpu device driver pointer
+ *
+ * Invalidate gart TLB which can be use as a way to flush gart changes
+ *
+ */
+void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev)
+{
+    int i;

  mb();
  amdgpu_asic_flush_hdp(adev, NULL);
  for (i = 0; i < adev->num_vmhubs; i++)
  amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
-    return 0;
  }

  /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h

index a25fe97b0196..e104022197ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -66,5 +66,5 @@ int amdgpu_gart_map(struct amdgpu_device *adev, 
uint64_t offset,

  int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
   int pages, struct page **pagelist,
   dma_addr_t *dma_addr, uint64_t flags);
-
+void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev);
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c

index 8860545344c7..b01bc2346082 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -205,6 +205,7 @@ uint64_t amdgpu_gtt_mgr_usage(struct 
ttm_resource_manager *man)

   */
  int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man)
  {
+    struct amdgpu_device *adev = NULL;
  struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
  struct amdgpu_gtt_node *node;
  struct drm_mm_node *mm_node;
@@ -216,9 +217,14 @@ int amdgpu_gtt_mgr_recover(struct 
ttm_resource_manager *man)

  r = amdgpu_ttm_recover_gart(node->tbo);
  if (r)
  break;
+    if (!adev)
+    adev = amdgpu_ttm_adev(node->tbo->bdev);


container_of(mgr, typeof(*adev), mman.gtt_mgr) is probably a bit 
cleaner than this.



This was bothering me as well, thanks for this!


Nirmoy



Christian.


  }
  spin_unlock(>lock);

+    if (adev)
+    amdgpu_gart_invalidate_tlb(adev);
+
  return r;
  }

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

index c0aef327292a..786650a4a493 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1014,6 +1014,7 @@ int amdgpu_ttm_alloc_gart(struct 
ttm_buffer_object *bo)

  return r;
  }

+    amdgpu_gart_invalidate_tlb(adev);
  ttm_resource_free(bo, >mem);
  bo->mem = tmp;
  }
--
2.31.1




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v3 1/1] drm/amdgpu: flush gart changes after all BO recovery

2021-05-28 Thread Christian König




Am 28.05.21 um 17:54 schrieb Nirmoy Das:

Don't flush gart changes after recovering each BO instead
do it after recovering all the BOs. Flishing gart also needed
for amdgpu_ttm_alloc_gart().

v3: rename amdgpu_gart_tlb_flush() -> amdgpu_gart_invalidate_tlb()
v2: abstract out gart tlb flushing logic to amdgpu_gart.c

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c| 20 +++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h|  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c |  6 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  1 +
  4 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 5562b5c90c03..992b59c29dd7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -322,16 +322,26 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t 
offset,
if (!adev->gart.ptr)
return 0;

-   r = amdgpu_gart_map(adev, offset, pages, dma_addr, flags,
-   adev->gart.ptr);
-   if (r)
-   return r;
+   return amdgpu_gart_map(adev, offset, pages, dma_addr, flags,
+  adev->gart.ptr);
+}
+
+/**
+ * amdgpu_gart_invalidate_tlb - invalidate gart TLB
+ *
+ * @adev: amdgpu device driver pointer
+ *
+ * Invalidate gart TLB which can be use as a way to flush gart changes
+ *
+ */
+void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev)
+{
+   int i;

mb();
amdgpu_asic_flush_hdp(adev, NULL);
for (i = 0; i < adev->num_vmhubs; i++)
amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
-   return 0;
  }

  /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
index a25fe97b0196..e104022197ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -66,5 +66,5 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t 
offset,
  int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
 int pages, struct page **pagelist,
 dma_addr_t *dma_addr, uint64_t flags);
-
+void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev);
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 8860545344c7..b01bc2346082 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -205,6 +205,7 @@ uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager 
*man)
   */
  int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man)
  {
+   struct amdgpu_device *adev = NULL;
struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
struct amdgpu_gtt_node *node;
struct drm_mm_node *mm_node;
@@ -216,9 +217,14 @@ int amdgpu_gtt_mgr_recover(struct ttm_resource_manager 
*man)
r = amdgpu_ttm_recover_gart(node->tbo);
if (r)
break;
+   if (!adev)
+   adev = amdgpu_ttm_adev(node->tbo->bdev);


container_of(mgr, typeof(*adev), mman.gtt_mgr) is probably a bit cleaner 
than this.


Christian.


}
spin_unlock(>lock);

+   if (adev)
+   amdgpu_gart_invalidate_tlb(adev);
+
return r;
  }

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c0aef327292a..786650a4a493 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1014,6 +1014,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
return r;
}

+   amdgpu_gart_invalidate_tlb(adev);
ttm_resource_free(bo, >mem);
bo->mem = tmp;
}
--
2.31.1



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [RFC PATCH 0/5] Support DEVICE_GENERIC memory in migrate_vma_*

2021-05-28 Thread Felix Kuehling
Am 2021-05-28 um 9:08 a.m. schrieb Jason Gunthorpe:
> On Thu, May 27, 2021 at 07:08:04PM -0400, Felix Kuehling wrote:
>> Now we're trying to migrate data to and from that memory using the
>> migrate_vma_* helpers so we can support page-based migration in our
>> unified memory allocations, while also supporting CPU access to those
>> pages.
> So you have completely coherent and indistinguishable GPU and CPU
> memory and the need of migration is basicaly alot like NUMA policy
> choice - get better access locality?

Yes. For a typical GPU compute application it means the GPU gets the
best bandwidth/latency, and the CPU can coherently access the results
without page faults and migrations. That's especially valuable for
applications with persistent compute kernels that want to exploit
concurrency between CPU and GPU.


>  
>> This patch series makes a few changes to make MEMORY_DEVICE_GENERIC pages
>> behave correctly in the migrate_vma_* helpers. We are looking for feedback
>> about this approach. If we're close, what's needed to make our patches
>> acceptable upstream? If we're not close, any suggestions how else to
>> achieve what we are trying to do (i.e. page migration and coherent CPU
>> access to VRAM)?
> I'm not an expert in migrate, but it doesn't look outrageous.
>
> Have you thought about allowing MEMORY_DEVICE_GENERIC to work with
> hmm_range_fault() so you can have nice uniform RDMA?

Yes. That's our plan for RDMA to unified memory on this system. My
understanding was, that DEVICE_GENERIC pages should already work with
hmm_range_fault. But maybe I'm missing something.


>
> People have wanted to do that with MEMORY_DEVICE_PRIVATE but nobody
> finished the work

Yeah, for DEVICE_PRIVATE it seems more tricky because the peer device is
not the owner of the pages and would need help from the actual owner to
get proper DMA addresses.

Regards,
  Felix


>
> Jason
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v3 1/1] drm/amdgpu: flush gart changes after all BO recovery

2021-05-28 Thread Nirmoy Das
Don't flush gart changes after recovering each BO instead
do it after recovering all the BOs. Flishing gart also needed
for amdgpu_ttm_alloc_gart().

v3: rename amdgpu_gart_tlb_flush() -> amdgpu_gart_invalidate_tlb()
v2: abstract out gart tlb flushing logic to amdgpu_gart.c

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c| 20 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h|  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c |  6 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  1 +
 4 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 5562b5c90c03..992b59c29dd7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -322,16 +322,26 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t 
offset,
if (!adev->gart.ptr)
return 0;

-   r = amdgpu_gart_map(adev, offset, pages, dma_addr, flags,
-   adev->gart.ptr);
-   if (r)
-   return r;
+   return amdgpu_gart_map(adev, offset, pages, dma_addr, flags,
+  adev->gart.ptr);
+}
+
+/**
+ * amdgpu_gart_invalidate_tlb - invalidate gart TLB
+ *
+ * @adev: amdgpu device driver pointer
+ *
+ * Invalidate gart TLB which can be use as a way to flush gart changes
+ *
+ */
+void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev)
+{
+   int i;

mb();
amdgpu_asic_flush_hdp(adev, NULL);
for (i = 0; i < adev->num_vmhubs; i++)
amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
-   return 0;
 }

 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
index a25fe97b0196..e104022197ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -66,5 +66,5 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t 
offset,
 int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
 int pages, struct page **pagelist,
 dma_addr_t *dma_addr, uint64_t flags);
-
+void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 8860545344c7..b01bc2346082 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -205,6 +205,7 @@ uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager 
*man)
  */
 int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man)
 {
+   struct amdgpu_device *adev = NULL;
struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
struct amdgpu_gtt_node *node;
struct drm_mm_node *mm_node;
@@ -216,9 +217,14 @@ int amdgpu_gtt_mgr_recover(struct ttm_resource_manager 
*man)
r = amdgpu_ttm_recover_gart(node->tbo);
if (r)
break;
+   if (!adev)
+   adev = amdgpu_ttm_adev(node->tbo->bdev);
}
spin_unlock(>lock);

+   if (adev)
+   amdgpu_gart_invalidate_tlb(adev);
+
return r;
 }

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c0aef327292a..786650a4a493 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1014,6 +1014,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
return r;
}

+   amdgpu_gart_invalidate_tlb(adev);
ttm_resource_free(bo, >mem);
bo->mem = tmp;
}
--
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdkfd: use resource cursor in svm_migrate_copy_to_vram v2

2021-05-28 Thread Christian König
Access to the mm_node is now forbidden. So instead of hand wiring that
use the cursor functionality.

v2: fix handling as pointed out by Philip.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 68 
 1 file changed, 10 insertions(+), 58 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index fd8f544f0de2..5ce8fa2ddab0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -29,6 +29,7 @@
 #include "amdgpu_object.h"
 #include "amdgpu_vm.h"
 #include "amdgpu_mn.h"
+#include "amdgpu_res_cursor.h"
 #include "kfd_priv.h"
 #include "kfd_svm.h"
 #include "kfd_migrate.h"
@@ -205,34 +206,6 @@ svm_migrate_copy_done(struct amdgpu_device *adev, struct 
dma_fence *mfence)
return r;
 }
 
-static uint64_t
-svm_migrate_node_physical_addr(struct amdgpu_device *adev,
-  struct drm_mm_node **mm_node, uint64_t *offset)
-{
-   struct drm_mm_node *node = *mm_node;
-   uint64_t pos = *offset;
-
-   if (node->start == AMDGPU_BO_INVALID_OFFSET) {
-   pr_debug("drm node is not validated\n");
-   return 0;
-   }
-
-   pr_debug("vram node start 0x%llx npages 0x%llx\n", node->start,
-node->size);
-
-   if (pos >= node->size) {
-   do  {
-   pos -= node->size;
-   node++;
-   } while (pos >= node->size);
-
-   *mm_node = node;
-   *offset = pos;
-   }
-
-   return (node->start + pos) << PAGE_SHIFT;
-}
-
 unsigned long
 svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr)
 {
@@ -297,11 +270,9 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, 
struct svm_range *prange,
 {
uint64_t npages = migrate->cpages;
struct device *dev = adev->dev;
-   struct drm_mm_node *node;
+   struct amdgpu_res_cursor cursor;
dma_addr_t *src;
uint64_t *dst;
-   uint64_t vram_addr;
-   uint64_t offset;
uint64_t i, j;
int r;
 
@@ -317,19 +288,12 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, 
struct svm_range *prange,
goto out;
}
 
-   node = prange->ttm_res->mm_node;
-   offset = prange->offset;
-   vram_addr = svm_migrate_node_physical_addr(adev, , );
-   if (!vram_addr) {
-   WARN_ONCE(1, "vram node address is 0\n");
-   r = -ENOMEM;
-   goto out;
-   }
-
+   amdgpu_res_first(prange->ttm_res, prange->offset << PAGE_SHIFT,
+npages << PAGE_SHIFT, );
for (i = j = 0; i < npages; i++) {
struct page *spage;
 
-   dst[i] = vram_addr + (j << PAGE_SHIFT);
+   dst[i] = cursor.start + (j << PAGE_SHIFT);
migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
svm_migrate_get_vram_page(prange, migrate->dst[i]);
 
@@ -354,18 +318,10 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, 
struct svm_range *prange,
mfence);
if (r)
goto out_free_vram_pages;
-   offset += j;
-   vram_addr = (node->start + offset) << 
PAGE_SHIFT;
+   amdgpu_res_next(, j << PAGE_SHIFT);
j = 0;
} else {
-   offset++;
-   vram_addr += PAGE_SIZE;
-   }
-   if (offset >= node->size) {
-   node++;
-   pr_debug("next node size 0x%llx\n", node->size);
-   vram_addr = node->start << PAGE_SHIFT;
-   offset = 0;
+   amdgpu_res_next(, PAGE_SIZE);
}
continue;
}
@@ -373,19 +329,15 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, 
struct svm_range *prange,
pr_debug("dma mapping src to 0x%llx, page_to_pfn 0x%lx\n",
 src[i] >> PAGE_SHIFT, page_to_pfn(spage));
 
-   if (j + offset >= node->size - 1 && i < npages - 1) {
+   if (j << PAGE_SHIFT >= cursor.size - 1 && i < npages - 1) {
r = svm_migrate_copy_memory_gart(adev, src + i - j,
 dst + i - j, j + 1,
 FROM_RAM_TO_VRAM,
 mfence);
if (r)
goto out_free_vram_pages;
-
-   node++;
-   pr_debug("next node size 0x%llx\n", 

Re: [PATCH 1/1] drm/amd/display: WARN_ON cleanups

2021-05-28 Thread Christian König



Am 27.05.21 um 14:14 schrieb Nirmoy Das:

Uuse WARN_ON() inside if-condition when possible.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 11 ---
  1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index f2ed51484980..736eb537a8e4 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -315,8 +315,7 @@ get_crtc_by_otg_inst(struct amdgpu_device *adev,
struct drm_crtc *crtc;
struct amdgpu_crtc *amdgpu_crtc;
  
-	if (otg_inst == -1) {

-   WARN_ON(1);
+   if (WARN_ON(otg_inst == -1)) {
return adev->mode_info.crtcs[0];
}


While at it you could also drop the extra {} here.

Apart from that patch is Acked-by: Christian König 
.


Christian.

  
@@ -397,8 +396,7 @@ static void dm_pflip_high_irq(void *interrupt_params)

e = amdgpu_crtc->event;
amdgpu_crtc->event = NULL;
  
-	if (!e)

-   WARN_ON(1);
+   WARN_ON(!e);
  
  	vrr_active = amdgpu_dm_vrr_active_irq(amdgpu_crtc);
  
@@ -6698,9 +6696,8 @@ static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc,
  
  	dm_update_crtc_active_planes(crtc, crtc_state);
  
-	if (unlikely(!dm_crtc_state->stream &&

-modeset_required(crtc_state, NULL, 
dm_crtc_state->stream))) {
-   WARN_ON(1);
+   if (WARN_ON(unlikely(!dm_crtc_state->stream &&
+modeset_required(crtc_state, NULL, 
dm_crtc_state->stream {
return ret;
}
  


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdkfd: move flushing TLBs from map to unmap

2021-05-28 Thread Christian König



Am 27.05.21 um 16:05 schrieb philip yang:



On 2021-05-26 5:25 p.m., Felix Kuehling wrote:

Am 2021-05-26 um 3:21 p.m. schrieb Eric Huang:

On 2021-05-25 3:16 p.m., Felix Kuehling wrote:

Similar to a recent fix by Philip Yang 76e08b37d0aa ("drm/amdgpu: flush
TLB if valid PDE turns into PTE"), there needs to be a conditional TLB
flush after map, if any PDEs were unmapped and turned into PTEs in the
process. This is currently returned by amdgpu_vm_bo_update_mapping in
the "table_freed" parameter. This needs to be also returned by
amdgpu_vm_bo_update and reported back to KFD, so KFD can do the TLB
flush after map, if needed.

I follow up your suggestion to create another patch (attached) and
test it. It seems it doesn't improve the latency when memory size is
bigger than huge page (2M), because table_freed parameter will always
be true when mapping page is huge page size. I think Philip's patch is
to fix the case of remapping memory from small page to huge page in
HMM, but it doesn't consider if the memory is remapped and arbitrarily
flushes TLBs when mapping huge page.

That's unexpected. Turning an invalid PDE into a valid (huge) PTE should
not trigger a TLB flush.


table_freed will be true if PDE has been used by previous mapping, 
unmap the previous mapping will clear the PTEs, leave PDE unchanged as 
P=0, V=1 (in memory and TLB), then huge page mapping turns PDE to PTE 
(P=1, V=1) in memory, and free PTE page.




I think there might be a little bug in your patch. See we set 
params.table_freed to true when we call amdgpu_vm_free_pts(), but 
amdgpu_vm_free_pts() doesn't necessary frees anything.


It can be that all subsequent page tables where never allocated before.

Christian.

For example, test map 0x7ffe37401000, unmap it, and then map 
0x7ffe374 2MB huge page, table_freed will be true, means that 
flush TLB is needed after mapping huge page.


You can change the test, don't unmap previous mapping, then 2MB huge 
page will get new GPU virtual address, or closeKFD, openKFD again to 
create new GPU vm.


Regards,

Philip


Regards,
   Felix



kfd_flush_tlb probably needs a new parameter to determine the flush
type. The flush after map can be a "legacy" flush (type 0). The flush
after unmap must be a "heavy-weight" flush (type 2) to make sure we
don't evict cache lines into pages that we no longer own.

Finally, in the ticket I thought about possible optimizations using a
worker to minimize the impact of TLB flushes on unmap latency. That
could be a follow up commit.

It is a good idea to use worker, but how do we grantee it done before
memory is remapped? if remapping depends on it, then more latency will
be introduced in map.

Regards,
Eric

Regards,
    Felix


Am 2021-05-25 um 1:53 p.m. schrieb Eric Huang:

It it to optimize memory allocation latency.

Signed-off-by: Eric Huang

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 960913a35ee4..ab73741edb97 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1657,20 +1657,6 @@ static int kfd_ioctl_map_memory_to_gpu(struct
file *filep,
  goto sync_memory_failed;
  }

-   /* Flush TLBs after waiting for the page table updates to
complete */
-   for (i = 0; i < args->n_devices; i++) {
-   peer = kfd_device_by_id(devices_arr[i]);
-   if (WARN_ON_ONCE(!peer))
-   continue;
-   peer_pdd = kfd_get_process_device_data(peer, p);
-   if (WARN_ON_ONCE(!peer_pdd))
-   continue;
-   if (!amdgpu_read_lock(peer->ddev, true)) {
-   kfd_flush_tlb(peer_pdd);
-   amdgpu_read_unlock(peer->ddev);
-   }
-   }
-
  kfree(devices_arr);

  trace_kfd_map_memory_to_gpu_end(p,
@@ -1766,6 +1752,7 @@ static int
kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
  amdgpu_read_unlock(peer->ddev);
  goto unmap_memory_from_gpu_failed;
  }
+   kfd_flush_tlb(peer_pdd);
  amdgpu_read_unlock(peer->ddev);
  args->n_success = i+1;
  }
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Cphilip.yang%40amd.com%7C92ac3fbce9264fbcf40508d9208cc477%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637576611241705305%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=S8NSZRdXq%2B74tSSLkm2TYEVDr%2Fr%2BW%2FET7CJln7tbEQo%3Dreserved=0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org

Re: [PATCH v2 6/6] drm/amdgpu: do not allocate entries separately

2021-05-28 Thread Das, Nirmoy


On 5/28/2021 4:09 PM, Christian König wrote:



Am 28.05.21 um 12:56 schrieb Nirmoy Das:

Allocate PD/PT entries while allocating VM BOs and use that
instead of allocating those entries separately.

v2: create a new var for num entries.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 34 +++---
  1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 223c63342ecd..7e478ffb7fdf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -877,6 +877,7 @@ static int amdgpu_vm_pt_create(struct 
amdgpu_device *adev,

  struct amdgpu_bo *bo;
  struct dma_resv *resv;
  int r;
+    unsigned int num_entries;


Move that a more up, variables like r and i should be always declared 
last.




Thanks, I will keep this in my mind next time.



Apart from that Reviewed-by: Christian König 

Next step should probably be to remove entry->entries.



Right, I will do that.


Regards,

Nirmoy



Regards,
Christian.



  memset(, 0, sizeof(bp));

@@ -886,7 +887,14 @@ static int amdgpu_vm_pt_create(struct 
amdgpu_device *adev,

  bp.domain = amdgpu_bo_get_preferred_pin_domain(adev, bp.domain);
  bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
  AMDGPU_GEM_CREATE_CPU_GTT_USWC;
-    bp.bo_ptr_size = sizeof(struct amdgpu_bo_vm);
+
+    if (level < AMDGPU_VM_PTB)
+    num_entries = amdgpu_vm_num_entries(adev, level);
+    else
+    num_entries = 0;
+
+    bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries);
+
  if (vm->use_cpu_for_update)
  bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;

@@ -957,19 +965,14 @@ static int amdgpu_vm_alloc_pts(struct 
amdgpu_device *adev,

  struct amdgpu_bo_vm *pt;
  int r;

-    if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
-    unsigned num_entries;
-
-    num_entries = amdgpu_vm_num_entries(adev, cursor->level);
-    entry->entries = kvmalloc_array(num_entries,
-    sizeof(*entry->entries),
-    GFP_KERNEL | __GFP_ZERO);
-    if (!entry->entries)
-    return -ENOMEM;
-    }
-
-    if (entry->base.bo)
+    if (entry->base.bo) {
+    if (cursor->level < AMDGPU_VM_PTB)
+    entry->entries =
+    to_amdgpu_bo_vm(entry->base.bo)->entries;
+    else
+    entry->entries = NULL;
  return 0;
+    }

  r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, );
  if (r)
@@ -981,6 +984,10 @@ static int amdgpu_vm_alloc_pts(struct 
amdgpu_device *adev,

  pt_bo = >bo;
  pt_bo->parent = amdgpu_bo_ref(cursor->parent->base.bo);
  amdgpu_vm_bo_base_init(>base, vm, pt_bo);
+    if (cursor->level < AMDGPU_VM_PTB)
+    entry->entries = pt->entries;
+    else
+    entry->entries = NULL;

  r = amdgpu_vm_clear_bo(adev, vm, pt, immediate);
  if (r)
@@ -1010,7 +1017,6 @@ static void amdgpu_vm_free_table(struct 
amdgpu_vm_pt *entry)

  amdgpu_bo_unref();
  amdgpu_bo_unref(>base.bo);
  }
-    kvfree(entry->entries);
  entry->entries = NULL;
  }

--
2.31.1




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 1/1] drm/amdgpu: cleanup gart tlb flush logic

2021-05-28 Thread Das, Nirmoy


On 5/28/2021 4:48 PM, Christian König wrote:

Am 28.05.21 um 16:44 schrieb Nirmoy Das:

Don't flush gpu tlb after recovering each BO instead
do it after receovering all the BOs.

v2: abstract out gart tlb flushing logic to amdgpu_gart.c

Signed-off-by: Nirmoy Das 
---
Is there a better way to get adev in amdgpu_gtt_mgr_recover()?

  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c    | 20 +++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h    |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c |  6 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  1 +
  4 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c

index 5562b5c90c03..e2059f7ed639 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -322,16 +322,26 @@ int amdgpu_gart_bind(struct amdgpu_device 
*adev, uint64_t offset,

  if (!adev->gart.ptr)
  return 0;

-    r = amdgpu_gart_map(adev, offset, pages, dma_addr, flags,
-    adev->gart.ptr);
-    if (r)
-    return r;
+    return amdgpu_gart_map(adev, offset, pages, dma_addr, flags,
+   adev->gart.ptr);
+}
+
+/**
+ * amdgpu_gart_tlb_flush - flush gart TLB


Either change the description like "flush GART changes" and drop the 
_tlb_ part of the name orr rename the function to 
amdgpu_gart_invalidate_tlb.


Since we flush gart changes by invalidating the TLB. Otherwise we have 
a mixup in the name.



Thanks!  amdgpu_gart_invalidate_tlb() sounds better to me, I will resend.


Nirmoy




Sorry didn't though about that before, apart from that the patch looks 
good to me.


Christian.



+ *
+ * @adev: amdgpu device driver pointer
+ *
+ * Flush TLB of gart page table.
+ *
+ */
+void amdgpu_gart_tlb_flush(struct amdgpu_device *adev)
+{
+    int i;

  mb();
  amdgpu_asic_flush_hdp(adev, NULL);
  for (i = 0; i < adev->num_vmhubs; i++)
  amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
-    return 0;
  }

  /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h

index a25fe97b0196..c853b70a24cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -66,5 +66,5 @@ int amdgpu_gart_map(struct amdgpu_device *adev, 
uint64_t offset,

  int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
   int pages, struct page **pagelist,
   dma_addr_t *dma_addr, uint64_t flags);
-
+void amdgpu_gart_tlb_flush(struct amdgpu_device *adev);
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c

index 8860545344c7..b61a54f6d95d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -205,6 +205,7 @@ uint64_t amdgpu_gtt_mgr_usage(struct 
ttm_resource_manager *man)

   */
  int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man)
  {
+    struct amdgpu_device *adev = NULL;
  struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
  struct amdgpu_gtt_node *node;
  struct drm_mm_node *mm_node;
@@ -216,9 +217,14 @@ int amdgpu_gtt_mgr_recover(struct 
ttm_resource_manager *man)

  r = amdgpu_ttm_recover_gart(node->tbo);
  if (r)
  break;
+    if (!adev)
+    adev = amdgpu_ttm_adev(node->tbo->bdev);
  }
  spin_unlock(>lock);

+    if (adev)
+    amdgpu_gart_tlb_flush(adev);
+
  return r;
  }

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

index c0aef327292a..5e514759d319 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1014,6 +1014,7 @@ int amdgpu_ttm_alloc_gart(struct 
ttm_buffer_object *bo)

  return r;
  }

+    amdgpu_gart_tlb_flush(adev);
  ttm_resource_free(bo, >mem);
  bo->mem = tmp;
  }
--
2.31.1




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/1] drm/amdgpu: make sure we unpin the UVD BO

2021-05-28 Thread Das, Nirmoy


On 5/28/2021 4:36 PM, Christian König wrote:

Am 20.04.21 um 12:50 schrieb Nirmoy Das:

Releasing pinned BOs is illegal now.
UVD 6 was missing from:
commit 2f40801dc553 ("drm/amdgpu: make sure we unpin the UVD BO")

Signed-off-by: Nirmoy Das 


Reviewed-by: Christian König 

Maybe add a CC:stable tag as well.

Sorry I somehow overlooked this patch. Can you make sure that it lands 
in amd-staging-drm-next ASAP? We have an user complaining.




No worries, I will push it now with CC:stable.


Thanks,

Nirmoy



Thanks,
Christian.


---
  drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c 
b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c

index 760859880c1e..4eebf973a065 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -357,6 +357,7 @@ static int uvd_v6_0_enc_ring_test_ib(struct 
amdgpu_ring *ring, long timeout)

    error:
  dma_fence_put(fence);
+    amdgpu_bo_unpin(bo);
  amdgpu_bo_unreserve(bo);
  amdgpu_bo_unref();
  return r;



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 1/1] drm/amdgpu: cleanup gart tlb flush logic

2021-05-28 Thread Christian König

Am 28.05.21 um 16:44 schrieb Nirmoy Das:

Don't flush gpu tlb after recovering each BO instead
do it after receovering all the BOs.

v2: abstract out gart tlb flushing logic to amdgpu_gart.c

Signed-off-by: Nirmoy Das 
---
Is there a better way to get adev in amdgpu_gtt_mgr_recover()?

  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c| 20 +++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h|  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c |  6 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  1 +
  4 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 5562b5c90c03..e2059f7ed639 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -322,16 +322,26 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t 
offset,
if (!adev->gart.ptr)
return 0;

-   r = amdgpu_gart_map(adev, offset, pages, dma_addr, flags,
-   adev->gart.ptr);
-   if (r)
-   return r;
+   return amdgpu_gart_map(adev, offset, pages, dma_addr, flags,
+  adev->gart.ptr);
+}
+
+/**
+ * amdgpu_gart_tlb_flush - flush gart TLB


Either change the description like "flush GART changes" and drop the 
_tlb_ part of the name orr rename the function to 
amdgpu_gart_invalidate_tlb.


Since we flush gart changes by invalidating the TLB. Otherwise we have a 
mixup in the name.


Sorry didn't though about that before, apart from that the patch looks 
good to me.


Christian.



+ *
+ * @adev: amdgpu device driver pointer
+ *
+ * Flush TLB of gart page table.
+ *
+ */
+void amdgpu_gart_tlb_flush(struct amdgpu_device *adev)
+{
+   int i;

mb();
amdgpu_asic_flush_hdp(adev, NULL);
for (i = 0; i < adev->num_vmhubs; i++)
amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
-   return 0;
  }

  /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
index a25fe97b0196..c853b70a24cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -66,5 +66,5 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t 
offset,
  int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
 int pages, struct page **pagelist,
 dma_addr_t *dma_addr, uint64_t flags);
-
+void amdgpu_gart_tlb_flush(struct amdgpu_device *adev);
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 8860545344c7..b61a54f6d95d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -205,6 +205,7 @@ uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager 
*man)
   */
  int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man)
  {
+   struct amdgpu_device *adev = NULL;
struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
struct amdgpu_gtt_node *node;
struct drm_mm_node *mm_node;
@@ -216,9 +217,14 @@ int amdgpu_gtt_mgr_recover(struct ttm_resource_manager 
*man)
r = amdgpu_ttm_recover_gart(node->tbo);
if (r)
break;
+   if (!adev)
+   adev = amdgpu_ttm_adev(node->tbo->bdev);
}
spin_unlock(>lock);

+   if (adev)
+   amdgpu_gart_tlb_flush(adev);
+
return r;
  }

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c0aef327292a..5e514759d319 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1014,6 +1014,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
return r;
}

+   amdgpu_gart_tlb_flush(adev);
ttm_resource_free(bo, >mem);
bo->mem = tmp;
}
--
2.31.1



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 1/1] drm/amdgpu: cleanup gart tlb flush logic

2021-05-28 Thread Nirmoy Das
Don't flush gpu tlb after recovering each BO instead
do it after receovering all the BOs.

v2: abstract out gart tlb flushing logic to amdgpu_gart.c

Signed-off-by: Nirmoy Das 
---
Is there a better way to get adev in amdgpu_gtt_mgr_recover()?

 drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c| 20 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h|  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c |  6 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  1 +
 4 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 5562b5c90c03..e2059f7ed639 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -322,16 +322,26 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t 
offset,
if (!adev->gart.ptr)
return 0;

-   r = amdgpu_gart_map(adev, offset, pages, dma_addr, flags,
-   adev->gart.ptr);
-   if (r)
-   return r;
+   return amdgpu_gart_map(adev, offset, pages, dma_addr, flags,
+  adev->gart.ptr);
+}
+
+/**
+ * amdgpu_gart_tlb_flush - flush gart TLB
+ *
+ * @adev: amdgpu device driver pointer
+ *
+ * Flush TLB of gart page table.
+ *
+ */
+void amdgpu_gart_tlb_flush(struct amdgpu_device *adev)
+{
+   int i;

mb();
amdgpu_asic_flush_hdp(adev, NULL);
for (i = 0; i < adev->num_vmhubs; i++)
amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
-   return 0;
 }

 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
index a25fe97b0196..c853b70a24cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -66,5 +66,5 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t 
offset,
 int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
 int pages, struct page **pagelist,
 dma_addr_t *dma_addr, uint64_t flags);
-
+void amdgpu_gart_tlb_flush(struct amdgpu_device *adev);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 8860545344c7..b61a54f6d95d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -205,6 +205,7 @@ uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager 
*man)
  */
 int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man)
 {
+   struct amdgpu_device *adev = NULL;
struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
struct amdgpu_gtt_node *node;
struct drm_mm_node *mm_node;
@@ -216,9 +217,14 @@ int amdgpu_gtt_mgr_recover(struct ttm_resource_manager 
*man)
r = amdgpu_ttm_recover_gart(node->tbo);
if (r)
break;
+   if (!adev)
+   adev = amdgpu_ttm_adev(node->tbo->bdev);
}
spin_unlock(>lock);

+   if (adev)
+   amdgpu_gart_tlb_flush(adev);
+
return r;
 }

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c0aef327292a..5e514759d319 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1014,6 +1014,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
return r;
}

+   amdgpu_gart_tlb_flush(adev);
ttm_resource_free(bo, >mem);
bo->mem = tmp;
}
--
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/1] drm/amdgpu: make sure we unpin the UVD BO

2021-05-28 Thread Christian König

Am 20.04.21 um 12:50 schrieb Nirmoy Das:

Releasing pinned BOs is illegal now.
UVD 6 was missing from:
commit 2f40801dc553 ("drm/amdgpu: make sure we unpin the UVD BO")

Signed-off-by: Nirmoy Das 


Reviewed-by: Christian König 

Maybe add a CC:stable tag as well.

Sorry I somehow overlooked this patch. Can you make sure that it lands 
in amd-staging-drm-next ASAP? We have an user complaining.


Thanks,
Christian.


---
  drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c 
b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 760859880c1e..4eebf973a065 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -357,6 +357,7 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring 
*ring, long timeout)
  
  error:

dma_fence_put(fence);
+   amdgpu_bo_unpin(bo);
amdgpu_bo_unreserve(bo);
amdgpu_bo_unref();
return r;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 6/6] drm/amdgpu: do not allocate entries separately

2021-05-28 Thread Christian König



Am 28.05.21 um 12:56 schrieb Nirmoy Das:

Allocate PD/PT entries while allocating VM BOs and use that
instead of allocating those entries separately.

v2: create a new var for num entries.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 34 +++---
  1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 223c63342ecd..7e478ffb7fdf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -877,6 +877,7 @@ static int amdgpu_vm_pt_create(struct amdgpu_device *adev,
struct amdgpu_bo *bo;
struct dma_resv *resv;
int r;
+   unsigned int num_entries;


Move that a more up, variables like r and i should be always declared last.

Apart from that Reviewed-by: Christian König 

Next step should probably be to remove entry->entries.

Regards,
Christian.



memset(, 0, sizeof(bp));

@@ -886,7 +887,14 @@ static int amdgpu_vm_pt_create(struct amdgpu_device *adev,
bp.domain = amdgpu_bo_get_preferred_pin_domain(adev, bp.domain);
bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
AMDGPU_GEM_CREATE_CPU_GTT_USWC;
-   bp.bo_ptr_size = sizeof(struct amdgpu_bo_vm);
+
+   if (level < AMDGPU_VM_PTB)
+   num_entries = amdgpu_vm_num_entries(adev, level);
+   else
+   num_entries = 0;
+
+   bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries);
+
if (vm->use_cpu_for_update)
bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;

@@ -957,19 +965,14 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
struct amdgpu_bo_vm *pt;
int r;

-   if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
-   unsigned num_entries;
-
-   num_entries = amdgpu_vm_num_entries(adev, cursor->level);
-   entry->entries = kvmalloc_array(num_entries,
-   sizeof(*entry->entries),
-   GFP_KERNEL | __GFP_ZERO);
-   if (!entry->entries)
-   return -ENOMEM;
-   }
-
-   if (entry->base.bo)
+   if (entry->base.bo) {
+   if (cursor->level < AMDGPU_VM_PTB)
+   entry->entries =
+   to_amdgpu_bo_vm(entry->base.bo)->entries;
+   else
+   entry->entries = NULL;
return 0;
+   }

r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, );
if (r)
@@ -981,6 +984,10 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
pt_bo = >bo;
pt_bo->parent = amdgpu_bo_ref(cursor->parent->base.bo);
amdgpu_vm_bo_base_init(>base, vm, pt_bo);
+   if (cursor->level < AMDGPU_VM_PTB)
+   entry->entries = pt->entries;
+   else
+   entry->entries = NULL;

r = amdgpu_vm_clear_bo(adev, vm, pt, immediate);
if (r)
@@ -1010,7 +1017,6 @@ static void amdgpu_vm_free_table(struct amdgpu_vm_pt 
*entry)
amdgpu_bo_unref();
amdgpu_bo_unref(>base.bo);
}
-   kvfree(entry->entries);
entry->entries = NULL;
  }

--
2.31.1



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v4 4/6] drm/amdgpu: switch to amdgpu_bo_vm for vm code

2021-05-28 Thread Christian König



Am 28.05.21 um 12:56 schrieb Nirmoy Das:

The subclass, amdgpu_bo_vm is intended for PT/PD BOs which are also
shadowed, so switch to amdgpu_bo_vm BO for PT/PD BOs.

v4: update amdgpu_vm_update_funcs to accept amdgpu_bo_vm.
v3: simplify code.
 check also if shadow bo exist instead of checking bo only type.
v2: squash three related patches.

Signed-off-by: Nirmoy Das 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 123 
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   5 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c  |  14 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c |  19 +--
  4 files changed, 96 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6bc7566cc193..223c63342ecd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -652,15 +652,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device 
*adev,
spin_lock(>mman.bdev.lru_lock);
list_for_each_entry(bo_base, >idle, vm_status) {
struct amdgpu_bo *bo = bo_base->bo;
+   struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo);

if (!bo->parent)
continue;

ttm_bo_move_to_lru_tail(>tbo, >tbo.mem,
>lru_bulk_move);
-   if (bo->shadow)
-   ttm_bo_move_to_lru_tail(>shadow->tbo,
-   >shadow->tbo.mem,
+   if (shadow)
+   ttm_bo_move_to_lru_tail(>tbo, >tbo.mem,
>lru_bulk_move);
}
spin_unlock(>mman.bdev.lru_lock);
@@ -692,12 +692,13 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,

list_for_each_entry_safe(bo_base, tmp, >evicted, vm_status) {
struct amdgpu_bo *bo = bo_base->bo;
+   struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo);

r = validate(param, bo);
if (r)
return r;
-   if (bo->shadow) {
-   r = validate(param, bo->shadow);
+   if (shadow) {
+   r = validate(param, shadow);
if (r)
return r;
}
@@ -705,7 +706,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
if (bo->tbo.type != ttm_bo_type_kernel) {
amdgpu_vm_bo_moved(bo_base);
} else {
-   vm->update_funcs->map_table(bo);
+   vm->update_funcs->map_table(to_amdgpu_bo_vm(bo));
amdgpu_vm_bo_relocated(bo_base);
}
}
@@ -737,7 +738,7 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
   *
   * @adev: amdgpu_device pointer
   * @vm: VM to clear BO from
- * @bo: BO to clear
+ * @vmbo: BO to clear
   * @immediate: use an immediate update
   *
   * Root PD needs to be reserved when calling this.
@@ -747,13 +748,14 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
   */
  static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
  struct amdgpu_vm *vm,
- struct amdgpu_bo *bo,
+ struct amdgpu_bo_vm *vmbo,
  bool immediate)
  {
struct ttm_operation_ctx ctx = { true, false };
unsigned level = adev->vm_manager.root_level;
struct amdgpu_vm_update_params params;
-   struct amdgpu_bo *ancestor = bo;
+   struct amdgpu_bo *ancestor = >bo;
+   struct amdgpu_bo *bo = >bo;
unsigned entries, ats_entries;
uint64_t addr;
int r;
@@ -793,14 +795,15 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
if (r)
return r;

-   if (bo->shadow) {
-   r = ttm_bo_validate(>shadow->tbo, >shadow->placement,
-   );
+   if (vmbo->shadow) {
+   struct amdgpu_bo *shadow = vmbo->shadow;
+
+   r = ttm_bo_validate(>tbo, >placement, );
if (r)
return r;
}

-   r = vm->update_funcs->map_table(bo);
+   r = vm->update_funcs->map_table(vmbo);
if (r)
return r;

@@ -824,7 +827,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
amdgpu_gmc_get_vm_pde(adev, level, , );
}

-   r = vm->update_funcs->update(, bo, addr, 0, ats_entries,
+   r = vm->update_funcs->update(, vmbo, addr, 0, 
ats_entries,
 value, flags);
if (r)
return r;
@@ -847,7 +850,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
}
   

Re: [PATCH 1/1] drm/amdgpu: flush gpu tlb after a gart allocation

2021-05-28 Thread Das, Nirmoy


On 5/28/2021 3:03 PM, Christian König wrote:



Am 28.05.21 um 14:54 schrieb Nirmoy Das:

Flush gpu tlb in amdgpu_ttm_alloc_gart(). Also
don't flush gpu tlb after recovering each BO.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 2 --
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 5 -
  2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c

index 5562b5c90c03..68d41063e120 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -329,8 +329,6 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, 
uint64_t offset,

    mb();
  amdgpu_asic_flush_hdp(adev, NULL);


The memory barrier and HDP flush can be moved as well.


-    for (i = 0; i < adev->num_vmhubs; i++)
-    amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
  return 0;
  }
  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

index c0aef327292a..e68b5dab84a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -977,7 +977,7 @@ int amdgpu_ttm_alloc_gart(struct 
ttm_buffer_object *bo)

  struct ttm_placement placement;
  struct ttm_place placements;
  uint64_t addr, flags;
-    int r;
+    int r, i;
    if (bo->mem.start != AMDGPU_BO_INVALID_OFFSET)
  return 0;
@@ -1014,6 +1014,9 @@ int amdgpu_ttm_alloc_gart(struct 
ttm_buffer_object *bo)

  return r;
  }
  +    for (i = 0; i < adev->num_vmhubs; i++)
+    amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
+


Probably best to make that a function in amdgpu_gart.c to keep the 
GART functionality properly abstracted.


This can then be called from both amdgpu_gtt_mgr_recover() after 
dropping the lock and here.



Thanks, Christian! I will resend.


Nirmoy



Regards,
Christian.


  ttm_resource_free(bo, >mem);
  bo->mem = tmp;
  }



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/1] drm/amdgpu: flush gpu tlb after a gart allocation

2021-05-28 Thread Christian König




Am 28.05.21 um 14:54 schrieb Nirmoy Das:

Flush gpu tlb in amdgpu_ttm_alloc_gart(). Also
don't flush gpu tlb after recovering each BO.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 2 --
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 5 -
  2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 5562b5c90c03..68d41063e120 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -329,8 +329,6 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t 
offset,
  
  	mb();

amdgpu_asic_flush_hdp(adev, NULL);


The memory barrier and HDP flush can be moved as well.


-   for (i = 0; i < adev->num_vmhubs; i++)
-   amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
return 0;
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

index c0aef327292a..e68b5dab84a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -977,7 +977,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
struct ttm_placement placement;
struct ttm_place placements;
uint64_t addr, flags;
-   int r;
+   int r, i;
  
  	if (bo->mem.start != AMDGPU_BO_INVALID_OFFSET)

return 0;
@@ -1014,6 +1014,9 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
return r;
}
  
+		for (i = 0; i < adev->num_vmhubs; i++)

+   amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
+


Probably best to make that a function in amdgpu_gart.c to keep the GART 
functionality properly abstracted.


This can then be called from both amdgpu_gtt_mgr_recover() after 
dropping the lock and here.


Regards,
Christian.


ttm_resource_free(bo, >mem);
bo->mem = tmp;
}


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/1] drm/amdgpu: flush gpu tlb after a gart allocation

2021-05-28 Thread Nirmoy Das
Flush gpu tlb in amdgpu_ttm_alloc_gart(). Also
don't flush gpu tlb after recovering each BO.

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 2 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 5 -
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 5562b5c90c03..68d41063e120 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -329,8 +329,6 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t 
offset,
 
mb();
amdgpu_asic_flush_hdp(adev, NULL);
-   for (i = 0; i < adev->num_vmhubs; i++)
-   amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c0aef327292a..e68b5dab84a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -977,7 +977,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
struct ttm_placement placement;
struct ttm_place placements;
uint64_t addr, flags;
-   int r;
+   int r, i;
 
if (bo->mem.start != AMDGPU_BO_INVALID_OFFSET)
return 0;
@@ -1014,6 +1014,9 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
return r;
}
 
+   for (i = 0; i < adev->num_vmhubs; i++)
+   amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
+
ttm_resource_free(bo, >mem);
bo->mem = tmp;
}
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: stop bookkeeping of temporary GTT allocation

2021-05-28 Thread Christian König

Am 28.05.21 um 11:47 schrieb Yu, Lang:

[AMD Official Use Only]


Inline.


-Original Message-
From: Koenig, Christian 
Sent: Thursday, May 27, 2021 7:51 PM
To: Yu, Lang ; amd-gfx@lists.freedesktop.org; dri-
de...@lists.freedesktop.org
Cc: Huang, Ray ; Deucher, Alexander
; Olsak, Marek 
Subject: Re: [PATCH 2/2] drm/amdgpu: stop bookkeeping of temporary GTT
allocation

Puttin Marek on CC.

Am 27.05.21 um 03:30 schrieb Lang Yu:

To improve buffer migration performace, stop bookkeeping of temporary
GTT allocation, including allocation for BO evicted from VRAM and
bounce buffer.

Signed-off-by: Lang Yu 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 16 ++--
   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  4 +++-
   2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 8860545344c7..32fedd495c7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -111,14 +111,15 @@ static int amdgpu_gtt_mgr_new(struct

ttm_resource_manager *man,

struct amdgpu_gtt_node *node;
int r;

-   spin_lock(>lock);
-   if ((>mem == mem || tbo->mem.mem_type != TTM_PL_TT) &&
-   atomic64_read(>available) < mem->num_pages) {
+   if (!(mem->placement & TTM_PL_FLAG_TEMPORARY)) {
+   spin_lock(>lock);
+   if (atomic64_read(>available) < mem->num_pages) {
+   spin_unlock(>lock);
+   return -ENOSPC;
+   }
+   atomic64_sub(mem->num_pages, >available);

After sleeping a night over that I think we need to talk about this part here 
once
more.

While temporary GTT allocations can temporary exceed the GTT limitation we
still need to account them in the case the eviction is interrupted for some 
reason.

In other words what can happen is that we want to move
VRAM->GTT->SYSTEM, but GTT->SYSTEM never happens because it is
interrupted in the wait (that's unfortunately rather likely).

To solve this I think we should do the following:
1. Change mgr->available into mgr->used (e.g. invert the value).
2. Always account all GTT BOs to the used space.
3. Only when it is not a temporary allocation bail out.

This way temporary allocations are accounted for, but we still allow
memory evictions to happen under pressure.

While at it you could also drop taking the spinlock to check the atomic,
that is pretty much unnecessary.

Regards,
Christian.


[Yu, Lang] Hi Christian,

Yes, it can actually happen that the BO was evicted from VRAM to GTT domain,
but was not moved forward to SYSTEM domain. It resides in GTT domain
waiting for next time validation or eviction or destruction.

It is reasonable that we count all GTT allocation.
1, I find if the temporary GTT BO was not counted but used for command 
submission,
then we can use more GTT memory than GTT limit for command submission. Is that 
your concern?


Yes, exactly that.


2, Or if we don't count temporary GTT allocation, that will mess up gtt manager.

In other words, if we don't count it when it resides in GTT domain, what is the 
consequence?


The GTT size is the limit how much system memory userspace can 
intentionally allocate.


This works around stupid applications which tend to allocate as much 
memory as possible (without actually needing that much) and then 
triggering the OOM killer.



Would like to know your concern. Actually it is counted by ttm_pages_allocated.

If we use "used" instead of "available" in gtt manager, the used size may 
exceed man size.


Yes, that is intentional.


We should also deal with gtt mgr debug interface.

Rework the logic like this with your idea:

if ((atomic64_add_return(mem->num_pages, >used) > man->size) &&
!(mem->placement & TTM_PL_FLAG_TEMPORARY)) {
atomic64_sub(mem->num_pages, >used);
return -ENOSPC;
}


Yeah, something like that.

Regards,
Christian.



Regards,
Lang


spin_unlock(>lock);
-   return -ENOSPC;
}
-   atomic64_sub(mem->num_pages, >available);
-   spin_unlock(>lock);

if (!place->lpfn) {
mem->mm_node = NULL;
@@ -178,6 +179,9 @@ static void amdgpu_gtt_mgr_del(struct

ttm_resource_manager *man,

kfree(node);
}

+   if (mem->placement & TTM_PL_FLAG_TEMPORARY)
+   return;
+
atomic64_add(mem->num_pages, >available);
   }

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

index c0aef327292a..129d39392859 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -152,9 +152,11 @@ static void amdgpu_evict_flags(struct

ttm_buffer_object *bo,

abo->placements[0].lpfn = 0;
abo->placement.busy_placement = 

Re: [PATCH v3] amdgpu: remove unreachable code

2021-05-28 Thread Christian König

Am 28.05.21 um 11:29 schrieb Jiapeng Chong:

In the function amdgpu_uvd_cs_msg(), every branch in the switch
statement will have a return, so the code below the switch statement
will not be executed.

Eliminate the follow smatch warning:

drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c:845 amdgpu_uvd_cs_msg() warn:
ignoring unreachable code.

Reported-by: Abaci Robot 
Signed-off-by: Jiapeng Chong 


Reviewed-by: Christian König 


---
Changes in v2:
   -For the follow advice: https://lore.kernel.org/patchwork/patch/1435968/

  drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 3 +--
  1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index c6dbc08..35f6874 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -829,9 +829,8 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
  
  	default:

DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
-   return -EINVAL;
}
-   BUG();
+
return -EINVAL;
  }
  


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 6/6] drm/amdgpu: do not allocate entries separately

2021-05-28 Thread Nirmoy Das
Allocate PD/PT entries while allocating VM BOs and use that
instead of allocating those entries separately.

v2: create a new var for num entries.

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 34 +++---
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 223c63342ecd..7e478ffb7fdf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -877,6 +877,7 @@ static int amdgpu_vm_pt_create(struct amdgpu_device *adev,
struct amdgpu_bo *bo;
struct dma_resv *resv;
int r;
+   unsigned int num_entries;

memset(, 0, sizeof(bp));

@@ -886,7 +887,14 @@ static int amdgpu_vm_pt_create(struct amdgpu_device *adev,
bp.domain = amdgpu_bo_get_preferred_pin_domain(adev, bp.domain);
bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
AMDGPU_GEM_CREATE_CPU_GTT_USWC;
-   bp.bo_ptr_size = sizeof(struct amdgpu_bo_vm);
+
+   if (level < AMDGPU_VM_PTB)
+   num_entries = amdgpu_vm_num_entries(adev, level);
+   else
+   num_entries = 0;
+
+   bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries);
+
if (vm->use_cpu_for_update)
bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;

@@ -957,19 +965,14 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
struct amdgpu_bo_vm *pt;
int r;

-   if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
-   unsigned num_entries;
-
-   num_entries = amdgpu_vm_num_entries(adev, cursor->level);
-   entry->entries = kvmalloc_array(num_entries,
-   sizeof(*entry->entries),
-   GFP_KERNEL | __GFP_ZERO);
-   if (!entry->entries)
-   return -ENOMEM;
-   }
-
-   if (entry->base.bo)
+   if (entry->base.bo) {
+   if (cursor->level < AMDGPU_VM_PTB)
+   entry->entries =
+   to_amdgpu_bo_vm(entry->base.bo)->entries;
+   else
+   entry->entries = NULL;
return 0;
+   }

r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, );
if (r)
@@ -981,6 +984,10 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
pt_bo = >bo;
pt_bo->parent = amdgpu_bo_ref(cursor->parent->base.bo);
amdgpu_vm_bo_base_init(>base, vm, pt_bo);
+   if (cursor->level < AMDGPU_VM_PTB)
+   entry->entries = pt->entries;
+   else
+   entry->entries = NULL;

r = amdgpu_vm_clear_bo(adev, vm, pt, immediate);
if (r)
@@ -1010,7 +1017,6 @@ static void amdgpu_vm_free_table(struct amdgpu_vm_pt 
*entry)
amdgpu_bo_unref();
amdgpu_bo_unref(>base.bo);
}
-   kvfree(entry->entries);
entry->entries = NULL;
 }

--
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 5/6] drm/amdgpu: remove unused code

2021-05-28 Thread Nirmoy Das
Remove unused code related to shadow BO.

v2: removing shadow bo ptr from base class.

Signed-off-by: Nirmoy Das 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 29 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  6 -
 2 files changed, 35 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 89ba612a5080..15cee49f11e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -638,35 +638,6 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
return r;
 }

-int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
-   unsigned long size,
-   struct amdgpu_bo *bo)
-{
-   struct amdgpu_bo_param bp;
-   int r;
-
-   if (bo->shadow)
-   return 0;
-
-   memset(, 0, sizeof(bp));
-   bp.size = size;
-   bp.domain = AMDGPU_GEM_DOMAIN_GTT;
-   bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
-   bp.type = ttm_bo_type_kernel;
-   bp.resv = bo->tbo.base.resv;
-   bp.bo_ptr_size = sizeof(struct amdgpu_bo);
-
-   r = amdgpu_bo_create(adev, , >shadow);
-   if (!r) {
-   bo->shadow->parent = amdgpu_bo_ref(bo);
-   mutex_lock(>shadow_list_lock);
-   list_add_tail(>shadow->shadow_list, >shadow_list);
-   mutex_unlock(>shadow_list_lock);
-   }
-
-   return r;
-}
-
 /**
  * amdgpu_bo_create_user - create an _bo_user buffer object
  * @adev: amdgpu device object
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 9afccf6c66f2..fa75251148be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -104,9 +104,6 @@ struct amdgpu_bo {
struct amdgpu_vm_bo_base*vm_bo;
/* Constant after initialization */
struct amdgpu_bo*parent;
-   struct amdgpu_bo*shadow;
-
-

 #ifdef CONFIG_MMU_NOTIFIER
struct mmu_interval_notifiernotifier;
@@ -300,9 +297,6 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev,
struct amdgpu_bo_vm **ubo_ptr);
 void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
   void **cpu_addr);
-int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
-   unsigned long size,
-   struct amdgpu_bo *bo);
 int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);
 void *amdgpu_bo_kptr(struct amdgpu_bo *bo);
 void amdgpu_bo_kunmap(struct amdgpu_bo *bo);
--
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v4 4/6] drm/amdgpu: switch to amdgpu_bo_vm for vm code

2021-05-28 Thread Nirmoy Das
The subclass, amdgpu_bo_vm is intended for PT/PD BOs which are also
shadowed, so switch to amdgpu_bo_vm BO for PT/PD BOs.

v4: update amdgpu_vm_update_funcs to accept amdgpu_bo_vm.
v3: simplify code.
check also if shadow bo exist instead of checking bo only type.
v2: squash three related patches.

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 123 
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c  |  14 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c |  19 +--
 4 files changed, 96 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6bc7566cc193..223c63342ecd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -652,15 +652,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device 
*adev,
spin_lock(>mman.bdev.lru_lock);
list_for_each_entry(bo_base, >idle, vm_status) {
struct amdgpu_bo *bo = bo_base->bo;
+   struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo);

if (!bo->parent)
continue;

ttm_bo_move_to_lru_tail(>tbo, >tbo.mem,
>lru_bulk_move);
-   if (bo->shadow)
-   ttm_bo_move_to_lru_tail(>shadow->tbo,
-   >shadow->tbo.mem,
+   if (shadow)
+   ttm_bo_move_to_lru_tail(>tbo, >tbo.mem,
>lru_bulk_move);
}
spin_unlock(>mman.bdev.lru_lock);
@@ -692,12 +692,13 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,

list_for_each_entry_safe(bo_base, tmp, >evicted, vm_status) {
struct amdgpu_bo *bo = bo_base->bo;
+   struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo);

r = validate(param, bo);
if (r)
return r;
-   if (bo->shadow) {
-   r = validate(param, bo->shadow);
+   if (shadow) {
+   r = validate(param, shadow);
if (r)
return r;
}
@@ -705,7 +706,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
if (bo->tbo.type != ttm_bo_type_kernel) {
amdgpu_vm_bo_moved(bo_base);
} else {
-   vm->update_funcs->map_table(bo);
+   vm->update_funcs->map_table(to_amdgpu_bo_vm(bo));
amdgpu_vm_bo_relocated(bo_base);
}
}
@@ -737,7 +738,7 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
  *
  * @adev: amdgpu_device pointer
  * @vm: VM to clear BO from
- * @bo: BO to clear
+ * @vmbo: BO to clear
  * @immediate: use an immediate update
  *
  * Root PD needs to be reserved when calling this.
@@ -747,13 +748,14 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
  */
 static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
  struct amdgpu_vm *vm,
- struct amdgpu_bo *bo,
+ struct amdgpu_bo_vm *vmbo,
  bool immediate)
 {
struct ttm_operation_ctx ctx = { true, false };
unsigned level = adev->vm_manager.root_level;
struct amdgpu_vm_update_params params;
-   struct amdgpu_bo *ancestor = bo;
+   struct amdgpu_bo *ancestor = >bo;
+   struct amdgpu_bo *bo = >bo;
unsigned entries, ats_entries;
uint64_t addr;
int r;
@@ -793,14 +795,15 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
if (r)
return r;

-   if (bo->shadow) {
-   r = ttm_bo_validate(>shadow->tbo, >shadow->placement,
-   );
+   if (vmbo->shadow) {
+   struct amdgpu_bo *shadow = vmbo->shadow;
+
+   r = ttm_bo_validate(>tbo, >placement, );
if (r)
return r;
}

-   r = vm->update_funcs->map_table(bo);
+   r = vm->update_funcs->map_table(vmbo);
if (r)
return r;

@@ -824,7 +827,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
amdgpu_gmc_get_vm_pde(adev, level, , );
}

-   r = vm->update_funcs->update(, bo, addr, 0, ats_entries,
+   r = vm->update_funcs->update(, vmbo, addr, 0, 
ats_entries,
 value, flags);
if (r)
return r;
@@ -847,7 +850,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
}
}

-   r = vm->update_funcs->update(, bo, addr, 0, entries,
+

[PATCH 3/6] drm/admgpu: add two shadow BO helper functions

2021-05-28 Thread Nirmoy Das
Add amdgpu_bo_add_to_shadow_list() to handle shadow list
additions and amdgpu_bo_shadowed() to check if a BO is shadowed.

Signed-off-by: Nirmoy Das 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 16 
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 17 +
 2 files changed, 33 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 2e5426ab24a8..89ba612a5080 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -760,6 +760,22 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo)
return r;
 }
 
+/**
+ * amdgpu_bo_add_to_shadow_list - add a BO to the shadow list
+ *
+ * @bo: BO that will be inserted into the shadow list
+ *
+ * Insert a BO to the shadow list.
+ */
+void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo *bo)
+{
+   struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+
+   mutex_lock(>shadow_list_lock);
+   list_add_tail(>shadow_list, >shadow_list);
+   mutex_unlock(>shadow_list_lock);
+}
+
 /**
  * amdgpu_bo_restore_shadow - restore an _bo shadow
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index a7fbf5f7051e..9afccf6c66f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -259,6 +259,22 @@ static inline bool amdgpu_bo_encrypted(struct amdgpu_bo 
*bo)
return bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED;
 }
 
+/**
+ * amdgpu_bo_shadowed - check if the BO is shadowed
+ *
+ * @bo: BO to be tested.
+ *
+ * Returns:
+ * NULL if not shadowed or else return a BO pointer.
+ */
+static inline struct amdgpu_bo *amdgpu_bo_shadowed(struct amdgpu_bo *bo)
+{
+   if (bo->tbo.type == ttm_bo_type_kernel)
+   return to_amdgpu_bo_vm(bo)->shadow;
+
+   return NULL;
+}
+
 bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
 void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
 
@@ -322,6 +338,7 @@ u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo);
 int amdgpu_bo_validate(struct amdgpu_bo *bo);
 void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem,
uint64_t *gtt_mem, uint64_t *cpu_mem);
+void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo *bo);
 int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
 struct dma_fence **fence);
 uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev,
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 2/6] drm/amdgpu: move shadow bo validation to VM code

2021-05-28 Thread Nirmoy Das
Do the shadow bo validation in the VM code as
VM code knows/owns shadow BOs.

v2: Fix a typo.

Signed-off-by: Nirmoy Das 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 23 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  5 +
 2 files changed, 9 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 90136f9dedd6..f6a8f0c5a52f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -396,10 +396,10 @@ void amdgpu_cs_report_moved_bytes(struct amdgpu_device 
*adev, u64 num_bytes,
spin_unlock(>mm_stats.lock);
 }

-static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
-struct amdgpu_bo *bo)
+static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo)
 {
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+   struct amdgpu_cs_parser *p = param;
struct ttm_operation_ctx ctx = {
.interruptible = true,
.no_wait_gpu = false,
@@ -451,21 +451,6 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser 
*p,
return r;
 }

-static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
-{
-   struct amdgpu_cs_parser *p = param;
-   int r;
-
-   r = amdgpu_cs_bo_validate(p, bo);
-   if (r)
-   return r;
-
-   if (bo->shadow)
-   r = amdgpu_cs_bo_validate(p, bo->shadow);
-
-   return r;
-}
-
 static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
struct list_head *validated)
 {
@@ -493,7 +478,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser 
*p,
 lobj->user_pages);
}

-   r = amdgpu_cs_validate(p, bo);
+   r = amdgpu_cs_bo_validate(p, bo);
if (r)
return r;

@@ -593,7 +578,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
p->bytes_moved_vis = 0;

r = amdgpu_vm_validate_pt_bos(p->adev, >vm,
- amdgpu_cs_validate, p);
+ amdgpu_cs_bo_validate, p);
if (r) {
DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n");
goto error_validate;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index da155c276c51..6bc7566cc193 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -696,6 +696,11 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
r = validate(param, bo);
if (r)
return r;
+   if (bo->shadow) {
+   r = validate(param, bo->shadow);
+   if (r)
+   return r;
+   }

if (bo->tbo.type != ttm_bo_type_kernel) {
amdgpu_vm_bo_moved(bo_base);
--
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/6] drm/amdgpu: add amdgpu_bo_vm bo type

2021-05-28 Thread Nirmoy Das
Add new BO subclass that will be used by amdgpu vm code.

Signed-off-by: Nirmoy Das 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 32 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 10 +++
 2 files changed, 42 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index e9f8701fd046..2e5426ab24a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -694,6 +694,38 @@ int amdgpu_bo_create_user(struct amdgpu_device *adev,
*ubo_ptr = to_amdgpu_bo_user(bo_ptr);
return r;
 }
+
+/**
+ * amdgpu_bo_create_vm - create an _bo_vm buffer object
+ * @adev: amdgpu device object
+ * @bp: parameters to be used for the buffer object
+ * @vmbo_ptr: pointer to the buffer object pointer
+ *
+ * Create a BO to be for GPUVM.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+
+int amdgpu_bo_create_vm(struct amdgpu_device *adev,
+   struct amdgpu_bo_param *bp,
+   struct amdgpu_bo_vm **vmbo_ptr)
+{
+   struct amdgpu_bo *bo_ptr;
+   int r;
+
+   /* bo_ptr_size will be determined by the caller and it depends on
+* num of amdgpu_vm_pt entries.
+*/
+   BUG_ON(bp->bo_ptr_size < sizeof(struct amdgpu_bo_vm));
+   r = amdgpu_bo_create(adev, bp, _ptr);
+   if (r)
+   return r;
+
+   *vmbo_ptr = to_amdgpu_bo_vm(bo_ptr);
+   return r;
+}
+
 /**
  * amdgpu_bo_validate - validate an _bo buffer object
  * @bo: pointer to the buffer object
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 11480c5a2716..a7fbf5f7051e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -44,6 +44,7 @@
 #define AMDGPU_AMDKFD_CREATE_SVM_BO(1ULL << 62)
 
 #define to_amdgpu_bo_user(abo) container_of((abo), struct amdgpu_bo_user, bo)
+#define to_amdgpu_bo_vm(abo) container_of((abo), struct amdgpu_bo_vm, bo)
 
 struct amdgpu_bo_param {
unsigned long   size;
@@ -125,6 +126,12 @@ struct amdgpu_bo_user {
 
 };
 
+struct amdgpu_bo_vm {
+   struct amdgpu_bobo;
+   struct amdgpu_bo*shadow;
+   struct amdgpu_vm_pt entries[];
+};
+
 static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
 {
return container_of(tbo, struct amdgpu_bo, tbo);
@@ -272,6 +279,9 @@ int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
 int amdgpu_bo_create_user(struct amdgpu_device *adev,
  struct amdgpu_bo_param *bp,
  struct amdgpu_bo_user **ubo_ptr);
+int amdgpu_bo_create_vm(struct amdgpu_device *adev,
+   struct amdgpu_bo_param *bp,
+   struct amdgpu_bo_vm **ubo_ptr);
 void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
   void **cpu_addr);
 int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 2/2] drm/amdgpu: stop bookkeeping of temporary GTT allocation

2021-05-28 Thread Yu, Lang
[AMD Official Use Only]


Inline.

>-Original Message-
>From: Koenig, Christian 
>Sent: Thursday, May 27, 2021 7:51 PM
>To: Yu, Lang ; amd-gfx@lists.freedesktop.org; dri-
>de...@lists.freedesktop.org
>Cc: Huang, Ray ; Deucher, Alexander
>; Olsak, Marek 
>Subject: Re: [PATCH 2/2] drm/amdgpu: stop bookkeeping of temporary GTT
>allocation
>
>Puttin Marek on CC.
>
>Am 27.05.21 um 03:30 schrieb Lang Yu:
>> To improve buffer migration performace, stop bookkeeping of temporary
>> GTT allocation, including allocation for BO evicted from VRAM and
>> bounce buffer.
>>
>> Signed-off-by: Lang Yu 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 16 ++--
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  4 +++-
>>   2 files changed, 13 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
>> index 8860545344c7..32fedd495c7f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
>> @@ -111,14 +111,15 @@ static int amdgpu_gtt_mgr_new(struct
>ttm_resource_manager *man,
>>  struct amdgpu_gtt_node *node;
>>  int r;
>>
>> -spin_lock(>lock);
>> -if ((>mem == mem || tbo->mem.mem_type != TTM_PL_TT) &&
>> -atomic64_read(>available) < mem->num_pages) {
>> +if (!(mem->placement & TTM_PL_FLAG_TEMPORARY)) {
>> +spin_lock(>lock);
>> +if (atomic64_read(>available) < mem->num_pages) {
>> +spin_unlock(>lock);
>> +return -ENOSPC;
>> +}
>> +atomic64_sub(mem->num_pages, >available);
>
>After sleeping a night over that I think we need to talk about this part here 
>once
>more.
>
>While temporary GTT allocations can temporary exceed the GTT limitation we
>still need to account them in the case the eviction is interrupted for some 
>reason.
>
>In other words what can happen is that we want to move
>VRAM->GTT->SYSTEM, but GTT->SYSTEM never happens because it is
>interrupted in the wait (that's unfortunately rather likely).
>
>To solve this I think we should do the following:
>1. Change mgr->available into mgr->used (e.g. invert the value).
>2. Always account all GTT BOs to the used space.
>3. Only when it is not a temporary allocation bail out.
>
>This way temporary allocations are accounted for, but we still allow
>memory evictions to happen under pressure.
>
>While at it you could also drop taking the spinlock to check the atomic,
>that is pretty much unnecessary.
>
>Regards,
>Christian.
>
[Yu, Lang] Hi Christian,

Yes, it can actually happen that the BO was evicted from VRAM to GTT domain,
but was not moved forward to SYSTEM domain. It resides in GTT domain 
waiting for next time validation or eviction or destruction.

It is reasonable that we count all GTT allocation. 
1, I find if the temporary GTT BO was not counted but used for command 
submission, 
then we can use more GTT memory than GTT limit for command submission. Is that 
your concern? 
2, Or if we don't count temporary GTT allocation, that will mess up gtt manager.

In other words, if we don't count it when it resides in GTT domain, what is the 
consequence? 
Would like to know your concern. Actually it is counted by ttm_pages_allocated. 

If we use "used" instead of "available" in gtt manager, the used size may 
exceed man size.
We should also deal with gtt mgr debug interface.

Rework the logic like this with your idea:

if ((atomic64_add_return(mem->num_pages, >used) > man->size) &&
!(mem->placement & TTM_PL_FLAG_TEMPORARY)) {
atomic64_sub(mem->num_pages, >used);
return -ENOSPC;
}

Regards,
Lang

>>  spin_unlock(>lock);
>> -return -ENOSPC;
>>  }
>> -atomic64_sub(mem->num_pages, >available);
>> -spin_unlock(>lock);
>>
>>  if (!place->lpfn) {
>>  mem->mm_node = NULL;
>> @@ -178,6 +179,9 @@ static void amdgpu_gtt_mgr_del(struct
>ttm_resource_manager *man,
>>  kfree(node);
>>  }
>>
>> +if (mem->placement & TTM_PL_FLAG_TEMPORARY)
>> +return;
>> +
>>  atomic64_add(mem->num_pages, >available);
>>   }
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index c0aef327292a..129d39392859 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -152,9 +152,11 @@ static void amdgpu_evict_flags(struct
>ttm_buffer_object *bo,
>>  abo->placements[0].lpfn = 0;
>>  abo->placement.busy_placement = 
>>placements[1];
>>  abo->placement.num_busy_placement = 1;
>> +abo->placements[1].flags |=
>TTM_PL_FLAG_TEMPORARY;
>>  } else {
>>  /* Move to GTT memory */
>>  amdgpu_bo_placement_from_domain(abo,

RE: [PATCH v3 4/6] drm/amdgpu: switch to amdgpu_bo_vm for vm code

2021-05-28 Thread Das, Nirmoy
[AMD Official Use Only]

Thanks Christian, I will resend with your suggestions included.

Nirmoy

-Original Message-
From: Koenig, Christian  
Sent: Friday, May 28, 2021 10:01 AM
To: Das, Nirmoy ; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander 
Subject: Re: [PATCH v3 4/6] drm/amdgpu: switch to amdgpu_bo_vm for vm code



Am 27.05.21 um 13:53 schrieb Nirmoy Das:
> The subclass, amdgpu_bo_vm is intended for PT/PD BOs which are also 
> shadowed, so switch to amdgpu_bo_vm BO for PT/PD BOs.
>
> v3: simplify code.
>  check also if shadow bo exist, instead of checking only bo's type.
> v2: squash three related patches.
>
> Signed-off-by: Nirmoy Das 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 93 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 18 ++--
>   2 files changed, 68 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 6bc7566cc193..d723873df765 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -652,15 +652,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device 
> *adev,
>   spin_lock(>mman.bdev.lru_lock);
>   list_for_each_entry(bo_base, >idle, vm_status) {
>   struct amdgpu_bo *bo = bo_base->bo;
> + struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo);
>
>   if (!bo->parent)
>   continue;
>
>   ttm_bo_move_to_lru_tail(>tbo, >tbo.mem,
>   >lru_bulk_move);
> - if (bo->shadow)
> - ttm_bo_move_to_lru_tail(>shadow->tbo,
> - >shadow->tbo.mem,
> + if (shadow)
> + ttm_bo_move_to_lru_tail(>tbo, >tbo.mem,
>   >lru_bulk_move);
>   }
>   spin_unlock(>mman.bdev.lru_lock);
> @@ -692,12 +692,13 @@ int amdgpu_vm_validate_pt_bos(struct 
> amdgpu_device *adev, struct amdgpu_vm *vm,
>
>   list_for_each_entry_safe(bo_base, tmp, >evicted, vm_status) {
>   struct amdgpu_bo *bo = bo_base->bo;
> + struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo);
>
>   r = validate(param, bo);
>   if (r)
>   return r;
> - if (bo->shadow) {
> - r = validate(param, bo->shadow);
> + if (shadow) {
> + r = validate(param, shadow);
>   if (r)
>   return r;
>   }
> @@ -754,6 +755,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>   unsigned level = adev->vm_manager.root_level;
>   struct amdgpu_vm_update_params params;
>   struct amdgpu_bo *ancestor = bo;
> + struct amdgpu_bo *shadow;
>   unsigned entries, ats_entries;
>   uint64_t addr;
>   int r;
> @@ -793,9 +795,9 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>   if (r)
>   return r;
>
> - if (bo->shadow) {
> - r = ttm_bo_validate(>shadow->tbo, >shadow->placement,
> - );
> + shadow = amdgpu_bo_shadowed(bo);
> + if (shadow) {
> + r = ttm_bo_validate(>tbo, >placement, );
>   if (r)
>   return r;
>   }
> @@ -863,14 +865,16 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device 
> *adev,
>* @vm: requesting vm
>* @level: the page table level
>* @immediate: use a immediate update
> - * @bo: pointer to the buffer object pointer
> + * @vmbo: pointer to the buffer object pointer
>*/
>   static int amdgpu_vm_pt_create(struct amdgpu_device *adev,
>  struct amdgpu_vm *vm,
>  int level, bool immediate,
> -struct amdgpu_bo **bo)
> +struct amdgpu_bo_vm **vmbo)
>   {
>   struct amdgpu_bo_param bp;
> + struct amdgpu_bo *bo;
> + struct dma_resv *resv;
>   int r;
>
>   memset(, 0, sizeof(bp));
> @@ -881,7 +885,7 @@ static int amdgpu_vm_pt_create(struct amdgpu_device *adev,
>   bp.domain = amdgpu_bo_get_preferred_pin_domain(adev, bp.domain);
>   bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>   AMDGPU_GEM_CREATE_CPU_GTT_USWC;
> - bp.bo_ptr_size = sizeof(struct amdgpu_bo);
> + bp.bo_ptr_size = sizeof(struct amdgpu_bo_vm);
>   if (vm->use_cpu_for_update)
>   bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
>
> @@ -890,26 +894,41 @@ static int amdgpu_vm_pt_create(struct amdgpu_device 
> *adev,
>   if (vm->root.base.bo)
>   bp.resv = vm->root.base.bo->tbo.base.resv;
>
> - r = amdgpu_bo_create(adev, , bo);
> + r = amdgpu_bo_create_vm(adev, , vmbo);
>   if (r)
>   return r;
>
> - if (vm->is_compute_context && (adev->flags & AMD_IS_APU))
> + bo = &(*vmbo)->bo;

[PATCH] drm/amdgpu: support atcs method powershift (v4)

2021-05-28 Thread Sathishkumar S
add support to handle ATCS method for power shift control.
used to communicate dGPU device state to SBIOS.

V2: use defined acpi func for checking psc support (Lijo)
fix alignment (Shashank)
V3: rebased on unified ATCS handling (Alex)
V4: rebased on ATPX/ATCS structures global (Alex)

Signed-off-by: Sathishkumar S 
Reviewed-by: Alex Deucher 
Reviewed-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  6 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 55 
 drivers/gpu/drm/amd/include/amd_acpi.h   | 18 
 3 files changed, 79 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 7b794957515f..0ea2ed3a55f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1343,8 +1343,11 @@ struct amdgpu_afmt_acr amdgpu_afmt_acr(uint32_t clock);
 int amdgpu_acpi_init(struct amdgpu_device *adev);
 void amdgpu_acpi_fini(struct amdgpu_device *adev);
 bool amdgpu_acpi_is_pcie_performance_request_supported(struct amdgpu_device 
*adev);
+bool amdgpu_acpi_is_power_shift_control_supported(void);
 int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
u8 perf_req, bool advertise);
+int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
+   u8 dev_state, bool drv_state);
 int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev);
 
 void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps);
@@ -1355,6 +1358,9 @@ static inline int amdgpu_acpi_init(struct amdgpu_device 
*adev) { return 0; }
 static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
 static inline bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev) { 
return false; }
 static inline void amdgpu_acpi_detect(void) { }
+static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return 
false; }
+static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
+ u8 dev_state, bool drv_state) 
{ return 0; }
 #endif
 
 int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index bbff6c06f943..b631316bfe5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -76,6 +76,7 @@ struct amdgpu_atcs_functions {
bool pcie_perf_req;
bool pcie_dev_rdy;
bool pcie_bus_width;
+   bool power_shift_control;
 };
 
 struct amdgpu_atcs {
@@ -534,6 +535,7 @@ static void amdgpu_atcs_parse_functions(struct 
amdgpu_atcs_functions *f, u32 mas
f->pcie_perf_req = mask & ATCS_PCIE_PERFORMANCE_REQUEST_SUPPORTED;
f->pcie_dev_rdy = mask & ATCS_PCIE_DEVICE_READY_NOTIFICATION_SUPPORTED;
f->pcie_bus_width = mask & ATCS_SET_PCIE_BUS_WIDTH_SUPPORTED;
+   f->power_shift_control = mask & ATCS_SET_POWER_SHIFT_CONTROL_SUPPORTED;
 }
 
 /**
@@ -598,6 +600,18 @@ bool 
amdgpu_acpi_is_pcie_performance_request_supported(struct amdgpu_device *ade
return false;
 }
 
+/**
+ * amdgpu_acpi_is_power_shift_control_supported
+ *
+ * Check if the ATCS power shift control method
+ * is supported.
+ * returns true if supported, false if not.
+ */
+bool amdgpu_acpi_is_power_shift_control_supported(void)
+{
+   return amdgpu_acpi_priv.atcs.functions.power_shift_control;
+}
+
 /**
  * amdgpu_acpi_pcie_notify_device_ready
  *
@@ -699,6 +713,47 @@ int amdgpu_acpi_pcie_performance_request(struct 
amdgpu_device *adev,
return 0;
 }
 
+/**
+ * amdgpu_acpi_power_shift_control
+ *
+ * @adev: amdgpu_device pointer
+ * @dev_state: device acpi state
+ * @drv_state: driver state
+ *
+ * Executes the POWER_SHIFT_CONTROL method to
+ * communicate current dGPU device state and
+ * driver state to APU/SBIOS.
+ * returns 0 on success, error on failure.
+ */
+int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
+   u8 dev_state, bool drv_state)
+{
+   union acpi_object *info;
+   struct amdgpu_atcs *atcs = _acpi_priv.atcs;
+   struct atcs_pwr_shift_input atcs_input;
+   struct acpi_buffer params;
+
+   if (!amdgpu_acpi_is_power_shift_control_supported())
+   return -EINVAL;
+
+   atcs_input.size = sizeof(struct atcs_pwr_shift_input);
+   /* dGPU id (bit 2-0: func num, 7-3: dev num, 15-8: bus num) */
+   atcs_input.dgpu_id = adev->pdev->devfn | (adev->pdev->bus->number << 8);
+   atcs_input.dev_acpi_state = dev_state;
+   atcs_input.drv_state = drv_state;
+
+   params.length = sizeof(struct atcs_pwr_shift_input);
+   params.pointer = _input;
+
+   info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_POWER_SHIFT_CONTROL, 
);
+   if (!info) {
+   DRM_ERROR("ATCS PSC update failed\n");
+   return -EIO;
+   }
+
+   return 0;
+}
+
 /**
  

[PATCH] drm/amd/pm: use attr_update if the attr has it

2021-05-28 Thread Sathishkumar S
use attr_update if its available as part of the attribute.
default_attr_update was used even if attr->attr_update is true.

Signed-off-by: Sathishkumar S 
Reviewed-by: Shashank Sharma 
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 13da377888d2..f48132bc089d 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -1942,7 +1942,7 @@ static int amdgpu_device_attr_create(struct amdgpu_device 
*adev,
 
BUG_ON(!attr);
 
-   attr_update = attr->attr_update ? attr_update : default_attr_update;
+   attr_update = attr->attr_update ? attr->attr_update : 
default_attr_update;
 
ret = attr_update(adev, attr, mask, _states);
if (ret) {
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 5/6] drm/amdgpu: remove unused code

2021-05-28 Thread Christian König

Am 27.05.21 um 13:53 schrieb Nirmoy Das:

Remove unused code related to shadow BO.

v2: removing shadow bo ptr from base class.

Signed-off-by: Nirmoy Das 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 29 --
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  6 -
  2 files changed, 35 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index a63b450cd603..db9c64836556 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -638,35 +638,6 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
return r;
  }

-int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
-   unsigned long size,
-   struct amdgpu_bo *bo)
-{
-   struct amdgpu_bo_param bp;
-   int r;
-
-   if (bo->shadow)
-   return 0;
-
-   memset(, 0, sizeof(bp));
-   bp.size = size;
-   bp.domain = AMDGPU_GEM_DOMAIN_GTT;
-   bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
-   bp.type = ttm_bo_type_kernel;
-   bp.resv = bo->tbo.base.resv;
-   bp.bo_ptr_size = sizeof(struct amdgpu_bo);
-
-   r = amdgpu_bo_create(adev, , >shadow);
-   if (!r) {
-   bo->shadow->parent = amdgpu_bo_ref(bo);
-   mutex_lock(>shadow_list_lock);
-   list_add_tail(>shadow->shadow_list, >shadow_list);
-   mutex_unlock(>shadow_list_lock);
-   }
-
-   return r;
-}
-
  /**
   * amdgpu_bo_create_user - create an _bo_user buffer object
   * @adev: amdgpu device object
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 9afccf6c66f2..fa75251148be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -104,9 +104,6 @@ struct amdgpu_bo {
struct amdgpu_vm_bo_base*vm_bo;
/* Constant after initialization */
struct amdgpu_bo*parent;
-   struct amdgpu_bo*shadow;
-
-

  #ifdef CONFIG_MMU_NOTIFIER
struct mmu_interval_notifiernotifier;
@@ -300,9 +297,6 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev,
struct amdgpu_bo_vm **ubo_ptr);
  void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
   void **cpu_addr);
-int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
-   unsigned long size,
-   struct amdgpu_bo *bo);
  int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);
  void *amdgpu_bo_kptr(struct amdgpu_bo *bo);
  void amdgpu_bo_kunmap(struct amdgpu_bo *bo);
--
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v3 4/6] drm/amdgpu: switch to amdgpu_bo_vm for vm code

2021-05-28 Thread Christian König




Am 27.05.21 um 13:53 schrieb Nirmoy Das:

The subclass, amdgpu_bo_vm is intended for PT/PD BOs which are also
shadowed, so switch to amdgpu_bo_vm BO for PT/PD BOs.

v3: simplify code.
 check also if shadow bo exist, instead of checking only bo's type.
v2: squash three related patches.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 93 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 18 ++--
  2 files changed, 68 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6bc7566cc193..d723873df765 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -652,15 +652,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device 
*adev,
spin_lock(>mman.bdev.lru_lock);
list_for_each_entry(bo_base, >idle, vm_status) {
struct amdgpu_bo *bo = bo_base->bo;
+   struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo);

if (!bo->parent)
continue;

ttm_bo_move_to_lru_tail(>tbo, >tbo.mem,
>lru_bulk_move);
-   if (bo->shadow)
-   ttm_bo_move_to_lru_tail(>shadow->tbo,
-   >shadow->tbo.mem,
+   if (shadow)
+   ttm_bo_move_to_lru_tail(>tbo, >tbo.mem,
>lru_bulk_move);
}
spin_unlock(>mman.bdev.lru_lock);
@@ -692,12 +692,13 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,

list_for_each_entry_safe(bo_base, tmp, >evicted, vm_status) {
struct amdgpu_bo *bo = bo_base->bo;
+   struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo);

r = validate(param, bo);
if (r)
return r;
-   if (bo->shadow) {
-   r = validate(param, bo->shadow);
+   if (shadow) {
+   r = validate(param, shadow);
if (r)
return r;
}
@@ -754,6 +755,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
unsigned level = adev->vm_manager.root_level;
struct amdgpu_vm_update_params params;
struct amdgpu_bo *ancestor = bo;
+   struct amdgpu_bo *shadow;
unsigned entries, ats_entries;
uint64_t addr;
int r;
@@ -793,9 +795,9 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
if (r)
return r;

-   if (bo->shadow) {
-   r = ttm_bo_validate(>shadow->tbo, >shadow->placement,
-   );
+   shadow = amdgpu_bo_shadowed(bo);
+   if (shadow) {
+   r = ttm_bo_validate(>tbo, >placement, );
if (r)
return r;
}
@@ -863,14 +865,16 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
   * @vm: requesting vm
   * @level: the page table level
   * @immediate: use a immediate update
- * @bo: pointer to the buffer object pointer
+ * @vmbo: pointer to the buffer object pointer
   */
  static int amdgpu_vm_pt_create(struct amdgpu_device *adev,
   struct amdgpu_vm *vm,
   int level, bool immediate,
-  struct amdgpu_bo **bo)
+  struct amdgpu_bo_vm **vmbo)
  {
struct amdgpu_bo_param bp;
+   struct amdgpu_bo *bo;
+   struct dma_resv *resv;
int r;

memset(, 0, sizeof(bp));
@@ -881,7 +885,7 @@ static int amdgpu_vm_pt_create(struct amdgpu_device *adev,
bp.domain = amdgpu_bo_get_preferred_pin_domain(adev, bp.domain);
bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
AMDGPU_GEM_CREATE_CPU_GTT_USWC;
-   bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+   bp.bo_ptr_size = sizeof(struct amdgpu_bo_vm);
if (vm->use_cpu_for_update)
bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;

@@ -890,26 +894,41 @@ static int amdgpu_vm_pt_create(struct amdgpu_device *adev,
if (vm->root.base.bo)
bp.resv = vm->root.base.bo->tbo.base.resv;

-   r = amdgpu_bo_create(adev, , bo);
+   r = amdgpu_bo_create_vm(adev, , vmbo);
if (r)
return r;

-   if (vm->is_compute_context && (adev->flags & AMD_IS_APU))
+   bo = &(*vmbo)->bo;
+   if (vm->is_compute_context && (adev->flags & AMD_IS_APU)) {
+   (*vmbo)->shadow = NULL;
return 0;
+   }

if (!bp.resv)
-   WARN_ON(dma_resv_lock((*bo)->tbo.base.resv,
+   WARN_ON(dma_resv_lock(bo->tbo.base.resv,
  NULL));
-   r = amdgpu_bo_create_shadow(adev, bp.size, *bo);
+   resv = bp.resv;
+   

Re: [PATCH 3/6] drm/admgpu: add two shadow BO helper functions

2021-05-28 Thread Christian König

Am 27.05.21 um 13:53 schrieb Nirmoy Das:

Add amdgpu_bo_add_to_shadow_list() to handle shadow list
additions and amdgpu_bo_shadowed() to check if a BO is shadowed.

Signed-off-by: Nirmoy Das 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 16 
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 17 +
  2 files changed, 33 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 6870cc297ae6..a63b450cd603 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -760,6 +760,22 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo)
return r;
  }
  
+/**

+ * amdgpu_bo_add_to_shadow_list - add a BO to the shadow list
+ *
+ * @bo: BO that will be inserted into the shadow list
+ *
+ * Insert a BO to the shadow list.
+ */
+void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo *bo)
+{
+   struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+
+   mutex_lock(>shadow_list_lock);
+   list_add_tail(>shadow_list, >shadow_list);
+   mutex_unlock(>shadow_list_lock);
+}
+
  /**
   * amdgpu_bo_restore_shadow - restore an _bo shadow
   *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index a7fbf5f7051e..9afccf6c66f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -259,6 +259,22 @@ static inline bool amdgpu_bo_encrypted(struct amdgpu_bo 
*bo)
return bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED;
  }
  
+/**

+ * amdgpu_bo_shadowed - check if the BO is shadowed
+ *
+ * @bo: BO to be tested.
+ *
+ * Returns:
+ * NULL if not shadowed or else return a BO pointer.
+ */
+static inline struct amdgpu_bo *amdgpu_bo_shadowed(struct amdgpu_bo *bo)
+{
+   if (bo->tbo.type == ttm_bo_type_kernel)
+   return to_amdgpu_bo_vm(bo)->shadow;
+
+   return NULL;
+}
+
  bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
  void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
  
@@ -322,6 +338,7 @@ u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo);

  int amdgpu_bo_validate(struct amdgpu_bo *bo);
  void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem,
uint64_t *gtt_mem, uint64_t *cpu_mem);
+void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo *bo);
  int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
 struct dma_fence **fence);
  uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev,


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH -next] drm/radeon/radeon_pm: use DEVICE_ATTR_RW macro

2021-05-28 Thread YueHaibing
Use DEVICE_ATTR_RW() helper instead of plain DEVICE_ATTR(),
which makes the code a bit shorter and easier to read.

Signed-off-by: YueHaibing 
---
 drivers/gpu/drm/radeon/radeon_pm.c | 56 --
 1 file changed, 23 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_pm.c 
b/drivers/gpu/drm/radeon/radeon_pm.c
index 3861c0b98fcf..edf10cc3947e 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -352,9 +352,8 @@ static void radeon_pm_print_states(struct radeon_device 
*rdev)
}
 }
 
-static ssize_t radeon_get_pm_profile(struct device *dev,
-struct device_attribute *attr,
-char *buf)
+static ssize_t power_profile_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
 {
struct drm_device *ddev = dev_get_drvdata(dev);
struct radeon_device *rdev = ddev->dev_private;
@@ -366,10 +365,8 @@ static ssize_t radeon_get_pm_profile(struct device *dev,
  (cp == PM_PROFILE_HIGH) ? "high" : "default");
 }
 
-static ssize_t radeon_set_pm_profile(struct device *dev,
-struct device_attribute *attr,
-const char *buf,
-size_t count)
+static ssize_t power_profile_store(struct device *dev, struct device_attribute 
*attr,
+  const char *buf, size_t count)
 {
struct drm_device *ddev = dev_get_drvdata(dev);
struct radeon_device *rdev = ddev->dev_private;
@@ -406,9 +403,8 @@ static ssize_t radeon_set_pm_profile(struct device *dev,
return count;
 }
 
-static ssize_t radeon_get_pm_method(struct device *dev,
-   struct device_attribute *attr,
-   char *buf)
+static ssize_t power_method_show(struct device *dev,
+struct device_attribute *attr, char *buf)
 {
struct drm_device *ddev = dev_get_drvdata(dev);
struct radeon_device *rdev = ddev->dev_private;
@@ -418,10 +414,9 @@ static ssize_t radeon_get_pm_method(struct device *dev,
  (pm == PM_METHOD_PROFILE) ? "profile" : "dpm");
 }
 
-static ssize_t radeon_set_pm_method(struct device *dev,
-   struct device_attribute *attr,
-   const char *buf,
-   size_t count)
+static ssize_t power_method_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
 {
struct drm_device *ddev = dev_get_drvdata(dev);
struct radeon_device *rdev = ddev->dev_private;
@@ -462,9 +457,8 @@ static ssize_t radeon_set_pm_method(struct device *dev,
return count;
 }
 
-static ssize_t radeon_get_dpm_state(struct device *dev,
-   struct device_attribute *attr,
-   char *buf)
+static ssize_t power_dpm_state_show(struct device *dev,
+   struct device_attribute *attr, char *buf)
 {
struct drm_device *ddev = dev_get_drvdata(dev);
struct radeon_device *rdev = ddev->dev_private;
@@ -475,10 +469,9 @@ static ssize_t radeon_get_dpm_state(struct device *dev,
  (pm == POWER_STATE_TYPE_BALANCED) ? "balanced" : 
"performance");
 }
 
-static ssize_t radeon_set_dpm_state(struct device *dev,
-   struct device_attribute *attr,
-   const char *buf,
-   size_t count)
+static ssize_t power_dpm_state_store(struct device *dev,
+struct device_attribute *attr,
+const char *buf, size_t count)
 {
struct drm_device *ddev = dev_get_drvdata(dev);
struct radeon_device *rdev = ddev->dev_private;
@@ -506,9 +499,9 @@ static ssize_t radeon_set_dpm_state(struct device *dev,
return count;
 }
 
-static ssize_t radeon_get_dpm_forced_performance_level(struct device *dev,
-  struct device_attribute 
*attr,
-  char *buf)
+static ssize_t power_dpm_force_performance_level_show(struct device *dev,
+ struct device_attribute 
*attr,
+ char *buf)
 {
struct drm_device *ddev = dev_get_drvdata(dev);
struct radeon_device *rdev = ddev->dev_private;
@@ -523,10 +516,9 @@ static ssize_t 
radeon_get_dpm_forced_performance_level(struct device *dev,
  (level == RADEON_DPM_FORCED_LEVEL_LOW) ? "low" : 
"high");
 }
 
-static ssize_t