Re: [PATCH 2/2] drm/amdgpu: add sclk/uclk sensor support for vega20

2019-07-17 Thread Deucher, Alexander
Ah, I missed that they were already handled in smu_v11_0.c.  In that case, I 
think I can drop these patches.

Alex

From: Quan, Evan
Sent: Wednesday, July 17, 2019 8:58 PM
To: Alex Deucher; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander
Subject: RE: [PATCH 2/2] drm/amdgpu: add sclk/uclk sensor support for vega20

I think the AMDGPU_PP_SENSOR_GFX_SCLK and AMDGPU_PP_SENSOR_GFX_MCLK requests 
are handled in smu_v11_0_read_sensor.
It means it cannot reach navi10_ppt.c and vega20_ppt.c.
Maybe this should be fixed in smu_v11_0_read_sensor.

Regards,
Evan
> -Original Message-
> From: amd-gfx  On Behalf Of Alex
> Deucher
> Sent: Wednesday, July 17, 2019 10:36 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander 
> Subject: [PATCH 2/2] drm/amdgpu: add sclk/uclk sensor support for vega20
>
> Query the metrics table to get the average sclk and uclk.
>
> Signed-off-by: Alex Deucher 
> ---
>  drivers/gpu/drm/amd/powerplay/vega20_ppt.c | 35
> ++
>  1 file changed, 35 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
> b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
> index 9204e4e50d09..763d73af6cd1 100644
> --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
> +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
> @@ -3117,6 +3117,36 @@ static int vega20_thermal_get_temperature(struct
> smu_context *smu,
>
>return 0;
>  }
> +
> +static int vega20_get_avg_clocks(struct smu_context *smu,
> +  enum amd_pp_sensors sensor,
> +  uint32_t *value)
> +{
> + SmuMetrics_t metrics;
> + int ret = 0;
> +
> + if (!value)
> + return -EINVAL;
> +
> + ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, (void
> *), false);
> + if (ret)
> + return ret;
> +
> + switch (sensor) {
> + case AMDGPU_PP_SENSOR_GFX_SCLK:
> + *value = metrics.AverageGfxclkFrequency * 100;
> + break;
> + case AMDGPU_PP_SENSOR_GFX_MCLK:
> + *value = metrics.AverageUclkFrequency * 100;
> + break;
> + default:
> + pr_err("Invalid sensor for retrieving avg clock\n");
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
>  static int vega20_read_sensor(struct smu_context *smu,
> enum amd_pp_sensors sensor,
> void *data, uint32_t *size)
> @@ -3147,6 +3177,11 @@ static int vega20_read_sensor(struct smu_context
> *smu,
>ret = vega20_thermal_get_temperature(smu, sensor,
> (uint32_t *)data);
>*size = 4;
>break;
> + case AMDGPU_PP_SENSOR_GFX_SCLK:
> + case AMDGPU_PP_SENSOR_GFX_MCLK:
> + ret = vega20_get_avg_clocks(smu, sensor, (uint32_t *)data);
> + *size = 4;
> + break;
>default:
>return -EINVAL;
>}
> --
> 2.20.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: Prefer pcie_capability_read_word()

2019-07-17 Thread Frederick Lawler
Commit 8c0d3a02c130 ("PCI: Add accessors for PCI Express Capability")
added accessors for the PCI Express Capability so that drivers didn't
need to be aware of differences between v1 and v2 of the PCI
Express Capability.

Replace pci_read_config_word() and pci_write_config_word() calls with
pcie_capability_read_word() and pcie_capability_write_word().

Signed-off-by: Frederick Lawler 
---
 drivers/gpu/drm/amd/amdgpu/cik.c | 70 
 drivers/gpu/drm/amd/amdgpu/si.c  | 70 
 2 files changed, 88 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 07c1f239e9c3..2f33dd0f7a11 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1377,7 +1377,6 @@ static int cik_set_vce_clocks(struct amdgpu_device *adev, 
u32 evclk, u32 ecclk)
 static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
 {
struct pci_dev *root = adev->pdev->bus->self;
-   int bridge_pos, gpu_pos;
u32 speed_cntl, current_data_rate;
int i;
u16 tmp16;
@@ -1412,12 +1411,7 @@ static void cik_pcie_gen3_enable(struct amdgpu_device 
*adev)
DRM_INFO("enabling PCIE gen 2 link speeds, disable with 
amdgpu.pcie_gen2=0\n");
}
 
-   bridge_pos = pci_pcie_cap(root);
-   if (!bridge_pos)
-   return;
-
-   gpu_pos = pci_pcie_cap(adev->pdev);
-   if (!gpu_pos)
+   if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev))
return;
 
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
@@ -1427,14 +1421,17 @@ static void cik_pcie_gen3_enable(struct amdgpu_device 
*adev)
u16 bridge_cfg2, gpu_cfg2;
u32 max_lw, current_lw, tmp;
 
-   pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, 
_cfg);
-   pci_read_config_word(adev->pdev, gpu_pos + 
PCI_EXP_LNKCTL, _cfg);
+   pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ _cfg);
+   pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL,
+ _cfg);
 
tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
-   pci_write_config_word(root, bridge_pos + 
PCI_EXP_LNKCTL, tmp16);
+   pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
 
tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
-   pci_write_config_word(adev->pdev, gpu_pos + 
PCI_EXP_LNKCTL, tmp16);
+   pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL,
+  tmp16);
 
tmp = RREG32_PCIE(ixPCIE_LC_STATUS1);
max_lw = (tmp & 
PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >>
@@ -1458,15 +1455,23 @@ static void cik_pcie_gen3_enable(struct amdgpu_device 
*adev)
 
for (i = 0; i < 10; i++) {
/* check status */
-   pci_read_config_word(adev->pdev, gpu_pos + 
PCI_EXP_DEVSTA, );
+   pcie_capability_read_word(adev->pdev,
+ PCI_EXP_DEVSTA,
+ );
if (tmp16 & PCI_EXP_DEVSTA_TRPND)
break;
 
-   pci_read_config_word(root, bridge_pos + 
PCI_EXP_LNKCTL, _cfg);
-   pci_read_config_word(adev->pdev, gpu_pos + 
PCI_EXP_LNKCTL, _cfg);
+   pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ _cfg);
+   pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ _cfg);
 
-   pci_read_config_word(root, bridge_pos + 
PCI_EXP_LNKCTL2, _cfg2);
-   pci_read_config_word(adev->pdev, gpu_pos + 
PCI_EXP_LNKCTL2, _cfg2);
+   pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ _cfg2);
+   pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ _cfg2);
 
tmp = RREG32_PCIE(ixPCIE_LC_CNTL4);
tmp |= PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
@@ -1479,26 +1484,39 @@ static void cik_pcie_gen3_enable(struct amdgpu_device 
*adev)
msleep(100);
 
/* linkctl */
- 

[PATCH] drm/radeon: Prefer pcie_capability_read_word()

2019-07-17 Thread Frederick Lawler
Commit 8c0d3a02c130 ("PCI: Add accessors for PCI Express Capability")
added accessors for the PCI Express Capability so that drivers didn't
need to be aware of differences between v1 and v2 of the PCI
Express Capability.

Replace pci_read_config_word() and pci_write_config_word() calls with
pcie_capability_read_word() and pcie_capability_write_word().

Signed-off-by: Frederick Lawler 
---
 drivers/gpu/drm/radeon/cik.c | 70 +-
 drivers/gpu/drm/radeon/si.c  | 73 +++-
 2 files changed, 90 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index ab7b4e2ffcd2..f6c91ac5427a 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -9500,7 +9500,6 @@ static void cik_pcie_gen3_enable(struct radeon_device 
*rdev)
 {
struct pci_dev *root = rdev->pdev->bus->self;
enum pci_bus_speed speed_cap;
-   int bridge_pos, gpu_pos;
u32 speed_cntl, current_data_rate;
int i;
u16 tmp16;
@@ -9542,12 +9541,7 @@ static void cik_pcie_gen3_enable(struct radeon_device 
*rdev)
DRM_INFO("enabling PCIE gen 2 link speeds, disable with 
radeon.pcie_gen2=0\n");
}
 
-   bridge_pos = pci_pcie_cap(root);
-   if (!bridge_pos)
-   return;
-
-   gpu_pos = pci_pcie_cap(rdev->pdev);
-   if (!gpu_pos)
+   if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
return;
 
if (speed_cap == PCIE_SPEED_8_0GT) {
@@ -9557,14 +9551,17 @@ static void cik_pcie_gen3_enable(struct radeon_device 
*rdev)
u16 bridge_cfg2, gpu_cfg2;
u32 max_lw, current_lw, tmp;
 
-   pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, 
_cfg);
-   pci_read_config_word(rdev->pdev, gpu_pos + 
PCI_EXP_LNKCTL, _cfg);
+   pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ _cfg);
+   pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL,
+ _cfg);
 
tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
-   pci_write_config_word(root, bridge_pos + 
PCI_EXP_LNKCTL, tmp16);
+   pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
 
tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
-   pci_write_config_word(rdev->pdev, gpu_pos + 
PCI_EXP_LNKCTL, tmp16);
+   pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL,
+  tmp16);
 
tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> 
LC_DETECTED_LINK_WIDTH_SHIFT;
@@ -9582,15 +9579,23 @@ static void cik_pcie_gen3_enable(struct radeon_device 
*rdev)
 
for (i = 0; i < 10; i++) {
/* check status */
-   pci_read_config_word(rdev->pdev, gpu_pos + 
PCI_EXP_DEVSTA, );
+   pcie_capability_read_word(rdev->pdev,
+ PCI_EXP_DEVSTA,
+ );
if (tmp16 & PCI_EXP_DEVSTA_TRPND)
break;
 
-   pci_read_config_word(root, bridge_pos + 
PCI_EXP_LNKCTL, _cfg);
-   pci_read_config_word(rdev->pdev, gpu_pos + 
PCI_EXP_LNKCTL, _cfg);
+   pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ _cfg);
+   pcie_capability_read_word(rdev->pdev,
+ PCI_EXP_LNKCTL,
+ _cfg);
 
-   pci_read_config_word(root, bridge_pos + 
PCI_EXP_LNKCTL2, _cfg2);
-   pci_read_config_word(rdev->pdev, gpu_pos + 
PCI_EXP_LNKCTL2, _cfg2);
+   pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ _cfg2);
+   pcie_capability_read_word(rdev->pdev,
+ PCI_EXP_LNKCTL2,
+ _cfg2);
 
tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
tmp |= LC_SET_QUIESCE;
@@ -9603,26 +9608,39 @@ static void cik_pcie_gen3_enable(struct radeon_device 
*rdev)
msleep(100);
 
/* linkctl */
-   pci_read_config_word(root, bridge_pos + 
PCI_EXP_LNKCTL, );
+

RE: [PATCH] drm/amdgpu/pm: remove check for pp funcs in freq sysfs handlers

2019-07-17 Thread Quan, Evan
Reviewed-by: Evan Quan 

> -Original Message-
> From: amd-gfx  On Behalf Of Alex
> Deucher
> Sent: Thursday, July 18, 2019 2:15 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander 
> Subject: [PATCH] drm/amdgpu/pm: remove check for pp funcs in freq sysfs
> handlers
> 
> The dpm sensor function already does this for us.  This fixes the freq*_input
> files with the new SMU implementation.
> 
> Signed-off-by: Alex Deucher 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 10 --
>  1 file changed, 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> index 397af9094a39..8b7efd0a7028 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> @@ -2077,11 +2077,6 @@ static ssize_t amdgpu_hwmon_show_sclk(struct
> device *dev,
>(ddev->switch_power_state != DRM_SWITCH_POWER_ON))
>   return -EINVAL;
> 
> - /* sanity check PP is enabled */
> - if (!(adev->powerplay.pp_funcs &&
> -   adev->powerplay.pp_funcs->read_sensor))
> -   return -EINVAL;
> -
>   /* get the sclk */
>   r = amdgpu_dpm_read_sensor(adev,
> AMDGPU_PP_SENSOR_GFX_SCLK,
>  (void *), );
> @@ -2112,11 +2107,6 @@ static ssize_t amdgpu_hwmon_show_mclk(struct
> device *dev,
>(ddev->switch_power_state != DRM_SWITCH_POWER_ON))
>   return -EINVAL;
> 
> - /* sanity check PP is enabled */
> - if (!(adev->powerplay.pp_funcs &&
> -   adev->powerplay.pp_funcs->read_sensor))
> -   return -EINVAL;
> -
>   /* get the sclk */
>   r = amdgpu_dpm_read_sensor(adev,
> AMDGPU_PP_SENSOR_GFX_MCLK,
>  (void *), );
> --
> 2.20.1
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu/dm: Remove unneeded hotplug event in s3_handle_mst()

2019-07-17 Thread Lyude Paul
The DRM DP MST topology manager will send it's own hotplug events when
connectors are destroyed, so there's no reason to send an additional
hotplug event here.

Signed-off-by: Lyude Paul 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 0242d693f4f6..156eeacee2ae 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -812,7 +812,6 @@ static void s3_handle_mst(struct drm_device *dev, bool 
suspend)
struct drm_connector *connector;
struct drm_dp_mst_topology_mgr *mgr;
int ret;
-   bool need_hotplug = false;
 
drm_modeset_lock(>mode_config.connection_mutex, NULL);
 
@@ -831,15 +830,15 @@ static void s3_handle_mst(struct drm_device *dev, bool 
suspend)
ret = drm_dp_mst_topology_mgr_resume(mgr);
if (ret < 0) {
drm_dp_mst_topology_mgr_set_mst(mgr, false);
-   need_hotplug = true;
+   mutex_lock(>hpd_lock);
+   aconnector->dc_link->type =
+   dc_connection_single;
+   mutex_unlock(>hpd_lock);
}
}
}
 
drm_modeset_unlock(>mode_config.connection_mutex);
-
-   if (need_hotplug)
-   drm_kms_helper_hotplug_event(dev);
 }
 
 /**
-- 
2.21.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 23/26] drm/amdgpu/dm: Resume short HPD IRQs before resuming MST topology

2019-07-17 Thread Lyude Paul
Since we're going to be reprobing the entire topology state on resume
now using sideband transactions, we need to ensure that we actually have
short HPD irqs enabled before calling drm_dp_mst_topology_mgr_resume().
So, do that.

Cc: Juston Li 
Cc: Imre Deak 
Cc: Ville Syrjälä 
Cc: Harry Wentland 
Signed-off-by: Lyude Paul 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 0ef49db0f08d..e33e080cf16d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1041,15 +1041,15 @@ static int dm_resume(void *handle)
/* program HPD filter */
dc_resume(dm->dc);
 
-   /* On resume we need to  rewrite the MSTM control bits to enamble MST*/
-   s3_handle_mst(ddev, false);
-
/*
 * early enable HPD Rx IRQ, should be done before set mode as short
 * pulse interrupts are used for MST
 */
amdgpu_dm_irq_resume_early(adev);
 
+   /* On resume we need to  rewrite the MSTM control bits to enamble MST*/
+   s3_handle_mst(ddev, false);
+
/* Do detection*/
drm_connector_list_iter_begin(ddev, );
drm_for_each_connector_iter(connector, ) {
-- 
2.21.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 24/26] drm/dp_mst: Add basic topology reprobing when resuming

2019-07-17 Thread Lyude Paul
Finally! For a very long time, our MST helpers have had one very
annoying issue: They don't know how to reprobe the topology state when
coming out of suspend. This means that if a user has a machine connected
to an MST topology and decides to suspend their machine, we lose all
topology changes that happened during that period. That can be a big
problem if the machine was connected to a different topology on the same
port before resuming, as we won't bother reprobing any of the ports and
likely cause the user's monitors not to come back up as expected.

So, we start fixing this by teaching our MST helpers how to reprobe the
link addresses of each connected topology when resuming. As it turns
out, the behavior that we want here is identical to the behavior we want
when initially probing a newly connected MST topology, with a couple of
important differences:

- We need to be more careful about handling the potential races between
  events from the MST hub that could change the topology state as we're
  performing the link address reprobe
- We need to be more careful about handling unlikely state changes on
  ports - such as an input port turning into an output port, something
  that would be far more likely to happen in situations like the MST hub
  we're connected to being changed while we're suspend

Both of which have been solved by previous commits. That leaves one
requirement:

- We need to prune any MST ports in our in-memory topology state that
  were present when suspending, but have not appeared in the post-resume
  link address response from their parent branch device

Which we can now handle in this commit by modifying
drm_dp_send_link_address(). We then introduce suspend/resume reprobing
by introducing drm_dp_mst_topology_mgr_invalidate_mstb(), which we call
in drm_dp_mst_topology_mgr_suspend() to traverse the in-memory topology
state to indicate that each mstb needs it's link address resent and PBN
resources reprobed.

On resume, we start back up >work and have it reprobe the topology
in the same way we would on a hotplug, removing any leftover ports that
no longer appear in the topology state.

Cc: Juston Li 
Cc: Imre Deak 
Cc: Ville Syrjälä 
Cc: Harry Wentland 
Signed-off-by: Lyude Paul 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |   2 +-
 drivers/gpu/drm/drm_dp_mst_topology.c | 138 +-
 drivers/gpu/drm/i915/display/intel_dp.c   |   3 +-
 drivers/gpu/drm/nouveau/dispnv50/disp.c   |   6 +-
 include/drm/drm_dp_mst_helper.h   |   3 +-
 5 files changed, 112 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index e33e080cf16d..2ebd4811ff40 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -829,7 +829,7 @@ static void s3_handle_mst(struct drm_device *dev, bool 
suspend)
if (suspend) {
drm_dp_mst_topology_mgr_suspend(mgr);
} else {
-   ret = drm_dp_mst_topology_mgr_resume(mgr);
+   ret = drm_dp_mst_topology_mgr_resume(mgr, true);
if (ret < 0) {
drm_dp_mst_topology_mgr_set_mst(mgr, false);
need_hotplug = true;
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c 
b/drivers/gpu/drm/drm_dp_mst_topology.c
index 126db36c9337..320158970e25 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -1931,6 +1931,14 @@ drm_dp_mst_handle_link_address_port(struct 
drm_dp_mst_branch *mstb,
goto fail_unlock;
}
 
+   /*
+* If this port wasn't just created, then we're reprobing because
+* we're coming out of suspend. In this case, always resend the link
+* address if there's an MSTB on this port
+*/
+   if (!created && port->pdt == DP_PEER_DEVICE_MST_BRANCHING)
+   send_link_addr = true;
+
if (send_link_addr) {
mutex_lock(>lock);
if (port->mstb) {
@@ -2443,7 +2451,8 @@ static void drm_dp_send_link_address(struct 
drm_dp_mst_topology_mgr *mgr,
 {
struct drm_dp_sideband_msg_tx *txmsg;
struct drm_dp_link_address_ack_reply *reply;
-   int i, len, ret;
+   struct drm_dp_mst_port *port, *tmp;
+   int i, len, ret, port_mask = 0;
 
txmsg = kzalloc(sizeof(*txmsg), GFP_KERNEL);
if (!txmsg)
@@ -2473,9 +2482,28 @@ static void drm_dp_send_link_address(struct 
drm_dp_mst_topology_mgr *mgr,
 
drm_dp_check_mstb_guid(mstb, reply->guid);
 
-   for (i = 0; i < reply->nports; i++)
+   for (i = 0; i < reply->nports; i++) {
+   port_mask |= BIT(reply->ports[i].port_number);
drm_dp_mst_handle_link_address_port(mstb, mgr->dev,
>ports[i]);
+   }
+
+  

[PATCH 22/26] drm/amdgpu: Iterate through DRM connectors correctly

2019-07-17 Thread Lyude Paul
Currently, every single piece of code in amdgpu that loops through
connectors does it incorrectly and doesn't use the proper list iteration
helpers, drm_connector_list_iter_begin() and
drm_connector_list_iter_end(). Yeesh.

So, do that.

Cc: Juston Li 
Cc: Imre Deak 
Cc: Ville Syrjälä 
Cc: Harry Wentland 
Signed-off-by: Lyude Paul 
---
 .../gpu/drm/amd/amdgpu/amdgpu_connectors.c| 13 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 20 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c   |  5 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c  | 40 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c   |  5 ++-
 drivers/gpu/drm/amd/amdgpu/dce_v10_0.c| 34 
 drivers/gpu/drm/amd/amdgpu/dce_v11_0.c| 34 
 drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 40 ++-
 drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 34 
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 33 ---
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c | 10 -
 11 files changed, 195 insertions(+), 73 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 73b2ede773d3..2cabaaecf28a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -1022,8 +1022,12 @@ amdgpu_connector_dvi_detect(struct drm_connector 
*connector, bool force)
 */
if (amdgpu_connector->shared_ddc && (ret == 
connector_status_connected)) {
struct drm_connector *list_connector;
+   struct drm_connector_list_iter iter;
struct amdgpu_connector *list_amdgpu_connector;
-   list_for_each_entry(list_connector, 
>mode_config.connector_list, head) {
+
+   drm_connector_list_iter_begin(dev, );
+   drm_for_each_connector_iter(list_connector,
+   ) {
if (connector == list_connector)
continue;
list_amdgpu_connector = 
to_amdgpu_connector(list_connector);
@@ -1040,6 +1044,7 @@ amdgpu_connector_dvi_detect(struct drm_connector 
*connector, bool force)
}
}
}
+   drm_connector_list_iter_end();
}
}
}
@@ -1501,6 +1506,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
 {
struct drm_device *dev = adev->ddev;
struct drm_connector *connector;
+   struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector;
struct amdgpu_connector_atom_dig *amdgpu_dig_connector;
struct drm_encoder *encoder;
@@ -1514,10 +1520,12 @@ amdgpu_connector_add(struct amdgpu_device *adev,
return;
 
/* see if we already added it */
-   list_for_each_entry(connector, >mode_config.connector_list, head) {
+   drm_connector_list_iter_begin(dev, );
+   drm_for_each_connector_iter(connector, ) {
amdgpu_connector = to_amdgpu_connector(connector);
if (amdgpu_connector->connector_id == connector_id) {
amdgpu_connector->devices |= supported_device;
+   drm_connector_list_iter_end();
return;
}
if (amdgpu_connector->ddc_bus && i2c_bus->valid) {
@@ -1532,6 +1540,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
}
}
}
+   drm_connector_list_iter_end();
 
/* check if it's a dp bridge */
list_for_each_entry(encoder, >mode_config.encoder_list, head) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 7401bc95c15b..f8d433b013e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2909,6 +2909,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
suspend, bool fbcon)
struct amdgpu_device *adev;
struct drm_crtc *crtc;
struct drm_connector *connector;
+   struct drm_connector_list_iter iter;
int r;
 
if (dev == NULL || dev->dev_private == NULL) {
@@ -2931,9 +2932,11 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
suspend, bool fbcon)
if (!amdgpu_device_has_dc_support(adev)) {
/* turn off display hw */
drm_modeset_lock_all(dev);
-   list_for_each_entry(connector, 
>mode_config.connector_list, head) {
-   drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
-  

[PATCH 00/26] DP MST Refactors + debugging tools + suspend/resume reprobing

2019-07-17 Thread Lyude Paul
This is the large series for adding MST suspend/resume reprobing that
I've been working on for quite a while now. In addition, I:

- Refactored and cleaned up any code I ended up digging through in the
  process of understanding how some parts of these helpers worked.
- Added some debugging tools along the way that I ended up needing to
  figure out some issues in my own code

Note that there's still one important part of this process missing
that's not included in this patch series: EDID reprobing, which I
believe Stanislav Lisovskiy from Intel is currently working on. The main
purpose of this series is to fix the issue of the in-memory topology
state (e.g. connectors connected to an MST hub, branch devices, etc.)
going out of sync if a topology connected to a connector is swapped out
with a different topology while the system is resumed, or while the
device housing said connector is in runtime suspend.

As well, the debugging tools that are added in this include:
- A limited debugging utility for dumping the list of topology
  references on an MST port or branch connector whose topology reference
  count has reached 0
- Sideband down request dumping, along with some basic selftests for
  testing our encoding/decoding functions

Cc: Juston Li 
Cc: Imre Deak 
Cc: Ville Syrjälä 
Cc: Harry Wentland 

Lyude Paul (26):
  drm/dp_mst: Move link address dumping into a function
  drm/dp_mst: Destroy mstbs from destroy_connector_work
  drm/dp_mst: Move test_calc_pbn_mode() into an actual selftest
  drm/print: Add drm_err_printer()
  drm/dp_mst: Add sideband down request tracing + selftests
  drm/dp_mst: Move PDT teardown for ports into destroy_connector_work
  drm/dp_mst: Get rid of list clear in drm_dp_finish_destroy_port()
  drm/dp_mst: Refactor drm_dp_send_enum_path_resources
  drm/dp_mst: Remove huge conditional in drm_dp_mst_handle_up_req()
  drm/dp_mst: Constify guid in drm_dp_get_mst_branch_by_guid()
  drm/dp_mst: Refactor drm_dp_mst_handle_up_req()
  drm/dp_mst: Refactor drm_dp_mst_handle_down_rep()
  drm/dp_mst: Destroy topology_mgr mutexes
  drm/dp_mst: Cleanup drm_dp_send_link_address() a bit
  drm/dp_mst: Refactor pdt setup/teardown, add more locking
  drm/dp_mst: Rename drm_dp_add_port and drm_dp_update_port
  drm/dp_mst: Remove lies in {up,down}_rep_recv documentation
  drm/dp_mst: Handle UP requests asynchronously
  drm/dp_mst: Protect drm_dp_mst_port members with connection_mutex
  drm/dp_mst: Don't forget to update port->input in
drm_dp_mst_handle_conn_stat()
  drm/nouveau: Don't grab runtime PM refs for HPD IRQs
  drm/amdgpu: Iterate through DRM connectors correctly
  drm/amdgpu/dm: Resume short HPD IRQs before resuming MST topology
  drm/dp_mst: Add basic topology reprobing when resuming
  drm/dp_mst: Also print unhashed pointers for malloc/topology
references
  drm/dp_mst: Add topology ref history tracking for debugging

 drivers/gpu/drm/Kconfig   |   14 +
 .../gpu/drm/amd/amdgpu/amdgpu_connectors.c|   13 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|   20 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c   |5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c  |   40 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c   |5 +-
 drivers/gpu/drm/amd/amdgpu/dce_v10_0.c|   34 +-
 drivers/gpu/drm/amd/amdgpu/dce_v11_0.c|   34 +-
 drivers/gpu/drm/amd/amdgpu/dce_v6_0.c |   40 +-
 drivers/gpu/drm/amd/amdgpu/dce_v8_0.c |   34 +-
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |   41 +-
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c |   10 +-
 drivers/gpu/drm/drm_dp_mst_topology.c | 1592 +
 .../gpu/drm/drm_dp_mst_topology_internal.h|   24 +
 drivers/gpu/drm/drm_print.c   |6 +
 drivers/gpu/drm/i915/display/intel_dp.c   |3 +-
 drivers/gpu/drm/nouveau/dispnv50/disp.c   |6 +-
 drivers/gpu/drm/nouveau/nouveau_connector.c   |   33 +-
 drivers/gpu/drm/selftests/Makefile|2 +-
 .../gpu/drm/selftests/drm_modeset_selftests.h |2 +
 .../drm/selftests/test-drm_dp_mst_helper.c|  248 +++
 .../drm/selftests/test-drm_modeset_common.h   |2 +
 include/drm/drm_dp_mst_helper.h   |  127 +-
 include/drm/drm_print.h   |   17 +
 24 files changed, 1846 insertions(+), 506 deletions(-)
 create mode 100644 drivers/gpu/drm/drm_dp_mst_topology_internal.h
 create mode 100644 drivers/gpu/drm/selftests/test-drm_dp_mst_helper.c

-- 
2.21.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 2/2] drm/amdgpu: add sclk/uclk sensor support for vega20

2019-07-17 Thread Quan, Evan
I think the AMDGPU_PP_SENSOR_GFX_SCLK and AMDGPU_PP_SENSOR_GFX_MCLK requests 
are handled in smu_v11_0_read_sensor.
It means it cannot reach navi10_ppt.c and vega20_ppt.c.
Maybe this should be fixed in smu_v11_0_read_sensor.

Regards,
Evan
> -Original Message-
> From: amd-gfx  On Behalf Of Alex
> Deucher
> Sent: Wednesday, July 17, 2019 10:36 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander 
> Subject: [PATCH 2/2] drm/amdgpu: add sclk/uclk sensor support for vega20
> 
> Query the metrics table to get the average sclk and uclk.
> 
> Signed-off-by: Alex Deucher 
> ---
>  drivers/gpu/drm/amd/powerplay/vega20_ppt.c | 35
> ++
>  1 file changed, 35 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
> b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
> index 9204e4e50d09..763d73af6cd1 100644
> --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
> +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
> @@ -3117,6 +3117,36 @@ static int vega20_thermal_get_temperature(struct
> smu_context *smu,
> 
>   return 0;
>  }
> +
> +static int vega20_get_avg_clocks(struct smu_context *smu,
> +  enum amd_pp_sensors sensor,
> +  uint32_t *value)
> +{
> + SmuMetrics_t metrics;
> + int ret = 0;
> +
> + if (!value)
> + return -EINVAL;
> +
> + ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, (void
> *), false);
> + if (ret)
> + return ret;
> +
> + switch (sensor) {
> + case AMDGPU_PP_SENSOR_GFX_SCLK:
> + *value = metrics.AverageGfxclkFrequency * 100;
> + break;
> + case AMDGPU_PP_SENSOR_GFX_MCLK:
> + *value = metrics.AverageUclkFrequency * 100;
> + break;
> + default:
> + pr_err("Invalid sensor for retrieving avg clock\n");
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
>  static int vega20_read_sensor(struct smu_context *smu,
>enum amd_pp_sensors sensor,
>void *data, uint32_t *size)
> @@ -3147,6 +3177,11 @@ static int vega20_read_sensor(struct smu_context
> *smu,
>   ret = vega20_thermal_get_temperature(smu, sensor,
> (uint32_t *)data);
>   *size = 4;
>   break;
> + case AMDGPU_PP_SENSOR_GFX_SCLK:
> + case AMDGPU_PP_SENSOR_GFX_MCLK:
> + ret = vega20_get_avg_clocks(smu, sensor, (uint32_t *)data);
> + *size = 4;
> + break;
>   default:
>   return -EINVAL;
>   }
> --
> 2.20.1
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/5] drm/amdgpu: allow direct submission in the VM backends

2019-07-17 Thread Kuehling, Felix
On 2019-07-17 5:10, Christian König wrote:
> Am 16.07.19 um 18:40 schrieb Kuehling, Felix:
>> On 2019-07-16 9:36 a.m., Christian König wrote:
>>> Am 02.07.19 um 21:35 schrieb Kuehling, Felix:
 This assumes that page tables are resident when a page fault is 
 handled.
>>> Yeah, that is correct. I also haven't completely figured out how we
>>> can prevent the VM from being destroyed while handling the fault.
>> There are other cases I had in mind: Page tables can be evicted. For KFD
>> processes which can be preempted with CWSR, it's possible that a wave
>> that caused a page fault is preempted due to a page table eviction. That
>> means, by the time the page fault is handled, the page table is no
>> longer resident.
>
> This is a corner case we can handle later on. As long as the VM is 
> still alive just allocating page tables again should be sufficient for 
> this.

Do you mean, instead of migrating page tables back, throwing them away 
and allocating a new one?

Also, this may be a corner case. But I feel you're limiting yourself to 
a small range of current use cases. I'm not convinced that the design 
you're building here will scale to future use cases for HMM updating 
page tables for random virtual addresses. I'm looking for a general 
solution that will work for those future use cases. Otherwise we'll end 
up having to rewrite this page-table-update-in-fault-handler code from 
scratch in a month or two.


>
>>> I mean it's perfectly possible that the process is killed while faults
>>> are still in the pipeline.
>>>
 I think it's possible that a page table gets evicted while a page 
 fault
 is pending. Maybe not with graphics, but definitely with compute where
 waves can be preempted while waiting for a page fault. In that case 
 the
 direct access would break.

 Even with graphics I think it's still possible that new page tables 
 need
 to be allocated to handle a page fault. When that happens, you need to
 wait for fences to let new page tables be validated and initialized.
>>> Yeah, the problem here is that when you wait on fences which in turn
>>> depend on your submission your end up in a deadlock.
>>>
>> I think this implies that you have amdgpu_cs fences attached to page
>> tables. I believe this is the fundamental issue that needs to be fixed.
>
> We still need this cause even with page faults the root PD can't be 
> evicted.
>
> What we can probably do is to split up the PDs/PTs into the root PD 
> and everything else.

Yeah, the root PD always exists as long as the VM exists. Everything 
else can be created/destroyed/moved dynamically.


>
>> If you want to manage page tables in page fault interrupt handlers, you
>> can't have command submission fences attached to your page tables. You
>> can allow page tables to be evicted while the command submission is in
>> progress. A page fault will fault it back in if it's needed. If you
>> eliminate command submission fences on the page tables, you remove the
>> potential for deadlocks.
>
> No, there is still a huge potential for deadlocks here.
>
> Additional to the root PDs you can have a MM submission which needs to 
> wait for a compute submission to be finished.

I assume by MM you mean "memory manger", not "multi-media". What kind of 
MM submission specifically? It can't be a VM page table update, with my 
proposal to never add user CS fences to VM page table reservations. Are 
you talking about buffer moves? They could go on a separate scheduler 
entity from VM submissions, so they don't hold up VM updates. VM updates 
only need to wait for MM fences that affect the page table BOs themselves.


>
> If you then make your new allocation depend on the MM submission to be 
> finished you have a classical circle dependency and a deadlock at hand.

I don't see it. Allocate page table, wait for fence associated with that 
page table initialization, update PTEs. At no point do we depend on the 
user CS being stalled by the page fault. There is not user submission on 
the paging ring. Anything that has been scheduled on the paging ring has 
its dependencies satisfied. We may need separate scheduler entities 
(queues) for regular MM submissions that can depend on user fences and 
VM submissions that must not.


>
> The only way around that is to allocate the new page tables with the 
> no_wait_gpu flag set and so avoid having any dependencies on ongoing 
> operations.

We discussed this before. I suggested an emergency pool for page tables. 
That pool can have a limited size. If page tables don't have user fences 
on them, they can always be evicted, so we can always make room in this 
emergency pool.


>
>> But you do need fences on page tables related to the allocation and
>> migration of page tables themselves. And your page table updates must
>> wait for those fences. Therefore I think the whole approach of direct
>> submission for page table updates is fundamentally broken.
>
> For the reasons 

Re: [PATCH v2] drm/amdgpu: Default disable GDS for compute VMIDs

2019-07-17 Thread Kuehling, Felix
On 2019-07-17 14:23, Greathouse, Joseph wrote:
> The GDS and GWS blocks default to allowing all VMIDs to
> access all entries. Graphics VMIDs can handle setting
> these limits when the driver launches work. However,
> compute workloads under HWS control don't go through the
> kernel driver. Instead, HWS firmware should set these
> limits when a process is put into a VMID slot.
>
> Disable access to these devices by default by turning off
> all mask bits (for OA) and setting BASE=SIZE=0 (for GDS
> and GWS) for all compute VMIDs. If a process wants to use
> these resources, they can request this from the HWS
> firmware (when such capabilities are enabled). HWS will
> then handle setting the base and limit for the process when
> it is assigned to a VMID.
>
> This will also prevent user kernels from getting 'stuck' in
> GWS by accident if they write GWS-using code but HWS
> firmware is not set up to handle GWS reset. Until HWS is
> enabled to handle GWS properly, all GWS accesses will
> MEM_VIOL fault the kernel.
>
> v2: Move initialization outside of SRBM mutex
>
> Change-Id: I8edcea9d0b14d16a7444bcf9fbf9451aef8b707d
> Signed-off-by: Joseph Greathouse 

Reviewed-by: Felix Kuehling 


> ---
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 9 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c  | 9 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 9 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 9 +
>   4 files changed, 36 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 618291df659b..73dcb632a3ce 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -1516,6 +1516,15 @@ static void gfx_v10_0_init_compute_vmid(struct 
> amdgpu_device *adev)
>   }
>   nv_grbm_select(adev, 0, 0, 0, 0);
>   mutex_unlock(>srbm_mutex);
> +
> + /* Initialize all compute VMIDs to have no GDS, GWS, or OA
> +acccess. These should be enabled by FW for target VMIDs. */
> + for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
> + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
> + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
> + WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
> + WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
> + }
>   }
>   
>   static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> index e1e2a44ee13c..3f98624772a4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> @@ -1877,6 +1877,15 @@ static void gfx_v7_0_init_compute_vmid(struct 
> amdgpu_device *adev)
>   }
>   cik_srbm_select(adev, 0, 0, 0, 0);
>   mutex_unlock(>srbm_mutex);
> +
> + /* Initialize all compute VMIDs to have no GDS, GWS, or OA
> +acccess. These should be enabled by FW for target VMIDs. */
> + for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
> + WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
> + WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
> + WREG32(amdgpu_gds_reg_offset[i].gws, 0);
> + WREG32(amdgpu_gds_reg_offset[i].oa, 0);
> + }
>   }
>   
>   static void gfx_v7_0_config_init(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 8c590a554675..e4028d54f8f7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -3702,6 +3702,15 @@ static void gfx_v8_0_init_compute_vmid(struct 
> amdgpu_device *adev)
>   }
>   vi_srbm_select(adev, 0, 0, 0, 0);
>   mutex_unlock(>srbm_mutex);
> +
> + /* Initialize all compute VMIDs to have no GDS, GWS, or OA
> +acccess. These should be enabled by FW for target VMIDs. */
> + for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
> + WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
> + WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
> + WREG32(amdgpu_gds_reg_offset[i].gws, 0);
> + WREG32(amdgpu_gds_reg_offset[i].oa, 0);
> + }
>   }
>   
>   static void gfx_v8_0_config_init(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 5af60e1c735a..259a35395fec 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -2025,6 +2025,15 @@ static void gfx_v9_0_init_compute_vmid(struct 
> amdgpu_device *adev)
>   }
>   soc15_grbm_select(adev, 0, 0, 0, 0);
>   mutex_unlock(>srbm_mutex);
> +
> + /* Initialize all compute VMIDs to have no GDS, GWS, or OA
> +acccess. These should be enabled by FW for target VMIDs. */
> + for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
> + 

[PATCH v2] drm/amdgpu: Default disable GDS for compute VMIDs

2019-07-17 Thread Greathouse, Joseph
The GDS and GWS blocks default to allowing all VMIDs to
access all entries. Graphics VMIDs can handle setting
these limits when the driver launches work. However,
compute workloads under HWS control don't go through the
kernel driver. Instead, HWS firmware should set these
limits when a process is put into a VMID slot.

Disable access to these devices by default by turning off
all mask bits (for OA) and setting BASE=SIZE=0 (for GDS
and GWS) for all compute VMIDs. If a process wants to use
these resources, they can request this from the HWS
firmware (when such capabilities are enabled). HWS will
then handle setting the base and limit for the process when
it is assigned to a VMID.

This will also prevent user kernels from getting 'stuck' in
GWS by accident if they write GWS-using code but HWS
firmware is not set up to handle GWS reset. Until HWS is
enabled to handle GWS properly, all GWS accesses will
MEM_VIOL fault the kernel.

v2: Move initialization outside of SRBM mutex

Change-Id: I8edcea9d0b14d16a7444bcf9fbf9451aef8b707d
Signed-off-by: Joseph Greathouse 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 9 +
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c  | 9 +
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 9 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 9 +
 4 files changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 618291df659b..73dcb632a3ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -1516,6 +1516,15 @@ static void gfx_v10_0_init_compute_vmid(struct 
amdgpu_device *adev)
}
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(>srbm_mutex);
+
+   /* Initialize all compute VMIDs to have no GDS, GWS, or OA
+  acccess. These should be enabled by FW for target VMIDs. */
+   for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
+   WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
+   WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
+   WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
+   WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
+   }
 }
 
 static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index e1e2a44ee13c..3f98624772a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -1877,6 +1877,15 @@ static void gfx_v7_0_init_compute_vmid(struct 
amdgpu_device *adev)
}
cik_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(>srbm_mutex);
+
+   /* Initialize all compute VMIDs to have no GDS, GWS, or OA
+  acccess. These should be enabled by FW for target VMIDs. */
+   for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
+   WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
+   WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
+   WREG32(amdgpu_gds_reg_offset[i].gws, 0);
+   WREG32(amdgpu_gds_reg_offset[i].oa, 0);
+   }
 }
 
 static void gfx_v7_0_config_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 8c590a554675..e4028d54f8f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -3702,6 +3702,15 @@ static void gfx_v8_0_init_compute_vmid(struct 
amdgpu_device *adev)
}
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(>srbm_mutex);
+
+   /* Initialize all compute VMIDs to have no GDS, GWS, or OA
+  acccess. These should be enabled by FW for target VMIDs. */
+   for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
+   WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
+   WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
+   WREG32(amdgpu_gds_reg_offset[i].gws, 0);
+   WREG32(amdgpu_gds_reg_offset[i].oa, 0);
+   }
 }
 
 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 5af60e1c735a..259a35395fec 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2025,6 +2025,15 @@ static void gfx_v9_0_init_compute_vmid(struct 
amdgpu_device *adev)
}
soc15_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(>srbm_mutex);
+
+   /* Initialize all compute VMIDs to have no GDS, GWS, or OA
+  acccess. These should be enabled by FW for target VMIDs. */
+   for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
+   WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
+   WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
+   WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
+   WREG32_SOC15_OFFSET(GC, 0, 

[PATCH] drm/amdgpu/pm: remove check for pp funcs in freq sysfs handlers

2019-07-17 Thread Alex Deucher
The dpm sensor function already does this for us.  This fixes
the freq*_input files with the new SMU implementation.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 10 --
 1 file changed, 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 397af9094a39..8b7efd0a7028 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -2077,11 +2077,6 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev,
 (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
return -EINVAL;
 
-   /* sanity check PP is enabled */
-   if (!(adev->powerplay.pp_funcs &&
- adev->powerplay.pp_funcs->read_sensor))
- return -EINVAL;
-
/* get the sclk */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK,
   (void *), );
@@ -2112,11 +2107,6 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev,
 (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
return -EINVAL;
 
-   /* sanity check PP is enabled */
-   if (!(adev->powerplay.pp_funcs &&
- adev->powerplay.pp_funcs->read_sensor))
- return -EINVAL;
-
/* get the sclk */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK,
   (void *), );
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: Default disable GDS for compute VMIDs

2019-07-17 Thread Kuehling, Felix
On 2019-07-17 13:11, Greathouse, Joseph wrote:
> The GDS and GWS blocks default to allowing all VMIDs to
> access all entries. Graphics VMIDs can handle setting
> these limits when the driver launches work. However,
> compute workloads under HWS control don't go through the
> kernel driver. Instead, HWS firmware should set these
> limits when a process is put into a VMID slot.
>
> Disable access to these devices by default by turning off
> all mask bits (for OA) and setting BASE=SIZE=0 (for GDS
> and GWS) for all compute VMIDs. If a process wants to use
> these resources, they can request this from the HWS
> firmware (when such capabilities are enabled). HWS will
> then handle setting the base and limit for the process when
> it is assigned to a VMID.
>
> This will also prevent user kernels from getting 'stuck' in
> GWS by accident if they write GWS-using code but HWS
> firmware is not set up to handle GWS reset. Until HWS is
> enabled to handle GWS properly, all GWS accesses will
> MEM_VIOL fault the kernel.
>
> Change-Id: I2a709fdcb2468511754f2e5eae75546751c0e6f0
> Signed-off-by: Joseph Greathouse 
> ---
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 6 ++
>   drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c  | 6 ++
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 6 ++
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 6 ++
>   4 files changed, 24 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 618291df659b..32da5a91abfb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -1513,6 +1513,12 @@ static void gfx_v10_0_init_compute_vmid(struct 
> amdgpu_device *adev)
>   /* CP and shaders */
>   WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
>   WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
> + /* Initialize all compute VMIDs to have no GDS, GWS, or OA
> +acccess. These should be enabled by FW for target VMID. */
> + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0x);
> + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0x);
> + WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0x);
> + WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0x);
I'd prefer these register initializations in a separate loop outside the 
srbm_mutex, because they don't depend on the srbm/grbm_select bits. Same 
for the other GFX IP versions.

Regards,
   Felix


>   }
>   nv_grbm_select(adev, 0, 0, 0, 0);
>   mutex_unlock(>srbm_mutex);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> index e1e2a44ee13c..89ea0d799c12 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> @@ -1874,6 +1874,12 @@ static void gfx_v7_0_init_compute_vmid(struct 
> amdgpu_device *adev)
>   WREG32(mmSH_MEM_APE1_BASE, 1);
>   WREG32(mmSH_MEM_APE1_LIMIT, 0);
>   WREG32(mmSH_MEM_BASES, sh_mem_bases);
> + /* Initialize all compute VMIDs to have no GDS, GWS, or OA
> +acccess. These should be enabled by FW for target VMID. */
> + WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
> + WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
> + WREG32(amdgpu_gds_reg_offset[i].gws, 0);
> + WREG32(amdgpu_gds_reg_offset[i].oa, 0);
>   }
>   cik_srbm_select(adev, 0, 0, 0, 0);
>   mutex_unlock(>srbm_mutex);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 8c590a554675..53d9a223f8c1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -3699,6 +3699,12 @@ static void gfx_v8_0_init_compute_vmid(struct 
> amdgpu_device *adev)
>   WREG32(mmSH_MEM_APE1_BASE, 1);
>   WREG32(mmSH_MEM_APE1_LIMIT, 0);
>   WREG32(mmSH_MEM_BASES, sh_mem_bases);
> + /* Initialize all compute VMIDs to have no GDS, GWS, or OA
> +acccess. These should be enabled by FW for target VMID. */
> + WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
> + WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
> + WREG32(amdgpu_gds_reg_offset[i].gws, 0);
> + WREG32(amdgpu_gds_reg_offset[i].oa, 0);
>   }
>   vi_srbm_select(adev, 0, 0, 0, 0);
>   mutex_unlock(>srbm_mutex);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 5af60e1c735a..b9ed03a8a561 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -2022,6 +2022,12 @@ static void gfx_v9_0_init_compute_vmid(struct 
> amdgpu_device *adev)
>   /* CP and shaders */
>   WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
>   

[PATCH] drm/amdgpu: Default disable GDS for compute VMIDs

2019-07-17 Thread Greathouse, Joseph
The GDS and GWS blocks default to allowing all VMIDs to
access all entries. Graphics VMIDs can handle setting
these limits when the driver launches work. However,
compute workloads under HWS control don't go through the
kernel driver. Instead, HWS firmware should set these
limits when a process is put into a VMID slot.

Disable access to these devices by default by turning off
all mask bits (for OA) and setting BASE=SIZE=0 (for GDS
and GWS) for all compute VMIDs. If a process wants to use
these resources, they can request this from the HWS
firmware (when such capabilities are enabled). HWS will
then handle setting the base and limit for the process when
it is assigned to a VMID.

This will also prevent user kernels from getting 'stuck' in
GWS by accident if they write GWS-using code but HWS
firmware is not set up to handle GWS reset. Until HWS is
enabled to handle GWS properly, all GWS accesses will
MEM_VIOL fault the kernel.

Change-Id: I2a709fdcb2468511754f2e5eae75546751c0e6f0
Signed-off-by: Joseph Greathouse 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 6 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c  | 6 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 6 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 6 ++
 4 files changed, 24 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 618291df659b..32da5a91abfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -1513,6 +1513,12 @@ static void gfx_v10_0_init_compute_vmid(struct 
amdgpu_device *adev)
/* CP and shaders */
WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
+   /* Initialize all compute VMIDs to have no GDS, GWS, or OA
+  acccess. These should be enabled by FW for target VMID. */
+   WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0x);
+   WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0x);
+   WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0x);
+   WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0x);
}
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(>srbm_mutex);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index e1e2a44ee13c..89ea0d799c12 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -1874,6 +1874,12 @@ static void gfx_v7_0_init_compute_vmid(struct 
amdgpu_device *adev)
WREG32(mmSH_MEM_APE1_BASE, 1);
WREG32(mmSH_MEM_APE1_LIMIT, 0);
WREG32(mmSH_MEM_BASES, sh_mem_bases);
+   /* Initialize all compute VMIDs to have no GDS, GWS, or OA
+  acccess. These should be enabled by FW for target VMID. */
+   WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
+   WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
+   WREG32(amdgpu_gds_reg_offset[i].gws, 0);
+   WREG32(amdgpu_gds_reg_offset[i].oa, 0);
}
cik_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(>srbm_mutex);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 8c590a554675..53d9a223f8c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -3699,6 +3699,12 @@ static void gfx_v8_0_init_compute_vmid(struct 
amdgpu_device *adev)
WREG32(mmSH_MEM_APE1_BASE, 1);
WREG32(mmSH_MEM_APE1_LIMIT, 0);
WREG32(mmSH_MEM_BASES, sh_mem_bases);
+   /* Initialize all compute VMIDs to have no GDS, GWS, or OA
+  acccess. These should be enabled by FW for target VMID. */
+   WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
+   WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
+   WREG32(amdgpu_gds_reg_offset[i].gws, 0);
+   WREG32(amdgpu_gds_reg_offset[i].oa, 0);
}
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(>srbm_mutex);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 5af60e1c735a..b9ed03a8a561 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2022,6 +2022,12 @@ static void gfx_v9_0_init_compute_vmid(struct 
amdgpu_device *adev)
/* CP and shaders */
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
+   /* Initialize all compute VMIDs to have no GDS, GWS, or OA
+  acccess. These should be enabled by FW for target VMID. */
+   WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0x);
+   WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 

Re: HMM related use-after-free with amdgpu

2019-07-17 Thread Jason Gunthorpe
On Tue, Jul 16, 2019 at 10:10:46PM +, Kuehling, Felix wrote:
> On 2019-07-16 1:04 p.m., Michel Dänzer wrote:
> > On 2019-07-16 6:35 p.m., Jason Gunthorpe wrote:
> >> On Tue, Jul 16, 2019 at 06:31:09PM +0200, Michel Dänzer wrote:
> >>> On 2019-07-15 7:25 p.m., Jason Gunthorpe wrote:
>  On Mon, Jul 15, 2019 at 06:51:06PM +0200, Michel Dänzer wrote:
> > With a KASAN enabled kernel built from amd-staging-drm-next, the
> > attached use-after-free is pretty reliably detected during a piglit gpu 
> > run.
>  Does this branch you are testing have the hmm.git merged? I think from
>  the name it does not?
> >>> Indeed, no.
> >>>
> >>>
>  Use after free's of this nature were something that was fixed in
>  hmm.git..
> 
>  I don't see an obvious way you can hit something like this with the
>  new code arrangement..
> >>> I tried merging the hmm-devmem-cleanup.4 changes[0] into my 5.2.y +
> >>> drm-next for 5.3 kernel. While the result didn't hit the problem, all
> >>> GL_AMD_pinned_memory piglit tests failed, so I suspect the problem was
> >>> simply avoided by not actually hitting the HMM related functionality.
> >>>
> >>> It's possible that I made a mistake in merging the changes, or that I
> >>> missed some other required changes. But it's also possible that the HMM
> >>> changes broke the corresponding user-pointer functionality in amdgpu.
> >> Not sure, this was all Tested by the AMD team so it should work, I
> >> hope.
> > It can't, due to the issue pointed out by Linus in the "drm pull for
> > 5.3-rc1" thread: DRM_AMDGPU_USERPTR still depends on ARCH_HAS_HMM, which
> > no longer exists, so it can't be enabled.
> 
> As far as I can tell, Linus fixed this up in his merge commit 
> be8454afc50f43016ca8b6130d9673bdd0bd56ec. Jason, is hmm.git going to get 
> rebased or merge to pick up the amdgpu changes for HMM from master?

It will be reset to -rc1 when it comes out, then we start all over
again.

Jason
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: reserve GDS resources on the gfx ring for gfx10

2019-07-17 Thread Koenig, Christian
Am 17.07.19 um 16:54 schrieb Marek Olšák:


On Wed., Jul. 17, 2019, 10:43 Christian König, 
mailto:ckoenig.leichtzumer...@gmail.com>> 
wrote:
Am 17.07.19 um 16:27 schrieb Marek Olšák:


On Wed., Jul. 17, 2019, 03:15 Christian König, 
mailto:ckoenig.leichtzumer...@gmail.com>> 
wrote:
Am 17.07.19 um 02:06 schrieb Marek Olšák:
> From: Marek Olšák mailto:marek.ol...@amd.com>>
>
> Hopefully we'll only use 1 gfx ring, because otherwise we'd have to have
> separate GDS buffers for each gfx ring.
>
> This is a workaround to ensure stability of transform feedback. Shaders hang
> waiting for a GDS instruction (ds_sub, not ds_ordered_count).
>
> The disadvantage is that compute IBs might get a different VMID,
> because now gfx always has GDS and compute doesn't.

So far compute is only using GWS, but I don't think that those
reservations are a good idea in general.

How severe is the ENOMEM problem you see with using an explicit GWS
allocation?

I guess you mean GDS or OA.

Yeah, just a typo. Compute is using GWS and we want to use GDS and OA here.

There is no ENOMEM, it just hangs. I don't know why. The shader is waiting for 
ds_sub and can't continue, but GDS is idle.

Well could it be because we don't correctly handle non zero offsets or stuff 
like that?

I don't know what you mean.

I wanted to try to leak some GDS/OA during allocation to narrow down what is 
going wrong here.



Does it work with this hack when you don't allocate GDS/OA from the start? 
(Just allocate it twice or something like this).

It's only allocated once by the kernel with this hack.

Well my question is why does that actually help?

I mean is it a bug in the memory management or some hardware issue that we 
can't switch GDS/OS?

I think we need to find the root cause of the problem and if it's related to 
switching GDS/OS what the actual restrictions are.

Christian.


Marek


Christian.


Marek


Regards,
Christian.

>
> Signed-off-by: Marek Olšák mailto:marek.ol...@amd.com>>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 10 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h |  6 ++
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 20 
>   4 files changed, 37 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 4b514a44184c..cbd55d061b72 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -456,20 +456,21 @@ struct amdgpu_cs_parser {
>   struct drm_file *filp;
>   struct amdgpu_ctx   *ctx;
>
>   /* chunks */
>   unsignednchunks;
>   struct amdgpu_cs_chunk  *chunks;
>
>   /* scheduler job object */
>   struct amdgpu_job   *job;
>   struct drm_sched_entity *entity;
> + unsignedhw_ip;
>
>   /* buffer objects */
>   struct ww_acquire_ctx   ticket;
>   struct amdgpu_bo_list   *bo_list;
>   struct amdgpu_mn*mn;
>   struct amdgpu_bo_list_entry vm_pd;
>   struct list_headvalidated;
>   struct dma_fence*fence;
>   uint64_tbytes_moved_threshold;
>   uint64_tbytes_moved_vis_threshold;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index c691df6f7a57..9125cd69a124 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -678,20 +678,28 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser 
> *p,
>   if (r)
>   goto error_validate;
>
>   amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
>p->bytes_moved_vis);
>
>   gds = p->bo_list->gds_obj;
>   gws = p->bo_list->gws_obj;
>   oa = p->bo_list->oa_obj;
>
> + if (p->hw_ip == AMDGPU_HW_IP_GFX) {
> + /* Only gfx10 allocates these. */
> + if (!gds)
> + gds = p->adev->gds.gds_gfx_bo;
> + if (!oa)
> + oa = p->adev->gds.oa_gfx_bo;
> + }
> +
>   amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>   struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>
>   /* Make sure we use the exclusive slot for shared BOs */
>   if (bo->prime_shared_count)
>   e->tv.num_shared = 0;
>   e->bo_va = amdgpu_vm_bo_find(vm, bo);
>   }
>
>   if (gds) {
> @@ -954,20 +962,22 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
>   struct drm_amdgpu_cs_chunk_ib *chunk_ib;
>   struct drm_sched_entity *entity;
>
>   chunk = >chunks[i];
>   ib = >job->ibs[j];
>   chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
>
>   if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
>   

Re: [PATCH] drm/amdkfd: Remove GWS from process during uninit

2019-07-17 Thread Kuehling, Felix
On 2019-07-17 10:58, Greathouse, Joseph wrote:
> If we shut down a process without having destroyed its GWS-using
> queues, it is possible that GWS BO will still be in the process
> BO list during the gpuvm destruction. This list should be empty
> at that time, so we should remove the GWS allocation at the
> process uninit point if it is still around.
>
> Change-Id: I098e7b315070dd5b0165bb7905aef643450f27f2
> Signed-off-by: Joseph Greathouse 

Reviewed-by: Felix Kuehling 


> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 3 +++
>   1 file changed, 3 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> index da0958625861..7e6c3ee82f5b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> @@ -150,6 +150,9 @@ void pqm_uninit(struct process_queue_manager *pqm)
>   struct process_queue_node *pqn, *next;
>   
>   list_for_each_entry_safe(pqn, next, >queues, process_queue_list) {
> + if (pqn->q && pqn->q->gws)
> + 
> amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
> + pqn->q->gws);
>   uninit_queue(pqn->q);
>   list_del(>process_queue_list);
>   kfree(pqn);
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdkfd: Remove GWS from process during uninit

2019-07-17 Thread Deucher, Alexander
Acked-by: Alex Deucher 

From: amd-gfx  on behalf of Greathouse, 
Joseph 
Sent: Wednesday, July 17, 2019 10:58 AM
To: amd-gfx@lists.freedesktop.org
Cc: Greathouse, Joseph
Subject: [PATCH] drm/amdkfd: Remove GWS from process during uninit

If we shut down a process without having destroyed its GWS-using
queues, it is possible that GWS BO will still be in the process
BO list during the gpuvm destruction. This list should be empty
at that time, so we should remove the GWS allocation at the
process uninit point if it is still around.

Change-Id: I098e7b315070dd5b0165bb7905aef643450f27f2
Signed-off-by: Joseph Greathouse 
---
 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index da0958625861..7e6c3ee82f5b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -150,6 +150,9 @@ void pqm_uninit(struct process_queue_manager *pqm)
 struct process_queue_node *pqn, *next;

 list_for_each_entry_safe(pqn, next, >queues, process_queue_list) {
+   if (pqn->q && pqn->q->gws)
+   
amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
+   pqn->q->gws);
 uninit_queue(pqn->q);
 list_del(>process_queue_list);
 kfree(pqn);
--
2.19.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: Remove GWS from process during uninit

2019-07-17 Thread Greathouse, Joseph
If we shut down a process without having destroyed its GWS-using
queues, it is possible that GWS BO will still be in the process
BO list during the gpuvm destruction. This list should be empty
at that time, so we should remove the GWS allocation at the
process uninit point if it is still around.

Change-Id: I098e7b315070dd5b0165bb7905aef643450f27f2
Signed-off-by: Joseph Greathouse 
---
 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index da0958625861..7e6c3ee82f5b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -150,6 +150,9 @@ void pqm_uninit(struct process_queue_manager *pqm)
struct process_queue_node *pqn, *next;
 
list_for_each_entry_safe(pqn, next, >queues, process_queue_list) {
+   if (pqn->q && pqn->q->gws)
+   
amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
+   pqn->q->gws);
uninit_queue(pqn->q);
list_del(>process_queue_list);
kfree(pqn);
-- 
2.19.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: reserve GDS resources on the gfx ring for gfx10

2019-07-17 Thread Marek Olšák
On Wed., Jul. 17, 2019, 10:43 Christian König, <
ckoenig.leichtzumer...@gmail.com> wrote:

> Am 17.07.19 um 16:27 schrieb Marek Olšák:
>
>
>
> On Wed., Jul. 17, 2019, 03:15 Christian König, <
> ckoenig.leichtzumer...@gmail.com> wrote:
>
>> Am 17.07.19 um 02:06 schrieb Marek Olšák:
>> > From: Marek Olšák 
>> >
>> > Hopefully we'll only use 1 gfx ring, because otherwise we'd have to have
>> > separate GDS buffers for each gfx ring.
>> >
>> > This is a workaround to ensure stability of transform feedback. Shaders
>> hang
>> > waiting for a GDS instruction (ds_sub, not ds_ordered_count).
>> >
>> > The disadvantage is that compute IBs might get a different VMID,
>> > because now gfx always has GDS and compute doesn't.
>>
>> So far compute is only using GWS, but I don't think that those
>> reservations are a good idea in general.
>>
>> How severe is the ENOMEM problem you see with using an explicit GWS
>> allocation?
>>
>
> I guess you mean GDS or OA.
>
>
> Yeah, just a typo. Compute is using GWS and we want to use GDS and OA here.
>
> There is no ENOMEM, it just hangs. I don't know why. The shader is waiting
> for ds_sub and can't continue, but GDS is idle.
>
>
> Well could it be because we don't correctly handle non zero offsets or
> stuff like that?
>

I don't know what you mean.


> Does it work with this hack when you don't allocate GDS/OA from the start?
> (Just allocate it twice or something like this).
>

It's only allocated once by the kernel with this hack.

Marek


> Christian.
>
>
> Marek
>
>
>> Regards,
>> Christian.
>>
>> >
>> > Signed-off-by: Marek Olšák 
>> > ---
>> >   drivers/gpu/drm/amd/amdgpu/amdgpu.h |  1 +
>> >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 10 ++
>> >   drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h |  6 ++
>> >   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 20 
>> >   4 files changed, 37 insertions(+)
>> >
>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> > index 4b514a44184c..cbd55d061b72 100644
>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> > @@ -456,20 +456,21 @@ struct amdgpu_cs_parser {
>> >   struct drm_file *filp;
>> >   struct amdgpu_ctx   *ctx;
>> >
>> >   /* chunks */
>> >   unsignednchunks;
>> >   struct amdgpu_cs_chunk  *chunks;
>> >
>> >   /* scheduler job object */
>> >   struct amdgpu_job   *job;
>> >   struct drm_sched_entity *entity;
>> > + unsignedhw_ip;
>> >
>> >   /* buffer objects */
>> >   struct ww_acquire_ctx   ticket;
>> >   struct amdgpu_bo_list   *bo_list;
>> >   struct amdgpu_mn*mn;
>> >   struct amdgpu_bo_list_entry vm_pd;
>> >   struct list_headvalidated;
>> >   struct dma_fence*fence;
>> >   uint64_tbytes_moved_threshold;
>> >   uint64_tbytes_moved_vis_threshold;
>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> > index c691df6f7a57..9125cd69a124 100644
>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> > @@ -678,20 +678,28 @@ static int amdgpu_cs_parser_bos(struct
>> amdgpu_cs_parser *p,
>> >   if (r)
>> >   goto error_validate;
>> >
>> >   amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
>> >p->bytes_moved_vis);
>> >
>> >   gds = p->bo_list->gds_obj;
>> >   gws = p->bo_list->gws_obj;
>> >   oa = p->bo_list->oa_obj;
>> >
>> > + if (p->hw_ip == AMDGPU_HW_IP_GFX) {
>> > + /* Only gfx10 allocates these. */
>> > + if (!gds)
>> > + gds = p->adev->gds.gds_gfx_bo;
>> > + if (!oa)
>> > + oa = p->adev->gds.oa_gfx_bo;
>> > + }
>> > +
>> >   amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> >   struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>> >
>> >   /* Make sure we use the exclusive slot for shared BOs */
>> >   if (bo->prime_shared_count)
>> >   e->tv.num_shared = 0;
>> >   e->bo_va = amdgpu_vm_bo_find(vm, bo);
>> >   }
>> >
>> >   if (gds) {
>> > @@ -954,20 +962,22 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device
>> *adev,
>> >   struct drm_amdgpu_cs_chunk_ib *chunk_ib;
>> >   struct drm_sched_entity *entity;
>> >
>> >   chunk = >chunks[i];
>> >   ib = >job->ibs[j];
>> >   chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
>> >
>> >   if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
>> >   continue;
>> >
>> > + parser->hw_ip = chunk_ib->ip_type;
>> > +
>> >   if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
>> >   

Re: [PATCH] drm/amdgpu: reserve GDS resources on the gfx ring for gfx10

2019-07-17 Thread Christian König

Am 17.07.19 um 16:27 schrieb Marek Olšák:



On Wed., Jul. 17, 2019, 03:15 Christian König, 
> wrote:


Am 17.07.19 um 02:06 schrieb Marek Olšák:
> From: Marek Olšák mailto:marek.ol...@amd.com>>
>
> Hopefully we'll only use 1 gfx ring, because otherwise we'd have
to have
> separate GDS buffers for each gfx ring.
>
> This is a workaround to ensure stability of transform feedback.
Shaders hang
> waiting for a GDS instruction (ds_sub, not ds_ordered_count).
>
> The disadvantage is that compute IBs might get a different VMID,
> because now gfx always has GDS and compute doesn't.

So far compute is only using GWS, but I don't think that those
reservations are a good idea in general.

How severe is the ENOMEM problem you see with using an explicit GWS
allocation?


I guess you mean GDS or OA.


Yeah, just a typo. Compute is using GWS and we want to use GDS and OA here.

There is no ENOMEM, it just hangs. I don't know why. The shader is 
waiting for ds_sub and can't continue, but GDS is idle.


Well could it be because we don't correctly handle non zero offsets or 
stuff like that?


Does it work with this hack when you don't allocate GDS/OA from the 
start? (Just allocate it twice or something like this).


Christian.



Marek


Regards,
Christian.

>
> Signed-off-by: Marek Olšák mailto:marek.ol...@amd.com>>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 10 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h |  6 ++
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 20 
>   4 files changed, 37 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 4b514a44184c..cbd55d061b72 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -456,20 +456,21 @@ struct amdgpu_cs_parser {
>       struct drm_file         *filp;
>       struct amdgpu_ctx       *ctx;
>
>       /* chunks */
>       unsigned                nchunks;
>       struct amdgpu_cs_chunk  *chunks;
>
>       /* scheduler job object */
>       struct amdgpu_job       *job;
>       struct drm_sched_entity *entity;
> +     unsigned                hw_ip;
>
>       /* buffer objects */
>       struct ww_acquire_ctx           ticket;
>       struct amdgpu_bo_list           *bo_list;
>       struct amdgpu_mn                *mn;
>       struct amdgpu_bo_list_entry     vm_pd;
>       struct list_head                validated;
>       struct dma_fence                *fence;
>       uint64_t bytes_moved_threshold;
>       uint64_t bytes_moved_vis_threshold;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index c691df6f7a57..9125cd69a124 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -678,20 +678,28 @@ static int amdgpu_cs_parser_bos(struct
amdgpu_cs_parser *p,
>       if (r)
>               goto error_validate;
>
>       amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
> p->bytes_moved_vis);
>
>       gds = p->bo_list->gds_obj;
>       gws = p->bo_list->gws_obj;
>       oa = p->bo_list->oa_obj;
>
> +     if (p->hw_ip == AMDGPU_HW_IP_GFX) {
> +             /* Only gfx10 allocates these. */
> +             if (!gds)
> +                     gds = p->adev->gds.gds_gfx_bo;
> +             if (!oa)
> +                     oa = p->adev->gds.oa_gfx_bo;
> +     }
> +
>       amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo
);
>
>               /* Make sure we use the exclusive slot for shared
BOs */
>               if (bo->prime_shared_count)
>                       e->tv.num_shared = 0;
>               e->bo_va = amdgpu_vm_bo_find(vm, bo);
>       }
>
>       if (gds) {
> @@ -954,20 +962,22 @@ static int amdgpu_cs_ib_fill(struct
amdgpu_device *adev,
>               struct drm_amdgpu_cs_chunk_ib *chunk_ib;
>               struct drm_sched_entity *entity;
>
>               chunk = >chunks[i];
>               ib = >job->ibs[j];
>               chunk_ib = (struct drm_amdgpu_cs_chunk_ib
*)chunk->kdata;
>
>               if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
>                       continue;
>
> +             parser->hw_ip = chunk_ib->ip_type;
> +
>               if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
>                   (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
>                       if (chunk_ib->flags &
AMDGPU_IB_FLAG_PREEMPT) {

[PATCH 2/2] drm/amdgpu: add sclk/uclk sensor support for vega20

2019-07-17 Thread Alex Deucher
Query the metrics table to get the average sclk and uclk.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/powerplay/vega20_ppt.c | 35 ++
 1 file changed, 35 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c 
b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
index 9204e4e50d09..763d73af6cd1 100644
--- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
@@ -3117,6 +3117,36 @@ static int vega20_thermal_get_temperature(struct 
smu_context *smu,
 
return 0;
 }
+
+static int vega20_get_avg_clocks(struct smu_context *smu,
+enum amd_pp_sensors sensor,
+uint32_t *value)
+{
+   SmuMetrics_t metrics;
+   int ret = 0;
+
+   if (!value)
+   return -EINVAL;
+
+   ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, (void *), 
false);
+   if (ret)
+   return ret;
+
+   switch (sensor) {
+   case AMDGPU_PP_SENSOR_GFX_SCLK:
+   *value = metrics.AverageGfxclkFrequency * 100;
+   break;
+   case AMDGPU_PP_SENSOR_GFX_MCLK:
+   *value = metrics.AverageUclkFrequency * 100;
+   break;
+   default:
+   pr_err("Invalid sensor for retrieving avg clock\n");
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
 static int vega20_read_sensor(struct smu_context *smu,
 enum amd_pp_sensors sensor,
 void *data, uint32_t *size)
@@ -3147,6 +3177,11 @@ static int vega20_read_sensor(struct smu_context *smu,
ret = vega20_thermal_get_temperature(smu, sensor, (uint32_t 
*)data);
*size = 4;
break;
+   case AMDGPU_PP_SENSOR_GFX_SCLK:
+   case AMDGPU_PP_SENSOR_GFX_MCLK:
+   ret = vega20_get_avg_clocks(smu, sensor, (uint32_t *)data);
+   *size = 4;
+   break;
default:
return -EINVAL;
}
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amdgpu: add sclk/uclk sensor support for navi

2019-07-17 Thread Alex Deucher
Query the metrics table to get the average sclk and uclk.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 34 ++
 1 file changed, 34 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c 
b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
index 7f11c641b7b8..146412c012a9 100644
--- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
@@ -1303,6 +1303,35 @@ static int navi10_thermal_get_temperature(struct 
smu_context *smu,
return 0;
 }
 
+static int navi10_get_avg_clocks(struct smu_context *smu,
+enum amd_pp_sensors sensor,
+uint32_t *value)
+{
+   SmuMetrics_t metrics;
+   int ret = 0;
+
+   if (!value)
+   return -EINVAL;
+
+   ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, (void *), 
false);
+   if (ret)
+   return ret;
+
+   switch (sensor) {
+   case AMDGPU_PP_SENSOR_GFX_SCLK:
+   *value = metrics.AverageGfxclkFrequency * 100;
+   break;
+   case AMDGPU_PP_SENSOR_GFX_MCLK:
+   *value = metrics.AverageUclkFrequency * 100;
+   break;
+   default:
+   pr_err("Invalid sensor for retrieving avg clock\n");
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
 static int navi10_read_sensor(struct smu_context *smu,
 enum amd_pp_sensors sensor,
 void *data, uint32_t *size)
@@ -1331,6 +1360,11 @@ static int navi10_read_sensor(struct smu_context *smu,
ret = navi10_thermal_get_temperature(smu, sensor, (uint32_t 
*)data);
*size = 4;
break;
+   case AMDGPU_PP_SENSOR_GFX_SCLK:
+   case AMDGPU_PP_SENSOR_GFX_MCLK:
+   ret = navi10_get_avg_clocks(smu, sensor, (uint32_t *)data);
+   *size = 4;
+   break;
default:
return -EINVAL;
}
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: reserve GDS resources on the gfx ring for gfx10

2019-07-17 Thread Marek Olšák
On Wed., Jul. 17, 2019, 03:15 Christian König, <
ckoenig.leichtzumer...@gmail.com> wrote:

> Am 17.07.19 um 02:06 schrieb Marek Olšák:
> > From: Marek Olšák 
> >
> > Hopefully we'll only use 1 gfx ring, because otherwise we'd have to have
> > separate GDS buffers for each gfx ring.
> >
> > This is a workaround to ensure stability of transform feedback. Shaders
> hang
> > waiting for a GDS instruction (ds_sub, not ds_ordered_count).
> >
> > The disadvantage is that compute IBs might get a different VMID,
> > because now gfx always has GDS and compute doesn't.
>
> So far compute is only using GWS, but I don't think that those
> reservations are a good idea in general.
>
> How severe is the ENOMEM problem you see with using an explicit GWS
> allocation?
>

I guess you mean GDS or OA.

There is no ENOMEM, it just hangs. I don't know why. The shader is waiting
for ds_sub and can't continue, but GDS is idle.

Marek


> Regards,
> Christian.
>
> >
> > Signed-off-by: Marek Olšák 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu.h |  1 +
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 10 ++
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h |  6 ++
> >   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 20 
> >   4 files changed, 37 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > index 4b514a44184c..cbd55d061b72 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > @@ -456,20 +456,21 @@ struct amdgpu_cs_parser {
> >   struct drm_file *filp;
> >   struct amdgpu_ctx   *ctx;
> >
> >   /* chunks */
> >   unsignednchunks;
> >   struct amdgpu_cs_chunk  *chunks;
> >
> >   /* scheduler job object */
> >   struct amdgpu_job   *job;
> >   struct drm_sched_entity *entity;
> > + unsignedhw_ip;
> >
> >   /* buffer objects */
> >   struct ww_acquire_ctx   ticket;
> >   struct amdgpu_bo_list   *bo_list;
> >   struct amdgpu_mn*mn;
> >   struct amdgpu_bo_list_entry vm_pd;
> >   struct list_headvalidated;
> >   struct dma_fence*fence;
> >   uint64_tbytes_moved_threshold;
> >   uint64_tbytes_moved_vis_threshold;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > index c691df6f7a57..9125cd69a124 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > @@ -678,20 +678,28 @@ static int amdgpu_cs_parser_bos(struct
> amdgpu_cs_parser *p,
> >   if (r)
> >   goto error_validate;
> >
> >   amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
> >p->bytes_moved_vis);
> >
> >   gds = p->bo_list->gds_obj;
> >   gws = p->bo_list->gws_obj;
> >   oa = p->bo_list->oa_obj;
> >
> > + if (p->hw_ip == AMDGPU_HW_IP_GFX) {
> > + /* Only gfx10 allocates these. */
> > + if (!gds)
> > + gds = p->adev->gds.gds_gfx_bo;
> > + if (!oa)
> > + oa = p->adev->gds.oa_gfx_bo;
> > + }
> > +
> >   amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> >   struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> >
> >   /* Make sure we use the exclusive slot for shared BOs */
> >   if (bo->prime_shared_count)
> >   e->tv.num_shared = 0;
> >   e->bo_va = amdgpu_vm_bo_find(vm, bo);
> >   }
> >
> >   if (gds) {
> > @@ -954,20 +962,22 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device
> *adev,
> >   struct drm_amdgpu_cs_chunk_ib *chunk_ib;
> >   struct drm_sched_entity *entity;
> >
> >   chunk = >chunks[i];
> >   ib = >job->ibs[j];
> >   chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
> >
> >   if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
> >   continue;
> >
> > + parser->hw_ip = chunk_ib->ip_type;
> > +
> >   if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
> >   (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
> >   if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
> >   if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
> >   ce_preempt++;
> >   else
> >   de_preempt++;
> >   }
> >
> >   /* each GFX command submit allows 0 or 1 IB
> preemptible for CE & DE */
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> > index df8a23554831..0943b8e1d97e 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> > +++ 

Re: [PATCH] drm/amd/amdgpu: Fix offset for vmid selection in debugfs interface

2019-07-17 Thread Deucher, Alexander
RB,

thanks,

Alex

From: amd-gfx  on behalf of StDenis, Tom 

Sent: Wednesday, July 17, 2019 8:44 AM
To: Koenig, Christian; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amd/amdgpu: Fix offset for vmid selection in debugfs 
interface

Thanks.


Alex can I grab an R-b please?


Cheers,

Tom

On 2019-07-16 7:30 a.m., Christian König wrote:
> Am 16.07.19 um 13:24 schrieb StDenis, Tom:
>> The register debugfs interface was using the wrong bitmask for vmid
>> selection for GFX_CNTL.
>>
>> Signed-off-by: Tom St Denis 
>
> Acked-by: Christian König 
>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> index 87b32873046f..59849ed9797d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> @@ -132,7 +132,7 @@ static int amdgpu_debugfs_process_reg_op(bool
>> read, struct file *f,
>>   me = (*pos & GENMASK_ULL(33, 24)) >> 24;
>>   pipe = (*pos & GENMASK_ULL(43, 34)) >> 34;
>>   queue = (*pos & GENMASK_ULL(53, 44)) >> 44;
>> -vmid = (*pos & GENMASK_ULL(48, 45)) >> 54;
>> +vmid = (*pos & GENMASK_ULL(58, 54)) >> 54;
>> use_ring = 1;
>>   } else {
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amd/powerplay: report bootup clock as max supported on dpm disabled

2019-07-17 Thread Deucher, Alexander
Reviewed-by: Alex Deucher 

From: amd-gfx  on behalf of Evan Quan 

Sent: Wednesday, July 17, 2019 5:34 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander; Xu, Feifei; Quan, Evan; Ma, Le
Subject: [PATCH] drm/amd/powerplay: report bootup clock as max supported on dpm 
disabled

With gfxclk or uclk dpm disabled, it's reasonable to report bootup clock
as the max supported.

Change-Id: If8aa7a912e8a34414b0e9c2b46de9b6e316fd9d7
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 27 ++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c 
b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index 5d5664f..d370b09 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -137,12 +137,37 @@ int smu_get_dpm_freq_range(struct smu_context *smu, enum 
smu_clk_type clk_type,
 {
 int ret = 0, clk_id = 0;
 uint32_t param = 0;
+   uint32_t clock_limit;

 if (!min && !max)
 return -EINVAL;

-   if (!smu_clk_dpm_is_enabled(smu, clk_type))
+   if (!smu_clk_dpm_is_enabled(smu, clk_type)) {
+   switch (clk_type) {
+   case SMU_MCLK:
+   case SMU_UCLK:
+   clock_limit = smu->smu_table.boot_values.uclk;
+   break;
+   case SMU_GFXCLK:
+   case SMU_SCLK:
+   clock_limit = smu->smu_table.boot_values.gfxclk;
+   break;
+   case SMU_SOCCLK:
+   clock_limit = smu->smu_table.boot_values.socclk;
+   break;
+   default:
+   clock_limit = 0;
+   break;
+   }
+
+   /* clock in Mhz unit */
+   if (min)
+   *min = clock_limit / 100;
+   if (max)
+   *max = clock_limit / 100;
+
 return 0;
+   }

 mutex_lock(>mutex);
 clk_id = smu_clk_get_index(smu, clk_type);
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 2/2] drm/amd/display: Add drm_audio_component support to amdgpu_dm

2019-07-17 Thread Takashi Iwai
On Wed, 10 Jul 2019 16:36:19 +0200,
Takashi Iwai wrote:
> 
> On Wed, 10 Jul 2019 16:31:40 +0200,
> Kazlauskas, Nicholas wrote:
> > 
> > On 7/10/19 9:48 AM, Takashi Iwai wrote:
> > > On Tue, 09 Jul 2019 18:30:19 +0200,
> > > Nicholas Kazlauskas wrote:
> > >>
> > >> [Why]
> > >> The drm_audio_component can be used to give pin ELD notifications
> > >> directly to the sound driver. This fixes audio endpoints disappearing
> > >> due to missing unsolicited notifications.
> > >>
> > >> [How]
> > >> Send the notification via the audio component whenever we enable or
> > >> disable audio state on a stream. This matches what i915 does with
> > >> their drm_audio_component and what Takashi Iwai's proposed hack for
> > >> radeon/amdpgu did.
> > >>
> > >> This is a bit delayed in when the notification actually occurs, however.
> > >> We wait until after all the programming is complete rather than sending
> > >> the notification mid sequence.
> > >>
> > >> Particular care is needed for the get ELD callback since it can happen
> > >> outside the locking and fencing DRM does for atomic commits.
> > >>
> > >> Cc: Takashi Iwai 
> > >> Cc: Leo Li 
> > >> Cc: Harry Wentland 
> > >> Signed-off-by: Nicholas Kazlauskas 
> > > 
> > > Thanks for the patch, this has been a long-standing TODO for me, too!
> > > 
> > > Do you have the patch for HD-audio part as well?  Or you tested with
> > > my old patch?  Then I'll resurrect my patch set as well.
> > 
> > I've tested this series with and without that patch. The notifications 
> > work and the driver will query back with get_eld as expected.
> 
> OK, then I'm going to prepare a branch including that patch.

FWIW, my latest patches are found in topic/hda-acomp branch on sound
git tree:
  git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git topic/hda-acomp

There have been a slight refactoring in HD-audio side, and now it
supports both AMD and Nvidia audio component support.  The branch
contains the patches for radeon and nouveau support.

Note that the branch isn't permanent one, it'll be rebased later.
Once after I submit and get the review for the HD-audio patches, I'll
create an immutable branch based on 5.3-rc1 containing those HD-audio
patches (but not DRM ones), so that you can pull into your drm tree
for the completeness.


thanks,

Takashi
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v18 11/15] IB/mlx4: untag user pointers in mlx4_get_umem_mr

2019-07-17 Thread Andrey Konovalov
On Wed, Jul 17, 2019 at 1:58 PM Jason Gunthorpe  wrote:
>
> On Wed, Jul 17, 2019 at 01:44:07PM +0200, Andrey Konovalov wrote:
> > On Tue, Jul 16, 2019 at 2:06 PM Jason Gunthorpe  wrote:
> > >
> > > On Tue, Jul 16, 2019 at 12:42:07PM +0200, Andrey Konovalov wrote:
> > > > On Mon, Jul 15, 2019 at 8:05 PM Jason Gunthorpe  wrote:
> > > > >
> > > > > On Mon, Jul 15, 2019 at 06:01:29PM +0200, Andrey Konovalov wrote:
> > > > > > On Mon, Jun 24, 2019 at 7:40 PM Catalin Marinas 
> > > > > >  wrote:
> > > > > > >
> > > > > > > On Mon, Jun 24, 2019 at 04:32:56PM +0200, Andrey Konovalov wrote:
> > > > > > > > This patch is a part of a series that extends kernel ABI to 
> > > > > > > > allow to pass
> > > > > > > > tagged user pointers (with the top byte set to something else 
> > > > > > > > other than
> > > > > > > > 0x00) as syscall arguments.
> > > > > > > >
> > > > > > > > mlx4_get_umem_mr() uses provided user pointers for vma lookups, 
> > > > > > > > which can
> > > > > > > > only by done with untagged pointers.
> > > > > > > >
> > > > > > > > Untag user pointers in this function.
> > > > > > > >
> > > > > > > > Signed-off-by: Andrey Konovalov 
> > > > > > > >  drivers/infiniband/hw/mlx4/mr.c | 7 ---
> > > > > > > >  1 file changed, 4 insertions(+), 3 deletions(-)
> > > > > > >
> > > > > > > Acked-by: Catalin Marinas 
> > > > > > >
> > > > > > > This patch also needs an ack from the infiniband maintainers 
> > > > > > > (Jason).
> > > > > >
> > > > > > Hi Jason,
> > > > > >
> > > > > > Could you take a look and give your acked-by?
> > > > >
> > > > > Oh, I think I did this a long time ago. Still looks OK.
> > > >
> > > > Hm, maybe that was we who lost it. Thanks!
> > > >
> > > > > You will send it?
> > > >
> > > > I will resend the patchset once the merge window is closed, if that's
> > > > what you mean.
> > >
> > > No.. I mean who send it to Linus's tree? ie do you want me to take
> > > this patch into rdma?
> >
> > I think the plan was to merge the whole series through the mm tree.
> > But I don't mind if you want to take this patch into your tree. It's
> > just that this patch doesn't make much sense without the rest of the
> > series.
>
> Generally I prefer if subsystem changes stay in subsystem trees. If
> the patch is good standalone, and the untag API has already been
> merged, this is a better strategy.

OK, feel free to take this into your tree, this works for me.

>
> Jason


Re: [PATCH] drm/amd/amdgpu: Fix offset for vmid selection in debugfs interface

2019-07-17 Thread StDenis, Tom
Thanks.


Alex can I grab an R-b please?


Cheers,

Tom

On 2019-07-16 7:30 a.m., Christian König wrote:
> Am 16.07.19 um 13:24 schrieb StDenis, Tom:
>> The register debugfs interface was using the wrong bitmask for vmid
>> selection for GFX_CNTL.
>>
>> Signed-off-by: Tom St Denis 
>
> Acked-by: Christian König 
>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> index 87b32873046f..59849ed9797d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> @@ -132,7 +132,7 @@ static int amdgpu_debugfs_process_reg_op(bool 
>> read, struct file *f,
>>   me = (*pos & GENMASK_ULL(33, 24)) >> 24;
>>   pipe = (*pos & GENMASK_ULL(43, 34)) >> 34;
>>   queue = (*pos & GENMASK_ULL(53, 44)) >> 44;
>> -    vmid = (*pos & GENMASK_ULL(48, 45)) >> 54;
>> +    vmid = (*pos & GENMASK_ULL(58, 54)) >> 54;
>>     use_ring = 1;
>>   } else {
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v18 11/15] IB/mlx4: untag user pointers in mlx4_get_umem_mr

2019-07-17 Thread Jason Gunthorpe
On Wed, Jul 17, 2019 at 01:44:07PM +0200, Andrey Konovalov wrote:
> On Tue, Jul 16, 2019 at 2:06 PM Jason Gunthorpe  wrote:
> >
> > On Tue, Jul 16, 2019 at 12:42:07PM +0200, Andrey Konovalov wrote:
> > > On Mon, Jul 15, 2019 at 8:05 PM Jason Gunthorpe  wrote:
> > > >
> > > > On Mon, Jul 15, 2019 at 06:01:29PM +0200, Andrey Konovalov wrote:
> > > > > On Mon, Jun 24, 2019 at 7:40 PM Catalin Marinas 
> > > > >  wrote:
> > > > > >
> > > > > > On Mon, Jun 24, 2019 at 04:32:56PM +0200, Andrey Konovalov wrote:
> > > > > > > This patch is a part of a series that extends kernel ABI to allow 
> > > > > > > to pass
> > > > > > > tagged user pointers (with the top byte set to something else 
> > > > > > > other than
> > > > > > > 0x00) as syscall arguments.
> > > > > > >
> > > > > > > mlx4_get_umem_mr() uses provided user pointers for vma lookups, 
> > > > > > > which can
> > > > > > > only by done with untagged pointers.
> > > > > > >
> > > > > > > Untag user pointers in this function.
> > > > > > >
> > > > > > > Signed-off-by: Andrey Konovalov 
> > > > > > >  drivers/infiniband/hw/mlx4/mr.c | 7 ---
> > > > > > >  1 file changed, 4 insertions(+), 3 deletions(-)
> > > > > >
> > > > > > Acked-by: Catalin Marinas 
> > > > > >
> > > > > > This patch also needs an ack from the infiniband maintainers 
> > > > > > (Jason).
> > > > >
> > > > > Hi Jason,
> > > > >
> > > > > Could you take a look and give your acked-by?
> > > >
> > > > Oh, I think I did this a long time ago. Still looks OK.
> > >
> > > Hm, maybe that was we who lost it. Thanks!
> > >
> > > > You will send it?
> > >
> > > I will resend the patchset once the merge window is closed, if that's
> > > what you mean.
> >
> > No.. I mean who send it to Linus's tree? ie do you want me to take
> > this patch into rdma?
> 
> I think the plan was to merge the whole series through the mm tree.
> But I don't mind if you want to take this patch into your tree. It's
> just that this patch doesn't make much sense without the rest of the
> series.

Generally I prefer if subsystem changes stay in subsystem trees. If
the patch is good standalone, and the untag API has already been
merged, this is a better strategy.

Jason


Re: [PATCH v18 08/15] userfaultfd: untag user pointers

2019-07-17 Thread Andrey Konovalov
On Wed, Jul 17, 2019 at 1:09 PM Mike Rapoport  wrote:
>
> On Mon, Jun 24, 2019 at 06:51:21PM +0100, Catalin Marinas wrote:
> > On Mon, Jun 24, 2019 at 04:32:53PM +0200, Andrey Konovalov wrote:
> > > This patch is a part of a series that extends kernel ABI to allow to pass
> > > tagged user pointers (with the top byte set to something else other than
> > > 0x00) as syscall arguments.
> > >
> > > userfaultfd code use provided user pointers for vma lookups, which can
> > > only by done with untagged pointers.
> > >
> > > Untag user pointers in validate_range().
> > >
> > > Reviewed-by: Vincenzo Frascino 
> > > Reviewed-by: Catalin Marinas 
> > > Reviewed-by: Kees Cook 
> > > Signed-off-by: Andrey Konovalov 
> > > ---
> > >  fs/userfaultfd.c | 22 --
> > >  1 file changed, 12 insertions(+), 10 deletions(-)
> >
> > Same here, it needs an ack from Al Viro.
>
> The userfault patches usually go via -mm tree, not sure if Al looks at them :)

Ah, OK, I guess than Andrew will take a look at them when merging.

>
> FWIW, you can add
>
> Reviewed-by: Mike Rapoport 

I will, thanks!

>
> > > diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
> > > index ae0b8b5f69e6..c2be36a168ca 100644
> > > --- a/fs/userfaultfd.c
> > > +++ b/fs/userfaultfd.c
> > > @@ -1261,21 +1261,23 @@ static __always_inline void wake_userfault(struct 
> > > userfaultfd_ctx *ctx,
> > >  }
> > >
> > >  static __always_inline int validate_range(struct mm_struct *mm,
> > > - __u64 start, __u64 len)
> > > + __u64 *start, __u64 len)
> > >  {
> > > __u64 task_size = mm->task_size;
> > >
> > > -   if (start & ~PAGE_MASK)
> > > +   *start = untagged_addr(*start);
> > > +
> > > +   if (*start & ~PAGE_MASK)
> > > return -EINVAL;
> > > if (len & ~PAGE_MASK)
> > > return -EINVAL;
> > > if (!len)
> > > return -EINVAL;
> > > -   if (start < mmap_min_addr)
> > > +   if (*start < mmap_min_addr)
> > > return -EINVAL;
> > > -   if (start >= task_size)
> > > +   if (*start >= task_size)
> > > return -EINVAL;
> > > -   if (len > task_size - start)
> > > +   if (len > task_size - *start)
> > > return -EINVAL;
> > > return 0;
> > >  }
> > > @@ -1325,7 +1327,7 @@ static int userfaultfd_register(struct 
> > > userfaultfd_ctx *ctx,
> > > goto out;
> > > }
> > >
> > > -   ret = validate_range(mm, uffdio_register.range.start,
> > > +   ret = validate_range(mm, _register.range.start,
> > >  uffdio_register.range.len);
> > > if (ret)
> > > goto out;
> > > @@ -1514,7 +1516,7 @@ static int userfaultfd_unregister(struct 
> > > userfaultfd_ctx *ctx,
> > > if (copy_from_user(_unregister, buf, 
> > > sizeof(uffdio_unregister)))
> > > goto out;
> > >
> > > -   ret = validate_range(mm, uffdio_unregister.start,
> > > +   ret = validate_range(mm, _unregister.start,
> > >  uffdio_unregister.len);
> > > if (ret)
> > > goto out;
> > > @@ -1665,7 +1667,7 @@ static int userfaultfd_wake(struct userfaultfd_ctx 
> > > *ctx,
> > > if (copy_from_user(_wake, buf, sizeof(uffdio_wake)))
> > > goto out;
> > >
> > > -   ret = validate_range(ctx->mm, uffdio_wake.start, uffdio_wake.len);
> > > +   ret = validate_range(ctx->mm, _wake.start, uffdio_wake.len);
> > > if (ret)
> > > goto out;
> > >
> > > @@ -1705,7 +1707,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx 
> > > *ctx,
> > >sizeof(uffdio_copy)-sizeof(__s64)))
> > > goto out;
> > >
> > > -   ret = validate_range(ctx->mm, uffdio_copy.dst, uffdio_copy.len);
> > > +   ret = validate_range(ctx->mm, _copy.dst, uffdio_copy.len);
> > > if (ret)
> > > goto out;
> > > /*
> > > @@ -1761,7 +1763,7 @@ static int userfaultfd_zeropage(struct 
> > > userfaultfd_ctx *ctx,
> > >sizeof(uffdio_zeropage)-sizeof(__s64)))
> > > goto out;
> > >
> > > -   ret = validate_range(ctx->mm, uffdio_zeropage.range.start,
> > > +   ret = validate_range(ctx->mm, _zeropage.range.start,
> > >  uffdio_zeropage.range.len);
> > > if (ret)
> > > goto out;
> > > --
> > > 2.22.0.410.gd8fdbe21b5-goog
>
> --
> Sincerely yours,
> Mike.
>


Re: [PATCH v18 11/15] IB/mlx4: untag user pointers in mlx4_get_umem_mr

2019-07-17 Thread Andrey Konovalov
On Tue, Jul 16, 2019 at 2:06 PM Jason Gunthorpe  wrote:
>
> On Tue, Jul 16, 2019 at 12:42:07PM +0200, Andrey Konovalov wrote:
> > On Mon, Jul 15, 2019 at 8:05 PM Jason Gunthorpe  wrote:
> > >
> > > On Mon, Jul 15, 2019 at 06:01:29PM +0200, Andrey Konovalov wrote:
> > > > On Mon, Jun 24, 2019 at 7:40 PM Catalin Marinas 
> > > >  wrote:
> > > > >
> > > > > On Mon, Jun 24, 2019 at 04:32:56PM +0200, Andrey Konovalov wrote:
> > > > > > This patch is a part of a series that extends kernel ABI to allow 
> > > > > > to pass
> > > > > > tagged user pointers (with the top byte set to something else other 
> > > > > > than
> > > > > > 0x00) as syscall arguments.
> > > > > >
> > > > > > mlx4_get_umem_mr() uses provided user pointers for vma lookups, 
> > > > > > which can
> > > > > > only by done with untagged pointers.
> > > > > >
> > > > > > Untag user pointers in this function.
> > > > > >
> > > > > > Signed-off-by: Andrey Konovalov 
> > > > > >  drivers/infiniband/hw/mlx4/mr.c | 7 ---
> > > > > >  1 file changed, 4 insertions(+), 3 deletions(-)
> > > > >
> > > > > Acked-by: Catalin Marinas 
> > > > >
> > > > > This patch also needs an ack from the infiniband maintainers (Jason).
> > > >
> > > > Hi Jason,
> > > >
> > > > Could you take a look and give your acked-by?
> > >
> > > Oh, I think I did this a long time ago. Still looks OK.
> >
> > Hm, maybe that was we who lost it. Thanks!
> >
> > > You will send it?
> >
> > I will resend the patchset once the merge window is closed, if that's
> > what you mean.
>
> No.. I mean who send it to Linus's tree? ie do you want me to take
> this patch into rdma?

I think the plan was to merge the whole series through the mm tree.
But I don't mind if you want to take this patch into your tree. It's
just that this patch doesn't make much sense without the rest of the
series.

>
> Jason


Re: [PATCH v18 11/15] IB/mlx4: untag user pointers in mlx4_get_umem_mr

2019-07-17 Thread Andrey Konovalov
On Tue, Jul 16, 2019 at 2:06 PM Jason Gunthorpe  wrote:
>
> On Tue, Jul 16, 2019 at 12:42:07PM +0200, Andrey Konovalov wrote:
> > On Mon, Jul 15, 2019 at 8:05 PM Jason Gunthorpe  wrote:
> > >
> > > On Mon, Jul 15, 2019 at 06:01:29PM +0200, Andrey Konovalov wrote:
> > > > On Mon, Jun 24, 2019 at 7:40 PM Catalin Marinas 
> > > >  wrote:
> > > > >
> > > > > On Mon, Jun 24, 2019 at 04:32:56PM +0200, Andrey Konovalov wrote:
> > > > > > This patch is a part of a series that extends kernel ABI to allow 
> > > > > > to pass
> > > > > > tagged user pointers (with the top byte set to something else other 
> > > > > > than
> > > > > > 0x00) as syscall arguments.
> > > > > >
> > > > > > mlx4_get_umem_mr() uses provided user pointers for vma lookups, 
> > > > > > which can
> > > > > > only by done with untagged pointers.
> > > > > >
> > > > > > Untag user pointers in this function.
> > > > > >
> > > > > > Signed-off-by: Andrey Konovalov 
> > > > > >  drivers/infiniband/hw/mlx4/mr.c | 7 ---
> > > > > >  1 file changed, 4 insertions(+), 3 deletions(-)
> > > > >
> > > > > Acked-by: Catalin Marinas 
> > > > >
> > > > > This patch also needs an ack from the infiniband maintainers (Jason).
> > > >
> > > > Hi Jason,
> > > >
> > > > Could you take a look and give your acked-by?
> > >
> > > Oh, I think I did this a long time ago. Still looks OK.
> >
> > Hm, maybe that was we who lost it. Thanks!
> >
> > > You will send it?
> >
> > I will resend the patchset once the merge window is closed, if that's
> > what you mean.
>
> No.. I mean who send it to Linus's tree? ie do you want me to take
> this patch into rdma?
>
> Jason


Re: [PATCH v18 08/15] userfaultfd: untag user pointers

2019-07-17 Thread Mike Rapoport
On Mon, Jun 24, 2019 at 06:51:21PM +0100, Catalin Marinas wrote:
> On Mon, Jun 24, 2019 at 04:32:53PM +0200, Andrey Konovalov wrote:
> > This patch is a part of a series that extends kernel ABI to allow to pass
> > tagged user pointers (with the top byte set to something else other than
> > 0x00) as syscall arguments.
> > 
> > userfaultfd code use provided user pointers for vma lookups, which can
> > only by done with untagged pointers.
> > 
> > Untag user pointers in validate_range().
> > 
> > Reviewed-by: Vincenzo Frascino 
> > Reviewed-by: Catalin Marinas 
> > Reviewed-by: Kees Cook 
> > Signed-off-by: Andrey Konovalov 
> > ---
> >  fs/userfaultfd.c | 22 --
> >  1 file changed, 12 insertions(+), 10 deletions(-)
> 
> Same here, it needs an ack from Al Viro.

The userfault patches usually go via -mm tree, not sure if Al looks at them :) 
 
FWIW, you can add 

Reviewed-by: Mike Rapoport 

> > diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
> > index ae0b8b5f69e6..c2be36a168ca 100644
> > --- a/fs/userfaultfd.c
> > +++ b/fs/userfaultfd.c
> > @@ -1261,21 +1261,23 @@ static __always_inline void wake_userfault(struct 
> > userfaultfd_ctx *ctx,
> >  }
> >  
> >  static __always_inline int validate_range(struct mm_struct *mm,
> > - __u64 start, __u64 len)
> > + __u64 *start, __u64 len)
> >  {
> > __u64 task_size = mm->task_size;
> >  
> > -   if (start & ~PAGE_MASK)
> > +   *start = untagged_addr(*start);
> > +
> > +   if (*start & ~PAGE_MASK)
> > return -EINVAL;
> > if (len & ~PAGE_MASK)
> > return -EINVAL;
> > if (!len)
> > return -EINVAL;
> > -   if (start < mmap_min_addr)
> > +   if (*start < mmap_min_addr)
> > return -EINVAL;
> > -   if (start >= task_size)
> > +   if (*start >= task_size)
> > return -EINVAL;
> > -   if (len > task_size - start)
> > +   if (len > task_size - *start)
> > return -EINVAL;
> > return 0;
> >  }
> > @@ -1325,7 +1327,7 @@ static int userfaultfd_register(struct 
> > userfaultfd_ctx *ctx,
> > goto out;
> > }
> >  
> > -   ret = validate_range(mm, uffdio_register.range.start,
> > +   ret = validate_range(mm, _register.range.start,
> >  uffdio_register.range.len);
> > if (ret)
> > goto out;
> > @@ -1514,7 +1516,7 @@ static int userfaultfd_unregister(struct 
> > userfaultfd_ctx *ctx,
> > if (copy_from_user(_unregister, buf, sizeof(uffdio_unregister)))
> > goto out;
> >  
> > -   ret = validate_range(mm, uffdio_unregister.start,
> > +   ret = validate_range(mm, _unregister.start,
> >  uffdio_unregister.len);
> > if (ret)
> > goto out;
> > @@ -1665,7 +1667,7 @@ static int userfaultfd_wake(struct userfaultfd_ctx 
> > *ctx,
> > if (copy_from_user(_wake, buf, sizeof(uffdio_wake)))
> > goto out;
> >  
> > -   ret = validate_range(ctx->mm, uffdio_wake.start, uffdio_wake.len);
> > +   ret = validate_range(ctx->mm, _wake.start, uffdio_wake.len);
> > if (ret)
> > goto out;
> >  
> > @@ -1705,7 +1707,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx 
> > *ctx,
> >sizeof(uffdio_copy)-sizeof(__s64)))
> > goto out;
> >  
> > -   ret = validate_range(ctx->mm, uffdio_copy.dst, uffdio_copy.len);
> > +   ret = validate_range(ctx->mm, _copy.dst, uffdio_copy.len);
> > if (ret)
> > goto out;
> > /*
> > @@ -1761,7 +1763,7 @@ static int userfaultfd_zeropage(struct 
> > userfaultfd_ctx *ctx,
> >sizeof(uffdio_zeropage)-sizeof(__s64)))
> > goto out;
> >  
> > -   ret = validate_range(ctx->mm, uffdio_zeropage.range.start,
> > +   ret = validate_range(ctx->mm, _zeropage.range.start,
> >  uffdio_zeropage.range.len);
> > if (ret)
> > goto out;
> > -- 
> > 2.22.0.410.gd8fdbe21b5-goog

-- 
Sincerely yours,
Mike.



[PATCH] drm/amd/powerplay: report bootup clock as max supported on dpm disabled

2019-07-17 Thread Evan Quan
With gfxclk or uclk dpm disabled, it's reasonable to report bootup clock
as the max supported.

Change-Id: If8aa7a912e8a34414b0e9c2b46de9b6e316fd9d7
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 27 ++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c 
b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index 5d5664f..d370b09 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -137,12 +137,37 @@ int smu_get_dpm_freq_range(struct smu_context *smu, enum 
smu_clk_type clk_type,
 {
int ret = 0, clk_id = 0;
uint32_t param = 0;
+   uint32_t clock_limit;
 
if (!min && !max)
return -EINVAL;
 
-   if (!smu_clk_dpm_is_enabled(smu, clk_type))
+   if (!smu_clk_dpm_is_enabled(smu, clk_type)) {
+   switch (clk_type) {
+   case SMU_MCLK:
+   case SMU_UCLK:
+   clock_limit = smu->smu_table.boot_values.uclk;
+   break;
+   case SMU_GFXCLK:
+   case SMU_SCLK:
+   clock_limit = smu->smu_table.boot_values.gfxclk;
+   break;
+   case SMU_SOCCLK:
+   clock_limit = smu->smu_table.boot_values.socclk;
+   break;
+   default:
+   clock_limit = 0;
+   break;
+   }
+
+   /* clock in Mhz unit */
+   if (min)
+   *min = clock_limit / 100;
+   if (max)
+   *max = clock_limit / 100;
+
return 0;
+   }
 
mutex_lock(>mutex);
clk_id = smu_clk_get_index(smu, clk_type);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/5] drm/amdgpu: allow direct submission in the VM backends

2019-07-17 Thread Christian König

Am 16.07.19 um 18:40 schrieb Kuehling, Felix:

On 2019-07-16 9:36 a.m., Christian König wrote:

Am 02.07.19 um 21:35 schrieb Kuehling, Felix:

This assumes that page tables are resident when a page fault is handled.

Yeah, that is correct. I also haven't completely figured out how we
can prevent the VM from being destroyed while handling the fault.

There are other cases I had in mind: Page tables can be evicted. For KFD
processes which can be preempted with CWSR, it's possible that a wave
that caused a page fault is preempted due to a page table eviction. That
means, by the time the page fault is handled, the page table is no
longer resident.


This is a corner case we can handle later on. As long as the VM is still 
alive just allocating page tables again should be sufficient for this.



I mean it's perfectly possible that the process is killed while faults
are still in the pipeline.


I think it's possible that a page table gets evicted while a page fault
is pending. Maybe not with graphics, but definitely with compute where
waves can be preempted while waiting for a page fault. In that case the
direct access would break.

Even with graphics I think it's still possible that new page tables need
to be allocated to handle a page fault. When that happens, you need to
wait for fences to let new page tables be validated and initialized.

Yeah, the problem here is that when you wait on fences which in turn
depend on your submission your end up in a deadlock.


I think this implies that you have amdgpu_cs fences attached to page
tables. I believe this is the fundamental issue that needs to be fixed.


We still need this cause even with page faults the root PD can't be evicted.

What we can probably do is to split up the PDs/PTs into the root PD and 
everything else.



If you want to manage page tables in page fault interrupt handlers, you
can't have command submission fences attached to your page tables. You
can allow page tables to be evicted while the command submission is in
progress. A page fault will fault it back in if it's needed. If you
eliminate command submission fences on the page tables, you remove the
potential for deadlocks.


No, there is still a huge potential for deadlocks here.

Additional to the root PDs you can have a MM submission which needs to 
wait for a compute submission to be finished.


If you then make your new allocation depend on the MM submission to be 
finished you have a classical circle dependency and a deadlock at hand.


The only way around that is to allocate the new page tables with the 
no_wait_gpu flag set and so avoid having any dependencies on ongoing 
operations.



But you do need fences on page tables related to the allocation and
migration of page tables themselves. And your page table updates must
wait for those fences. Therefore I think the whole approach of direct
submission for page table updates is fundamentally broken.


For the reasons noted above you can't have any fences related to the 
allocation and migration on page tables.


What can happen later on is that you need to wait for a BO move to 
finish before we can update the page tables.


But I think that this is a completely different operation which 
shouldn't be handled in the fault handler.


In those cases the fault handler would just silence the retry fault and 
continue crunching on other faults.


As soon as the BO is moved in place we should update the page tables 
again using the normal SDMA scheduler entity.



Patch #2 deals with updating page directories. That pretty much implies
that page tables have moved or been reallocated. Wouldn't that be a
counter-indication for using direct page table updates? In other words,
if you need to update page directories, a direct page table updates is
unsafe by definition.

No, page tables are allocated because this is for handling invalid
faults.

E.g. we get a fault for an address where nothing is mapped and just
want to silence it.

That's the scenario you have in mind now. But in the future we'll get
page faults for addresses that have a valid VMA, and we want to use HMM
to map that into the GPU page table.


Yeah, but we will still need to use the same infrastructure.

Avoiding waiting on ongoing operations is mandatory or otherwise we will 
immediately run into deadlocks.


Regards,
Christian.



Regards,
    Felix



I will try to implement something to at least avoid accessing a
destructed VM.

Regards,
Christian.


Regards,
     Felix

On 2019-06-28 8:18 a.m., Christian König wrote:

This allows us to update page tables directly while in a page fault.

Signed-off-by: Christian König
---
    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  5 
    drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c  |  4 +++
    drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 29
+
    3 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 

kernel panic: stack is corrupted in pointer

2019-07-17 Thread syzbot

Hello,

syzbot found the following crash on:

HEAD commit:1438cde7 Add linux-next specific files for 20190716
git tree:   linux-next
console output: https://syzkaller.appspot.com/x/log.txt?x=1398805860
kernel config:  https://syzkaller.appspot.com/x/.config?x=3430a151e1452331
dashboard link: https://syzkaller.appspot.com/bug?extid=79f5f028005a77ecb6bb
compiler:   gcc (GCC) 9.0.0 20181231 (experimental)
syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=111fc8afa0

The bug was bisected to:

commit 96a5d8d4915f3e241ebb48d5decdd110ab9c7dcf
Author: Leo Liu 
Date:   Fri Jul 13 15:26:28 2018 +

drm/amdgpu: Make sure IB tests flushed after IP resume

bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=14a4620060
final crash:https://syzkaller.appspot.com/x/report.txt?x=16a4620060
console output: https://syzkaller.appspot.com/x/log.txt?x=12a4620060

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+79f5f028005a77ecb...@syzkaller.appspotmail.com
Fixes: 96a5d8d4915f ("drm/amdgpu: Make sure IB tests flushed after IP  
resume")


Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in:  
pointer+0x702/0x750 lib/vsprintf.c:2187

Shutting down cpus with NMI
Kernel Offset: disabled


---
This bug is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkal...@googlegroups.com.

syzbot will keep track of this bug report. See:
https://goo.gl/tpsmEJ#status for how to communicate with syzbot.
For information about bisection process see: https://goo.gl/tpsmEJ#bisection
syzbot can test patches for this bug, for details see:
https://goo.gl/tpsmEJ#testing-patches


Re: [PATCH 2/3] drm: introduce DRIVER_FORCE_AUTH

2019-07-17 Thread Koenig, Christian
Hi Emil,

Sorry for the delay, finally coming back to this after my vacation.

Am 03.07.19 um 17:14 schrieb Emil Velikov:
> On Wed, 3 Jul 2019 at 15:58, Koenig, Christian  
> wrote:
>> Am 03.07.2019 16:51 schrieb Emil Velikov :
>>
>> On Wed, 3 Jul 2019 at 15:33, Koenig, Christian  
>> wrote:
>>> Am 03.07.2019 16:00 schrieb Emil Velikov :
>>>
>>> On Wed, 3 Jul 2019 at 14:48, Koenig, Christian  
>>> wrote:
 Well this is still a NAK.

 As stated previously please just don't remove DRM_AUTH and keep the 
 functionality as it is.

>>> AFAICT nobody was in favour of your suggestion to remove DRM_AUTH from
>>> the handle to/from fd ioclts.
>>> Thus this seems like the second best option.
>>>
>>>
>>> Well just keep it. As I said please don't change anything here.
>>>
>>> Dropping DRM_AUTH from the driver IOCTLs was sufficient to work around the 
>>> problems at hand far as I know.
>>>
>> We also need the DRM_AUTH for handle to/from fd ones. Mesa drivers use
>> those ioctls.
>>
>>
>> Yeah, but only for importing/exporting things.
>>
>> And in those cases we either already gave render nodes or correctly 
>> authenticated primary nodes.
>>
>> So no need to change anything here as far as I see.
>>
> Not quite. When working with the primary node we have the following scenarios:
>   - handle to fd -> pass fd to other APIs - gbm, opencl, vdpau, etc
>   - handle to fd -> fd to handle - use it internally

Yeah, but this would once more mean that we expose the same 
functionality on the primary node we do on the render node.

And that is exactly what I want to prevent, because I think it is a very 
bad idea and will result in basically deprecating the render node.

For the problem at hand it was sufficient to drop the DRM_AUTH flag from 
the driver IOCTLs and I don't see a reason why we should go further than 
this.

Please just completely drop this whole approach,
Christian.

>
> -Emil

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: HMM related use-after-free with amdgpu

2019-07-17 Thread Michel Dänzer
On 2019-07-17 12:10 a.m., Kuehling, Felix wrote:
> On 2019-07-16 1:04 p.m., Michel Dänzer wrote:
>> On 2019-07-16 6:35 p.m., Jason Gunthorpe wrote:
>>> On Tue, Jul 16, 2019 at 06:31:09PM +0200, Michel Dänzer wrote:
 On 2019-07-15 7:25 p.m., Jason Gunthorpe wrote:
> On Mon, Jul 15, 2019 at 06:51:06PM +0200, Michel Dänzer wrote:
>> With a KASAN enabled kernel built from amd-staging-drm-next, the
>> attached use-after-free is pretty reliably detected during a piglit gpu 
>> run.
> Does this branch you are testing have the hmm.git merged? I think from
> the name it does not?
 Indeed, no.


> Use after free's of this nature were something that was fixed in
> hmm.git..
>
> I don't see an obvious way you can hit something like this with the
> new code arrangement..
 I tried merging the hmm-devmem-cleanup.4 changes[0] into my 5.2.y +
 drm-next for 5.3 kernel. While the result didn't hit the problem, all
 GL_AMD_pinned_memory piglit tests failed, so I suspect the problem was
 simply avoided by not actually hitting the HMM related functionality.

 It's possible that I made a mistake in merging the changes, or that I
 missed some other required changes. But it's also possible that the HMM
 changes broke the corresponding user-pointer functionality in amdgpu.
>>> Not sure, this was all Tested by the AMD team so it should work, I
>>> hope.
>> It can't, due to the issue pointed out by Linus in the "drm pull for
>> 5.3-rc1" thread: DRM_AMDGPU_USERPTR still depends on ARCH_HAS_HMM, which
>> no longer exists, so it can't be enabled.
> 
> As far as I can tell, Linus fixed this up in his merge commit 
> be8454afc50f43016ca8b6130d9673bdd0bd56ec.

Ah! That's the piece I was missing, since I had merged the drm-next
changes before Linus did. Thanks Felix.

Note that AFAICT it was basically luck that Linus noticed this and fixed
it up. It would be better not to push our luck like this. :)


-- 
Earthling Michel Dänzer   |  https://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: reserve GDS resources on the gfx ring for gfx10

2019-07-17 Thread Christian König

Am 17.07.19 um 02:06 schrieb Marek Olšák:

From: Marek Olšák 

Hopefully we'll only use 1 gfx ring, because otherwise we'd have to have
separate GDS buffers for each gfx ring.

This is a workaround to ensure stability of transform feedback. Shaders hang
waiting for a GDS instruction (ds_sub, not ds_ordered_count).

The disadvantage is that compute IBs might get a different VMID,
because now gfx always has GDS and compute doesn't.


So far compute is only using GWS, but I don't think that those 
reservations are a good idea in general.


How severe is the ENOMEM problem you see with using an explicit GWS 
allocation?


Regards,
Christian.



Signed-off-by: Marek Olšák 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 10 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h |  6 ++
  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 20 
  4 files changed, 37 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4b514a44184c..cbd55d061b72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -456,20 +456,21 @@ struct amdgpu_cs_parser {
struct drm_file *filp;
struct amdgpu_ctx   *ctx;
  
  	/* chunks */

unsignednchunks;
struct amdgpu_cs_chunk  *chunks;
  
  	/* scheduler job object */

struct amdgpu_job   *job;
struct drm_sched_entity *entity;
+   unsignedhw_ip;
  
  	/* buffer objects */

struct ww_acquire_ctx   ticket;
struct amdgpu_bo_list   *bo_list;
struct amdgpu_mn*mn;
struct amdgpu_bo_list_entry vm_pd;
struct list_headvalidated;
struct dma_fence*fence;
uint64_tbytes_moved_threshold;
uint64_tbytes_moved_vis_threshold;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index c691df6f7a57..9125cd69a124 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -678,20 +678,28 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser 
*p,
if (r)
goto error_validate;
  
  	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,

 p->bytes_moved_vis);
  
  	gds = p->bo_list->gds_obj;

gws = p->bo_list->gws_obj;
oa = p->bo_list->oa_obj;
  
+	if (p->hw_ip == AMDGPU_HW_IP_GFX) {

+   /* Only gfx10 allocates these. */
+   if (!gds)
+   gds = p->adev->gds.gds_gfx_bo;
+   if (!oa)
+   oa = p->adev->gds.oa_gfx_bo;
+   }
+
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
  
  		/* Make sure we use the exclusive slot for shared BOs */

if (bo->prime_shared_count)
e->tv.num_shared = 0;
e->bo_va = amdgpu_vm_bo_find(vm, bo);
}
  
  	if (gds) {

@@ -954,20 +962,22 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
struct drm_amdgpu_cs_chunk_ib *chunk_ib;
struct drm_sched_entity *entity;
  
  		chunk = >chunks[i];

ib = >job->ibs[j];
chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
  
  		if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)

continue;
  
+		parser->hw_ip = chunk_ib->ip_type;

+
if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
(amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
ce_preempt++;
else
de_preempt++;
}
  
  			/* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
index df8a23554831..0943b8e1d97e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
@@ -26,20 +26,26 @@
  
  struct amdgpu_ring;

  struct amdgpu_bo;
  
  struct amdgpu_gds {

uint32_t gds_size;
uint32_t gws_size;
uint32_t oa_size;
uint32_t gds_compute_max_wave_id;
uint32_t vgt_gs_max_wave_id;
+
+   /* Reserved OA for the gfx ring. (gfx10) */
+   uint32_t gds_gfx_reservation_size;
+   uint32_t oa_gfx_reservation_size;
+   struct amdgpu_bo *gds_gfx_bo;
+   struct amdgpu_bo *oa_gfx_bo;
  };
  
  struct amdgpu_gds_reg_offset {

uint32_tmem_base;
uint32_tmem_size;
uint32_tgws;

Re: [PATCH v3 2/3] drm: plumb attaching dev thru to prime_pin/unpin

2019-07-17 Thread Koenig, Christian
Am 16.07.19 um 23:37 schrieb Rob Clark:
> From: Rob Clark 
>
> Needed in the following patch for cache operations.

Well have you seen that those callbacks are deprecated?
>* Deprecated hook in favour of _gem_object_funcs.pin.

>* Deprecated hook in favour of _gem_object_funcs.unpin.
>

I would rather say if you want to extend something it would be better to 
switch over to the per GEM object functions first.

Regards,
Christian.

>
> Signed-off-by: Rob Clark 
> ---
> v3: rebased on drm-tip
>
>   drivers/gpu/drm/drm_gem.c   | 8 
>   drivers/gpu/drm/drm_internal.h  | 4 ++--
>   drivers/gpu/drm/drm_prime.c | 4 ++--
>   drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c | 4 ++--
>   drivers/gpu/drm/msm/msm_drv.h   | 4 ++--
>   drivers/gpu/drm/msm/msm_gem_prime.c | 4 ++--
>   drivers/gpu/drm/nouveau/nouveau_gem.h   | 4 ++--
>   drivers/gpu/drm/nouveau/nouveau_prime.c | 4 ++--
>   drivers/gpu/drm/qxl/qxl_prime.c | 4 ++--
>   drivers/gpu/drm/radeon/radeon_prime.c   | 4 ++--
>   drivers/gpu/drm/vgem/vgem_drv.c | 4 ++--
>   include/drm/drm_drv.h   | 5 ++---
>   12 files changed, 26 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
> index 84689ccae885..af2549c45027 100644
> --- a/drivers/gpu/drm/drm_gem.c
> +++ b/drivers/gpu/drm/drm_gem.c
> @@ -1215,22 +1215,22 @@ void drm_gem_print_info(struct drm_printer *p, 
> unsigned int indent,
>   obj->dev->driver->gem_print_info(p, indent, obj);
>   }
>   
> -int drm_gem_pin(struct drm_gem_object *obj)
> +int drm_gem_pin(struct drm_gem_object *obj, struct device *dev)
>   {
>   if (obj->funcs && obj->funcs->pin)
>   return obj->funcs->pin(obj);
>   else if (obj->dev->driver->gem_prime_pin)
> - return obj->dev->driver->gem_prime_pin(obj);
> + return obj->dev->driver->gem_prime_pin(obj, dev);
>   else
>   return 0;
>   }
>   
> -void drm_gem_unpin(struct drm_gem_object *obj)
> +void drm_gem_unpin(struct drm_gem_object *obj, struct device *dev)
>   {
>   if (obj->funcs && obj->funcs->unpin)
>   obj->funcs->unpin(obj);
>   else if (obj->dev->driver->gem_prime_unpin)
> - obj->dev->driver->gem_prime_unpin(obj);
> + obj->dev->driver->gem_prime_unpin(obj, dev);
>   }
>   
>   void *drm_gem_vmap(struct drm_gem_object *obj)
> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
> index 51a2055c8f18..e64090373e3a 100644
> --- a/drivers/gpu/drm/drm_internal.h
> +++ b/drivers/gpu/drm/drm_internal.h
> @@ -133,8 +133,8 @@ void drm_gem_release(struct drm_device *dev, struct 
> drm_file *file_private);
>   void drm_gem_print_info(struct drm_printer *p, unsigned int indent,
>   const struct drm_gem_object *obj);
>   
> -int drm_gem_pin(struct drm_gem_object *obj);
> -void drm_gem_unpin(struct drm_gem_object *obj);
> +int drm_gem_pin(struct drm_gem_object *obj, struct device *dev);
> +void drm_gem_unpin(struct drm_gem_object *obj, struct device *dev);
>   void *drm_gem_vmap(struct drm_gem_object *obj);
>   void drm_gem_vunmap(struct drm_gem_object *obj, void *vaddr);
>   
> diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
> index 189d980402ad..126860432ff9 100644
> --- a/drivers/gpu/drm/drm_prime.c
> +++ b/drivers/gpu/drm/drm_prime.c
> @@ -575,7 +575,7 @@ int drm_gem_map_attach(struct dma_buf *dma_buf,
>   {
>   struct drm_gem_object *obj = dma_buf->priv;
>   
> - return drm_gem_pin(obj);
> + return drm_gem_pin(obj, attach->dev);
>   }
>   EXPORT_SYMBOL(drm_gem_map_attach);
>   
> @@ -593,7 +593,7 @@ void drm_gem_map_detach(struct dma_buf *dma_buf,
>   {
>   struct drm_gem_object *obj = dma_buf->priv;
>   
> - drm_gem_unpin(obj);
> + drm_gem_unpin(obj, attach->dev);
>   }
>   EXPORT_SYMBOL(drm_gem_map_detach);
>   
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c 
> b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
> index a05292e8ed6f..67e69a5f00f2 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
> @@ -43,7 +43,7 @@ int etnaviv_gem_prime_mmap(struct drm_gem_object *obj,
>   return etnaviv_obj->ops->mmap(etnaviv_obj, vma);
>   }
>   
> -int etnaviv_gem_prime_pin(struct drm_gem_object *obj)
> +int etnaviv_gem_prime_pin(struct drm_gem_object *obj, struct device *dev)
>   {
>   if (!obj->import_attach) {
>   struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
> @@ -55,7 +55,7 @@ int etnaviv_gem_prime_pin(struct drm_gem_object *obj)
>   return 0;
>   }
>   
> -void etnaviv_gem_prime_unpin(struct drm_gem_object *obj)
> +void etnaviv_gem_prime_unpin(struct drm_gem_object *obj, struct device *dev)
>   {
>   if (!obj->import_attach) {
>   struct etnaviv_gem_object *etnaviv_obj =