Re: [PATCH] drm/amdkfd: CWSR with sw scheduler on Aldebaran and Arcturus

2021-08-20 Thread Amber Lin

Reviewed-by: Amber Lin 


On 8/20/21 3:11 PM, Mukul Joshi wrote:

Program trap handler settings to enable CWSR with software scheduler
on Aldebaran and Arcturus.

Signed-off-by: Mukul Joshi 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c  | 3 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c| 2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h| 2 ++
  4 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index a5434b713856..46cd4ee6bafb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -44,4 +44,5 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.get_atc_vmid_pasid_mapping_info =
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base = 
kgd_gfx_v9_set_vm_context_page_table_base,
+   .program_trap_handler_settings = 
kgd_gfx_v9_program_trap_handler_settings
  };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 6409d6b1b2df..5a7f680bcb3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -305,5 +305,6 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base =
kgd_gfx_v9_set_vm_context_page_table_base,
-   .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy
+   .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
+   .program_trap_handler_settings = 
kgd_gfx_v9_program_trap_handler_settings
  };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 154244916727..bcc1cbeb8799 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -882,7 +882,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int 
pasid,
adev->gfx.cu_info.max_waves_per_simd;
  }
  
-static void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd,

+void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd,
  uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
  {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
index e64deba8900f..c63591106879 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -65,3 +65,5 @@ void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev 
*kgd,
uint32_t vmid, uint64_t page_table_base);
  void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid,
int *pasid_wave_cnt, int *max_waves_per_cu);
+void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd,
+   uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr);


RE: [PATCH 2/3] drm/amdgpu: Use SDMA1 for buffer movement on Aldebaran

2021-08-20 Thread Greathouse, Joseph
> -Original Message-
> From: Christian König 
> Sent: Friday, August 20, 2021 2:00 AM
> To: Greathouse, Joseph ; 
> amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 2/3] drm/amdgpu: Use SDMA1 for buffer movement on 
> Aldebaran
> 
> Am 20.08.21 um 07:32 schrieb Joseph Greathouse:
> > Aldebaran should not use SDMA0 for buffer funcs such as page migration.
> > Instead, we move over to SDMA1 for these features. Leave SDMA0 in
> > charge for all other existing chips to avoid any possibility of
> > regressions.
> 
> The part why we do this is missing, apart from that looks good to me.
> 
> Christian.

How about this for a replacement description?

Because of sharing an MMHUB port with other engines, the hardware
design team has advised that Aldebaran should not use SDMA0 for
buffer funcs such as page migration. Instead, we move over to
SDMA1 for these features. Leave SDMA0 in charge for all other
existing chips to avoid any possibility of regressions.
 
Thanks,
-Joe

> >
> > Signed-off-by: Joseph Greathouse 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 8 ++--
> >   1 file changed, 6 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > index 8931000dcd41..771630d7bb3f 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > @@ -2689,11 +2689,15 @@ static const struct amdgpu_buffer_funcs 
> > sdma_v4_0_buffer_funcs = {
> >
> >   static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev)
> >   {
> > + int engine = 0;
> > +
> > + if (adev->asic_type == CHIP_ALDEBARAN)
> > + engine = 1;
> >   adev->mman.buffer_funcs = _v4_0_buffer_funcs;
> >   if (adev->sdma.has_page_queue)
> > - adev->mman.buffer_funcs_ring = >sdma.instance[0].page;
> > + adev->mman.buffer_funcs_ring = 
> > >sdma.instance[engine].page;
> >   else
> > - adev->mman.buffer_funcs_ring = >sdma.instance[0].ring;
> > + adev->mman.buffer_funcs_ring = 
> > >sdma.instance[engine].ring;
> >   }
> >
> >   static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = {



[PATCH 09/10] drm/amd/display: [FW Promotion] Release 0.0.80

2021-08-20 Thread Qingqing Zhuo
From: Anthony Koo 

- Add volatile to avoid incomplete flushing of data in rb

Reviewed-by: Aric Cyr 
Acked-by: Qingqing Zhuo 
Signed-off-by: Anthony Koo 
---
 .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h   | 29 ---
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h 
b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index 7b684e7f60df..b322677dafbd 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -47,10 +47,10 @@
 
 /* Firmware versioning. */
 #ifdef DMUB_EXPOSE_VERSION
-#define DMUB_FW_VERSION_GIT_HASH 0x7383caadc
+#define DMUB_FW_VERSION_GIT_HASH 0x591aacca1
 #define DMUB_FW_VERSION_MAJOR 0
 #define DMUB_FW_VERSION_MINOR 0
-#define DMUB_FW_VERSION_REVISION 79
+#define DMUB_FW_VERSION_REVISION 80
 #define DMUB_FW_VERSION_TEST 0
 #define DMUB_FW_VERSION_VBIOS 0
 #define DMUB_FW_VERSION_HOTFIX 0
@@ -2485,14 +2485,16 @@ static inline bool dmub_rb_full(struct dmub_rb *rb)
 static inline bool dmub_rb_push_front(struct dmub_rb *rb,
  const union dmub_rb_cmd *cmd)
 {
-   uint8_t *dst = (uint8_t *)(rb->base_address) + rb->wrpt;
-   const uint8_t *src = (const uint8_t *)cmd;
+   uint64_t volatile *dst = (uint64_t volatile *)(rb->base_address) + 
rb->wrpt / sizeof(uint64_t);
+   const uint64_t *src = (const uint64_t *)cmd;
+   uint8_t i;
 
if (dmub_rb_full(rb))
return false;
 
// copying data
-   dmub_memcpy(dst, src, DMUB_RB_CMD_SIZE);
+   for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++)
+   *dst++ = *src++;
 
rb->wrpt += DMUB_RB_CMD_SIZE;
 
@@ -2601,14 +2603,16 @@ static inline bool dmub_rb_peek_offset(struct dmub_rb 
*rb,
 static inline bool dmub_rb_out_front(struct dmub_rb *rb,
 union dmub_rb_out_cmd *cmd)
 {
-   const uint8_t *src = (const uint8_t *)(rb->base_address) + rb->rptr;
-   uint8_t *dst = (uint8_t *)cmd;
+   const uint64_t volatile *src = (const uint64_t volatile 
*)(rb->base_address) + rb->rptr / sizeof(uint64_t);
+   uint64_t *dst = (uint64_t *)cmd;
+   uint8_t i;
 
if (dmub_rb_empty(rb))
return false;
 
// copying data
-   dmub_memcpy(dst, src, DMUB_RB_CMD_SIZE);
+   for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++)
+   *dst++ = *src++;
 
return true;
 }
@@ -2643,14 +2647,17 @@ static inline bool dmub_rb_pop_front(struct dmub_rb *rb)
  */
 static inline void dmub_rb_flush_pending(const struct dmub_rb *rb)
 {
-   uint8_t buf[DMUB_RB_CMD_SIZE];
uint32_t rptr = rb->rptr;
uint32_t wptr = rb->wrpt;
 
while (rptr != wptr) {
-   const uint8_t *data = (const uint8_t *)rb->base_address + rptr;
+   uint64_t volatile *data = (uint64_t volatile *)rb->base_address 
+ rptr / sizeof(uint64_t);
+   //uint64_t volatile *p = (uint64_t volatile *)data;
+   uint64_t temp;
+   uint8_t i;
 
-   dmub_memcpy(buf, data, DMUB_RB_CMD_SIZE);
+   for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++)
+   temp = *data++;
 
rptr += DMUB_RB_CMD_SIZE;
if (rptr >= rb->capacity)
-- 
2.25.1



[PATCH 10/10] drm/amd/display: 3.2.150

2021-08-20 Thread Qingqing Zhuo
From: Aric Cyr 

This version brings along following fixes:
- FW promotion 0.0.80
- Add missing ABM register offsets
- Fix in swizzle mode mapping
- Emulated sink support for freesync
- Improvoments in max target bpp

Acked-by: Qingqing Zhuo 
Signed-off-by: Aric Cyr 
---
 drivers/gpu/drm/amd/display/dc/dc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index 3ab52d9a82cf..28aac18ae1e2 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -45,7 +45,7 @@
 /* forward declaration */
 struct aux_payload;
 
-#define DC_VER "3.2.149"
+#define DC_VER "3.2.150"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
-- 
2.25.1



[PATCH 02/10] drm/amd/display: Support for DMUB HPD interrupt handling

2021-08-20 Thread Qingqing Zhuo
From: Jude Shih 

[WHY]
To add support for HPD interrupt handling from DMUB.
HPD interrupt could be triggered from outbox1 from DMUB

[HOW]
1) Use queue_work to handle hpd task from outbox1

2) Add handle_hpd_irq_helper to share interrupt handling code
between legacy and DMUB HPD from outbox1

3) Added DMUB HPD handling in dmub_srv_stat_get_notification().
HPD handling callback function and wake up the DMUB thread.

Reviewed-by: Nicholas Kazlauskas 
Acked-by: Qingqing Zhuo 
Signed-off-by: Jude Shih 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 171 +-
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  40 
 2 files changed, 203 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 816723691d51..162a8208f74f 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -215,6 +215,8 @@ static void handle_cursor_update(struct drm_plane *plane,
 static const struct drm_format_info *
 amd_get_format_info(const struct drm_mode_fb_cmd2 *cmd);
 
+static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector);
+
 static bool
 is_timing_unchanged_for_freesync(struct drm_crtc_state *old_crtc_state,
 struct drm_crtc_state *new_crtc_state);
@@ -618,6 +620,116 @@ static void dm_dcn_vertical_interrupt0_high_irq(void 
*interrupt_params)
 }
 #endif
 
+/**
+ * dmub_aux_setconfig_reply_callback - Callback for AUX or SET_CONFIG command.
+ * @adev: amdgpu_device pointer
+ * @notify: dmub notification structure
+ *
+ * Dmub AUX or SET_CONFIG command completion processing callback
+ * Copies dmub notification to DM which is to be read by AUX command.
+ * issuing thread and also signals the event to wake up the thread.
+ */
+void dmub_aux_setconfig_callback(struct amdgpu_device *adev, struct 
dmub_notification *notify)
+{
+   if (adev->dm.dmub_notify)
+   memcpy(adev->dm.dmub_notify, notify, sizeof(struct 
dmub_notification));
+   if (notify->type == DMUB_NOTIFICATION_AUX_REPLY)
+   complete(>dm.dmub_aux_transfer_done);
+}
+
+/**
+ * dmub_hpd_callback - DMUB HPD interrupt processing callback.
+ * @adev: amdgpu_device pointer
+ * @notify: dmub notification structure
+ *
+ * Dmub Hpd interrupt processing callback. Gets displayindex through the
+ * ink index and calls helper to do the processing.
+ */
+void dmub_hpd_callback(struct amdgpu_device *adev, struct dmub_notification 
*notify)
+{
+   struct amdgpu_dm_connector *aconnector;
+   struct drm_connector *connector;
+   struct drm_connector_list_iter iter;
+   struct dc_link *link;
+   uint8_t link_index = 0;
+   struct drm_device *dev = adev->dm.ddev;
+
+   if (adev == NULL)
+   return;
+
+   if (notify == NULL) {
+   DRM_ERROR("DMUB HPD callback notification was NULL");
+   return;
+   }
+
+   if (notify->link_index > adev->dm.dc->link_count) {
+   DRM_ERROR("DMUB HPD index (%u)is abnormal", notify->link_index);
+   return;
+   }
+
+   drm_modeset_lock(>mode_config.connection_mutex, NULL);
+
+   link_index = notify->link_index;
+
+   link = adev->dm.dc->links[link_index];
+
+   drm_connector_list_iter_begin(dev, );
+   drm_for_each_connector_iter(connector, ) {
+   aconnector = to_amdgpu_dm_connector(connector);
+   if (link && aconnector->dc_link == link) {
+   DRM_INFO("DMUB HPD callback: link_index=%u\n", 
link_index);
+   handle_hpd_irq_helper(aconnector);
+   break;
+   }
+   }
+   drm_connector_list_iter_end();
+   drm_modeset_unlock(>mode_config.connection_mutex);
+
+}
+
+/**
+ * register_dmub_notify_callback - Sets callback for DMUB notify
+ * @adev: amdgpu_device pointer
+ * @type: Type of dmub notification
+ * @callback: Dmub interrupt callback function
+ * @dmub_int_thread_offload: offload indicator
+ *
+ * API to register a dmub callback handler for a dmub notification
+ * Also sets indicator whether callback processing to be offloaded.
+ * to dmub interrupt handling thread
+ * Return: true if successfully registered, false if there is existing 
registration
+ */
+bool register_dmub_notify_callback(struct amdgpu_device *adev, enum 
dmub_notification_type type,
+dmub_notify_interrupt_callback_t callback, bool dmub_int_thread_offload)
+{
+   if (callback != NULL && type < 
ARRAY_SIZE(adev->dm.dmub_thread_offload)) {
+   adev->dm.dmub_callback[type] = callback;
+   adev->dm.dmub_thread_offload[type] = dmub_int_thread_offload;
+   } else
+   return false;
+
+   return true;
+}
+
+static void dm_handle_hpd_work(struct work_struct *work)
+{
+   struct dmub_hpd_work *dmub_hpd_wrk;
+
+   dmub_hpd_wrk = container_of(work, struct 

[PATCH 05/10] drm/amd/display: Limit max DSC target bpp for specific monitors

2021-08-20 Thread Qingqing Zhuo
From: Roman Li 

[Why]
Some monitors exhibit corruption at 16bpp DSC.

[How]
- Add helpers for patching edid caps.
- Use it for limiting DSC target bitrate to 15bpp for known monitors

Reviewed-by: Rodrigo Siqueira 
Acked-by: Qingqing Zhuo 
Signed-off-by: Roman Li 
Cc: sta...@vger.kernel.org
---
 .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 35 +++
 1 file changed, 35 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index 6fee12c91ef5..d793eec69d61 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -40,6 +40,39 @@
 
 #include "dm_helpers.h"
 
+struct monitor_patch_info {
+   unsigned int manufacturer_id;
+   unsigned int product_id;
+   void (*patch_func)(struct dc_edid_caps *edid_caps, unsigned int param);
+   unsigned int patch_param;
+};
+static void set_max_dsc_bpp_limit(struct dc_edid_caps *edid_caps, unsigned int 
param);
+
+static const struct monitor_patch_info monitor_patch_table[] = {
+{0x6D1E, 0x5BBF, set_max_dsc_bpp_limit, 15},
+{0x6D1E, 0x5B9A, set_max_dsc_bpp_limit, 15},
+};
+
+static void set_max_dsc_bpp_limit(struct dc_edid_caps *edid_caps, unsigned int 
param)
+{
+   if (edid_caps)
+   edid_caps->panel_patch.max_dsc_target_bpp_limit = param;
+}
+
+static int amdgpu_dm_patch_edid_caps(struct dc_edid_caps *edid_caps)
+{
+   int i, ret = 0;
+
+   for (i = 0; i < ARRAY_SIZE(monitor_patch_table); i++)
+   if ((edid_caps->manufacturer_id == 
monitor_patch_table[i].manufacturer_id)
+   &&  (edid_caps->product_id == 
monitor_patch_table[i].product_id)) {
+   monitor_patch_table[i].patch_func(edid_caps, 
monitor_patch_table[i].patch_param);
+   ret++;
+   }
+
+   return ret;
+}
+
 /* dm_helpers_parse_edid_caps
  *
  * Parse edid caps
@@ -125,6 +158,8 @@ enum dc_edid_status dm_helpers_parse_edid_caps(
kfree(sads);
kfree(sadb);
 
+   amdgpu_dm_patch_edid_caps(edid_caps);
+
return result;
 }
 
-- 
2.25.1



[PATCH 04/10] drm/amd/display: Use max target bpp override option

2021-08-20 Thread Qingqing Zhuo
From: Roman Li 

[Why]
Max target bpp override is an option for working around
DSC issues. It is supported on DC level, but was not
used in DM.

[How]
Use actual option value instead of 0.

Reviewed-by: Rodrigo Siqueira 
Acked-by: Qingqing Zhuo 
Signed-off-by: Roman Li 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 +++-
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c   | 4 ++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 162a8208f74f..e5ff59e2fc7c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5753,9 +5753,15 @@ static void apply_dsc_policy_for_stream(struct 
amdgpu_dm_connector *aconnector,
 {
struct drm_connector *drm_connector = >base;
uint32_t link_bandwidth_kbps;
+   uint32_t max_dsc_target_bpp_limit_override = 0;
 
link_bandwidth_kbps = dc_link_bandwidth_kbps(aconnector->dc_link,

dc_link_get_link_cap(aconnector->dc_link));
+
+   if (stream->link && stream->link->local_sink)
+   max_dsc_target_bpp_limit_override =
+   
stream->link->local_sink->edid_caps.panel_patch.max_dsc_target_bpp_limit;
+   
/* Set DSC policy according to dsc_clock_en */
dc_dsc_policy_set_enable_dsc_when_not_needed(
aconnector->dsc_settings.dsc_force_enable == 
DSC_CLK_FORCE_ENABLE);
@@ -5765,7 +5771,7 @@ static void apply_dsc_policy_for_stream(struct 
amdgpu_dm_connector *aconnector,
if 
(dc_dsc_compute_config(aconnector->dc_link->ctx->dc->res_pool->dscs[0],
dsc_caps,

aconnector->dc_link->ctx->dc->debug.dsc_min_slice_height_override,
-   0,
+   
max_dsc_target_bpp_limit_override,
link_bandwidth_kbps,
>timing,
>timing.dsc_cfg)) {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 1bcba6943fd7..705f2e67edb5 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -547,7 +547,7 @@ static void set_dsc_configs_from_fairness_vars(struct 
dsc_mst_fairness_params *p

params[i].sink->ctx->dc->res_pool->dscs[0],
[i].sink->dsc_caps.dsc_dec_caps,

params[i].sink->ctx->dc->debug.dsc_min_slice_height_override,
-   0,
+   
params[i].sink->edid_caps.panel_patch.max_dsc_target_bpp_limit,
0,
params[i].timing,
[i].timing->dsc_cfg)) {
@@ -579,7 +579,7 @@ static int bpp_x16_from_pbn(struct dsc_mst_fairness_params 
param, int pbn)
param.sink->ctx->dc->res_pool->dscs[0],
>dsc_caps.dsc_dec_caps,

param.sink->ctx->dc->debug.dsc_min_slice_height_override,
-   0,
+   
param.sink->edid_caps.panel_patch.max_dsc_target_bpp_limit,
(int) kbps, param.timing, _config);
 
return dsc_config.bits_per_pixel;
-- 
2.25.1



[PATCH 07/10] drm/amd/display: Initialize GSP1 SDP header

2021-08-20 Thread Qingqing Zhuo
From: Wyatt Wood 

[Why + How]
Initialize GSP1 SDP header for use in DMCUB FW.

Reviewed-by: Anthony Koo 
Acked-by: Qingqing Zhuo 
Signed-off-by: Wyatt Wood 
---
 .../drm/amd/display/dc/dcn10/dcn10_stream_encoder.c| 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
index f1a08a7736ac..58ff201a54e7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
@@ -715,6 +715,16 @@ void enc1_stream_encoder_update_dp_info_packets(
0,  /* packetIndex */
_frame->vsc);
 
+   /* VSC SDP at packetIndex 1 is used by PSR in DMCUB FW.
+* Note that the enablement of GSP1 is not done below,
+* it's done in FW.
+*/
+   if (info_frame->vsc.valid)
+   enc1_update_generic_info_packet(
+   enc1,
+   1,  /* packetIndex */
+   _frame->vsc);
+
if (info_frame->spd.valid)
enc1_update_generic_info_packet(
enc1,
-- 
2.25.1



[PATCH 03/10] drm/amd/display: Set min dcfclk if pipe count is 0

2021-08-20 Thread Qingqing Zhuo
From: Michael Strauss 

[WHY]
Clocks don't get recalculated in 0 stream/0 pipe configs,
blocking S0i3 if dcfclk gets high enough

[HOW]
Create DCN31 copy of DCN30 bandwidth validation func which
doesn't entirely skip validation in 0 pipe scenarios

Override dcfclk to vlevel 0/min value during validation if pipe
count is 0

Reviewed-by: Eric Yang 
Acked-by: Qingqing Zhuo 
Signed-off-by: Michael Strauss 
---
 .../drm/amd/display/dc/dcn30/dcn30_resource.c |  2 +-
 .../drm/amd/display/dc/dcn30/dcn30_resource.h |  7 +++
 .../drm/amd/display/dc/dcn31/dcn31_resource.c | 63 ++-
 3 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
index 28e15ebf2f43..1333f0541f1b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
@@ -1856,7 +1856,7 @@ static struct pipe_ctx *dcn30_find_split_pipe(
return pipe;
 }
 
-static noinline bool dcn30_internal_validate_bw(
+noinline bool dcn30_internal_validate_bw(
struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h 
b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h
index b754b89beadf..b92e4cc0232f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h
@@ -55,6 +55,13 @@ unsigned int dcn30_calc_max_scaled_time(
 
 bool dcn30_validate_bandwidth(struct dc *dc, struct dc_state *context,
bool fast_validate);
+bool dcn30_internal_validate_bw(
+   struct dc *dc,
+   struct dc_state *context,
+   display_e2e_pipe_params_st *pipes,
+   int *pipe_cnt_out,
+   int *vlevel_out,
+   bool fast_validate);
 void dcn30_calculate_wm_and_dlg(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
index 3f92f27dac20..49563cfabdcd 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
@@ -1647,6 +1647,15 @@ static void dcn31_calculate_wm_and_dlg_fp(
if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk)
dcfclk = context->bw_ctx.dml.soc.min_dcfclk;
 
+   /* We don't recalculate clocks for 0 pipe configs, which can block
+* S0i3 as high clocks will block low power states
+* Override any clocks that can block S0i3 to min here
+*/
+   if (pipe_cnt == 0) {
+   context->bw_ctx.bw.dcn.clk.dcfclk_khz = dcfclk; // always 
should be vlevel 0
+   return;
+   }
+
pipes[0].clks_cfg.voltage = vlevel;
pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
pipes[0].clks_cfg.socclk_mhz = 
context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
@@ -1772,6 +1781,58 @@ static void dcn31_calculate_wm_and_dlg(
DC_FP_END();
 }
 
+bool dcn31_validate_bandwidth(struct dc *dc,
+   struct dc_state *context,
+   bool fast_validate)
+{
+   bool out = false;
+
+   BW_VAL_TRACE_SETUP();
+
+   int vlevel = 0;
+   int pipe_cnt = 0;
+   display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * 
sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
+   DC_LOGGER_INIT(dc->ctx->logger);
+
+   BW_VAL_TRACE_COUNT();
+
+   out = dcn30_internal_validate_bw(dc, context, pipes, _cnt, 
, fast_validate);
+
+   // Disable fast_validate to set min dcfclk in alculate_wm_and_dlg
+   if (pipe_cnt == 0)
+   fast_validate = false;
+
+   if (!out)
+   goto validate_fail;
+
+   BW_VAL_TRACE_END_VOLTAGE_LEVEL();
+
+   if (fast_validate) {
+   BW_VAL_TRACE_SKIP(fast);
+   goto validate_out;
+   }
+
+   dc->res_pool->funcs->calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, 
vlevel);
+
+   BW_VAL_TRACE_END_WATERMARKS();
+
+   goto validate_out;
+
+validate_fail:
+   DC_LOG_WARNING("Mode Validation Warning: %s failed alidation.\n",
+   
dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states]));
+
+   BW_VAL_TRACE_SKIP(fail);
+   out = false;
+
+validate_out:
+   kfree(pipes);
+
+   BW_VAL_TRACE_FINISH();
+
+   return out;
+}
+
 static struct dc_cap_funcs cap_funcs = {
.get_dcc_compression_cap = dcn20_get_dcc_compression_cap
 };
@@ -1854,7 +1915,7 @@ static struct resource_funcs dcn31_res_pool_funcs = {
.link_encs_assign = link_enc_cfg_link_encs_assign,
.link_enc_unassign = link_enc_cfg_link_enc_unassign,
.panel_cntl_create = dcn31_panel_cntl_create,
-  

[PATCH 06/10] drm/amd/display: Add emulated sink support for updating FS

2021-08-20 Thread Qingqing Zhuo
From: Aurabindo Pillai 

[Why]
When forced modes are used during certain IGT tests,
without a real connector, dc_sink would be null when
standard modes are added by the driver. Calling the
function to update freesync capabilities at this
point will result in an error being printed

[How]
Use emulated sink when available. If both the normal
and emulated sink are not available, set all freesync
parameters to 0.

Reviewed-by: Nicholas Kazlauskas 
Acked-by: Qingqing Zhuo 
Signed-off-by: Aurabindo Pillai 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 20 +++
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index e5ff59e2fc7c..e1e57e7465a7 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -10917,6 +10917,7 @@ void amdgpu_dm_update_freesync_caps(struct 
drm_connector *connector,
struct amdgpu_dm_connector *amdgpu_dm_connector =
to_amdgpu_dm_connector(connector);
struct dm_connector_state *dm_con_state = NULL;
+   struct dc_sink *sink = amdgpu_dm_connector->dc_sink;
 
struct drm_device *dev = connector->dev;
struct amdgpu_device *adev = drm_to_adev(dev);
@@ -10928,28 +10929,31 @@ void amdgpu_dm_update_freesync_caps(struct 
drm_connector *connector,
goto update;
}
 
-   if (!edid) {
+   sink = amdgpu_dm_connector->dc_sink ?
+   amdgpu_dm_connector->dc_sink :
+   amdgpu_dm_connector->dc_em_sink;
+
+   if (!edid || !sink) {
dm_con_state = to_dm_connector_state(connector->state);
 
amdgpu_dm_connector->min_vfreq = 0;
amdgpu_dm_connector->max_vfreq = 0;
amdgpu_dm_connector->pixel_clock_mhz = 0;
+   connector->display_info.monitor_range.min_vfreq = 0;
+   connector->display_info.monitor_range.max_vfreq = 0;
+   freesync_capable = false;
 
goto update;
}
 
dm_con_state = to_dm_connector_state(connector->state);
 
-   if (!amdgpu_dm_connector->dc_sink) {
-   DRM_ERROR("dc_sink NULL, could not add free_sync module.\n");
-   goto update;
-   }
if (!adev->dm.freesync_module)
goto update;
 
 
-   if (amdgpu_dm_connector->dc_sink->sink_signal == 
SIGNAL_TYPE_DISPLAY_PORT
-   || amdgpu_dm_connector->dc_sink->sink_signal == 
SIGNAL_TYPE_EDP) {
+   if (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT
+   || sink->sink_signal == SIGNAL_TYPE_EDP) {
bool edid_check_required = false;
 
if (edid) {
@@ -10996,7 +11000,7 @@ void amdgpu_dm_update_freesync_caps(struct 
drm_connector *connector,
freesync_capable = true;
}
}
-   } else if (edid && amdgpu_dm_connector->dc_sink->sink_signal == 
SIGNAL_TYPE_HDMI_TYPE_A) {
+   } else if (edid && sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A) {
i = parse_hdmi_amd_vsdb(amdgpu_dm_connector, edid, _info);
if (i >= 0 && vsdb_info.freesync_supported) {
timing  = >detailed_timings[i];
-- 
2.25.1



[PATCH 01/10] drm/amd/display: add missing ABM register offsets

2021-08-20 Thread Qingqing Zhuo
From: Josip Pavic 

[Why]
Some ABM registers don't exist on DCN 3.01, so are
missing from its register offset list. However,
this list was copied to later versions of DCN that
do have these registers. As a result, they're
inaccessible from the driver on those DCN versions
even though they exist.

[How]
Add the missing ABM register offsets to DCN 3.02+

Reviewed-by: Anthony Koo 
Acked-by: Qingqing Zhuo 
Signed-off-by: Josip Pavic 
---
 drivers/gpu/drm/amd/display/dc/dce/dce_abm.h | 16 
 .../drm/amd/display/dc/dcn302/dcn302_resource.c  |  2 +-
 .../drm/amd/display/dc/dcn303/dcn303_resource.c  |  2 +-
 .../drm/amd/display/dc/dcn31/dcn31_resource.c|  2 +-
 4 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h 
b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h
index 456fadbbfac7..b699d1b2ba83 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h
@@ -96,6 +96,22 @@
SRI(DC_ABM1_HGLS_REG_READ_PROGRESS, ABM, id), \
NBIO_SR(BIOS_SCRATCH_2)
 
+#define ABM_DCN302_REG_LIST(id)\
+   ABM_COMMON_REG_LIST_DCE_BASE(), \
+   SRI(DC_ABM1_HG_SAMPLE_RATE, ABM, id), \
+   SRI(DC_ABM1_LS_SAMPLE_RATE, ABM, id), \
+   SRI(BL1_PWM_BL_UPDATE_SAMPLE_RATE, ABM, id), \
+   SRI(DC_ABM1_HG_MISC_CTRL, ABM, id), \
+   SRI(DC_ABM1_IPCSC_COEFF_SEL, ABM, id), \
+   SRI(BL1_PWM_CURRENT_ABM_LEVEL, ABM, id), \
+   SRI(BL1_PWM_TARGET_ABM_LEVEL, ABM, id), \
+   SRI(BL1_PWM_USER_LEVEL, ABM, id), \
+   SRI(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES, ABM, id), \
+   SRI(DC_ABM1_HGLS_REG_READ_PROGRESS, ABM, id), \
+   SRI(DC_ABM1_ACE_OFFSET_SLOPE_0, ABM, id), \
+   SRI(DC_ABM1_ACE_THRES_12, ABM, id), \
+   NBIO_SR(BIOS_SCRATCH_2)
+
 #define ABM_DCN30_REG_LIST(id)\
ABM_COMMON_REG_LIST_DCE_BASE(), \
SRI(DC_ABM1_HG_SAMPLE_RATE, ABM, id), \
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
index 7d3ff5d44402..5cd55e8573f7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
@@ -1462,7 +1462,7 @@ static const struct dccg_mask dccg_mask = {
 };
 
 #define abm_regs(id)\
-   [id] = { ABM_DCN301_REG_LIST(id) }
+   [id] = { ABM_DCN302_REG_LIST(id) }
 
 static const struct dce_abm_registers abm_regs[] = {
abm_regs(0),
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
index dc7823d23ba8..4c4046eb20a3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
@@ -1390,7 +1390,7 @@ static const struct dccg_mask dccg_mask = {
 };
 
 #define abm_regs(id)\
-   [id] = { ABM_DCN301_REG_LIST(id) }
+   [id] = { ABM_DCN302_REG_LIST(id) }
 
 static const struct dce_abm_registers abm_regs[] = {
abm_regs(0),
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
index a7702d3c75cd..3f92f27dac20 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
@@ -363,7 +363,7 @@ static const struct dce110_clk_src_mask cs_mask = {
 
 #define abm_regs(id)\
 [id] = {\
-   ABM_DCN301_REG_LIST(id)\
+   ABM_DCN302_REG_LIST(id)\
 }
 
 static const struct dce_abm_registers abm_regs[] = {
-- 
2.25.1



[PATCH 08/10] drm/amd/display: Update swizzle mode enums

2021-08-20 Thread Qingqing Zhuo
From: Alvin Lee 

[Why]
Swizzle mode enum for DC_SW_VAR_R_X was existing,
but not mapped correctly.

[How]
Update mapping and conversion for DC_SW_VAR_R_X.

Reviewed-by: XiangBing Foo 
Reviewed-by: Martin Leung 
Acked-by: Qingqing Zhuo 
Signed-off-by: Alvin Lee 
Cc: sta...@vger.kernel.org
---
 drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c   | 4 +++-
 drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index e3e01b17c164..8ad296a0ee68 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -1854,7 +1854,9 @@ static void swizzle_to_dml_params(
case DC_SW_VAR_D_X:
*sw_mode = dm_sw_var_d_x;
break;
-
+   case DC_SW_VAR_R_X:
+   *sw_mode = dm_sw_var_r_x;
+   break;
default:
ASSERT(0); /* Not supported */
break;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h 
b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
index 1051ca1a23b8..edb9f7567d6d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
@@ -80,11 +80,11 @@ enum dm_swizzle_mode {
dm_sw_SPARE_13 = 24,
dm_sw_64kb_s_x = 25,
dm_sw_64kb_d_x = 26,
-   dm_sw_SPARE_14 = 27,
+   dm_sw_64kb_r_x = 27,
dm_sw_SPARE_15 = 28,
dm_sw_var_s_x = 29,
dm_sw_var_d_x = 30,
-   dm_sw_64kb_r_x,
+   dm_sw_var_r_x = 31,
dm_sw_gfx7_2d_thin_l_vp,
dm_sw_gfx7_2d_thin_gl,
 };
-- 
2.25.1



[PATCH 00/10] DC Patches Aug 23, 2021

2021-08-20 Thread Qingqing Zhuo
This DC patchset brings improvements in multiple areas. In summary, we 
highlight:

  - DC version 3.2.150
  - FW promotion 0.0.80
  - Add missing ABM register offsets
  - Fix in swizzle mode mapping
  - Emulated sink support for freesync
  - Improvoments in max target bpp

---

Alvin Lee (1):
  drm/amd/display: Update swizzle mode enums

Anthony Koo (1):
  drm/amd/display: [FW Promotion] Release 0.0.80

Aric Cyr (1):
  drm/amd/display: 3.2.150

Aurabindo Pillai (1):
  drm/amd/display: Add emulated sink support for updating FS

Josip Pavic (1):
  drm/amd/display: add missing ABM register offsets

Jude Shih (1):
  drm/amd/display: Support for DMUB HPD interrupt handling

Michael Strauss (1):
  drm/amd/display: Set min dcfclk if pipe count is 0

Roman Li (2):
  drm/amd/display: Use max target bpp override option
  drm/amd/display: Limit max DSC target bpp for specific monitors

Wyatt Wood (1):
  drm/amd/display: Initialize GSP1 SDP header

 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 199 --
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  40 
 .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c |  35 +++
 .../display/amdgpu_dm/amdgpu_dm_mst_types.c   |   4 +-
 drivers/gpu/drm/amd/display/dc/dc.h   |   2 +-
 drivers/gpu/drm/amd/display/dc/dce/dce_abm.h  |  16 ++
 .../display/dc/dcn10/dcn10_stream_encoder.c   |  10 +
 .../drm/amd/display/dc/dcn20/dcn20_resource.c |   4 +-
 .../drm/amd/display/dc/dcn30/dcn30_resource.c |   2 +-
 .../drm/amd/display/dc/dcn30/dcn30_resource.h |   7 +
 .../amd/display/dc/dcn302/dcn302_resource.c   |   2 +-
 .../amd/display/dc/dcn303/dcn303_resource.c   |   2 +-
 .../drm/amd/display/dc/dcn31/dcn31_resource.c |  65 +-
 .../amd/display/dc/dml/display_mode_enums.h   |   4 +-
 .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h   |  29 ++-
 15 files changed, 382 insertions(+), 39 deletions(-)

-- 
2.25.1



Re: [PATCH v2 1/2] drm/amdkfd: check access permisson to restore retry fault

2021-08-20 Thread Felix Kuehling
Am 2021-08-20 um 2:31 p.m. schrieb Philip Yang:
> Check range access permission to restore GPU retry fault, if GPU retry
> fault on address which belongs to VMA, and VMA has no read or write
> permission requested by GPU, failed to restore the address. The vm fault
> event will pass back to user space.
>
> Signed-off-by: Philip Yang 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  5 +++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +-
>  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c |  3 ++-
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  3 ++-
>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c   | 29 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_svm.h   |  5 +++--
>  6 files changed, 39 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 831f00644460..bbb892d8f489 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -3347,12 +3347,13 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
>   * @adev: amdgpu device pointer
>   * @pasid: PASID of the VM
>   * @addr: Address of the fault
> + * @write_fault: 0 is read fault, 1 is write fault

That should say true/false instead of 0/1. With that fixed, the series is

Reviewed-by: Felix Kuehling 


>   *
>   * Try to gracefully handle a VM fault. Return true if the fault was handled 
> and
>   * shouldn't be reported any more.
>   */
>  bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
> - uint64_t addr)
> + uint64_t addr, bool write_fault)
>  {
>   bool is_compute_context = false;
>   struct amdgpu_bo *root;
> @@ -3377,7 +3378,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
> u32 pasid,
>   addr /= AMDGPU_GPU_PAGE_SIZE;
>  
>   if (is_compute_context &&
> - !svm_range_restore_pages(adev, pasid, addr)) {
> + !svm_range_restore_pages(adev, pasid, addr, write_fault)) {
>   amdgpu_bo_unref();
>   return true;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 80cc9ab2c1d0..85fcfb8c5efd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -448,7 +448,7 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device 
> *adev);
>  void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
>struct amdgpu_task_info *task_info);
>  bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
> - uint64_t addr);
> + uint64_t addr, bool write_fault);
>  
>  void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 24b781e90bef..41c3a0d70b7c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -93,6 +93,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device 
> *adev,
>  struct amdgpu_iv_entry *entry)
>  {
>   bool retry_fault = !!(entry->src_data[1] & 0x80);
> + bool write_fault = !!(entry->src_data[1] & 0x20);
>   struct amdgpu_vmhub *hub = >vmhub[entry->vmid_src];
>   struct amdgpu_task_info task_info;
>   uint32_t status = 0;
> @@ -121,7 +122,7 @@ static int gmc_v10_0_process_interrupt(struct 
> amdgpu_device *adev,
>   /* Try to handle the recoverable page faults by filling page
>* tables
>*/
> - if (amdgpu_vm_handle_fault(adev, entry->pasid, addr))
> + if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, 
> write_fault))
>   return 1;
>   }
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 097230b5e946..a7dfb370f642 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -506,6 +506,7 @@ static int gmc_v9_0_process_interrupt(struct 
> amdgpu_device *adev,
> struct amdgpu_iv_entry *entry)
>  {
>   bool retry_fault = !!(entry->src_data[1] & 0x80);
> + bool write_fault = !!(entry->src_data[1] & 0x20);
>   uint32_t status = 0, cid = 0, rw = 0;
>   struct amdgpu_task_info task_info;
>   struct amdgpu_vmhub *hub;
> @@ -536,7 +537,7 @@ static int gmc_v9_0_process_interrupt(struct 
> amdgpu_device *adev,
>   /* Try to handle the recoverable page faults by filling page
>* tables
>*/
> - if (amdgpu_vm_handle_fault(adev, entry->pasid, addr))
> + if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, 
> write_fault))
>   return 1;
>   }
>  
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 

Re: [PATCH 1/3] drm/amdkfd: Allocate SDMA engines more fairly

2021-08-20 Thread Felix Kuehling
Am 2021-08-20 um 1:32 a.m. schrieb Joseph Greathouse:
> Give every process at most one queue from each SDMA engine.
> Previously, we allocated all SDMA engines and queues on a first-
> come-first-serve basis. This meant that it was possible for two
> processes racing on their allocation requests to each end up with
> two queues on the same SDMA engine. That could serialize the
> performance of commands to different queues.
>
> This new mechanism allows each process at most a single queue
> on each SDMA engine. Processes will check for queue availability on
> SDMA engine 0, then 1, then 2, etc. and only allocate on that
> engine if there is an available queue slot. If there are no
> queue slots available (or if this process has already allocated
> a queue on this engine), it moves on and tries the next engine.
>
> The Aldebaran chip has a small quirk that SDMA0 should only be
> used by the very first allocation from each process. It is OK to
> use any other SDMA engine at any time, but after the first SDMA
> allocation request from a process, the resulting engine must
> be from SDMA1 or above. This patch handles this case as well.
>
> Signed-off-by: Joseph Greathouse 

One final nit-pick inline. With that fixed, the series is

Reviewed-by: Felix Kuehling 


> ---
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 135 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   2 +
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c  |   3 +
>  3 files changed, 107 insertions(+), 33 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index f8fce9d05f50..86bdb765f350 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -52,12 +52,14 @@ static int unmap_queues_cpsch(struct device_queue_manager 
> *dqm,
>  static int map_queues_cpsch(struct device_queue_manager *dqm);
>  
>  static void deallocate_sdma_queue(struct device_queue_manager *dqm,
> + struct qcm_process_device *qpd,
>   struct queue *q);
>  
>  static inline void deallocate_hqd(struct device_queue_manager *dqm,
>   struct queue *q);
>  static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
>  static int allocate_sdma_queue(struct device_queue_manager *dqm,
> + struct qcm_process_device *qpd,
>   struct queue *q);
>  static void kfd_process_hw_exception(struct work_struct *work);
>  
> @@ -349,7 +351,7 @@ static int create_queue_nocpsch(struct 
> device_queue_manager *dqm,
>   q->pipe, q->queue);
>   } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
>   q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
> - retval = allocate_sdma_queue(dqm, q);
> + retval = allocate_sdma_queue(dqm, qpd, q);
>   if (retval)
>   goto deallocate_vmid;
>   dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
> @@ -410,7 +412,7 @@ static int create_queue_nocpsch(struct 
> device_queue_manager *dqm,
>   deallocate_hqd(dqm, q);
>   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
>   q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
> - deallocate_sdma_queue(dqm, q);
> + deallocate_sdma_queue(dqm, qpd, q);
>  deallocate_vmid:
>   if (list_empty(>queues_list))
>   deallocate_vmid(dqm, qpd, q);
> @@ -475,9 +477,9 @@ static int destroy_queue_nocpsch_locked(struct 
> device_queue_manager *dqm,
>   if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
>   deallocate_hqd(dqm, q);
>   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
> - deallocate_sdma_queue(dqm, q);
> + deallocate_sdma_queue(dqm, qpd, q);
>   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
> - deallocate_sdma_queue(dqm, q);
> + deallocate_sdma_queue(dqm, qpd, q);
>   else {
>   pr_debug("q->properties.type %d is invalid\n",
>   q->properties.type);
> @@ -1039,42 +1041,93 @@ static void pre_reset(struct device_queue_manager 
> *dqm)
>   dqm_unlock(dqm);
>  }
>  
> +static uint64_t sdma_engine_mask(int engine, int num_engines)
> +{
> + uint64_t mask = 0;
> +
> + engine %= num_engines;
> + while (engine < (sizeof(uint64_t)*8)) {
> + mask |= 1ULL << engine;
> + engine += num_engines;
> + }
> + return mask;
> +}
> +
>  static int allocate_sdma_queue(struct device_queue_manager *dqm,
> + struct qcm_process_device *qpd,
>   struct queue *q)
>  {
> - int bit;
> + uint64_t available_queue_bitmap;
> + unsigned int bit, engine, num_engines;
> + bool found_sdma = false;
>  
>   

[PATCH] drm/amdkfd: CWSR with sw scheduler on Aldebaran and Arcturus

2021-08-20 Thread Mukul Joshi
Program trap handler settings to enable CWSR with software scheduler
on Aldebaran and Arcturus.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c  | 3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c| 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h| 2 ++
 4 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index a5434b713856..46cd4ee6bafb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -44,4 +44,5 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.get_atc_vmid_pasid_mapping_info =
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base = 
kgd_gfx_v9_set_vm_context_page_table_base,
+   .program_trap_handler_settings = 
kgd_gfx_v9_program_trap_handler_settings
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 6409d6b1b2df..5a7f680bcb3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -305,5 +305,6 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base =
kgd_gfx_v9_set_vm_context_page_table_base,
-   .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy
+   .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
+   .program_trap_handler_settings = 
kgd_gfx_v9_program_trap_handler_settings
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 154244916727..bcc1cbeb8799 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -882,7 +882,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int 
pasid,
adev->gfx.cu_info.max_waves_per_simd;
 }
 
-static void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd,
+void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd,
 uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
 {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
index e64deba8900f..c63591106879 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -65,3 +65,5 @@ void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev 
*kgd,
uint32_t vmid, uint64_t page_table_base);
 void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid,
int *pasid_wave_cnt, int *max_waves_per_cu);
+void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd,
+   uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr);
-- 
2.17.1



[PATCH v2 2/2] drm/amdkfd: map SVM range with correct access permission

2021-08-20 Thread Philip Yang
Restore retry fault or prefetch range, or restore svm range after
eviction to map range to GPU with correct read or write access
permission.

Range may includes multiple VMAs, update GPU page table with offset of
prange, number of pages for each VMA according VMA access permission.

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 134 +--
 1 file changed, 86 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index b6d1268bc38f..8f9b5b53dab5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -120,6 +120,7 @@ static void svm_range_remove_notifier(struct svm_range 
*prange)
 
 static int
 svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
+ unsigned long offset, unsigned long npages,
  unsigned long *hmm_pfns, uint32_t gpuidx)
 {
enum dma_data_direction dir = DMA_BIDIRECTIONAL;
@@ -136,7 +137,8 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct 
svm_range *prange,
prange->dma_addr[gpuidx] = addr;
}
 
-   for (i = 0; i < prange->npages; i++) {
+   addr += offset;
+   for (i = 0; i < npages; i++) {
if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]),
  "leaking dma mapping\n"))
dma_unmap_page(dev, addr[i], PAGE_SIZE, dir);
@@ -167,6 +169,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct 
svm_range *prange,
 
 static int
 svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
+ unsigned long offset, unsigned long npages,
  unsigned long *hmm_pfns)
 {
struct kfd_process *p;
@@ -187,7 +190,8 @@ svm_range_dma_map(struct svm_range *prange, unsigned long 
*bitmap,
}
adev = (struct amdgpu_device *)pdd->dev->kgd;
 
-   r = svm_range_dma_map_dev(adev, prange, hmm_pfns, gpuidx);
+   r = svm_range_dma_map_dev(adev, prange, offset, npages,
+ hmm_pfns, gpuidx);
if (r)
break;
}
@@ -1088,11 +1092,6 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, 
struct svm_range *prange,
pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
 
pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags);
-
-   pr_debug("svms 0x%p [0x%lx 0x%lx] vram %d PTE 0x%llx mapping 0x%x\n",
-prange->svms, prange->start, prange->last,
-(domain == SVM_RANGE_VRAM_DOMAIN) ? 1:0, pte_flags, 
mapping_flags);
-
return pte_flags;
 }
 
@@ -1156,7 +1155,8 @@ svm_range_unmap_from_gpus(struct svm_range *prange, 
unsigned long start,
 
 static int
 svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-struct svm_range *prange, dma_addr_t *dma_addr,
+struct svm_range *prange, unsigned long offset,
+unsigned long npages, bool readonly, dma_addr_t *dma_addr,
 struct amdgpu_device *bo_adev, struct dma_fence **fence)
 {
struct amdgpu_bo_va bo_va;
@@ -1167,14 +1167,15 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
int r = 0;
int64_t i;
 
-   pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
-prange->last);
+   last_start = prange->start + offset;
+
+   pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms,
+last_start, last_start + npages - 1, readonly);
 
if (prange->svm_bo && prange->ttm_res)
bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);
 
-   last_start = prange->start;
-   for (i = 0; i < prange->npages; i++) {
+   for (i = offset; i < offset + npages; i++) {
last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN;
dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN;
if ((prange->start + i) < prange->last &&
@@ -1183,13 +1184,21 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
 
pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n",
 last_start, prange->start + i, last_domain ? "GPU" : 
"CPU");
+
pte_flags = svm_range_get_pte_flags(adev, prange, last_domain);
-   r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, 
false, NULL,
-   last_start,
+   if (readonly)
+   pte_flags &= ~AMDGPU_PTE_WRITEABLE;
+
+   pr_debug("svms 0x%p map [0x%lx 0x%llx] vram %d PTE 0x%llx\n",
+prange->svms, last_start, prange->start + i,
+(last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
+pte_flags);
+
+   r = 

[PATCH v2 1/2] drm/amdkfd: check access permisson to restore retry fault

2021-08-20 Thread Philip Yang
Check range access permission to restore GPU retry fault, if GPU retry
fault on address which belongs to VMA, and VMA has no read or write
permission requested by GPU, failed to restore the address. The vm fault
event will pass back to user space.

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c |  3 ++-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  3 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c   | 29 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h   |  5 +++--
 6 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 831f00644460..bbb892d8f489 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -3347,12 +3347,13 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
  * @adev: amdgpu device pointer
  * @pasid: PASID of the VM
  * @addr: Address of the fault
+ * @write_fault: 0 is read fault, 1 is write fault
  *
  * Try to gracefully handle a VM fault. Return true if the fault was handled 
and
  * shouldn't be reported any more.
  */
 bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
-   uint64_t addr)
+   uint64_t addr, bool write_fault)
 {
bool is_compute_context = false;
struct amdgpu_bo *root;
@@ -3377,7 +3378,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
addr /= AMDGPU_GPU_PAGE_SIZE;
 
if (is_compute_context &&
-   !svm_range_restore_pages(adev, pasid, addr)) {
+   !svm_range_restore_pages(adev, pasid, addr, write_fault)) {
amdgpu_bo_unref();
return true;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 80cc9ab2c1d0..85fcfb8c5efd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -448,7 +448,7 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device 
*adev);
 void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
 struct amdgpu_task_info *task_info);
 bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
-   uint64_t addr);
+   uint64_t addr, bool write_fault);
 
 void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 24b781e90bef..41c3a0d70b7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -93,6 +93,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device 
*adev,
   struct amdgpu_iv_entry *entry)
 {
bool retry_fault = !!(entry->src_data[1] & 0x80);
+   bool write_fault = !!(entry->src_data[1] & 0x20);
struct amdgpu_vmhub *hub = >vmhub[entry->vmid_src];
struct amdgpu_task_info task_info;
uint32_t status = 0;
@@ -121,7 +122,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device 
*adev,
/* Try to handle the recoverable page faults by filling page
 * tables
 */
-   if (amdgpu_vm_handle_fault(adev, entry->pasid, addr))
+   if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, 
write_fault))
return 1;
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 097230b5e946..a7dfb370f642 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -506,6 +506,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device 
*adev,
  struct amdgpu_iv_entry *entry)
 {
bool retry_fault = !!(entry->src_data[1] & 0x80);
+   bool write_fault = !!(entry->src_data[1] & 0x20);
uint32_t status = 0, cid = 0, rw = 0;
struct amdgpu_task_info task_info;
struct amdgpu_vmhub *hub;
@@ -536,7 +537,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device 
*adev,
/* Try to handle the recoverable page faults by filling page
 * tables
 */
-   if (amdgpu_vm_handle_fault(adev, entry->pasid, addr))
+   if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, 
write_fault))
return 1;
}
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index d4a43c94bcf9..b6d1268bc38f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2400,9 +2400,29 @@ svm_range_count_fault(struct amdgpu_device *adev, struct 
kfd_process *p,
WRITE_ONCE(pdd->faults, pdd->faults + 1);
 }
 
+static bool

[pull] amdgpu, amdkfd, radeon drm-next-5.15

2021-08-20 Thread Alex Deucher
Hi Dave, Daniel,

Updates for 5.15.  Mainly bug fixes and cleanups.

The following changes since commit 554594567b1fa3da74f88ec7b2dc83d000c58e98:

  drm/display: fix possible null-pointer dereference in dcn10_set_clock() 
(2021-08-11 17:19:54 -0400)

are available in the Git repository at:

  https://gitlab.freedesktop.org/agd5f/linux.git 
tags/amd-drm-next-5.15-2021-08-20

for you to fetch changes up to 90a9266269eb9f71af1f323c33e1dca53527bd22:

  drm/amdgpu: Cancel delayed work when GFXOFF is disabled (2021-08-20 12:09:44 
-0400)


amd-drm-next-5.15-2021-08-20:

amdgpu:
- embed hw fence into job
- Misc SMU fixes
- PSP TA code cleanup
- RAS fixes
- PWM fan speed fixes
- DC workqueue cleanups
- SR-IOV fixes
- gfxoff delayed work fix
- Pin domain check fix

amdkfd:
- SVM fixes

radeon:
- Code cleanup


Anthony Koo (1):
  drm/amd/display: [FW Promotion] Release 0.0.79

Aric Cyr (1):
  drm/amd/display: 3.2.149

Candice Li (3):
  drm/amd/amdgpu: consolidate PSP TA context
  drm/amd/amdgpu: remove unnecessary RAS context field
  drm/amd: consolidate TA shared memory structures

Christian König (1):
  drm/amdgpu: use the preferred pin domain after the check

Colin Ian King (1):
  drm/amd/pm: Fix spelling mistake "firwmare" -> "firmware"

Evan Quan (9):
  drm/amd/pm: correct the fan speed RPM setting
  drm/amd/pm: record the RPM and PWM based fan speed settings
  drm/amd/pm: correct the fan speed PWM retrieving
  drm/amd/pm: correct the fan speed RPM retrieving
  drm/amd/pm: drop the unnecessary intermediate percent-based transition
  drm/amd/pm: drop unnecessary manual mode check
  drm/amd/pm: correct the address of Arcturus fan related registers
  drm/amdgpu: disable BACO support for 699F:C7 polaris12 SKU temporarily
  drm/amd/pm: a quick fix for "divided by zero" error

Hawking Zhang (1):
  drm/amdgpu: increase max xgmi physical node for aldebaran

Jack Zhang (1):
  drm/amd/amdgpu embed hw_fence into amdgpu_job

Jake Wang (1):
  drm/amd/display: Ensure DCN save after VM setup

Jiange Zhao (1):
  drm/amdgpu: Add MB_REQ_MSG_READY_TO_RESET response when VF get FLR 
notification.

Jonathan Kim (1):
  drm/amdgpu: get extended xgmi topology data

Kenneth Feng (2):
  Revert "drm/amd/pm: fix workload mismatch on vega10"
  drm/amd/pm: change the workload type for some cards

Kevin Wang (5):
  drm/amd/pm: correct DPM_XGMI/VCN_DPM feature name
  drm/amd/pm: skip to load smu microcode on sriov for aldebaran
  drm/amd/pm: change return value in aldebaran_get_power_limit()
  drm/amd/pm: change smu msg's attribute to allow working under sriov
  drm/amd/pm: change pp_dpm_sclk/mclk/fclk attribute is RO for aldebaran

Lukas Bulwahn (1):
  drm: amdgpu: remove obsolete reference to config CHASH

Michel Dänzer (1):
  drm/amdgpu: Cancel delayed work when GFXOFF is disabled

Nathan Chancellor (1):
  drm/radeon: Add break to switch statement in 
radeonfb_create_pinned_object()

Nicholas Kazlauskas (3):
  drm/amd/display: Fix multi-display support for idle opt workqueue
  drm/amd/display: Use vblank control events for PSR enable/disable
  drm/amd/display: Guard vblank wq flush with DCN guards

Wayne Lin (1):
  drm/amd/display: Create dc_sink when EDID fail

Yifan Zhang (1):
  drm/amdkfd: fix random KFDSVMRangeTest.SetGetAttributesTest test failure

YuBiao Wang (1):
  drm/amd/amdgpu:flush ttm delayed work before cancel_sync

Zhan Liu (1):
  drm/amd/display: Use DCN30 watermark calc for DCN301

Zhigang Luo (1):
  drm/amdgpu: correct MMSCH 1.0 version

 drivers/gpu/drm/Kconfig|   1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |   1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c|   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  28 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  |  86 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c|  37 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c|   1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c|  39 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h|   6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  16 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c  |   1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c   |   1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  10 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c| 432 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h| 111 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c|   4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c|   6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h

Re: [PATCH v2 02/14] drm/arm/hdlcd: Convert to Linux IRQ interfaces

2021-08-20 Thread Liviu Dudau
On Tue, Aug 03, 2021 at 11:06:52AM +0200, Thomas Zimmermann wrote:
> Drop the DRM IRQ midlayer in favor of Linux IRQ interfaces. DRM's
> IRQ helpers are mostly useful for UMS drivers. Modern KMS drivers
> don't benefit from using it.
> 
> DRM IRQ callbacks are now being called directly or inlined.
> 
> Calls to platform_get_irq() can fail with a negative errno code.
> Abort initialization in this case. The DRM IRQ midlayer does not
> handle this case correctly.
> 
> v2:
>   * name struct drm_device variables 'drm' (Sam)
> 
> Signed-off-by: Thomas Zimmermann 
> Acked-by: Sam Ravnborg 

Sorry for the delayed response due to holidays.

Acked-by: Liviu Dudau 

Best regards,
Liviu

> ---
>  drivers/gpu/drm/arm/hdlcd_drv.c | 174 ++--
>  drivers/gpu/drm/arm/hdlcd_drv.h |   1 +
>  2 files changed, 97 insertions(+), 78 deletions(-)
> 
> diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c
> index 81ae92390736..479c2422a2e0 100644
> --- a/drivers/gpu/drm/arm/hdlcd_drv.c
> +++ b/drivers/gpu/drm/arm/hdlcd_drv.c
> @@ -29,7 +29,6 @@
>  #include 
>  #include 
>  #include 
> -#include 
>  #include 
>  #include 
>  #include 
> @@ -38,6 +37,94 @@
>  #include "hdlcd_drv.h"
>  #include "hdlcd_regs.h"
>  
> +static irqreturn_t hdlcd_irq(int irq, void *arg)
> +{
> + struct drm_device *drm = arg;
> + struct hdlcd_drm_private *hdlcd = drm->dev_private;
> + unsigned long irq_status;
> +
> + irq_status = hdlcd_read(hdlcd, HDLCD_REG_INT_STATUS);
> +
> +#ifdef CONFIG_DEBUG_FS
> + if (irq_status & HDLCD_INTERRUPT_UNDERRUN)
> + atomic_inc(>buffer_underrun_count);
> +
> + if (irq_status & HDLCD_INTERRUPT_DMA_END)
> + atomic_inc(>dma_end_count);
> +
> + if (irq_status & HDLCD_INTERRUPT_BUS_ERROR)
> + atomic_inc(>bus_error_count);
> +
> + if (irq_status & HDLCD_INTERRUPT_VSYNC)
> + atomic_inc(>vsync_count);
> +
> +#endif
> + if (irq_status & HDLCD_INTERRUPT_VSYNC)
> + drm_crtc_handle_vblank(>crtc);
> +
> + /* acknowledge interrupt(s) */
> + hdlcd_write(hdlcd, HDLCD_REG_INT_CLEAR, irq_status);
> +
> + return IRQ_HANDLED;
> +}
> +
> +static void hdlcd_irq_preinstall(struct drm_device *drm)
> +{
> + struct hdlcd_drm_private *hdlcd = drm->dev_private;
> + /* Ensure interrupts are disabled */
> + hdlcd_write(hdlcd, HDLCD_REG_INT_MASK, 0);
> + hdlcd_write(hdlcd, HDLCD_REG_INT_CLEAR, ~0);
> +}
> +
> +static void hdlcd_irq_postinstall(struct drm_device *drm)
> +{
> +#ifdef CONFIG_DEBUG_FS
> + struct hdlcd_drm_private *hdlcd = drm->dev_private;
> + unsigned long irq_mask = hdlcd_read(hdlcd, HDLCD_REG_INT_MASK);
> +
> + /* enable debug interrupts */
> + irq_mask |= HDLCD_DEBUG_INT_MASK;
> +
> + hdlcd_write(hdlcd, HDLCD_REG_INT_MASK, irq_mask);
> +#endif
> +}
> +
> +static int hdlcd_irq_install(struct drm_device *drm, int irq)
> +{
> + int ret;
> +
> + if (irq == IRQ_NOTCONNECTED)
> + return -ENOTCONN;
> +
> + hdlcd_irq_preinstall(drm);
> +
> + ret = request_irq(irq, hdlcd_irq, 0, drm->driver->name, drm);
> + if (ret)
> + return ret;
> +
> + hdlcd_irq_postinstall(drm);
> +
> + return 0;
> +}
> +
> +static void hdlcd_irq_uninstall(struct drm_device *drm)
> +{
> + struct hdlcd_drm_private *hdlcd = drm->dev_private;
> + /* disable all the interrupts that we might have enabled */
> + unsigned long irq_mask = hdlcd_read(hdlcd, HDLCD_REG_INT_MASK);
> +
> +#ifdef CONFIG_DEBUG_FS
> + /* disable debug interrupts */
> + irq_mask &= ~HDLCD_DEBUG_INT_MASK;
> +#endif
> +
> + /* disable vsync interrupts */
> + irq_mask &= ~HDLCD_INTERRUPT_VSYNC;
> + hdlcd_write(hdlcd, HDLCD_REG_INT_MASK, irq_mask);
> +
> + free_irq(hdlcd->irq, drm);
> +}
> +
>  static int hdlcd_load(struct drm_device *drm, unsigned long flags)
>  {
>   struct hdlcd_drm_private *hdlcd = drm->dev_private;
> @@ -90,7 +177,12 @@ static int hdlcd_load(struct drm_device *drm, unsigned 
> long flags)
>   goto setup_fail;
>   }
>  
> - ret = drm_irq_install(drm, platform_get_irq(pdev, 0));
> + ret = platform_get_irq(pdev, 0);
> + if (ret < 0)
> + goto irq_fail;
> + hdlcd->irq = ret;
> +
> + ret = hdlcd_irq_install(drm, hdlcd->irq);
>   if (ret < 0) {
>   DRM_ERROR("failed to install IRQ handler\n");
>   goto irq_fail;
> @@ -122,76 +214,6 @@ static void hdlcd_setup_mode_config(struct drm_device 
> *drm)
>   drm->mode_config.funcs = _mode_config_funcs;
>  }
>  
> -static irqreturn_t hdlcd_irq(int irq, void *arg)
> -{
> - struct drm_device *drm = arg;
> - struct hdlcd_drm_private *hdlcd = drm->dev_private;
> - unsigned long irq_status;
> -
> - irq_status = hdlcd_read(hdlcd, HDLCD_REG_INT_STATUS);
> -
> -#ifdef CONFIG_DEBUG_FS
> - if (irq_status & HDLCD_INTERRUPT_UNDERRUN)
> - 

Re: [PATCH 2/2] drm/amdkfd: map SVM range with correct access permission

2021-08-20 Thread philip yang

  


On 2021-08-19 5:32 p.m., Felix Kuehling
  wrote:


  Am 2021-08-19 um 10:56 a.m. schrieb Philip Yang:

  
Restore retry fault or prefetch range, or restore svm range after
eviction to map range to GPU with correct read or write access
permission.

Range may includes multiple VMAs, update GPU page table with offset of
prange, number of pages for each VMA according VMA access permission.

Signed-off-by: Philip Yang 

  
  
Minor nitpicks, and one question. See inline. It looks good otherwise.



  
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 131 +--
 1 file changed, 84 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index cf1009bb532a..94612581963f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -120,6 +120,7 @@ static void svm_range_remove_notifier(struct svm_range *prange)
 
 static int
 svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
+		  unsigned long offset, unsigned long npages,
 		  unsigned long *hmm_pfns, uint32_t gpuidx)
 {
 	enum dma_data_direction dir = DMA_BIDIRECTIONAL;
@@ -136,7 +137,8 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
 		prange->dma_addr[gpuidx] = addr;
 	}
 
-	for (i = 0; i < prange->npages; i++) {
+	addr += offset;
+	for (i = 0; i < npages; i++) {
 		if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]),
 			  "leaking dma mapping\n"))
 			dma_unmap_page(dev, addr[i], PAGE_SIZE, dir);
@@ -167,6 +169,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
 
 static int
 svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
+		  unsigned long offset, unsigned long npages,
 		  unsigned long *hmm_pfns)
 {
 	struct kfd_process *p;
@@ -187,7 +190,8 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
 		}
 		adev = (struct amdgpu_device *)pdd->dev->kgd;
 
-		r = svm_range_dma_map_dev(adev, prange, hmm_pfns, gpuidx);
+		r = svm_range_dma_map_dev(adev, prange, offset, npages,
+	  hmm_pfns, gpuidx);
 		if (r)
 			break;
 	}
@@ -1088,11 +1092,6 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
 	pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
 
 	pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags);
-
-	pr_debug("svms 0x%p [0x%lx 0x%lx] vram %d PTE 0x%llx mapping 0x%x\n",
-		 prange->svms, prange->start, prange->last,
-		 (domain == SVM_RANGE_VRAM_DOMAIN) ? 1:0, pte_flags, mapping_flags);
-
 	return pte_flags;
 }
 
@@ -1156,7 +1155,8 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
 
 static int
 svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-		 struct svm_range *prange, dma_addr_t *dma_addr,
+		 struct svm_range *prange, unsigned long offset,
+		 unsigned long npages, bool readonly, dma_addr_t *dma_addr,
 		 struct amdgpu_device *bo_adev, struct dma_fence **fence)
 {
 	struct amdgpu_bo_va bo_va;
@@ -1167,14 +1167,15 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	int r = 0;
 	int64_t i;
 
-	pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
-		 prange->last);
+	last_start = prange->start + offset;
+
+	pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms,
+		 last_start, last_start + npages - 1, readonly);
 
 	if (prange->svm_bo && prange->ttm_res)
 		bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);
 
-	last_start = prange->start;
-	for (i = 0; i < prange->npages; i++) {
+	for (i = offset; i < offset + npages; i++) {
 		last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN;
 		dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN;
 		if ((prange->start + i) < prange->last &&
@@ -1183,13 +1184,21 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 		pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n",
 			 last_start, prange->start + i, last_domain ? "GPU" : "CPU");
+
 		pte_flags = svm_range_get_pte_flags(adev, prange, last_domain);
-		r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL,
-		last_start,
+		if (readonly)
+			pte_flags &= ~AMDGPU_PTE_WRITEABLE;
+
+		pr_debug("svms 0x%p map [0x%lx 0x%llx] vram %d PTE 0x%llx\n",
+			 prange->svms, last_start, prange->start + i,
+			 (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
+			 pte_flags);
+
+		r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
+		NULL, last_start,
 		prange->start + i, pte_flags,
 		last_start - prange->start,
-		NULL,
-		dma_addr,
+		NULL, dma_addr,
 		>last_update,
 		_freed);
 		if (r) {
@@ -1220,8 +1229,10 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	return r;
 }
 
-static int svm_range_map_to_gpus(struct svm_range *prange,
- unsigned long *bitmap, bool wait)
+static int
+svm_range_map_to_gpus(struct 

Re: [PATCH] drm/amd/pm: And destination bounds checking to struct copy

2021-08-20 Thread Alex Deucher
On Thu, Aug 19, 2021 at 4:14 PM Kees Cook  wrote:
>
> In preparation for FORTIFY_SOURCE performing compile-time and run-time
> field bounds checking for memcpy(), memmove(), and memset(), avoid
> intentionally writing across neighboring fields.
>
> The "Board Parameters" members of the structs:
> struct atom_smc_dpm_info_v4_5
> struct atom_smc_dpm_info_v4_6
> struct atom_smc_dpm_info_v4_7
> struct atom_smc_dpm_info_v4_10
> are written to the corresponding members of the corresponding PPTable_t
> variables, but they lack destination size bounds checking, which means
> the compiler cannot verify at compile time that this is an intended and
> safe memcpy().
>
> Since the header files are effectively immutable[1] and a struct_group()
> cannot be used, nor a common struct referenced by both sides of the
> memcpy() arguments, add a new helper, memcpy_trailing(), to perform the
> bounds checking at compile time. Replace the open-coded memcpy()s with
> memcpy_trailing() which includes enough context for the bounds checking.
>
> "objdump -d" shows no object code changes.
>
> [1] https://lore.kernel.org/lkml/e56aad3c-a06f-da07-f491-a894a570d...@amd.com
>
> Cc: Lijo Lazar 
> Cc: "Christian König" 
> Cc: "Pan, Xinhui" 
> Cc: David Airlie 
> Cc: Daniel Vetter 
> Cc: Hawking Zhang 
> Cc: Feifei Xu 
> Cc: Likun Gao 
> Cc: Jiawei Gu 
> Cc: Evan Quan 
> Cc: amd-gfx@lists.freedesktop.org
> Cc: dri-de...@lists.freedesktop.org
> Signed-off-by: Kees Cook 
> Link: 
> https://lore.kernel.org/lkml/cadnq5_npb8uyvd+r4uhgf-w8-cqj3joodjvijr_y9w9wqj7...@mail.gmail.com
> ---
> Alex, I dropped your prior Acked-by, since the implementation is very
> different. If you're still happy with it, I can add it back. :)

This looks reasonable to me:
Acked-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 25 +++
>  .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c |  6 ++---
>  .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   |  8 +++---
>  .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c|  5 ++--
>  4 files changed, 33 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 96e895d6be35..4605934a4fb7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1446,4 +1446,29 @@ static inline int amdgpu_in_reset(struct amdgpu_device 
> *adev)
>  {
> return atomic_read(>in_gpu_reset);
>  }
> +
> +/**
> + * memcpy_trailing - Copy the end of one structure into the middle of another
> + *
> + * @dst: Pointer to destination struct
> + * @first_dst_member: The member name in @dst where the overwrite begins
> + * @last_dst_member: The member name in @dst where the overwrite ends after
> + * @src: Pointer to the source struct
> + * @first_src_member: The member name in @src where the copy begins
> + *
> + */
> +#define memcpy_trailing(dst, first_dst_member, last_dst_member,  
>  \
> +   src, first_src_member) \
> +({\
> +   size_t __src_offset = offsetof(typeof(*(src)), first_src_member);  \
> +   size_t __src_size = sizeof(*(src)) - __src_offset; \
> +   size_t __dst_offset = offsetof(typeof(*(dst)), first_dst_member);  \
> +   size_t __dst_size = offsetofend(typeof(*(dst)), last_dst_member) - \
> +   __dst_offset;  \
> +   BUILD_BUG_ON(__src_size != __dst_size);\
> +   __builtin_memcpy((u8 *)(dst) + __dst_offset,   \
> +(u8 *)(src) + __src_offset,   \
> +__dst_size);  \
> +})
> +
>  #endif
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c 
> b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
> index 8ab58781ae13..1918e6232319 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
> @@ -465,10 +465,8 @@ static int arcturus_append_powerplay_table(struct 
> smu_context *smu)
>
> if ((smc_dpm_table->table_header.format_revision == 4) &&
> (smc_dpm_table->table_header.content_revision == 6))
> -   memcpy(_pptable->MaxVoltageStepGfx,
> -  _dpm_table->maxvoltagestepgfx,
> -  sizeof(*smc_dpm_table) - offsetof(struct 
> atom_smc_dpm_info_v4_6, maxvoltagestepgfx));
> -
> +   memcpy_trailing(smc_pptable, MaxVoltageStepGfx, BoardReserved,
> +   smc_dpm_table, maxvoltagestepgfx);
> return 0;
>  }
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
> b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> index 2e5d3669652b..b738042e064d 100644
> --- 

Re: [PATCH v3] drm/amdgpu/OLAND: clip the ref divider max value

2021-08-20 Thread Christian König

Am 20.08.21 um 15:54 schrieb Shashank Sharma:

This patch limits the ref_div_max value to 100, during the
calculation of PLL feedback reference divider. With current
value (128), the produced fb_ref_div value generates unstable
output at particular frequencies. Radeon driver limits this
value at 100.

On Oland, when we try to setup mode 2048x1280@60 (a bit weird,
I know), it demands a clock of 221270 Khz. It's been observed
that the PLL calculations using values 128 and 100 are vastly
different, and look like this:

+--+
|Parameter|AMDGPU|Radeon   |
| |  | |
+-++
|Clock feedback  | |
|divider max  |  128 |   100   |
|cap value|  | |
| |  | |
| |  | |
+--+
|ref_div_max  |  | |
| |  42  |  20 |
| |  | |
| |  | |
+--+
|ref_div  |  42  |  20 |
| |  | |
+--+
|fb_div   |  10326   |  8195   |
+--+
|fb_div   |  1024|  163|
+--+
|fb_dev_p |  4   |  9  |
|frac fb_de^_p|  | |
++-+

With ref_div_max value clipped at 100, AMDGPU driver can also
drive videmode 2048x1280@60 (221Mhz) and produce proper output
without any blanking and distortion on the screen.

PS: This value was changed from 128 to 100 in Radeon driver also, here:
https://github.com/freedesktop/drm-tip/commit/4b21ce1b4b5d262e7d4656b8ececc891fc3cb806

V1:
Got acks from:
Acked-by: Alex Deucher 
Acked-by: Christian König 

V2:
- Restricting the changes only for OLAND, just to avoid any regression
   for other cards.
- Changed unsigned -> unsigned int to make checkpatch quiet.

V3: Apply the change on SI family (not only oland) (Christian)

Cc: Alex Deucher 
Cc: Christian König 
Cc: Eddy Qin 
Signed-off-by: Shashank Sharma 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c| 20 +---
  drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h|  3 ++-
  drivers/gpu/drm/amd/amdgpu/atombios_crtc.c |  2 +-
  3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
index f2e20666c9c1..4eaec446b49d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
@@ -80,12 +80,17 @@ static void amdgpu_pll_reduce_ratio(unsigned *nom, unsigned 
*den,
   * Calculate feedback and reference divider for a given post divider. Makes
   * sure we stay within the limits.
   */
-static void amdgpu_pll_get_fb_ref_div(unsigned nom, unsigned den, unsigned 
post_div,
- unsigned fb_div_max, unsigned ref_div_max,
- unsigned *fb_div, unsigned *ref_div)
+static void amdgpu_pll_get_fb_ref_div(struct amdgpu_device *adev, unsigned int 
nom,
+ unsigned int den, unsigned int post_div,
+ unsigned int fb_div_max, unsigned int 
ref_div_max,
+ unsigned int *fb_div, unsigned int 
*ref_div)
  {
+
/* limit reference * post divider to a maximum */
-   ref_div_max = min(128 / post_div, ref_div_max);
+   if (adev->family == AMDGPU_FAMILY_SI)
+   ref_div_max = min(100 / post_div, ref_div_max);
+   else
+   ref_div_max = min(128 / post_div, ref_div_max);
  
  	/* get matching reference and feedback divider */

*ref_div = min(max(DIV_ROUND_CLOSEST(den, post_div), 1u), ref_div_max);
@@ -112,7 +117,8 @@ static void amdgpu_pll_get_fb_ref_div(unsigned nom, 
unsigned den, unsigned post_
   * Try to calculate the PLL parameters to generate the given frequency:
   * dot_clock = (ref_freq * feedback_div) / (ref_div * post_div)
   */
-void amdgpu_pll_compute(struct amdgpu_pll *pll,
+void amdgpu_pll_compute(struct amdgpu_device *adev,
+   struct amdgpu_pll *pll,
u32 freq,
u32 *dot_clock_p,
u32 *fb_div_p,
@@ -199,7 +205,7 @@ void amdgpu_pll_compute(struct amdgpu_pll *pll,
  
  	for (post_div = post_div_min; post_div <= post_div_max; ++post_div) {

unsigned diff;
-   amdgpu_pll_get_fb_ref_div(nom, den, post_div, fb_div_max,
+   amdgpu_pll_get_fb_ref_div(adev, nom, den, post_div, fb_div_max,
  ref_div_max, _div, 

Re: [PATCH] drm/amdgpu: add missing cleanups for Polaris12 UVD/VCE on suspend

2021-08-20 Thread Alex Deucher
On Thu, Aug 19, 2021 at 10:15 PM Quan, Evan  wrote:
>
> [AMD Official Use Only]
>
>
>
>
>
>
>
> From: Lazar, Lijo 
> Sent: Thursday, August 19, 2021 10:36 PM
> To: Zhu, James ; Quan, Evan ; 
> amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ; Chen, Guchun 
> ; Pan, Xinhui 
> Subject: RE: [PATCH] drm/amdgpu: add missing cleanups for Polaris12 UVD/VCE 
> on suspend
>
>
>
> [AMD Official Use Only]
>
>
>
> If that is done  –
>
>
>
> +   amdgpu_device_ip_set_powergating_state(adev, 
> AMD_IP_BLOCK_TYPE_UVD,
> +  AMD_PG_STATE_GATE);
> +   amdgpu_device_ip_set_clockgating_state(adev, 
> AMD_IP_BLOCK_TYPE_UVD,
> +  AMD_CG_STATE_GATE);
>
>
>
> Usual order is CG followed by PG. It comes in the else part, so less likely 
> to happen. Nice to fix for code correctness purpose.
>
> [Quan, Evan] Thanks Lijo. Make sense to me. However, actually these code were 
> copied from amdgpu_uvd_idle_work_handler() of amdgpu_uvd.c. Same logic was 
> used there. So, maybe @Zhu, James or @Liu, Leo can share some insights about 
> this.
>

It looks like it is wrong there as well.  We should be gating the
clocks before the power.  The order is also wrong in
amdgpu_uvd_ring_begin_use().  We need to ungate the power before the
clocks

Alex


>
>
> BR
>
> Evan
>
>
>
> Thanks,
>
> Lijo
>
>
>
> From: Zhu, James 
> Sent: Thursday, August 19, 2021 7:49 PM
> To: Quan, Evan ; amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ; Chen, Guchun 
> ; Lazar, Lijo ; Pan, Xinhui 
> 
> Subject: Re: [PATCH] drm/amdgpu: add missing cleanups for Polaris12 UVD/VCE 
> on suspend
>
>
>
> [AMD Official Use Only]
>
>
>
>
>
> Why not move changes into hw_fini?
>
>
>
> Best Regards!
>
>
>
> James Zhu
>
> 
>
> From: amd-gfx  on behalf of Evan Quan 
> 
> Sent: Wednesday, August 18, 2021 11:08 PM
> To: amd-gfx@lists.freedesktop.org 
> Cc: Deucher, Alexander ; Chen, Guchun 
> ; Lazar, Lijo ; Quan, Evan 
> ; Pan, Xinhui 
> Subject: [PATCH] drm/amdgpu: add missing cleanups for Polaris12 UVD/VCE on 
> suspend
>
>
>
> Perform proper cleanups on UVD/VCE suspend: powergate enablement,
> clockgating enablement and dpm disablement. This can fix some hangs
> observed on suspending when UVD/VCE still using(e.g. issue
> "pm-suspend" when video is still playing).
>
> Change-Id: I36f39d9731e0a9638b52d5d92558b0ee9c23a9ed
> Signed-off-by: Evan Quan 
> Signed-off-by: xinhui pan 
> ---
>  drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 24 
>  drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 23 +++
>  2 files changed, 47 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c 
> b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> index 4eebf973a065..d0fc6ec18c29 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> @@ -554,6 +554,30 @@ static int uvd_v6_0_suspend(void *handle)
>  int r;
>  struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>
> +   /*
> +* Proper cleanups before halting the HW engine:
> +*   - cancel the delayed idle work
> +*   - enable powergating
> +*   - enable clockgating
> +*   - disable dpm
> +*
> +* TODO: to align with the VCN implementation, move the
> +* jobs for clockgating/powergating/dpm setting to
> +* ->set_powergating_state().
> +*/
> +   cancel_delayed_work_sync(>uvd.idle_work);
> +
> +   if (adev->pm.dpm_enabled) {
> +   amdgpu_dpm_enable_uvd(adev, false);
> +   } else {
> +   amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> +   /* shutdown the UVD block */
> +   amdgpu_device_ip_set_powergating_state(adev, 
> AMD_IP_BLOCK_TYPE_UVD,
> +  AMD_PG_STATE_GATE);
> +   amdgpu_device_ip_set_clockgating_state(adev, 
> AMD_IP_BLOCK_TYPE_UVD,
> +  AMD_CG_STATE_GATE);
> +   }
> +
>  r = uvd_v6_0_hw_fini(adev);
>  if (r)
>  return r;
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c 
> b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> index 6d9108fa22e0..a594ade5d30a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> @@ -503,6 +503,29 @@ static int vce_v3_0_suspend(void *handle)
>  int r;
>  struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>
> +   /*
> +* Proper cleanups before halting the HW engine:
> +*   - cancel the delayed idle work
> +*   - enable powergating
> +*   - enable clockgating
> +*   - disable dpm
> +*
> +* TODO: to align with the VCN implementation, move the
> +* jobs for clockgating/powergating/dpm setting to
> +* ->set_powergating_state().
> +   

Re: [PATCH v3 5/6] drm/amd/display: Add DP 2.0 BIOS and DMUB Support

2021-08-20 Thread Kazlauskas, Nicholas

On 2021-08-19 2:58 p.m., Fangzhi Zuo wrote:

Parse DP2 encoder caps and hpo instance from bios

Signed-off-by: Fangzhi Zuo 
---
  drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 10 ++
  drivers/gpu/drm/amd/display/dc/bios/command_table2.c   | 10 ++
  .../drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.c  |  4 
  drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h   |  6 ++
  drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h|  4 
  .../gpu/drm/amd/display/include/bios_parser_types.h| 10 ++
  drivers/gpu/drm/amd/include/atomfirmware.h |  6 ++
  7 files changed, 50 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c 
b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index 6dbde74c1e06..cdb5c027411a 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -1604,6 +1604,16 @@ static enum bp_result bios_parser_get_encoder_cap_info(
ATOM_ENCODER_CAP_RECORD_HBR3_EN) ? 1 : 0;
info->HDMI_6GB_EN = (record->encodercaps &
ATOM_ENCODER_CAP_RECORD_HDMI6Gbps_EN) ? 1 : 0;
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+   info->IS_DP2_CAPABLE = (record->encodercaps &
+   ATOM_ENCODER_CAP_RECORD_DP2) ? 1 : 0;
+   info->DP_UHBR10_EN = (record->encodercaps &
+   ATOM_ENCODER_CAP_RECORD_UHBR10_EN) ? 1 : 0;
+   info->DP_UHBR13_5_EN = (record->encodercaps &
+   ATOM_ENCODER_CAP_RECORD_UHBR13_5_EN) ? 1 : 0;
+   info->DP_UHBR20_EN = (record->encodercaps &
+   ATOM_ENCODER_CAP_RECORD_UHBR20_EN) ? 1 : 0;
+#endif
info->DP_IS_USB_C = (record->encodercaps &
ATOM_ENCODER_CAP_RECORD_USB_C_TYPE) ? 1 : 0;
  
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c

index f1f672a997d7..6e333b4af7d6 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
@@ -340,6 +340,13 @@ static enum bp_result transmitter_control_v1_7(
const struct command_table_helper *cmd = bp->cmd_helper;
struct dmub_dig_transmitter_control_data_v1_7 dig_v1_7 = {0};
  
+#if defined(CONFIG_DRM_AMD_DC_DCN)

+   uint8_t hpo_instance = (uint8_t)cntl->hpo_engine_id - ENGINE_ID_HPO_0;
+
+   if (dc_is_dp_signal(cntl->signal))
+   hpo_instance = (uint8_t)cntl->hpo_engine_id - 
ENGINE_ID_HPO_DP_0;
+#endif
+
dig_v1_7.phyid = cmd->phy_id_to_atom(cntl->transmitter);
dig_v1_7.action = (uint8_t)cntl->action;
  
@@ -353,6 +360,9 @@ static enum bp_result transmitter_control_v1_7(

dig_v1_7.hpdsel = cmd->hpd_sel_to_atom(cntl->hpd_sel);
dig_v1_7.digfe_sel = cmd->dig_encoder_sel_to_atom(cntl->engine_id);
dig_v1_7.connobj_id = (uint8_t)cntl->connector_obj_id.id;
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+   dig_v1_7.HPO_instance = hpo_instance;
+#endif
dig_v1_7.symclk_units.symclk_10khz = cntl->pixel_clock/10;
  
  	if (cntl->action == TRANSMITTER_CONTROL_ENABLE ||

diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.c 
b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.c
index 46ea39f5ef8d..6f3c2fb60790 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.c
@@ -192,6 +192,10 @@ void dcn30_link_encoder_construct(
enc10->base.features.flags.bits.IS_HBR3_CAPABLE =
bp_cap_info.DP_HBR3_EN;
enc10->base.features.flags.bits.HDMI_6GB_EN = 
bp_cap_info.HDMI_6GB_EN;
+   enc10->base.features.flags.bits.IS_DP2_CAPABLE = 
bp_cap_info.IS_DP2_CAPABLE;
+   enc10->base.features.flags.bits.IS_UHBR10_CAPABLE = 
bp_cap_info.DP_UHBR10_EN;
+   enc10->base.features.flags.bits.IS_UHBR13_5_CAPABLE = 
bp_cap_info.DP_UHBR13_5_EN;
+   enc10->base.features.flags.bits.IS_UHBR20_CAPABLE = 
bp_cap_info.DP_UHBR20_EN;


Please drop the DCN guards around this section - don't want to modify 
the bit field structure based on DCN vs DCE only.



enc10->base.features.flags.bits.DP_IS_USB_C =
bp_cap_info.DP_IS_USB_C;
} else {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h 
b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
index fa3a725e11dc..b99efcf4712f 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
@@ -59,6 +59,12 @@ struct encoder_feature_support {
uint32_t IS_TPS3_CAPABLE:1;
uint32_t IS_TPS4_CAPABLE:1;
uint32_t HDMI_6GB_EN:1;
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+   uint32_t IS_DP2_CAPABLE:1;
+   

Re: [PATCH v2] Revert "drm/scheduler: Avoid accessing freed bad job."

2021-08-20 Thread Andrey Grodzovsky



On 2021-08-20 3:12 a.m., Liu, Monk wrote:

[AMD Official Use Only]

@Daniel Vetter @Grodzovsky, Andrey @Koenig, Christian
  
Do you have any concern on the kthread_park() approach ?


Theoretically speaking sched_main shall run there exclusively with job_timeout 
since they both touches jobs, and stop scheduler during job_timeout won't 
impact performance since in that scenario
There was already something wrong/stuck on that ring/scheduler



Regarding last paragraph, and specifically the claim that there was 
already something wrong if the TO handler
starts execution - Not sure about this and I wonder if we have a 
potential bug here - when we start the timeout timer in
drm_sched_job_begin we do it for each new incoming job. In a constant 
rapid stream of jobs each new job comming
will try to start the timer but most of the time this operation just 
bails out as there is already pending timer from one
of the previous jobs which cancels out any new ones [1] so, when the TO 
handler does execute eventually it's not
because something wrong but simply because TO has expired. If in this 
case the pending list not empty a false
TDR will be triggered. I think long ago we used TO handler per job and 
not per scheduler, this would solve this problem
but hurt the serialization issue we are trying to solve. So not sure 
what to do.


[1] - 
https://elixir.bootlin.com/linux/v5.14-rc1/source/kernel/workqueue.c#L1665


Andrey



Thanks

--
Monk Liu | Cloud-GPU Core team
--

-Original Message-
From: Liu, Monk
Sent: Thursday, August 19, 2021 6:26 PM
To: Daniel Vetter ; Grodzovsky, Andrey 

Cc: Alex Deucher ; Chen, JingWen ; Maling list - 
DRI developers ; amd-gfx list ; 
Koenig, Christian 
Subject: RE: [PATCH v2] Revert "drm/scheduler: Avoid accessing freed bad job."

[AMD Official Use Only]

Hi Daniel


Why can't we stop the scheduler thread first, so that there's guaranteed no 
race? I've recently had a lot of discussions with panfrost folks about their 
reset that spawns across engines, and without stopping the scheduler thread 
first before you touch anything it's just plain impossible.

Yeah we had this though as well in our mind.

Our second approach is to call ktrhead_stop() in job_timedout() routine so that  the 
"bad" job is guaranteed to be used without scheduler's touching or freeing, 
Check this sample patch one as well please:

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index a2a9536..50a49cb 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -319,17 +319,12 @@ static void drm_sched_job_timedout(struct work_struct 
*work)
 sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
  
 /* Protects against concurrent deletion in drm_sched_get_cleanup_job */

+   kthread_park(sched->thread);
 spin_lock(>job_list_lock);
 job = list_first_entry_or_null(>pending_list,
struct drm_sched_job, list);
  
 if (job) {

-   /*
-* Remove the bad job so it cannot be freed by concurrent
-* drm_sched_cleanup_jobs. It will be reinserted back after 
sched->thread
-* is parked at which point it's safe.
-*/
-   list_del_init(>list);
 spin_unlock(>job_list_lock);
  
 status = job->sched->ops->timedout_job(job);

@@ -345,6 +340,7 @@ static void drm_sched_job_timedout(struct work_struct *work)
 } else {
 spin_unlock(>job_list_lock);
 }
+   kthread_unpark(sched->thread);
  
 if (status != DRM_GPU_SCHED_STAT_ENODEV) {

 spin_lock(>job_list_lock); @@ -393,20 +389,6 @@ void 
drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
 kthread_park(sched->thread);
  
 /*

-* Reinsert back the bad job here - now it's safe as
-* drm_sched_get_cleanup_job cannot race against us and release the
-* bad job at this point - we parked (waited for) any in progress
-* (earlier) cleanups and drm_sched_get_cleanup_job will not be called
-* now until the scheduler thread is unparked.
-*/
-   if (bad && bad->sched == sched)
-   /*
-* Add at the head of the queue to reflect it was the earliest
-* job extracted.
-*/
-   list_add(>list, >pending_list);
-
-   /*
  * Iterate the job list from later to  earlier one and either deactive
  * their HW callbacks or remove them from pending list if they already
  * signaled.


Thanks

--
Monk Liu | Cloud-GPU Core team
--

-Original Message-
From: Daniel Vetter 
Sent: Thursday, August 19, 

[PATCH v3] drm/amdgpu/OLAND: clip the ref divider max value

2021-08-20 Thread Shashank Sharma
This patch limits the ref_div_max value to 100, during the
calculation of PLL feedback reference divider. With current
value (128), the produced fb_ref_div value generates unstable
output at particular frequencies. Radeon driver limits this
value at 100.

On Oland, when we try to setup mode 2048x1280@60 (a bit weird,
I know), it demands a clock of 221270 Khz. It's been observed
that the PLL calculations using values 128 and 100 are vastly
different, and look like this:

+--+
|Parameter|AMDGPU|Radeon   |
| |  | |
+-++
|Clock feedback  | |
|divider max  |  128 |   100   |
|cap value|  | |
| |  | |
| |  | |
+--+
|ref_div_max  |  | |
| |  42  |  20 |
| |  | |
| |  | |
+--+
|ref_div  |  42  |  20 |
| |  | |
+--+
|fb_div   |  10326   |  8195   |
+--+
|fb_div   |  1024|  163|
+--+
|fb_dev_p |  4   |  9  |
|frac fb_de^_p|  | |
++-+

With ref_div_max value clipped at 100, AMDGPU driver can also
drive videmode 2048x1280@60 (221Mhz) and produce proper output
without any blanking and distortion on the screen.

PS: This value was changed from 128 to 100 in Radeon driver also, here:
https://github.com/freedesktop/drm-tip/commit/4b21ce1b4b5d262e7d4656b8ececc891fc3cb806

V1:
Got acks from:
Acked-by: Alex Deucher 
Acked-by: Christian König 

V2:
- Restricting the changes only for OLAND, just to avoid any regression
  for other cards.
- Changed unsigned -> unsigned int to make checkpatch quiet.

V3: Apply the change on SI family (not only oland) (Christian)

Cc: Alex Deucher 
Cc: Christian König 
Cc: Eddy Qin 
Signed-off-by: Shashank Sharma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c| 20 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h|  3 ++-
 drivers/gpu/drm/amd/amdgpu/atombios_crtc.c |  2 +-
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
index f2e20666c9c1..4eaec446b49d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
@@ -80,12 +80,17 @@ static void amdgpu_pll_reduce_ratio(unsigned *nom, unsigned 
*den,
  * Calculate feedback and reference divider for a given post divider. Makes
  * sure we stay within the limits.
  */
-static void amdgpu_pll_get_fb_ref_div(unsigned nom, unsigned den, unsigned 
post_div,
- unsigned fb_div_max, unsigned ref_div_max,
- unsigned *fb_div, unsigned *ref_div)
+static void amdgpu_pll_get_fb_ref_div(struct amdgpu_device *adev, unsigned int 
nom,
+ unsigned int den, unsigned int post_div,
+ unsigned int fb_div_max, unsigned int 
ref_div_max,
+ unsigned int *fb_div, unsigned int 
*ref_div)
 {
+
/* limit reference * post divider to a maximum */
-   ref_div_max = min(128 / post_div, ref_div_max);
+   if (adev->family == AMDGPU_FAMILY_SI)
+   ref_div_max = min(100 / post_div, ref_div_max);
+   else
+   ref_div_max = min(128 / post_div, ref_div_max);
 
/* get matching reference and feedback divider */
*ref_div = min(max(DIV_ROUND_CLOSEST(den, post_div), 1u), ref_div_max);
@@ -112,7 +117,8 @@ static void amdgpu_pll_get_fb_ref_div(unsigned nom, 
unsigned den, unsigned post_
  * Try to calculate the PLL parameters to generate the given frequency:
  * dot_clock = (ref_freq * feedback_div) / (ref_div * post_div)
  */
-void amdgpu_pll_compute(struct amdgpu_pll *pll,
+void amdgpu_pll_compute(struct amdgpu_device *adev,
+   struct amdgpu_pll *pll,
u32 freq,
u32 *dot_clock_p,
u32 *fb_div_p,
@@ -199,7 +205,7 @@ void amdgpu_pll_compute(struct amdgpu_pll *pll,
 
for (post_div = post_div_min; post_div <= post_div_max; ++post_div) {
unsigned diff;
-   amdgpu_pll_get_fb_ref_div(nom, den, post_div, fb_div_max,
+   amdgpu_pll_get_fb_ref_div(adev, nom, den, post_div, fb_div_max,
  ref_div_max, _div, _div);
diff = abs(target_clock - (pll->reference_freq * fb_div) /
   

Re: [PATCH v2] Revert "drm/scheduler: Avoid accessing freed bad job."

2021-08-20 Thread Andrey Grodzovsky

I believe we have some minor confusion here

On 2021-08-20 4:09 a.m., Jingwen Chen wrote:

Hi all,

I just submit a v3 patch according your opinion on using kthread_park
instead.

Thanks,
Jingwen
On Fri Aug 20, 2021 at 09:20:42AM +0200, Christian König wrote:

No, that perfectly works for me.

The problem we used to have with this approach was that we potentially have
multiple timeouts at the same time.

But when we serialize the timeout handling by using a single workqueue as
suggested by Daniel now as well then that isn't an issue any more.



While we do use single work queue by default (system_wq) for this, we 
use different
work items, one per scheduler which means they still run in parallel.  I 
didn't see the original
mail by Daniel but from what Christian mentioned I assume he suggested 
to serialize all TO handlers
from all possible engines by either using single work item for TO 
handler or by using single threaded queue for all TO handlers.
So i believe it's premature to send V3 patch without also switching all 
TDR handling to actual single threaded
handling per entire ASIC or in case of amdgpu we actually need to 
consider XGMI hives and so it goes beyond a single

device.

Andrey




Regards,
Christian.

Am 20.08.21 um 09:12 schrieb Liu, Monk:

[AMD Official Use Only]

@Daniel Vetter @Grodzovsky, Andrey @Koenig, Christian
Do you have any concern on the kthread_park() approach ?

Theoretically speaking sched_main shall run there exclusively with job_timeout 
since they both touches jobs, and stop scheduler during job_timeout won't 
impact performance since in that scenario
There was already something wrong/stuck on that ring/scheduler

Thanks

--
Monk Liu | Cloud-GPU Core team
--

-Original Message-
From: Liu, Monk
Sent: Thursday, August 19, 2021 6:26 PM
To: Daniel Vetter ; Grodzovsky, Andrey 

Cc: Alex Deucher ; Chen, JingWen ; Maling list - 
DRI developers ; amd-gfx list ; 
Koenig, Christian 
Subject: RE: [PATCH v2] Revert "drm/scheduler: Avoid accessing freed bad job."

[AMD Official Use Only]

Hi Daniel


Why can't we stop the scheduler thread first, so that there's guaranteed no 
race? I've recently had a lot of discussions with panfrost folks about their 
reset that spawns across engines, and without stopping the scheduler thread 
first before you touch anything it's just plain impossible.

Yeah we had this though as well in our mind.

Our second approach is to call ktrhead_stop() in job_timedout() routine so that  the 
"bad" job is guaranteed to be used without scheduler's touching or freeing, 
Check this sample patch one as well please:

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index a2a9536..50a49cb 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -319,17 +319,12 @@ static void drm_sched_job_timedout(struct work_struct 
*work)
  sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
  /* Protects against concurrent deletion in drm_sched_get_cleanup_job 
*/
+   kthread_park(sched->thread);
  spin_lock(>job_list_lock);
  job = list_first_entry_or_null(>pending_list,
 struct drm_sched_job, list);
  if (job) {
-   /*
-* Remove the bad job so it cannot be freed by concurrent
-* drm_sched_cleanup_jobs. It will be reinserted back after 
sched->thread
-* is parked at which point it's safe.
-*/
-   list_del_init(>list);
  spin_unlock(>job_list_lock);
  status = job->sched->ops->timedout_job(job);
@@ -345,6 +340,7 @@ static void drm_sched_job_timedout(struct work_struct *work)
  } else {
  spin_unlock(>job_list_lock);
  }
+   kthread_unpark(sched->thread);
  if (status != DRM_GPU_SCHED_STAT_ENODEV) {
  spin_lock(>job_list_lock); @@ -393,20 +389,6 @@ void 
drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
  kthread_park(sched->thread);
  /*
-* Reinsert back the bad job here - now it's safe as
-* drm_sched_get_cleanup_job cannot race against us and release the
-* bad job at this point - we parked (waited for) any in progress
-* (earlier) cleanups and drm_sched_get_cleanup_job will not be called
-* now until the scheduler thread is unparked.
-*/
-   if (bad && bad->sched == sched)
-   /*
-* Add at the head of the queue to reflect it was the earliest
-* job extracted.
-*/
-   list_add(>list, >pending_list);
-
-   /*
   * Iterate the job list from later to  earlier one and either deactive
   * their HW callbacks or remove them from pending list if they 

RE: [PATCH v2] drm/amdgpu/OLAND: clip the ref divider max value

2021-08-20 Thread Sharma, Shashank
[Public]

Thanks Alex, 

Regards
Shashank
-Original Message-
From: Alex Deucher  
Sent: Friday, August 20, 2021 6:42 PM
To: Sharma, Shashank 
Cc: Koenig, Christian ; 
amd-gfx@lists.freedesktop.org; Deucher, Alexander 
Subject: Re: [PATCH v2] drm/amdgpu/OLAND: clip the ref divider max value

It's SI.

Alex

On Fri, Aug 20, 2021 at 4:08 AM Sharma, Shashank  
wrote:
>
> [AMD Official Use Only]
>
> No problem, let me dig for this information.
>
> Regards
> Shashank
> -Original Message-
> From: Koenig, Christian 
> Sent: Friday, August 20, 2021 1:36 PM
> To: Sharma, Shashank ; 
> amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander 
> Subject: Re: [PATCH v2] drm/amdgpu/OLAND: clip the ref divider max 
> value
>
> Uff, I think that was SI but could be CIK as well.
>
> We have a table for this somewhere, but I don't have it at hand.
>
> Regards,
> Christian.
>
> Am 20.08.21 um 09:43 schrieb Sharma, Shashank:
> > [AMD Official Use Only]
> >
> > Agree, on the similar note, which Gen is OLAND BTW  ?
> >
> > Regards
> > Shashank
> > -Original Message-
> > From: Koenig, Christian 
> > Sent: Friday, August 20, 2021 12:08 PM
> > To: Sharma, Shashank ; 
> > amd-gfx@lists.freedesktop.org
> > Cc: Deucher, Alexander 
> > Subject: Re: [PATCH v2] drm/amdgpu/OLAND: clip the ref divider max 
> > value
> >
> > Sounds like a good idea to me, but I would limit this generally or at least 
> > for the whole generation and not just one particular chipset.
> >
> > Regards,
> > Christian.
> >
> > Am 20.08.21 um 08:05 schrieb Sharma, Shashank:
> >>  From 4841e5ba60e33ff798bde6cb69fbd7e137b6db9c Mon Sep 17 00:00:00
> >> 2001
> >> From: Shashank Sharma 
> >> Date: Fri, 20 Aug 2021 10:20:02 +0530
> >> Subject: [PATCH v2] drm/amdgpu/OLAND: clip the ref divider max 
> >> value
> >> MIME-Version: 1.0
> >> Content-Type: text/plain; charset=UTF-8
> >> Content-Transfer-Encoding: 8bit
> >>
> >> This patch limits the ref_div_max value to 100, during the 
> >> calculation of PLL feedback reference divider. With current value 
> >> (128), the produced fb_ref_div value generates unstable output at 
> >> particular frequencies. Radeon driver limits this value at 100.
> >>
> >> On Oland, when we try to setup mode 2048x1280@60 (a bit weird, I 
> >> know), it demands a clock of 221270 Khz. It's been observed that 
> >> the PLL calculations using values 128 and 100 are vastly different, 
> >> and look like this:
> >>
> >> +--+
> >> |Parameter|AMDGPU|Radeon   |
> >> | |  | |
> >> +-++
> >> |Clock feedback  | | divider max  |  128
> >> ||   100   | cap value|  | |
> >> | |  | |
> >> | |  | |
> >> +--+
> >> |ref_div_max  |  | |
> >> | |  42  |  20 |
> >> | |  | |
> >> | |  | |
> >> +--+
> >> |ref_div  |  42  |  20 |
> >> | |  | |
> >> +--+
> >> |fb_div   |  10326   |  8195   |
> >> +--+
> >> |fb_div   |  1024|  163|
> >> +--+
> >> |fb_dev_p |  4   |  9  | frac fb_de^_p|
> >> || |
> >> ++-+
> >>
> >> With ref_div_max value clipped at 100, AMDGPU driver can also drive 
> >> videmode 2048x1280@60 (221Mhz) and produce proper output without 
> >> any blanking and distortion on the screen.
> >>
> >> PS: This value was changed from 128 to 100 in Radeon driver also, here:
> >> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fg
> >> ithub.com%2Ffreedesktop%2Fdrm-tip%2Fcommit%2F4b21ce1b4b5d262e7d4656
> >> bdata=04%7C01%7CShashank.Sharma%40amd.com%7Cd5d2370e8fc0414007
> >> 3308d963dc2b00%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C6376506
> >> 19528849459%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2l
> >> uMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=OWOmT6itpeyY3m
> >> 8LP7JhGFYx%2FjB0SYMCocbSsKaOUuM%3Dreserved=0
> >> 8
> >> ececc891fc3cb806
> >>
> >>
> >> V1:
> >> Got acks from:
> >> Acked-by: Alex Deucher 
> >> Acked-by: Christian König 
> >>
> >> V2:
> >> - Restricting the changes only for OLAND, just to avoid any 
> >> regression
> >>for other cards.
> >> - Changed unsigned -> unsigned int to make checkpatch quiet.
> >>
> >> Cc: Alex Deucher 
> >> Cc: Christian König 
> >> Cc: Eddy Qin 
> >> Signed-off-by: Shashank Sharma 
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c| 20
> >> +---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h|  3 ++-
> >> 

Re: [PATCH] drm/amdgpu: use the preferred pin domain after the check

2021-08-20 Thread Alex Deucher
On Fri, Aug 20, 2021 at 4:30 AM Christian König
 wrote:
>
> For some reason we run into an use case where a BO is already pinned
> into GTT, but should be pinned into VRAM|GTT again.
>
> Handle that case gracefully as well.
>
> Signed-off-by: Christian König 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 795fa7445abe..92c8e6e7f346 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -920,11 +920,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 
> domain,
> return -EINVAL;
> }
>
> -   /* This assumes only APU display buffers are pinned with (VRAM|GTT).
> -* See function amdgpu_display_supported_domains()
> -*/
> -   domain = amdgpu_bo_get_preferred_pin_domain(adev, domain);
> -
> if (bo->tbo.pin_count) {
> uint32_t mem_type = bo->tbo.resource->mem_type;
> uint32_t mem_flags = bo->tbo.resource->placement;
> @@ -949,6 +944,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 
> domain,
> return 0;
> }
>
> +   /* This assumes only APU display buffers are pinned with (VRAM|GTT).
> +* See function amdgpu_display_supported_domains()
> +*/
> +   domain = amdgpu_bo_get_preferred_pin_domain(adev, domain);
> +
> if (bo->tbo.base.import_attach)
> dma_buf_pin(bo->tbo.base.import_attach);
>
> --
> 2.25.1
>


Re: [PATCH v2] drm/amdgpu/OLAND: clip the ref divider max value

2021-08-20 Thread Alex Deucher
It's SI.

Alex

On Fri, Aug 20, 2021 at 4:08 AM Sharma, Shashank
 wrote:
>
> [AMD Official Use Only]
>
> No problem, let me dig for this information.
>
> Regards
> Shashank
> -Original Message-
> From: Koenig, Christian 
> Sent: Friday, August 20, 2021 1:36 PM
> To: Sharma, Shashank ; amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander 
> Subject: Re: [PATCH v2] drm/amdgpu/OLAND: clip the ref divider max value
>
> Uff, I think that was SI but could be CIK as well.
>
> We have a table for this somewhere, but I don't have it at hand.
>
> Regards,
> Christian.
>
> Am 20.08.21 um 09:43 schrieb Sharma, Shashank:
> > [AMD Official Use Only]
> >
> > Agree, on the similar note, which Gen is OLAND BTW  ?
> >
> > Regards
> > Shashank
> > -Original Message-
> > From: Koenig, Christian 
> > Sent: Friday, August 20, 2021 12:08 PM
> > To: Sharma, Shashank ;
> > amd-gfx@lists.freedesktop.org
> > Cc: Deucher, Alexander 
> > Subject: Re: [PATCH v2] drm/amdgpu/OLAND: clip the ref divider max
> > value
> >
> > Sounds like a good idea to me, but I would limit this generally or at least 
> > for the whole generation and not just one particular chipset.
> >
> > Regards,
> > Christian.
> >
> > Am 20.08.21 um 08:05 schrieb Sharma, Shashank:
> >>  From 4841e5ba60e33ff798bde6cb69fbd7e137b6db9c Mon Sep 17 00:00:00
> >> 2001
> >> From: Shashank Sharma 
> >> Date: Fri, 20 Aug 2021 10:20:02 +0530
> >> Subject: [PATCH v2] drm/amdgpu/OLAND: clip the ref divider max value
> >> MIME-Version: 1.0
> >> Content-Type: text/plain; charset=UTF-8
> >> Content-Transfer-Encoding: 8bit
> >>
> >> This patch limits the ref_div_max value to 100, during the
> >> calculation of PLL feedback reference divider. With current value
> >> (128), the produced fb_ref_div value generates unstable output at
> >> particular frequencies. Radeon driver limits this value at 100.
> >>
> >> On Oland, when we try to setup mode 2048x1280@60 (a bit weird, I
> >> know), it demands a clock of 221270 Khz. It's been observed that the
> >> PLL calculations using values 128 and 100 are vastly different, and
> >> look like this:
> >>
> >> +--+
> >> |Parameter|AMDGPU|Radeon   |
> >> | |  | |
> >> +-++
> >> |Clock feedback  | | divider max  |  128
> >> ||   100   | cap value|  | |
> >> | |  | |
> >> | |  | |
> >> +--+
> >> |ref_div_max  |  | |
> >> | |  42  |  20 |
> >> | |  | |
> >> | |  | |
> >> +--+
> >> |ref_div  |  42  |  20 |
> >> | |  | |
> >> +--+
> >> |fb_div   |  10326   |  8195   |
> >> +--+
> >> |fb_div   |  1024|  163|
> >> +--+
> >> |fb_dev_p |  4   |  9  | frac fb_de^_p|
> >> || |
> >> ++-+
> >>
> >> With ref_div_max value clipped at 100, AMDGPU driver can also drive
> >> videmode 2048x1280@60 (221Mhz) and produce proper output without any
> >> blanking and distortion on the screen.
> >>
> >> PS: This value was changed from 128 to 100 in Radeon driver also, here:
> >> https://github.com/freedesktop/drm-tip/commit/4b21ce1b4b5d262e7d4656b
> >> 8
> >> ececc891fc3cb806
> >>
> >>
> >> V1:
> >> Got acks from:
> >> Acked-by: Alex Deucher 
> >> Acked-by: Christian König 
> >>
> >> V2:
> >> - Restricting the changes only for OLAND, just to avoid any
> >> regression
> >>for other cards.
> >> - Changed unsigned -> unsigned int to make checkpatch quiet.
> >>
> >> Cc: Alex Deucher 
> >> Cc: Christian König 
> >> Cc: Eddy Qin 
> >> Signed-off-by: Shashank Sharma 
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c| 20
> >> +---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h|  3 ++-
> >>   drivers/gpu/drm/amd/amdgpu/atombios_crtc.c |  2 +-
> >>   3 files changed, 16 insertions(+), 9 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
> >> index f2e20666c9c1..6d04c1d25bfb 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
> >> @@ -80,12 +80,17 @@ static void amdgpu_pll_reduce_ratio(unsigned
> >> *nom, unsigned *den,
> >>* Calculate feedback and reference divider for a given post divider.
> >> Makes
> >>* sure we stay within the limits.
> >>*/
> >> -static void amdgpu_pll_get_fb_ref_div(unsigned nom, unsigned den,
> >> unsigned post_div,
> >> - 

Re: [PATCH v6 08/13] mm: call pgmap->ops->page_free for DEVICE_GENERIC pages

2021-08-20 Thread Christoph Hellwig
On Tue, Aug 17, 2021 at 11:44:54AM -0400, Felix Kuehling wrote:
> >> That's a good catch. Existing drivers shouldn't need a page_free
> >> callback if they didn't have one before. That means we need to add a
> >> NULL-pointer check in free_device_page.
> > Also the other state clearing (__ClearPageWaiters/mem_cgroup_uncharge/
> > ->mapping = NULL).
> >
> > In many ways this seems like you want to bring back the DEVICE_PUBLIC
> > pgmap type that was removed a while ago due to the lack of users
> > instead of overloading the generic type.
> 
> I think so. I'm not clear about how DEVICE_PUBLIC differed from what
> DEVICE_GENERIC is today. As I understand it, DEVICE_PUBLIC was removed
> because it was unused and also known to be broken in some ways.
> DEVICE_GENERIC seemed close enough to what we need, other than not being
> supported in the migration helpers.
> 
> Would you see benefit in re-introducing DEVICE_PUBLIC as a distinct
> memory type from DEVICE_GENERIC? What would be the benefits of making
> that distinction?

The old DEVICE_PUBLIC mostly different in that it allowed the page
to be returned from vm_normal_page, which I think was horribly buggy.

But the point is not to bring back these old semantics.  The idea
is to be able to differeniate between your new coherent on-device
memory and the existing DEVICE_GENERIC.  That is call the
code in free_devmap_managed_page that is currently only used
for device private pages also for your new public device pages without
affecting the devdax and xen use cases.


Re: [PATCH v6 02/13] mm: remove extra ZONE_DEVICE struct page refcount

2021-08-20 Thread Christoph Hellwig
On Thu, Aug 19, 2021 at 03:59:56PM -0400, Felix Kuehling wrote:
> I got lost trying to understand how DAX counts page references and how
> the PTE_SPECIAL option affects that. Theodore, can you help with this?
> Is there an easy way to test without CONFIG_ARCH_HAS_PTE_SPECIAL on x86,
> or do we need to test on a CPU architecture that doesn't support this
> feature?

I think the right answer is to simplify disallow ZONE_DEVICE pages
if ARCH_HAS_PTE_SPECIAL is not supported.  ARCH_HAS_PTE_SPECIAL is
supported by all modern architecture ports than can make use of
ZONE_DEVICE / dev_pagemap, so we can avoid this pocket of barely
testable code entirely:


diff --git a/mm/Kconfig b/mm/Kconfig
index 40a9bfcd5062e1..2823bbfd1c8c70 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -775,6 +775,7 @@ config ZONE_DMA32
 
 config ZONE_DEVICE
bool "Device memory (pmem, HMM, etc...) hotplug support"
+   depends on ARCH_HAS_PTE_SPECIAL
depends on MEMORY_HOTPLUG
depends on MEMORY_HOTREMOVE
depends on SPARSEMEM_VMEMMAP


Re: [PATCH v6 02/13] mm: remove extra ZONE_DEVICE struct page refcount

2021-08-20 Thread Christoph Hellwig
On Wed, Aug 18, 2021 at 12:28:30PM -0700, Ralph Campbell wrote:
> Did you test on a system without CONFIG_ARCH_HAS_PTE_SPECIAL defined?
> In that case, mmap() of a DAX device will call insert_page() which calls
> get_page() which would trigger VM_BUG_ON_PAGE().

__vm_insert_mixed still ends up calling insert_pfn for the
!CASE_ARCH_HAS_PTE_SPECIAL if pfn_t_devmap() is true, which it should
be for DAX.  (and as said in my other mail, I suspect we should disallow
that case anyway, as no one can test it in practice).


Re: [PATCH v2] Revert "drm/scheduler: Avoid accessing freed bad job."

2021-08-20 Thread Jingwen Chen
Hi all,

I just submit a v3 patch according your opinion on using kthread_park
instead.

Thanks,
Jingwen
On Fri Aug 20, 2021 at 09:20:42AM +0200, Christian König wrote:
> No, that perfectly works for me.
> 
> The problem we used to have with this approach was that we potentially have
> multiple timeouts at the same time.
> 
> But when we serialize the timeout handling by using a single workqueue as
> suggested by Daniel now as well then that isn't an issue any more.
> 
> Regards,
> Christian.
> 
> Am 20.08.21 um 09:12 schrieb Liu, Monk:
> > [AMD Official Use Only]
> > 
> > @Daniel Vetter @Grodzovsky, Andrey @Koenig, Christian
> > Do you have any concern on the kthread_park() approach ?
> > 
> > Theoretically speaking sched_main shall run there exclusively with 
> > job_timeout since they both touches jobs, and stop scheduler during 
> > job_timeout won't impact performance since in that scenario
> > There was already something wrong/stuck on that ring/scheduler
> > 
> > Thanks
> > 
> > --
> > Monk Liu | Cloud-GPU Core team
> > --
> > 
> > -Original Message-
> > From: Liu, Monk
> > Sent: Thursday, August 19, 2021 6:26 PM
> > To: Daniel Vetter ; Grodzovsky, Andrey 
> > 
> > Cc: Alex Deucher ; Chen, JingWen 
> > ; Maling list - DRI developers 
> > ; amd-gfx list 
> > ; Koenig, Christian 
> > 
> > Subject: RE: [PATCH v2] Revert "drm/scheduler: Avoid accessing freed bad 
> > job."
> > 
> > [AMD Official Use Only]
> > 
> > Hi Daniel
> > 
> > > > Why can't we stop the scheduler thread first, so that there's 
> > > > guaranteed no race? I've recently had a lot of discussions with 
> > > > panfrost folks about their reset that spawns across engines, and 
> > > > without stopping the scheduler thread first before you touch anything 
> > > > it's just plain impossible.
> > Yeah we had this though as well in our mind.
> > 
> > Our second approach is to call ktrhead_stop() in job_timedout() routine so 
> > that  the "bad" job is guaranteed to be used without scheduler's touching 
> > or freeing, Check this sample patch one as well please:
> > 
> > diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
> > b/drivers/gpu/drm/scheduler/sched_main.c
> > index a2a9536..50a49cb 100644
> > --- a/drivers/gpu/drm/scheduler/sched_main.c
> > +++ b/drivers/gpu/drm/scheduler/sched_main.c
> > @@ -319,17 +319,12 @@ static void drm_sched_job_timedout(struct work_struct 
> > *work)
> >  sched = container_of(work, struct drm_gpu_scheduler, 
> > work_tdr.work);
> >  /* Protects against concurrent deletion in 
> > drm_sched_get_cleanup_job */
> > +   kthread_park(sched->thread);
> >  spin_lock(>job_list_lock);
> >  job = list_first_entry_or_null(>pending_list,
> > struct drm_sched_job, list);
> >  if (job) {
> > -   /*
> > -* Remove the bad job so it cannot be freed by concurrent
> > -* drm_sched_cleanup_jobs. It will be reinserted back after 
> > sched->thread
> > -* is parked at which point it's safe.
> > -*/
> > -   list_del_init(>list);
> >  spin_unlock(>job_list_lock);
> >  status = job->sched->ops->timedout_job(job);
> > @@ -345,6 +340,7 @@ static void drm_sched_job_timedout(struct work_struct 
> > *work)
> >  } else {
> >  spin_unlock(>job_list_lock);
> >  }
> > +   kthread_unpark(sched->thread);
> >  if (status != DRM_GPU_SCHED_STAT_ENODEV) {
> >  spin_lock(>job_list_lock); @@ -393,20 +389,6 @@ 
> > void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job 
> > *bad)
> >  kthread_park(sched->thread);
> >  /*
> > -* Reinsert back the bad job here - now it's safe as
> > -* drm_sched_get_cleanup_job cannot race against us and release the
> > -* bad job at this point - we parked (waited for) any in progress
> > -* (earlier) cleanups and drm_sched_get_cleanup_job will not be 
> > called
> > -* now until the scheduler thread is unparked.
> > -*/
> > -   if (bad && bad->sched == sched)
> > -   /*
> > -* Add at the head of the queue to reflect it was the 
> > earliest
> > -* job extracted.
> > -*/
> > -   list_add(>list, >pending_list);
> > -
> > -   /*
> >   * Iterate the job list from later to  earlier one and either 
> > deactive
> >   * their HW callbacks or remove them from pending list if they 
> > already
> >   * signaled.
> > 
> > 
> > Thanks
> > 
> > --
> > Monk Liu | Cloud-GPU Core team
> > --
> > 
> > -Original Message-
> > From: Daniel Vetter 
> > Sent: Thursday, August 19, 2021 5:31 PM
> > To: Grodzovsky, Andrey 
> 

RE: [PATCH] drm/amd/pm: a quick fix for "divided by zero" error

2021-08-20 Thread Chen, Guchun
[Public]

Reviewed-by: Guchun Chen 

Regards,
Guchun

-Original Message-
From: Quan, Evan  
Sent: Friday, August 20, 2021 5:01 PM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Chen, Guchun 
; Teng, Rui ; Quan, Evan 

Subject: [PATCH] drm/amd/pm: a quick fix for "divided by zero" error

Considering Arcturus is a dedicated ASIC for computing, it will be more proper 
to drop the support for fan speed reading and setting. That's on the TODO list.

Change-Id: Id83a7a88f26644ba66c4fd15034b4fc861cc6901
Signed-off-by: Evan Quan 
Reported-by: Rui Teng 
---
 .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 20 ---
 .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c|  9 +++--
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
index fbf71fc92b16..273df66cac14 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
@@ -1227,8 +1227,12 @@ static int arcturus_get_fan_speed_rpm(struct smu_context 
*smu,
 
tmp64 = (uint64_t)crystal_clock_freq * 60 * 1;
tach_status = RREG32_SOC15(THM, 0, mmCG_TACH_STATUS_ARCT);
-   do_div(tmp64, tach_status);
-   *speed = (uint32_t)tmp64;
+   if (tach_status) {
+   do_div(tmp64, tach_status);
+   *speed = (uint32_t)tmp64;
+   } else {
+   *speed = 0;
+   }
 
break;
}
@@ -1303,12 +1307,14 @@ static int arcturus_get_fan_speed_pwm(struct 
smu_context *smu,
CG_FDO_CTRL1, FMAX_DUTY100);
duty = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_THERMAL_STATUS_ARCT),
CG_THERMAL_STATUS, FDO_PWM_DUTY);
-   if (!duty100)
-   return -EINVAL;
 
-   tmp64 = (uint64_t)duty * 255;
-   do_div(tmp64, duty100);
-   *speed = MIN((uint32_t)tmp64, 255);
+   if (duty100) {
+   tmp64 = (uint64_t)duty * 255;
+   do_div(tmp64, duty100);
+   *speed = MIN((uint32_t)tmp64, 255);
+   } else {
+   *speed = 0;
+   }
 
return 0;
 }
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index 01b9653c39c7..87b055466a33 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -1306,8 +1306,13 @@ int smu_v11_0_get_fan_speed_rpm(struct smu_context *smu,
tmp64 = (uint64_t)crystal_clock_freq * 60 * 1;
 
tach_status = RREG32_SOC15(THM, 0, mmCG_TACH_STATUS);
-   do_div(tmp64, tach_status);
-   *speed = (uint32_t)tmp64;
+   if (tach_status) {
+   do_div(tmp64, tach_status);
+   *speed = (uint32_t)tmp64;
+   } else {
+   dev_warn_once(adev->dev, "Got zero output on CG_TACH_STATUS 
reading!\n");
+   *speed = 0;
+   }
 
return 0;
 }
--
2.29.0


[PATCH] drm/amd/pm: a quick fix for "divided by zero" error

2021-08-20 Thread Evan Quan
Considering Arcturus is a dedicated ASIC for computing, it
will be more proper to drop the support for fan speed reading
and setting. That's on the TODO list.

Change-Id: Id83a7a88f26644ba66c4fd15034b4fc861cc6901
Signed-off-by: Evan Quan 
Reported-by: Rui Teng 
---
 .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 20 ---
 .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c|  9 +++--
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
index fbf71fc92b16..273df66cac14 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
@@ -1227,8 +1227,12 @@ static int arcturus_get_fan_speed_rpm(struct smu_context 
*smu,
 
tmp64 = (uint64_t)crystal_clock_freq * 60 * 1;
tach_status = RREG32_SOC15(THM, 0, mmCG_TACH_STATUS_ARCT);
-   do_div(tmp64, tach_status);
-   *speed = (uint32_t)tmp64;
+   if (tach_status) {
+   do_div(tmp64, tach_status);
+   *speed = (uint32_t)tmp64;
+   } else {
+   *speed = 0;
+   }
 
break;
}
@@ -1303,12 +1307,14 @@ static int arcturus_get_fan_speed_pwm(struct 
smu_context *smu,
CG_FDO_CTRL1, FMAX_DUTY100);
duty = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_THERMAL_STATUS_ARCT),
CG_THERMAL_STATUS, FDO_PWM_DUTY);
-   if (!duty100)
-   return -EINVAL;
 
-   tmp64 = (uint64_t)duty * 255;
-   do_div(tmp64, duty100);
-   *speed = MIN((uint32_t)tmp64, 255);
+   if (duty100) {
+   tmp64 = (uint64_t)duty * 255;
+   do_div(tmp64, duty100);
+   *speed = MIN((uint32_t)tmp64, 255);
+   } else {
+   *speed = 0;
+   }
 
return 0;
 }
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index 01b9653c39c7..87b055466a33 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -1306,8 +1306,13 @@ int smu_v11_0_get_fan_speed_rpm(struct smu_context *smu,
tmp64 = (uint64_t)crystal_clock_freq * 60 * 1;
 
tach_status = RREG32_SOC15(THM, 0, mmCG_TACH_STATUS);
-   do_div(tmp64, tach_status);
-   *speed = (uint32_t)tmp64;
+   if (tach_status) {
+   do_div(tmp64, tach_status);
+   *speed = (uint32_t)tmp64;
+   } else {
+   dev_warn_once(adev->dev, "Got zero output on CG_TACH_STATUS 
reading!\n");
+   *speed = 0;
+   }
 
return 0;
 }
-- 
2.29.0



RE: [PATCH] drm/amdgpu: use the preferred pin domain after the check

2021-08-20 Thread Sharma, Shashank
[AMD Official Use Only]

Please feel free to use:
Reviewed-by: Shashank Sharma  

Regards
Shashank
-Original Message-
From: amd-gfx  On Behalf Of Christian 
König
Sent: Friday, August 20, 2021 2:01 PM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander 
Subject: [PATCH] drm/amdgpu: use the preferred pin domain after the check

For some reason we run into an use case where a BO is already pinned into GTT, 
but should be pinned into VRAM|GTT again.

Handle that case gracefully as well.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 795fa7445abe..92c8e6e7f346 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -920,11 +920,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 
domain,
return -EINVAL;
}
 
-   /* This assumes only APU display buffers are pinned with (VRAM|GTT).
-* See function amdgpu_display_supported_domains()
-*/
-   domain = amdgpu_bo_get_preferred_pin_domain(adev, domain);
-
if (bo->tbo.pin_count) {
uint32_t mem_type = bo->tbo.resource->mem_type;
uint32_t mem_flags = bo->tbo.resource->placement; @@ -949,6 
+944,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
return 0;
}
 
+   /* This assumes only APU display buffers are pinned with (VRAM|GTT).
+* See function amdgpu_display_supported_domains()
+*/
+   domain = amdgpu_bo_get_preferred_pin_domain(adev, domain);
+
if (bo->tbo.base.import_attach)
dma_buf_pin(bo->tbo.base.import_attach);
 
--
2.25.1


[PATCH] drm/amdgpu: use the preferred pin domain after the check

2021-08-20 Thread Christian König
For some reason we run into an use case where a BO is already pinned
into GTT, but should be pinned into VRAM|GTT again.

Handle that case gracefully as well.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 795fa7445abe..92c8e6e7f346 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -920,11 +920,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 
domain,
return -EINVAL;
}
 
-   /* This assumes only APU display buffers are pinned with (VRAM|GTT).
-* See function amdgpu_display_supported_domains()
-*/
-   domain = amdgpu_bo_get_preferred_pin_domain(adev, domain);
-
if (bo->tbo.pin_count) {
uint32_t mem_type = bo->tbo.resource->mem_type;
uint32_t mem_flags = bo->tbo.resource->placement;
@@ -949,6 +944,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 
domain,
return 0;
}
 
+   /* This assumes only APU display buffers are pinned with (VRAM|GTT).
+* See function amdgpu_display_supported_domains()
+*/
+   domain = amdgpu_bo_get_preferred_pin_domain(adev, domain);
+
if (bo->tbo.base.import_attach)
dma_buf_pin(bo->tbo.base.import_attach);
 
-- 
2.25.1



RE: [PATCH v2] drm/amdgpu/OLAND: clip the ref divider max value

2021-08-20 Thread Sharma, Shashank
[AMD Official Use Only]

No problem, let me dig for this information. 

Regards
Shashank
-Original Message-
From: Koenig, Christian  
Sent: Friday, August 20, 2021 1:36 PM
To: Sharma, Shashank ; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander 
Subject: Re: [PATCH v2] drm/amdgpu/OLAND: clip the ref divider max value

Uff, I think that was SI but could be CIK as well.

We have a table for this somewhere, but I don't have it at hand.

Regards,
Christian.

Am 20.08.21 um 09:43 schrieb Sharma, Shashank:
> [AMD Official Use Only]
>
> Agree, on the similar note, which Gen is OLAND BTW  ?
>
> Regards
> Shashank
> -Original Message-
> From: Koenig, Christian 
> Sent: Friday, August 20, 2021 12:08 PM
> To: Sharma, Shashank ; 
> amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander 
> Subject: Re: [PATCH v2] drm/amdgpu/OLAND: clip the ref divider max 
> value
>
> Sounds like a good idea to me, but I would limit this generally or at least 
> for the whole generation and not just one particular chipset.
>
> Regards,
> Christian.
>
> Am 20.08.21 um 08:05 schrieb Sharma, Shashank:
>>  From 4841e5ba60e33ff798bde6cb69fbd7e137b6db9c Mon Sep 17 00:00:00 
>> 2001
>> From: Shashank Sharma 
>> Date: Fri, 20 Aug 2021 10:20:02 +0530
>> Subject: [PATCH v2] drm/amdgpu/OLAND: clip the ref divider max value
>> MIME-Version: 1.0
>> Content-Type: text/plain; charset=UTF-8
>> Content-Transfer-Encoding: 8bit
>>
>> This patch limits the ref_div_max value to 100, during the 
>> calculation of PLL feedback reference divider. With current value 
>> (128), the produced fb_ref_div value generates unstable output at 
>> particular frequencies. Radeon driver limits this value at 100.
>>
>> On Oland, when we try to setup mode 2048x1280@60 (a bit weird, I 
>> know), it demands a clock of 221270 Khz. It's been observed that the 
>> PLL calculations using values 128 and 100 are vastly different, and 
>> look like this:
>>
>> +--+
>> |Parameter    |AMDGPU    |Radeon   |
>> | |  | |
>> +-++
>> |Clock feedback  | | divider max  |  128
>> ||   100   | cap value    |  | |
>> | |  | |
>> | |  | |
>> +--+
>> |ref_div_max  |  | |
>> | |  42  |  20 |
>> | |  | |
>> | |  | |
>> +--+
>> |ref_div  |  42  |  20 |
>> | |  | |
>> +--+
>> |fb_div   |  10326   |  8195   |
>> +--+
>> |fb_div   |  1024    |  163    |
>> +--+
>> |fb_dev_p |  4   |  9  | frac fb_de^_p|
>> || |
>> ++-+
>>
>> With ref_div_max value clipped at 100, AMDGPU driver can also drive 
>> videmode 2048x1280@60 (221Mhz) and produce proper output without any 
>> blanking and distortion on the screen.
>>
>> PS: This value was changed from 128 to 100 in Radeon driver also, here:
>> https://github.com/freedesktop/drm-tip/commit/4b21ce1b4b5d262e7d4656b
>> 8
>> ececc891fc3cb806
>>
>>
>> V1:
>> Got acks from:
>> Acked-by: Alex Deucher 
>> Acked-by: Christian König 
>>
>> V2:
>> - Restricting the changes only for OLAND, just to avoid any 
>> regression
>>    for other cards.
>> - Changed unsigned -> unsigned int to make checkpatch quiet.
>>
>> Cc: Alex Deucher 
>> Cc: Christian König 
>> Cc: Eddy Qin 
>> Signed-off-by: Shashank Sharma 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c    | 20 
>> +---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h    |  3 ++-
>>   drivers/gpu/drm/amd/amdgpu/atombios_crtc.c |  2 +-
>>   3 files changed, 16 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
>> index f2e20666c9c1..6d04c1d25bfb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
>> @@ -80,12 +80,17 @@ static void amdgpu_pll_reduce_ratio(unsigned 
>> *nom, unsigned *den,
>>    * Calculate feedback and reference divider for a given post divider.
>> Makes
>>    * sure we stay within the limits.
>>    */
>> -static void amdgpu_pll_get_fb_ref_div(unsigned nom, unsigned den, 
>> unsigned post_div,
>> -  unsigned fb_div_max, unsigned ref_div_max,
>> -  unsigned *fb_div, unsigned *ref_div)
>> +static void amdgpu_pll_get_fb_ref_div(struct amdgpu_device *adev,
>> unsigned int nom,
>> +  unsigned int den, unsigned int post_div,
>> +  unsigned int fb_div_max, 

[PATCH v3] Revert "drm/scheduler: Avoid accessing freed bad job."

2021-08-20 Thread Jingwen Chen
[Why]
for bailing job, this commit will delete it from pending list thus the
bailing job will never have a chance to be resubmitted even in advance
tdr mode.

[How]
after embeded hw_fence into amdgpu_job is done, the race condition that
this commit tries to work around is completely solved.So revert this
commit.
This reverts commit 135517d3565b48f4def3b1b82008bc17eb5d1c90.
v2:
add dma_fence_get/put() around timedout_job to avoid  concurrent delete
during processing timedout_job
v3:
park sched->thread instead during timedout_job.

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/scheduler/sched_main.c | 22 ++
 1 file changed, 2 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index a2a953693b45..c187fd3a6bb6 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -319,17 +319,12 @@ static void drm_sched_job_timedout(struct work_struct 
*work)
sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
 
/* Protects against concurrent deletion in drm_sched_get_cleanup_job */
+   kthread_park(sched->thread);
spin_lock(>job_list_lock);
job = list_first_entry_or_null(>pending_list,
   struct drm_sched_job, list);
 
if (job) {
-   /*
-* Remove the bad job so it cannot be freed by concurrent
-* drm_sched_cleanup_jobs. It will be reinserted back after 
sched->thread
-* is parked at which point it's safe.
-*/
-   list_del_init(>list);
spin_unlock(>job_list_lock);
 
status = job->sched->ops->timedout_job(job);
@@ -345,6 +340,7 @@ static void drm_sched_job_timedout(struct work_struct *work)
} else {
spin_unlock(>job_list_lock);
}
+   kthread_unpark(sched->thread);
 
if (status != DRM_GPU_SCHED_STAT_ENODEV) {
spin_lock(>job_list_lock);
@@ -392,20 +388,6 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, 
struct drm_sched_job *bad)
 
kthread_park(sched->thread);
 
-   /*
-* Reinsert back the bad job here - now it's safe as
-* drm_sched_get_cleanup_job cannot race against us and release the
-* bad job at this point - we parked (waited for) any in progress
-* (earlier) cleanups and drm_sched_get_cleanup_job will not be called
-* now until the scheduler thread is unparked.
-*/
-   if (bad && bad->sched == sched)
-   /*
-* Add at the head of the queue to reflect it was the earliest
-* job extracted.
-*/
-   list_add(>list, >pending_list);
-
/*
 * Iterate the job list from later to  earlier one and either deactive
 * their HW callbacks or remove them from pending list if they already
-- 
2.25.1



Re: [PATCH v2] drm/amdgpu/OLAND: clip the ref divider max value

2021-08-20 Thread Christian König

Uff, I think that was SI but could be CIK as well.

We have a table for this somewhere, but I don't have it at hand.

Regards,
Christian.

Am 20.08.21 um 09:43 schrieb Sharma, Shashank:

[AMD Official Use Only]

Agree, on the similar note, which Gen is OLAND BTW  ?

Regards
Shashank
-Original Message-
From: Koenig, Christian 
Sent: Friday, August 20, 2021 12:08 PM
To: Sharma, Shashank ; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander 
Subject: Re: [PATCH v2] drm/amdgpu/OLAND: clip the ref divider max value

Sounds like a good idea to me, but I would limit this generally or at least for 
the whole generation and not just one particular chipset.

Regards,
Christian.

Am 20.08.21 um 08:05 schrieb Sharma, Shashank:

 From 4841e5ba60e33ff798bde6cb69fbd7e137b6db9c Mon Sep 17 00:00:00 2001
From: Shashank Sharma 
Date: Fri, 20 Aug 2021 10:20:02 +0530
Subject: [PATCH v2] drm/amdgpu/OLAND: clip the ref divider max value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch limits the ref_div_max value to 100, during the calculation
of PLL feedback reference divider. With current value (128), the
produced fb_ref_div value generates unstable output at particular
frequencies. Radeon driver limits this value at 100.

On Oland, when we try to setup mode 2048x1280@60 (a bit weird, I
know), it demands a clock of 221270 Khz. It's been observed that the
PLL calculations using values 128 and 100 are vastly different, and
look like this:

+--+
|Parameter    |AMDGPU    |Radeon   |
| |  | |
+-++
|Clock feedback  | | divider max  |  128
||   100   | cap value    |  | |
| |  | |
| |  | |
+--+
|ref_div_max  |  | |
| |  42  |  20 |
| |  | |
| |  | |
+--+
|ref_div  |  42  |  20 |
| |  | |
+--+
|fb_div   |  10326   |  8195   |
+--+
|fb_div   |  1024    |  163    |
+--+
|fb_dev_p |  4   |  9  | frac fb_de^_p|
|| |
++-+

With ref_div_max value clipped at 100, AMDGPU driver can also drive
videmode 2048x1280@60 (221Mhz) and produce proper output without any
blanking and distortion on the screen.

PS: This value was changed from 128 to 100 in Radeon driver also, here:
https://github.com/freedesktop/drm-tip/commit/4b21ce1b4b5d262e7d4656b8
ececc891fc3cb806


V1:
Got acks from:
Acked-by: Alex Deucher 
Acked-by: Christian König 

V2:
- Restricting the changes only for OLAND, just to avoid any regression
   for other cards.
- Changed unsigned -> unsigned int to make checkpatch quiet.

Cc: Alex Deucher 
Cc: Christian König 
Cc: Eddy Qin 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c    | 20 +---
  drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h    |  3 ++-
  drivers/gpu/drm/amd/amdgpu/atombios_crtc.c |  2 +-
  3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
index f2e20666c9c1..6d04c1d25bfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
@@ -80,12 +80,17 @@ static void amdgpu_pll_reduce_ratio(unsigned *nom,
unsigned *den,
   * Calculate feedback and reference divider for a given post divider.
Makes
   * sure we stay within the limits.
   */
-static void amdgpu_pll_get_fb_ref_div(unsigned nom, unsigned den,
unsigned post_div,
-  unsigned fb_div_max, unsigned ref_div_max,
-  unsigned *fb_div, unsigned *ref_div)
+static void amdgpu_pll_get_fb_ref_div(struct amdgpu_device *adev,
unsigned int nom,
+  unsigned int den, unsigned int post_div,
+  unsigned int fb_div_max, unsigned int
+ref_div_max,
+  unsigned int *fb_div, unsigned int *ref_div)
  {
+
  /* limit reference * post divider to a maximum */
-    ref_div_max = min(128 / post_div, ref_div_max);
+    if (adev->asic_type == CHIP_OLAND)
+    ref_div_max = min(100 / post_div, ref_div_max);
+    else
+    ref_div_max = min(128 / post_div, ref_div_max);

  /* get matching reference and feedback divider */
  *ref_div = min(max(DIV_ROUND_CLOSEST(den, post_div), 1u),
ref_div_max); @@ -112,7 +117,8 @@ static void
amdgpu_pll_get_fb_ref_div(unsigned
nom, unsigned den, unsigned post_
   * Try to calculate the PLL parameters to 

RE: [PATCH v2] drm/amdgpu/OLAND: clip the ref divider max value

2021-08-20 Thread Sharma, Shashank
[AMD Official Use Only]

Agree, on the similar note, which Gen is OLAND BTW  ?

Regards
Shashank
-Original Message-
From: Koenig, Christian  
Sent: Friday, August 20, 2021 12:08 PM
To: Sharma, Shashank ; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander 
Subject: Re: [PATCH v2] drm/amdgpu/OLAND: clip the ref divider max value

Sounds like a good idea to me, but I would limit this generally or at least for 
the whole generation and not just one particular chipset.

Regards,
Christian.

Am 20.08.21 um 08:05 schrieb Sharma, Shashank:
> From 4841e5ba60e33ff798bde6cb69fbd7e137b6db9c Mon Sep 17 00:00:00 2001
> From: Shashank Sharma 
> Date: Fri, 20 Aug 2021 10:20:02 +0530
> Subject: [PATCH v2] drm/amdgpu/OLAND: clip the ref divider max value
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> This patch limits the ref_div_max value to 100, during the calculation 
> of PLL feedback reference divider. With current value (128), the 
> produced fb_ref_div value generates unstable output at particular 
> frequencies. Radeon driver limits this value at 100.
>
> On Oland, when we try to setup mode 2048x1280@60 (a bit weird, I 
> know), it demands a clock of 221270 Khz. It's been observed that the 
> PLL calculations using values 128 and 100 are vastly different, and 
> look like this:
>
> +--+
> |Parameter    |AMDGPU    |Radeon   |
> | |  | |
> +-++
> |Clock feedback  | | divider max  |  128 
> ||   100   | cap value    |  | |
> | |  | |
> | |  | |
> +--+
> |ref_div_max  |  | |
> | |  42  |  20 |
> | |  | |
> | |  | |
> +--+
> |ref_div  |  42  |  20 |
> | |  | |
> +--+
> |fb_div   |  10326   |  8195   |
> +--+
> |fb_div   |  1024    |  163    |
> +--+
> |fb_dev_p |  4   |  9  | frac fb_de^_p|  
> || |
> ++-+
>
> With ref_div_max value clipped at 100, AMDGPU driver can also drive 
> videmode 2048x1280@60 (221Mhz) and produce proper output without any 
> blanking and distortion on the screen.
>
> PS: This value was changed from 128 to 100 in Radeon driver also, here:
> https://github.com/freedesktop/drm-tip/commit/4b21ce1b4b5d262e7d4656b8
> ececc891fc3cb806
>
>
> V1:
> Got acks from:
> Acked-by: Alex Deucher 
> Acked-by: Christian König 
>
> V2:
> - Restricting the changes only for OLAND, just to avoid any regression
>   for other cards.
> - Changed unsigned -> unsigned int to make checkpatch quiet.
>
> Cc: Alex Deucher 
> Cc: Christian König 
> Cc: Eddy Qin 
> Signed-off-by: Shashank Sharma 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c    | 20 +---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h    |  3 ++-
>  drivers/gpu/drm/amd/amdgpu/atombios_crtc.c |  2 +-
>  3 files changed, 16 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
> index f2e20666c9c1..6d04c1d25bfb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
> @@ -80,12 +80,17 @@ static void amdgpu_pll_reduce_ratio(unsigned *nom, 
> unsigned *den,
>   * Calculate feedback and reference divider for a given post divider. 
> Makes
>   * sure we stay within the limits.
>   */
> -static void amdgpu_pll_get_fb_ref_div(unsigned nom, unsigned den, 
> unsigned post_div,
> -  unsigned fb_div_max, unsigned ref_div_max,
> -  unsigned *fb_div, unsigned *ref_div)
> +static void amdgpu_pll_get_fb_ref_div(struct amdgpu_device *adev,
> unsigned int nom,
> +  unsigned int den, unsigned int post_div,
> +  unsigned int fb_div_max, unsigned int 
> +ref_div_max,
> +  unsigned int *fb_div, unsigned int *ref_div)
>  {
> +
>  /* limit reference * post divider to a maximum */
> -    ref_div_max = min(128 / post_div, ref_div_max);
> +    if (adev->asic_type == CHIP_OLAND)
> +    ref_div_max = min(100 / post_div, ref_div_max);
> +    else
> +    ref_div_max = min(128 / post_div, ref_div_max);
>
>  /* get matching reference and feedback divider */
>  *ref_div = min(max(DIV_ROUND_CLOSEST(den, post_div), 1u), 
> ref_div_max); @@ -112,7 +117,8 @@ static void 
> amdgpu_pll_get_fb_ref_div(unsigned
> nom, unsigned den, unsigned post_
>   * Try 

Re: [PATCH v6 08/13] mm: call pgmap->ops->page_free for DEVICE_GENERIC pages

2021-08-20 Thread Jerome Glisse
On Thu, Aug 19, 2021 at 10:05 PM Christoph Hellwig  wrote:
>
> On Tue, Aug 17, 2021 at 11:44:54AM -0400, Felix Kuehling wrote:
> > >> That's a good catch. Existing drivers shouldn't need a page_free
> > >> callback if they didn't have one before. That means we need to add a
> > >> NULL-pointer check in free_device_page.
> > > Also the other state clearing (__ClearPageWaiters/mem_cgroup_uncharge/
> > > ->mapping = NULL).
> > >
> > > In many ways this seems like you want to bring back the DEVICE_PUBLIC
> > > pgmap type that was removed a while ago due to the lack of users
> > > instead of overloading the generic type.
> >
> > I think so. I'm not clear about how DEVICE_PUBLIC differed from what
> > DEVICE_GENERIC is today. As I understand it, DEVICE_PUBLIC was removed
> > because it was unused and also known to be broken in some ways.
> > DEVICE_GENERIC seemed close enough to what we need, other than not being
> > supported in the migration helpers.
> >
> > Would you see benefit in re-introducing DEVICE_PUBLIC as a distinct
> > memory type from DEVICE_GENERIC? What would be the benefits of making
> > that distinction?
>
> The old DEVICE_PUBLIC mostly different in that it allowed the page
> to be returned from vm_normal_page, which I think was horribly buggy.

Why was that buggy ? If I were to do it now, i would return
DEVICE_PUBLIC page from vm_normal_page but i would ban pinning as
pinning is exceptionally wrong for GPU. If you migrate some random
anonymous/file back to your GPU memory and it gets pinned there then
there is no way for the GPU to migrate the page out. Quickly you will
run out of physically contiguous memory and things like big graphic
buffer allocation (anything that needs physically contiguous memory)
will fail. It is less of an issue on some hardware that rely less and
less on physically contiguous memory but i do not think it is
completely gone from all hw.

> But the point is not to bring back these old semantics.  The idea
> is to be able to differeniate between your new coherent on-device
> memory and the existing DEVICE_GENERIC.  That is call the
> code in free_devmap_managed_page that is currently only used
> for device private pages also for your new public device pages without
> affecting the devdax and xen use cases.

Yes, I would rather bring back DEVICE_PUBLIC then try to use
DEVICE_GENERIC, the GENERIC change was done for users that closely
matched DAX semantics and it is not the case here, at least not from
my point of view.

Jerome


Re: [PATCH v2] Revert "drm/scheduler: Avoid accessing freed bad job."

2021-08-20 Thread Christian König

No, that perfectly works for me.

The problem we used to have with this approach was that we potentially 
have multiple timeouts at the same time.


But when we serialize the timeout handling by using a single workqueue 
as suggested by Daniel now as well then that isn't an issue any more.


Regards,
Christian.

Am 20.08.21 um 09:12 schrieb Liu, Monk:

[AMD Official Use Only]

@Daniel Vetter @Grodzovsky, Andrey @Koenig, Christian
  
Do you have any concern on the kthread_park() approach ?


Theoretically speaking sched_main shall run there exclusively with job_timeout 
since they both touches jobs, and stop scheduler during job_timeout won't 
impact performance since in that scenario
There was already something wrong/stuck on that ring/scheduler

Thanks

--
Monk Liu | Cloud-GPU Core team
--

-Original Message-
From: Liu, Monk
Sent: Thursday, August 19, 2021 6:26 PM
To: Daniel Vetter ; Grodzovsky, Andrey 

Cc: Alex Deucher ; Chen, JingWen ; Maling list - 
DRI developers ; amd-gfx list ; 
Koenig, Christian 
Subject: RE: [PATCH v2] Revert "drm/scheduler: Avoid accessing freed bad job."

[AMD Official Use Only]

Hi Daniel


Why can't we stop the scheduler thread first, so that there's guaranteed no 
race? I've recently had a lot of discussions with panfrost folks about their 
reset that spawns across engines, and without stopping the scheduler thread 
first before you touch anything it's just plain impossible.

Yeah we had this though as well in our mind.

Our second approach is to call ktrhead_stop() in job_timedout() routine so that  the 
"bad" job is guaranteed to be used without scheduler's touching or freeing, 
Check this sample patch one as well please:

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index a2a9536..50a49cb 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -319,17 +319,12 @@ static void drm_sched_job_timedout(struct work_struct 
*work)
 sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
  
 /* Protects against concurrent deletion in drm_sched_get_cleanup_job */

+   kthread_park(sched->thread);
 spin_lock(>job_list_lock);
 job = list_first_entry_or_null(>pending_list,
struct drm_sched_job, list);
  
 if (job) {

-   /*
-* Remove the bad job so it cannot be freed by concurrent
-* drm_sched_cleanup_jobs. It will be reinserted back after 
sched->thread
-* is parked at which point it's safe.
-*/
-   list_del_init(>list);
 spin_unlock(>job_list_lock);
  
 status = job->sched->ops->timedout_job(job);

@@ -345,6 +340,7 @@ static void drm_sched_job_timedout(struct work_struct *work)
 } else {
 spin_unlock(>job_list_lock);
 }
+   kthread_unpark(sched->thread);
  
 if (status != DRM_GPU_SCHED_STAT_ENODEV) {

 spin_lock(>job_list_lock); @@ -393,20 +389,6 @@ void 
drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
 kthread_park(sched->thread);
  
 /*

-* Reinsert back the bad job here - now it's safe as
-* drm_sched_get_cleanup_job cannot race against us and release the
-* bad job at this point - we parked (waited for) any in progress
-* (earlier) cleanups and drm_sched_get_cleanup_job will not be called
-* now until the scheduler thread is unparked.
-*/
-   if (bad && bad->sched == sched)
-   /*
-* Add at the head of the queue to reflect it was the earliest
-* job extracted.
-*/
-   list_add(>list, >pending_list);
-
-   /*
  * Iterate the job list from later to  earlier one and either deactive
  * their HW callbacks or remove them from pending list if they already
  * signaled.


Thanks

--
Monk Liu | Cloud-GPU Core team
--

-Original Message-
From: Daniel Vetter 
Sent: Thursday, August 19, 2021 5:31 PM
To: Grodzovsky, Andrey 
Cc: Daniel Vetter ; Alex Deucher ; Chen, JingWen 
; Maling list - DRI developers ; amd-gfx list 
; Liu, Monk ; Koenig, Christian 

Subject: Re: [PATCH v2] Revert "drm/scheduler: Avoid accessing freed bad job."

On Wed, Aug 18, 2021 at 10:51:00AM -0400, Andrey Grodzovsky wrote:

On 2021-08-18 10:42 a.m., Daniel Vetter wrote:

On Wed, Aug 18, 2021 at 10:36:32AM -0400, Andrey Grodzovsky wrote:

On 2021-08-18 10:32 a.m., Daniel Vetter wrote:

On Wed, Aug 18, 2021 at 10:26:25AM -0400, Andrey Grodzovsky wrote:

On 2021-08-18 10:02 a.m., Alex Deucher wrote:


+ dri-devel

Since scheduler is a shared component, please add dri-devel
on all scheduler 

Re: [PATCH v6 02/13] mm: remove extra ZONE_DEVICE struct page refcount

2021-08-20 Thread Jerome Glisse
On Thu, Aug 19, 2021 at 11:00 AM Sierra Guiza, Alejandro (Alex)
 wrote:
>
>
> On 8/18/2021 2:28 PM, Ralph Campbell wrote:
> > On 8/17/21 5:35 PM, Felix Kuehling wrote:
> >> Am 2021-08-17 um 8:01 p.m. schrieb Ralph Campbell:
> >>> On 8/12/21 11:31 PM, Alex Sierra wrote:
>  From: Ralph Campbell 
> 
>  ZONE_DEVICE struct pages have an extra reference count that
>  complicates the
>  code for put_page() and several places in the kernel that need to
>  check the
>  reference count to see that a page is not being used (gup, compaction,
>  migration, etc.). Clean up the code so the reference count doesn't
>  need to
>  be treated specially for ZONE_DEVICE.
> 
>  v2:
>  AS: merged this patch in linux 5.11 version
> 
>  v5:
>  AS: add condition at try_grab_page to check for the zone device type,
>  while
>  page ref counter is checked less/equal to zero. In case of device
>  zone, pages
>  ref counter are initialized to zero.
> 
>  Signed-off-by: Ralph Campbell 
>  Signed-off-by: Alex Sierra 
>  ---
> arch/powerpc/kvm/book3s_hv_uvmem.c |  2 +-
> drivers/gpu/drm/nouveau/nouveau_dmem.c |  2 +-
> fs/dax.c   |  4 +-
> include/linux/dax.h|  2 +-
> include/linux/memremap.h   |  7 +--
> include/linux/mm.h | 13 +
> lib/test_hmm.c |  2 +-
> mm/internal.h  |  8 +++
> mm/memremap.c  | 68
>  +++---
> mm/migrate.c   |  5 --
> mm/page_alloc.c|  3 ++
> mm/swap.c  | 45 ++---
> 12 files changed, 46 insertions(+), 115 deletions(-)
> 
> >>> I haven't seen a response to the issues I raised back at v3 of this
> >>> series.
> >>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.kernel.org%2Flinux-mm%2F4f6dd918-d79b-1aa7-3a4c-caa67ddc29bc%40nvidia.com%2Fdata=04%7C01%7Calex.sierra%40amd.com%7Cd2bd2d4fbf764528540908d9627e5dcd%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637649117156919772%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=P7FxYm%2BkJaCkMFa3OHtuKrPOn7SvytFRmYQdIzq7rN4%3Dreserved=0
> >>>
> >>>
> >>>
> >>> Did I miss something?
> >> I think part of the response was that we did more testing. Alex added
> >> support for DEVICE_GENERIC pages to test_hmm and he ran DAX tests
> >> recommended by Theodore Tso. In that testing he ran into a WARN_ON_ONCE
> >> about a zero page refcount in try_get_page. The fix is in the latest
> >> version of patch 2. But it's already obsolete because John Hubbard is
> >> about to remove that function altogether.
> >>
> >> I think the issues you raised were more uncertainty than known bugs. It
> >> seems the fact that you can have DAX pages with 0 refcount is a feature
> >> more than a bug.
> >>
> >> Regards,
> >>Felix
> >
> > Did you test on a system without CONFIG_ARCH_HAS_PTE_SPECIAL defined?
> > In that case, mmap() of a DAX device will call insert_page() which calls
> > get_page() which would trigger VM_BUG_ON_PAGE().
> >
> > I can believe it is OK for PTE_SPECIAL page table entries to have no
> > struct page or that MEMORY_DEVICE_GENERIC struct pages be mapped with
> > a zero reference count using insert_pfn().
> Hi Ralph,
> We have tried the DAX tests with and without CONFIG_ARCH_HAS_PTE_SPECIAL
> defined.
> Apparently none of the tests touches that condition for a DAX device. Of
> course,
> that doesn't mean it could happen.
>
> Regards,
> Alex S.
>
> >
> >
> > I find it hard to believe that other MM developers don't see an issue
> > with a struct page with refcount == 0 and mapcount == 1.
> >
> > I don't see where init_page_count() is being called for the
> > MEMORY_DEVICE_GENERIC or MEMORY_DEVICE_PRIVATE struct pages the AMD
> > driver allocates and passes to migrate_vma_setup().
> > Looks like svm_migrate_get_vram_page() needs to call init_page_count()
> > instead of get_page(). (I'm looking at branch
> > origin/alexsierrag/device_generic
> > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2FRadeonOpenCompute%2FROCK-Kernel-Driver.gitdata=04%7C01%7Calex.sierra%40amd.com%7Cd2bd2d4fbf764528540908d9627e5dcd%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637649117156919772%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=IXe8HP2s8x5OdJdERBkGOYJCQk3iqCu5AYkwpDL8zec%3Dreserved=0)
> Yes, you're right. My bad. Thanks for catching this up. I didn't realize
> I was missing
> to define CONFIG_DEBUG_VM on my build. Therefore this BUG was never caught.
> It worked after I replaced get_pages by init_page_count at
> svm_migrate_get_vram_page. However, I don't think this is the best 

RE: [PATCH v2] Revert "drm/scheduler: Avoid accessing freed bad job."

2021-08-20 Thread Liu, Monk
[AMD Official Use Only]

@Daniel Vetter @Grodzovsky, Andrey @Koenig, Christian
 
Do you have any concern on the kthread_park() approach ?

Theoretically speaking sched_main shall run there exclusively with job_timeout 
since they both touches jobs, and stop scheduler during job_timeout won't 
impact performance since in that scenario
There was already something wrong/stuck on that ring/scheduler 

Thanks 

--
Monk Liu | Cloud-GPU Core team
--

-Original Message-
From: Liu, Monk 
Sent: Thursday, August 19, 2021 6:26 PM
To: Daniel Vetter ; Grodzovsky, Andrey 

Cc: Alex Deucher ; Chen, JingWen 
; Maling list - DRI developers 
; amd-gfx list 
; Koenig, Christian 
Subject: RE: [PATCH v2] Revert "drm/scheduler: Avoid accessing freed bad job."

[AMD Official Use Only]

Hi Daniel

>> Why can't we stop the scheduler thread first, so that there's guaranteed no 
>> race? I've recently had a lot of discussions with panfrost folks about their 
>> reset that spawns across engines, and without stopping the scheduler thread 
>> first before you touch anything it's just plain impossible.

Yeah we had this though as well in our mind.

Our second approach is to call ktrhead_stop() in job_timedout() routine so that 
 the "bad" job is guaranteed to be used without scheduler's touching or 
freeing, Check this sample patch one as well please:

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index a2a9536..50a49cb 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -319,17 +319,12 @@ static void drm_sched_job_timedout(struct work_struct 
*work)
sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
 
/* Protects against concurrent deletion in drm_sched_get_cleanup_job */
+   kthread_park(sched->thread);
spin_lock(>job_list_lock);
job = list_first_entry_or_null(>pending_list,
   struct drm_sched_job, list);
 
if (job) {
-   /*
-* Remove the bad job so it cannot be freed by concurrent
-* drm_sched_cleanup_jobs. It will be reinserted back after 
sched->thread
-* is parked at which point it's safe.
-*/
-   list_del_init(>list);
spin_unlock(>job_list_lock);
 
status = job->sched->ops->timedout_job(job);
@@ -345,6 +340,7 @@ static void drm_sched_job_timedout(struct work_struct *work)
} else {
spin_unlock(>job_list_lock);
}
+   kthread_unpark(sched->thread);
 
if (status != DRM_GPU_SCHED_STAT_ENODEV) {
spin_lock(>job_list_lock); @@ -393,20 +389,6 @@ void 
drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
kthread_park(sched->thread);
 
/*
-* Reinsert back the bad job here - now it's safe as
-* drm_sched_get_cleanup_job cannot race against us and release the
-* bad job at this point - we parked (waited for) any in progress
-* (earlier) cleanups and drm_sched_get_cleanup_job will not be called
-* now until the scheduler thread is unparked.
-*/
-   if (bad && bad->sched == sched)
-   /*
-* Add at the head of the queue to reflect it was the earliest
-* job extracted.
-*/
-   list_add(>list, >pending_list);
-
-   /*
 * Iterate the job list from later to  earlier one and either deactive
 * their HW callbacks or remove them from pending list if they already
 * signaled.


Thanks 

--
Monk Liu | Cloud-GPU Core team
--

-Original Message-
From: Daniel Vetter 
Sent: Thursday, August 19, 2021 5:31 PM
To: Grodzovsky, Andrey 
Cc: Daniel Vetter ; Alex Deucher ; 
Chen, JingWen ; Maling list - DRI developers 
; amd-gfx list 
; Liu, Monk ; Koenig, 
Christian 
Subject: Re: [PATCH v2] Revert "drm/scheduler: Avoid accessing freed bad job."

On Wed, Aug 18, 2021 at 10:51:00AM -0400, Andrey Grodzovsky wrote:
> 
> On 2021-08-18 10:42 a.m., Daniel Vetter wrote:
> > On Wed, Aug 18, 2021 at 10:36:32AM -0400, Andrey Grodzovsky wrote:
> > > On 2021-08-18 10:32 a.m., Daniel Vetter wrote:
> > > > On Wed, Aug 18, 2021 at 10:26:25AM -0400, Andrey Grodzovsky wrote:
> > > > > On 2021-08-18 10:02 a.m., Alex Deucher wrote:
> > > > > 
> > > > > > + dri-devel
> > > > > > 
> > > > > > Since scheduler is a shared component, please add dri-devel 
> > > > > > on all scheduler patches.
> > > > > > 
> > > > > > On Wed, Aug 18, 2021 at 7:21 AM Jingwen Chen 
> > > > > >  wrote:
> > > > > > > [Why]
> > > > > > > for bailing job, this commit will delete it from pending 
> > > > > > > list thus the bailing job will never have a chance to be 
> > > > > > > 

Re: [PATCH 2/3] drm/amdgpu: Use SDMA1 for buffer movement on Aldebaran

2021-08-20 Thread Christian König




Am 20.08.21 um 07:32 schrieb Joseph Greathouse:

Aldebaran should not use SDMA0 for buffer funcs such as page migration.
Instead, we move over to SDMA1 for these features. Leave SDMA0 in
charge for all other existing chips to avoid any possibility of
regressions.


The part why we do this is missing, apart from that looks good to me.

Christian.



Signed-off-by: Joseph Greathouse 
---
  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 8 ++--
  1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 8931000dcd41..771630d7bb3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -2689,11 +2689,15 @@ static const struct amdgpu_buffer_funcs 
sdma_v4_0_buffer_funcs = {
  
  static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev)

  {
+   int engine = 0;
+
+   if (adev->asic_type == CHIP_ALDEBARAN)
+   engine = 1;
adev->mman.buffer_funcs = _v4_0_buffer_funcs;
if (adev->sdma.has_page_queue)
-   adev->mman.buffer_funcs_ring = >sdma.instance[0].page;
+   adev->mman.buffer_funcs_ring = 
>sdma.instance[engine].page;
else
-   adev->mman.buffer_funcs_ring = >sdma.instance[0].ring;
+   adev->mman.buffer_funcs_ring = 
>sdma.instance[engine].ring;
  }
  
  static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = {




Re: [PATCH v2] drm/amdgpu/OLAND: clip the ref divider max value

2021-08-20 Thread Christian König
Sounds like a good idea to me, but I would limit this generally or at 
least for the whole generation and not just one particular chipset.


Regards,
Christian.

Am 20.08.21 um 08:05 schrieb Sharma, Shashank:

From 4841e5ba60e33ff798bde6cb69fbd7e137b6db9c Mon Sep 17 00:00:00 2001
From: Shashank Sharma 
Date: Fri, 20 Aug 2021 10:20:02 +0530
Subject: [PATCH v2] drm/amdgpu/OLAND: clip the ref divider max value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch limits the ref_div_max value to 100, during the
calculation of PLL feedback reference divider. With current
value (128), the produced fb_ref_div value generates unstable
output at particular frequencies. Radeon driver limits this
value at 100.

On Oland, when we try to setup mode 2048x1280@60 (a bit weird,
I know), it demands a clock of 221270 Khz. It's been observed
that the PLL calculations using values 128 and 100 are vastly
different, and look like this:

+--+
|Parameter    |AMDGPU    |Radeon   |
| |  | |
+-++
|Clock feedback  | |
|divider max  |  128 |   100   |
|cap value    |  | |
| |  | |
| |  | |
+--+
|ref_div_max  |  | |
| |  42  |  20 |
| |  | |
| |  | |
+--+
|ref_div  |  42  |  20 |
| |  | |
+--+
|fb_div   |  10326   |  8195   |
+--+
|fb_div   |  1024    |  163    |
+--+
|fb_dev_p |  4   |  9  |
|frac fb_de^_p|  | |
++-+

With ref_div_max value clipped at 100, AMDGPU driver can also
drive videmode 2048x1280@60 (221Mhz) and produce proper output
without any blanking and distortion on the screen.

PS: This value was changed from 128 to 100 in Radeon driver also, here:
https://github.com/freedesktop/drm-tip/commit/4b21ce1b4b5d262e7d4656b8ececc891fc3cb806 



V1:
Got acks from:
Acked-by: Alex Deucher 
Acked-by: Christian König 

V2:
- Restricting the changes only for OLAND, just to avoid any regression
  for other cards.
- Changed unsigned -> unsigned int to make checkpatch quiet.

Cc: Alex Deucher 
Cc: Christian König 
Cc: Eddy Qin 
Signed-off-by: Shashank Sharma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c    | 20 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h    |  3 ++-
 drivers/gpu/drm/amd/amdgpu/atombios_crtc.c |  2 +-
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c

index f2e20666c9c1..6d04c1d25bfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
@@ -80,12 +80,17 @@ static void amdgpu_pll_reduce_ratio(unsigned *nom, 
unsigned *den,
  * Calculate feedback and reference divider for a given post divider. 
Makes

  * sure we stay within the limits.
  */
-static void amdgpu_pll_get_fb_ref_div(unsigned nom, unsigned den, 
unsigned post_div,

-  unsigned fb_div_max, unsigned ref_div_max,
-  unsigned *fb_div, unsigned *ref_div)
+static void amdgpu_pll_get_fb_ref_div(struct amdgpu_device *adev, 
unsigned int nom,

+  unsigned int den, unsigned int post_div,
+  unsigned int fb_div_max, unsigned int ref_div_max,
+  unsigned int *fb_div, unsigned int *ref_div)
 {
+
 /* limit reference * post divider to a maximum */
-    ref_div_max = min(128 / post_div, ref_div_max);
+    if (adev->asic_type == CHIP_OLAND)
+    ref_div_max = min(100 / post_div, ref_div_max);
+    else
+    ref_div_max = min(128 / post_div, ref_div_max);

 /* get matching reference and feedback divider */
 *ref_div = min(max(DIV_ROUND_CLOSEST(den, post_div), 1u), 
ref_div_max);
@@ -112,7 +117,8 @@ static void amdgpu_pll_get_fb_ref_div(unsigned 
nom, unsigned den, unsigned post_

  * Try to calculate the PLL parameters to generate the given frequency:
  * dot_clock = (ref_freq * feedback_div) / (ref_div * post_div)
  */
-void amdgpu_pll_compute(struct amdgpu_pll *pll,
+void amdgpu_pll_compute(struct amdgpu_device *adev,
+    struct amdgpu_pll *pll,
 u32 freq,
 u32 *dot_clock_p,
 u32 *fb_div_p,
@@ -199,7 +205,7 @@ void amdgpu_pll_compute(struct amdgpu_pll *pll,

 for (post_div = post_div_min; post_div <= post_div_max; 
++post_div) {

 unsigned diff;
-    

Re: [PATCH v6 02/13] mm: remove extra ZONE_DEVICE struct page refcount

2021-08-20 Thread Jerome Glisse
Note that you do not want GUP to succeed on device page, i do not see
where that is handled in the new code.

On Sun, Aug 15, 2021 at 1:40 PM John Hubbard  wrote:
>
> On 8/15/21 8:37 AM, Christoph Hellwig wrote:
> >> diff --git a/include/linux/mm.h b/include/linux/mm.h
> >> index 8ae31622deef..d48a1f0889d1 100644
> >> --- a/include/linux/mm.h
> >> +++ b/include/linux/mm.h
> >> @@ -1218,7 +1218,7 @@ __maybe_unused struct page 
> >> *try_grab_compound_head(struct page *page, int refs,
> >>   static inline __must_check bool try_get_page(struct page *page)
> >>   {
> >>  page = compound_head(page);
> >> -if (WARN_ON_ONCE(page_ref_count(page) <= 0))
> >> +if (WARN_ON_ONCE(page_ref_count(page) < 
> >> (int)!is_zone_device_page(page)))
> >
> > Please avoid the overly long line.  In fact I'd be tempted to just not
> > bother here and keep the old, more lose check.  Especially given that
> > John has a patch ready that removes try_get_page entirely.
> >
>
> Yes. Andrew has accepted it into mmotm.
>
> Ralph's patch here was written well before my cleanup that removed
> try_grab_page() [1]. But now that we're here, if you drop this hunk then
> it will make merging easier, I think.
>
>
> [1] https://lore.kernel.org/r/20210813044133.1536842-4-jhubb...@nvidia.com
>
> thanks,
> --
> John Hubbard
> NVIDIA
>


[PATCH v2] drm/amdgpu/OLAND: clip the ref divider max value

2021-08-20 Thread Sharma, Shashank

From 4841e5ba60e33ff798bde6cb69fbd7e137b6db9c Mon Sep 17 00:00:00 2001
From: Shashank Sharma 
Date: Fri, 20 Aug 2021 10:20:02 +0530
Subject: [PATCH v2] drm/amdgpu/OLAND: clip the ref divider max value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch limits the ref_div_max value to 100, during the
calculation of PLL feedback reference divider. With current
value (128), the produced fb_ref_div value generates unstable
output at particular frequencies. Radeon driver limits this
value at 100.

On Oland, when we try to setup mode 2048x1280@60 (a bit weird,
I know), it demands a clock of 221270 Khz. It's been observed
that the PLL calculations using values 128 and 100 are vastly
different, and look like this:

+--+
|Parameter|AMDGPU|Radeon   |
| |  | |
+-++
|Clock feedback  | |
|divider max  |  128 |   100   |
|cap value|  | |
| |  | |
| |  | |
+--+
|ref_div_max  |  | |
| |  42  |  20 |
| |  | |
| |  | |
+--+
|ref_div  |  42  |  20 |
| |  | |
+--+
|fb_div   |  10326   |  8195   |
+--+
|fb_div   |  1024|  163|
+--+
|fb_dev_p |  4   |  9  |
|frac fb_de^_p|  | |
++-+

With ref_div_max value clipped at 100, AMDGPU driver can also
drive videmode 2048x1280@60 (221Mhz) and produce proper output
without any blanking and distortion on the screen.

PS: This value was changed from 128 to 100 in Radeon driver also, here:
https://github.com/freedesktop/drm-tip/commit/4b21ce1b4b5d262e7d4656b8ececc891fc3cb806

V1:
Got acks from:
Acked-by: Alex Deucher 
Acked-by: Christian König 

V2:
- Restricting the changes only for OLAND, just to avoid any regression
  for other cards.
- Changed unsigned -> unsigned int to make checkpatch quiet.

Cc: Alex Deucher 
Cc: Christian König 
Cc: Eddy Qin 
Signed-off-by: Shashank Sharma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c| 20 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h|  3 ++-
 drivers/gpu/drm/amd/amdgpu/atombios_crtc.c |  2 +-
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c

index f2e20666c9c1..6d04c1d25bfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
@@ -80,12 +80,17 @@ static void amdgpu_pll_reduce_ratio(unsigned *nom, 
unsigned *den,
  * Calculate feedback and reference divider for a given post divider. 
Makes

  * sure we stay within the limits.
  */
-static void amdgpu_pll_get_fb_ref_div(unsigned nom, unsigned den, 
unsigned post_div,

- unsigned fb_div_max, unsigned ref_div_max,
- unsigned *fb_div, unsigned *ref_div)
+static void amdgpu_pll_get_fb_ref_div(struct amdgpu_device *adev, 
unsigned int nom,

+ unsigned int den, unsigned int post_div,
+ unsigned int fb_div_max, unsigned int 
ref_div_max,
+ unsigned int *fb_div, unsigned int 
*ref_div)
 {
+
/* limit reference * post divider to a maximum */
-   ref_div_max = min(128 / post_div, ref_div_max);
+   if (adev->asic_type == CHIP_OLAND)
+   ref_div_max = min(100 / post_div, ref_div_max);
+   else
+   ref_div_max = min(128 / post_div, ref_div_max);

/* get matching reference and feedback divider */
*ref_div = min(max(DIV_ROUND_CLOSEST(den, post_div), 1u), ref_div_max);
@@ -112,7 +117,8 @@ static void amdgpu_pll_get_fb_ref_div(unsigned nom, 
unsigned den, unsigned post_

  * Try to calculate the PLL parameters to generate the given frequency:
  * dot_clock = (ref_freq * feedback_div) / (ref_div * post_div)
  */
-void amdgpu_pll_compute(struct amdgpu_pll *pll,
+void amdgpu_pll_compute(struct amdgpu_device *adev,
+   struct amdgpu_pll *pll,
u32 freq,
u32 *dot_clock_p,
u32 *fb_div_p,
@@ -199,7 +205,7 @@ void amdgpu_pll_compute(struct amdgpu_pll *pll,

for (post_div = post_div_min; post_div <= post_div_max; ++post_div) {
unsigned diff;
-   amdgpu_pll_get_fb_ref_div(nom, den, post_div, fb_div_max,