Re: [PATCH v2 3/3] drm/amd: Add special handling for system s0ix state w/ dGPUs

2023-02-28 Thread Lazar, Lijo




On 2/28/2023 10:13 AM, Mario Limonciello wrote:

With dGPUs that support BACO or BOCO we want them to go into those
states when the system goes to s2idle.  Detect that the system will
be targeting this state and force the call into runtime suspend.

If the runtime suspend call fails for any reason, then fallback to
standard suspend flow.

Signed-off-by: Mario Limonciello 
---
v1->v2:
  * New patch
  drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c |  3 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  | 12 +++-
  2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 711f2a1bf525..7c3c6380135a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1073,8 +1073,7 @@ bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device 
*adev)
   */
  bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev)
  {
-   if (!(adev->flags & AMD_IS_APU) ||
-   (pm_suspend_target_state != PM_SUSPEND_TO_IDLE))
+   if (pm_suspend_target_state != PM_SUSPEND_TO_IDLE)
return false;



This will set adev->in_s0ix flag to be true for all dGPUs. There are 
many places through out suspend/resume logic where it is assumed that 
adev->in_s0ix is set only for APUs. For ex: it skips suspend of GFX 
assuming GFXOFF is a pre-condition for s0ix.


Basically this will break suspend/resume of dGPUs in s2idle if the 
device is not already suspended.


Thanks,
Lijo



if (adev->asic_type < CHIP_RAVEN)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 750984517192..acc032c4c250 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2415,8 +2415,18 @@ static int amdgpu_pmops_suspend(struct device *dev)
struct drm_device *drm_dev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(drm_dev);
  
-	if (amdgpu_acpi_is_s0ix_active(adev))

+   if (amdgpu_acpi_is_s0ix_active(adev)) {
+   /* try to explicitly enter runtime suspend for s2idle on 
BACO/BOCO */
+   if (dev_pm_test_driver_flags(drm_dev->dev, 
DPM_FLAG_SMART_SUSPEND)) {
+   int ret;
+
+   ret = pm_runtime_suspend(dev);
+   if (!ret)
+   return 0;
+   DRM_WARN("failed to enter runtime suspend, running system 
suspend: %d\n", ret);
+   }
adev->in_s0ix = true;
+   }
else if (amdgpu_acpi_is_s3_active(adev))
adev->in_s3 = true;
if (!adev->in_s0ix && !adev->in_s3)


Common DRM execution context v3

2023-02-28 Thread Christian König
Hi guys,

thrid round for those patches. They have been in my queue for nearly a
year now because I couldn't find much time to push into this.

Danilo wants to use this for his GPU VAs tracker work and Arun needs it
for hist secure semaphore work, so we should probably get it reviewed
now.

Compared to the last version I've fixed one memory leak found by Danilo
and removed the support for duplicate tracking. Only radeon really needs
that and we can trivially handle it differently there.

Please review and/or comment,
Christian.




[PATCH 1/9] drm: execution context for GEM buffers v3

2023-02-28 Thread Christian König
This adds the infrastructure for an execution context for GEM buffers
which is similar to the existinc TTMs execbuf util and intended to replace
it in the long term.

The basic functionality is that we abstracts the necessary loop to lock
many different GEM buffers with automated deadlock and duplicate handling.

v2: drop xarray and use dynamic resized array instead, the locking
overhead is unecessary and measureable.
v3: drop duplicate tracking, radeon is really the only one needing that.

Signed-off-by: Christian König 
---
 Documentation/gpu/drm-mm.rst |  12 ++
 drivers/gpu/drm/Kconfig  |   6 +
 drivers/gpu/drm/Makefile |   2 +
 drivers/gpu/drm/drm_exec.c   | 249 +++
 include/drm/drm_exec.h   | 115 
 5 files changed, 384 insertions(+)
 create mode 100644 drivers/gpu/drm/drm_exec.c
 create mode 100644 include/drm/drm_exec.h

diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst
index a79fd3549ff8..a52e6f4117d6 100644
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@@ -493,6 +493,18 @@ DRM Sync Objects
 .. kernel-doc:: drivers/gpu/drm/drm_syncobj.c
:export:
 
+DRM Execution context
+=
+
+.. kernel-doc:: drivers/gpu/drm/drm_exec.c
+   :doc: Overview
+
+.. kernel-doc:: include/drm/drm_exec.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_exec.c
+   :export:
+
 GPU Scheduler
 =
 
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 17d252dc25e2..84a5fc28c48d 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -200,6 +200,12 @@ config DRM_TTM
  GPU memory types. Will be enabled automatically if a device driver
  uses it.
 
+config DRM_EXEC
+   tristate
+   depends on DRM
+   help
+ Execution context for command submissions
+
 config DRM_BUDDY
tristate
depends on DRM
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index ab4460fcd63f..d40defbb0347 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -78,6 +78,8 @@ obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += 
drm_panel_orientation_quirks.o
 #
 # Memory-management helpers
 #
+#
+obj-$(CONFIG_DRM_EXEC) += drm_exec.o
 
 obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o
 
diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c
new file mode 100644
index ..df546cc5a227
--- /dev/null
+++ b/drivers/gpu/drm/drm_exec.c
@@ -0,0 +1,249 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+
+#include 
+#include 
+#include 
+
+/**
+ * DOC: Overview
+ *
+ * This component mainly abstracts the retry loop necessary for locking
+ * multiple GEM objects while preparing hardware operations (e.g. command
+ * submissions, page table updates etc..).
+ *
+ * If a contention is detected while locking a GEM object the cleanup procedure
+ * unlocks all previously locked GEM objects and locks the contended one first
+ * before locking any further objects.
+ *
+ * After an object is locked fences slots can optionally be reserved on the
+ * dma_resv object inside the GEM object.
+ *
+ * A typical usage pattern should look like this::
+ *
+ * struct drm_gem_object *obj;
+ * struct drm_exec exec;
+ * unsigned long index;
+ * int ret;
+ *
+ * drm_exec_init(&exec, true);
+ * drm_exec_while_not_all_locked(&exec) {
+ * ret = drm_exec_prepare_obj(&exec, boA, 1);
+ * drm_exec_continue_on_contention(&exec);
+ * if (ret)
+ * goto error;
+ *
+ * ret = drm_exec_lock(&exec, boB, 1);
+ * drm_exec_continue_on_contention(&exec);
+ * if (ret)
+ * goto error;
+ * }
+ *
+ * drm_exec_for_each_locked_object(&exec, index, obj) {
+ * dma_resv_add_fence(obj->resv, fence, DMA_RESV_USAGE_READ);
+ * ...
+ * }
+ * drm_exec_fini(&exec);
+ *
+ * See struct dma_exec for more details.
+ */
+
+/* Dummy value used to initially enter the retry loop */
+#define DRM_EXEC_DUMMY (void*)~0
+
+/* Unlock all objects and drop references */
+static void drm_exec_unlock_all(struct drm_exec *exec)
+{
+   struct drm_gem_object *obj;
+   unsigned long index;
+
+   drm_exec_for_each_locked_object(exec, index, obj) {
+   dma_resv_unlock(obj->resv);
+   drm_gem_object_put(obj);
+   }
+
+   if (exec->prelocked) {
+   dma_resv_unlock(exec->prelocked->resv);
+   drm_gem_object_put(exec->prelocked);
+   exec->prelocked = NULL;
+   }
+}
+
+/**
+ * drm_exec_init - initialize a drm_exec object
+ * @exec: the drm_exec object to initialize
+ * @interruptible: if locks should be acquired interruptible
+ *
+ * Initialize the object and make sure that we can track locked and duplicate
+ * objects.
+ */
+void drm_exec_init(struct drm_exec *exec, bool interruptible)
+{
+   exec->interruptible = interruptible;
+   

[PATCH 2/9] drm: add drm_exec selftests

2023-02-28 Thread Christian König
Largely just the initial skeleton.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/Kconfig   |  1 +
 drivers/gpu/drm/tests/Makefile|  3 +-
 drivers/gpu/drm/tests/drm_exec_test.c | 73 +++
 3 files changed, 76 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/tests/drm_exec_test.c

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 84a5fc28c48d..0c8d8ed69154 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -79,6 +79,7 @@ config DRM_KUNIT_TEST
select DRM_BUDDY
select DRM_EXPORT_FOR_TESTS if m
select DRM_KUNIT_TEST_HELPERS
+   select DRM_EXEC
default KUNIT_ALL_TESTS
help
  This builds unit tests for DRM. This option is not useful for
diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile
index bca726a8f483..ba7baa622675 100644
--- a/drivers/gpu/drm/tests/Makefile
+++ b/drivers/gpu/drm/tests/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \
drm_modes_test.o \
drm_plane_helper_test.o \
drm_probe_helper_test.o \
-   drm_rect_test.o
+   drm_rect_test.o \
+   drm_exec_test.o
 
 CFLAGS_drm_mm_test.o := $(DISABLE_STRUCTLEAK_PLUGIN)
diff --git a/drivers/gpu/drm/tests/drm_exec_test.c 
b/drivers/gpu/drm/tests/drm_exec_test.c
new file mode 100644
index ..78eb61eb27cc
--- /dev/null
+++ b/drivers/gpu/drm/tests/drm_exec_test.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#define pr_fmt(fmt) "drm_exec: " fmt
+
+#include 
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+#include "../lib/drm_random.h"
+
+static struct drm_device dev;
+
+static void drm_exec_sanitycheck(struct kunit *test)
+{
+   struct drm_exec exec;
+
+   drm_exec_init(&exec, true);
+   drm_exec_fini(&exec);
+   pr_info("%s - ok!\n", __func__);
+}
+
+static void drm_exec_lock1(struct kunit *test)
+{
+   struct drm_gem_object gobj = { };
+   struct drm_exec exec;
+   int ret;
+
+   drm_gem_private_object_init(&dev, &gobj, PAGE_SIZE);
+
+   drm_exec_init(&exec, true);
+   drm_exec_while_not_all_locked(&exec) {
+   ret = drm_exec_prepare_obj(&exec, &gobj, 1);
+   drm_exec_continue_on_contention(&exec);
+   if (ret) {
+   drm_exec_fini(&exec);
+   pr_err("%s - err %d!\n", __func__, ret);
+   return;
+   }
+   }
+   drm_exec_fini(&exec);
+   pr_info("%s - ok!\n", __func__);
+}
+
+static int drm_exec_suite_init(struct kunit_suite *suite)
+{
+   kunit_info(suite, "Testing DRM exec manager\n");
+   return 0;
+}
+
+static struct kunit_case drm_exec_tests[] = {
+   KUNIT_CASE(drm_exec_sanitycheck),
+   KUNIT_CASE(drm_exec_lock1),
+   {}
+};
+
+static struct kunit_suite drm_exec_test_suite = {
+   .name = "drm_exec",
+   .suite_init = drm_exec_suite_init,
+   .test_cases = drm_exec_tests,
+};
+
+kunit_test_suite(drm_exec_test_suite);
+
+MODULE_AUTHOR("AMD");
+MODULE_LICENSE("GPL and additional rights");
-- 
2.34.1



[PATCH 3/9] drm/amdkfd: switch over to using drm_exec

2023-02-28 Thread Christian König
Avoids quite a bit of logic and kmalloc overhead.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|   5 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 302 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  14 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|   3 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c  |  32 +-
 5 files changed, 151 insertions(+), 205 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 333780491867..e9ef493091a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -25,13 +25,13 @@
 #ifndef AMDGPU_AMDKFD_H_INCLUDED
 #define AMDGPU_AMDKFD_H_INCLUDED
 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
-#include 
 #include "amdgpu_sync.h"
 #include "amdgpu_vm.h"
 
@@ -69,8 +69,7 @@ struct kgd_mem {
struct hmm_range *range;
struct list_head attachments;
/* protected by amdkfd_process_info.lock */
-   struct ttm_validate_buffer validate_list;
-   struct ttm_validate_buffer resv_list;
+   struct list_head validate_list;
uint32_t domain;
unsigned int mapped_to_gpu_memory;
uint64_t va;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index d6320c836251..2f4aeaf711a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -27,6 +27,8 @@
 #include 
 #include 
 
+#include 
+
 #include "amdgpu_object.h"
 #include "amdgpu_gem.h"
 #include "amdgpu_vm.h"
@@ -897,28 +899,19 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem 
*mem,
struct amdkfd_process_info *process_info,
bool userptr)
 {
-   struct ttm_validate_buffer *entry = &mem->validate_list;
-   struct amdgpu_bo *bo = mem->bo;
-
-   INIT_LIST_HEAD(&entry->head);
-   entry->num_shared = 1;
-   entry->bo = &bo->tbo;
-   mutex_lock(&process_info->lock);
if (userptr)
-   list_add_tail(&entry->head, &process_info->userptr_valid_list);
+   list_add_tail(&mem->validate_list,
+ &process_info->userptr_valid_list);
else
-   list_add_tail(&entry->head, &process_info->kfd_bo_list);
+   list_add_tail(&mem->validate_list, &process_info->kfd_bo_list);
mutex_unlock(&process_info->lock);
 }
 
 static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
struct amdkfd_process_info *process_info)
 {
-   struct ttm_validate_buffer *bo_list_entry;
-
-   bo_list_entry = &mem->validate_list;
mutex_lock(&process_info->lock);
-   list_del(&bo_list_entry->head);
+   list_del(&mem->validate_list);
mutex_unlock(&process_info->lock);
 }
 
@@ -1005,13 +998,12 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t 
user_addr,
  * object can track VM updates.
  */
 struct bo_vm_reservation_context {
-   struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */
-   unsigned int n_vms; /* Number of VMs reserved   */
-   struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries  */
-   struct ww_acquire_ctx ticket;   /* Reservation ticket   */
-   struct list_head list, duplicates;  /* BO lists */
-   struct amdgpu_sync *sync;   /* Pointer to sync object   */
-   bool reserved;  /* Whether BOs are reserved */
+   /* DRM execution context for the reservation */
+   struct drm_exec exec;
+   /* Number of VMs reserved */
+   unsigned int n_vms;
+   /* Pointer to sync object */
+   struct amdgpu_sync *sync;
 };
 
 enum bo_vm_match {
@@ -1035,35 +1027,24 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
 
WARN_ON(!vm);
 
-   ctx->reserved = false;
ctx->n_vms = 1;
ctx->sync = &mem->sync;
-
-   INIT_LIST_HEAD(&ctx->list);
-   INIT_LIST_HEAD(&ctx->duplicates);
-
-   ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL);
-   if (!ctx->vm_pd)
-   return -ENOMEM;
-
-   ctx->kfd_bo.priority = 0;
-   ctx->kfd_bo.tv.bo = &bo->tbo;
-   ctx->kfd_bo.tv.num_shared = 1;
-   list_add(&ctx->kfd_bo.tv.head, &ctx->list);
-
-   amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
-
-   ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
-false, &ctx->duplicates);
-   if (ret) {
-   pr_err("Failed to reserve buffers in ttm.\n");
-   kfree(ctx->vm_pd);
-   ctx->vm_pd = NULL;
-   return ret;
+   drm_exec_init(&ctx->exec, true);
+   drm_exec_while_not_all_locked(&ctx->exec) {
+   ret = amdgpu_vm_lock_pd

[PATCH 4/9] drm/amdgpu: use drm_exec for GEM and CSA handling

2023-02-28 Thread Christian König
Start using the new component here as well.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 42 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 77 +++--
 2 files changed, 53 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index c6d4d41c4393..ea434c8de047 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -22,6 +22,8 @@
  * * Author: monk@amd.com
  */
 
+#include 
+
 #include "amdgpu.h"
 
 uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
@@ -65,31 +67,25 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
  struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
  uint64_t csa_addr, uint32_t size)
 {
-   struct ww_acquire_ctx ticket;
-   struct list_head list;
-   struct amdgpu_bo_list_entry pd;
-   struct ttm_validate_buffer csa_tv;
+   struct drm_exec exec;
int r;
 
-   INIT_LIST_HEAD(&list);
-   INIT_LIST_HEAD(&csa_tv.head);
-   csa_tv.bo = &bo->tbo;
-   csa_tv.num_shared = 1;
-
-   list_add(&csa_tv.head, &list);
-   amdgpu_vm_get_pd_bo(vm, &list, &pd);
-
-   r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
-   if (r) {
-   DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
-   return r;
+   drm_exec_init(&exec, true);
+   drm_exec_while_not_all_locked(&exec) {
+   r = amdgpu_vm_lock_pd(vm, &exec);
+   if (likely(!r))
+   r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 0);
+   drm_exec_continue_on_contention(&exec);
+   if (unlikely(r)) {
+   DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
+   goto error;
+   }
}
 
*bo_va = amdgpu_vm_bo_add(adev, vm, bo);
if (!*bo_va) {
-   ttm_eu_backoff_reservation(&ticket, &list);
-   DRM_ERROR("failed to create bo_va for static CSA\n");
-   return -ENOMEM;
+   r = -ENOMEM;
+   goto error;
}
 
r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
@@ -99,10 +95,10 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
if (r) {
DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r);
amdgpu_vm_bo_del(adev, *bo_va);
-   ttm_eu_backoff_reservation(&ticket, &list);
-   return r;
+   goto error;
}
 
-   ttm_eu_backoff_reservation(&ticket, &list);
-   return 0;
+error:
+   drm_exec_fini(&exec);
+   return r;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index ed1164a87fce..b070f3ae1569 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -33,6 +33,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -197,29 +198,23 @@ static void amdgpu_gem_object_close(struct drm_gem_object 
*obj,
struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
 
-   struct amdgpu_bo_list_entry vm_pd;
-   struct list_head list, duplicates;
struct dma_fence *fence = NULL;
-   struct ttm_validate_buffer tv;
-   struct ww_acquire_ctx ticket;
struct amdgpu_bo_va *bo_va;
+   struct drm_exec exec;
long r;
 
-   INIT_LIST_HEAD(&list);
-   INIT_LIST_HEAD(&duplicates);
-
-   tv.bo = &bo->tbo;
-   tv.num_shared = 2;
-   list_add(&tv.head, &list);
-
-   amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
-
-   r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
-   if (r) {
-   dev_err(adev->dev, "leaking bo va because "
-   "we fail to reserve bo (%ld)\n", r);
-   return;
+   drm_exec_init(&exec, false);
+   drm_exec_while_not_all_locked(&exec) {
+   r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 0);
+   if (likely(!r))
+   r = amdgpu_vm_lock_pd(vm, &exec);
+   drm_exec_continue_on_contention(&exec);
+   if (unlikely(r)) {
+   dev_err(adev->dev, "leaking bo va (%ld)\n", r);
+   goto out_unlock;
+   }
}
+
bo_va = amdgpu_vm_bo_find(vm, bo);
if (!bo_va || --bo_va->ref_count)
goto out_unlock;
@@ -229,6 +224,9 @@ static void amdgpu_gem_object_close(struct drm_gem_object 
*obj,
goto out_unlock;
 
r = amdgpu_vm_clear_freed(adev, vm, &fence);
+   if (unlikely(r < 0))
+   dev_err(adev->dev, "failed to clear page "
+   "tables on GEM object close (%ld)\n", r);
if (r || !fence)
  

[PATCH 6/9] drm/amdgpu: use the new drm_exec object for CS v2

2023-02-28 Thread Christian König
Use the new component here as well and remove the old handling.

v2: drop dupplicate handling

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c |  71 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h |   5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 210 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h  |   7 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  22 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   3 -
 7 files changed, 115 insertions(+), 204 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4e4efd10cb89..255161dd05f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -54,7 +54,6 @@
 
 #include 
 #include 
-#include 
 
 #include 
 #include 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 252a876b0725..b6298e901cbd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -28,6 +28,7 @@
  *Christian König 
  */
 
+#include 
 #include 
 
 #include "amdgpu.h"
@@ -50,13 +51,20 @@ static void amdgpu_bo_list_free(struct kref *ref)
   refcount);
struct amdgpu_bo_list_entry *e;
 
-   amdgpu_bo_list_for_each_entry(e, list) {
-   struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+   amdgpu_bo_list_for_each_entry(e, list)
+   amdgpu_bo_unref(&e->bo);
+   call_rcu(&list->rhead, amdgpu_bo_list_free_rcu);
+}
 
-   amdgpu_bo_unref(&bo);
-   }
+static int amdgpu_bo_list_entry_cmp(const void *_a, const void *_b)
+{
+   const struct amdgpu_bo_list_entry *a = _a, *b = _b;
 
-   call_rcu(&list->rhead, amdgpu_bo_list_free_rcu);
+   if (a->priority > b->priority)
+   return 1;
+   if (a->priority < b->priority)
+   return -1;
+   return 0;
 }
 
 int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
@@ -118,7 +126,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, 
struct drm_file *filp,
 
entry->priority = min(info[i].bo_priority,
  AMDGPU_BO_LIST_MAX_PRIORITY);
-   entry->tv.bo = &bo->tbo;
+   entry->bo = bo;
 
if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GDS)
list->gds_obj = bo;
@@ -133,6 +141,8 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, 
struct drm_file *filp,
 
list->first_userptr = first_userptr;
list->num_entries = num_entries;
+   sort(array, last_entry, sizeof(struct amdgpu_bo_list_entry),
+amdgpu_bo_list_entry_cmp, NULL);
 
trace_amdgpu_cs_bo_status(list->num_entries, total_size);
 
@@ -141,16 +151,10 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, 
struct drm_file *filp,
return 0;
 
 error_free:
-   for (i = 0; i < last_entry; ++i) {
-   struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo);
-
-   amdgpu_bo_unref(&bo);
-   }
-   for (i = first_userptr; i < num_entries; ++i) {
-   struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo);
-
-   amdgpu_bo_unref(&bo);
-   }
+   for (i = 0; i < last_entry; ++i)
+   amdgpu_bo_unref(&array[i].bo);
+   for (i = first_userptr; i < num_entries; ++i)
+   amdgpu_bo_unref(&array[i].bo);
kvfree(list);
return r;
 
@@ -182,41 +186,6 @@ int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
return -ENOENT;
 }
 
-void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
-struct list_head *validated)
-{
-   /* This is based on the bucket sort with O(n) time complexity.
-* An item with priority "i" is added to bucket[i]. The lists are then
-* concatenated in descending order.
-*/
-   struct list_head bucket[AMDGPU_BO_LIST_NUM_BUCKETS];
-   struct amdgpu_bo_list_entry *e;
-   unsigned i;
-
-   for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++)
-   INIT_LIST_HEAD(&bucket[i]);
-
-   /* Since buffers which appear sooner in the relocation list are
-* likely to be used more often than buffers which appear later
-* in the list, the sort mustn't change the ordering of buffers
-* with the same priority, i.e. it must be stable.
-*/
-   amdgpu_bo_list_for_each_entry(e, list) {
-   struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
-   unsigned priority = e->priority;
-
-   if (!bo->parent)
-   list_add_tail(&e->tv.head, &bucket[priority]);
-
-   e->user_pages = NULL;
-   e->range = NULL;
-   }
-
-   /* Connect the sorted buckets in the output list. */
-   for (i =

[PATCH 7/9] drm/radeon: switch over to drm_exec

2023-02-28 Thread Christian König
Just a straightforward conversion without any optimization.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/radeon/radeon.h|  7 ++--
 drivers/gpu/drm/radeon/radeon_cs.c | 45 +-
 drivers/gpu/drm/radeon/radeon_gem.c| 40 +--
 drivers/gpu/drm/radeon/radeon_object.c | 25 +++---
 drivers/gpu/drm/radeon/radeon_object.h |  2 +-
 drivers/gpu/drm/radeon/radeon_vm.c | 10 +++---
 6 files changed, 66 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 57e20780a458..c67b537170e7 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -75,8 +75,8 @@
 
 #include 
 #include 
-#include 
 
+#include 
 #include 
 #include 
 
@@ -457,7 +457,8 @@ struct radeon_mman {
 
 struct radeon_bo_list {
struct radeon_bo*robj;
-   struct ttm_validate_buffer  tv;
+   struct list_headlist;
+   boolshared;
uint64_tgpu_offset;
unsignedpreferred_domains;
unsignedallowed_domains;
@@ -1068,6 +1069,7 @@ struct radeon_cs_parser {
struct radeon_bo_list   *vm_bos;
struct list_headvalidated;
unsigneddma_reloc_idx;
+   struct drm_exec exec;
/* indices of various chunks */
struct radeon_cs_chunk  *chunk_ib;
struct radeon_cs_chunk  *chunk_relocs;
@@ -1081,7 +1083,6 @@ struct radeon_cs_parser {
u32 cs_flags;
u32 ring;
s32 priority;
-   struct ww_acquire_ctx   ticket;
 };
 
 static inline u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index 46a27ebf4588..5c681a44cec7 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -182,11 +182,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser 
*p)
}
}
 
-   p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
-   p->relocs[i].tv.num_shared = !r->write_domain;
-
-   radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
- priority);
+   p->relocs[i].shared = !r->write_domain;
+   radeon_cs_buckets_add(&buckets, &p->relocs[i].list, priority);
}
 
radeon_cs_buckets_get_list(&buckets, &p->validated);
@@ -197,7 +194,7 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser 
*p)
if (need_mmap_lock)
mmap_read_lock(current->mm);
 
-   r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, 
p->ring);
+   r = radeon_bo_list_validate(p->rdev, &p->exec, &p->validated, p->ring);
 
if (need_mmap_lock)
mmap_read_unlock(current->mm);
@@ -253,12 +250,11 @@ static int radeon_cs_sync_rings(struct radeon_cs_parser 
*p)
struct radeon_bo_list *reloc;
int r;
 
-   list_for_each_entry(reloc, &p->validated, tv.head) {
+   list_for_each_entry(reloc, &p->validated, list) {
struct dma_resv *resv;
 
resv = reloc->robj->tbo.base.resv;
-   r = radeon_sync_resv(p->rdev, &p->ib.sync, resv,
-reloc->tv.num_shared);
+   r = radeon_sync_resv(p->rdev, &p->ib.sync, resv, reloc->shared);
if (r)
return r;
}
@@ -275,6 +271,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void 
*data)
s32 priority = 0;
 
INIT_LIST_HEAD(&p->validated);
+   drm_exec_init(&p->exec, true);
 
if (!cs->num_chunks) {
return 0;
@@ -396,8 +393,8 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void 
*data)
 static int cmp_size_smaller_first(void *priv, const struct list_head *a,
  const struct list_head *b)
 {
-   struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, 
tv.head);
-   struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, 
tv.head);
+   struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, list);
+   struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, list);
 
/* Sort A before B if A is smaller. */
if (la->robj->tbo.base.size > lb->robj->tbo.base.size)
@@ -416,11 +413,13 @@ static int cmp_size_smaller_first(void *priv, const 
struct list_head *a,
  * If error is set than unvalidate buffer, otherwise just free memory
  * used by parsing context.
  **/
-static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, 
bool backoff)
+static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error)
 {
unsigned i;
 
if (!error) {

[PATCH 8/9] drm/qxl: switch to using drm_exec

2023-02-28 Thread Christian König
Just a straightforward conversion without any optimization.

Only compile tested for now.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/qxl/qxl_drv.h |  7 ++--
 drivers/gpu/drm/qxl/qxl_release.c | 67 ---
 2 files changed, 38 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index ea993d7162e8..3e732648b332 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -38,12 +38,12 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
-#include 
 #include 
 
 #include "qxl_dev.h"
@@ -101,7 +101,8 @@ struct qxl_gem {
 };
 
 struct qxl_bo_list {
-   struct ttm_validate_buffer tv;
+   struct qxl_bo   *bo;
+   struct list_headlist;
 };
 
 struct qxl_crtc {
@@ -151,7 +152,7 @@ struct qxl_release {
struct qxl_bo *release_bo;
uint32_t release_offset;
uint32_t surface_release_id;
-   struct ww_acquire_ctx ticket;
+   struct drm_exec exec;
struct list_head bos;
 };
 
diff --git a/drivers/gpu/drm/qxl/qxl_release.c 
b/drivers/gpu/drm/qxl/qxl_release.c
index 368d26da0d6a..da7cd9cd58f9 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -121,13 +121,11 @@ qxl_release_free_list(struct qxl_release *release)
 {
while (!list_empty(&release->bos)) {
struct qxl_bo_list *entry;
-   struct qxl_bo *bo;
 
entry = container_of(release->bos.next,
-struct qxl_bo_list, tv.head);
-   bo = to_qxl_bo(entry->tv.bo);
-   qxl_bo_unref(&bo);
-   list_del(&entry->tv.head);
+struct qxl_bo_list, list);
+   qxl_bo_unref(&entry->bo);
+   list_del(&entry->list);
kfree(entry);
}
release->release_bo = NULL;
@@ -172,8 +170,8 @@ int qxl_release_list_add(struct qxl_release *release, 
struct qxl_bo *bo)
 {
struct qxl_bo_list *entry;
 
-   list_for_each_entry(entry, &release->bos, tv.head) {
-   if (entry->tv.bo == &bo->tbo)
+   list_for_each_entry(entry, &release->bos, list) {
+   if (entry->bo == bo)
return 0;
}
 
@@ -182,9 +180,8 @@ int qxl_release_list_add(struct qxl_release *release, 
struct qxl_bo *bo)
return -ENOMEM;
 
qxl_bo_ref(bo);
-   entry->tv.bo = &bo->tbo;
-   entry->tv.num_shared = 0;
-   list_add_tail(&entry->tv.head, &release->bos);
+   entry->bo = bo;
+   list_add_tail(&entry->list, &release->bos);
return 0;
 }
 
@@ -221,21 +218,27 @@ int qxl_release_reserve_list(struct qxl_release *release, 
bool no_intr)
if (list_is_singular(&release->bos))
return 0;
 
-   ret = ttm_eu_reserve_buffers(&release->ticket, &release->bos,
-!no_intr, NULL);
-   if (ret)
-   return ret;
-
-   list_for_each_entry(entry, &release->bos, tv.head) {
-   struct qxl_bo *bo = to_qxl_bo(entry->tv.bo);
-
-   ret = qxl_release_validate_bo(bo);
-   if (ret) {
-   ttm_eu_backoff_reservation(&release->ticket, 
&release->bos);
-   return ret;
+   drm_exec_init(&release->exec, !no_intr);
+   drm_exec_while_not_all_locked(&release->exec) {
+   list_for_each_entry(entry, &release->bos, list) {
+   ret = drm_exec_prepare_obj(&release->exec,
+  &entry->bo->tbo.base,
+  1);
+   drm_exec_break_on_contention(&release->exec);
+   if (ret)
+   goto error;
}
}
+
+   list_for_each_entry(entry, &release->bos, list) {
+   ret = qxl_release_validate_bo(entry->bo);
+   if (ret)
+   goto error;
+   }
return 0;
+error:
+   drm_exec_fini(&release->exec);
+   return ret;
 }
 
 void qxl_release_backoff_reserve_list(struct qxl_release *release)
@@ -245,7 +248,7 @@ void qxl_release_backoff_reserve_list(struct qxl_release 
*release)
if (list_is_singular(&release->bos))
return;
 
-   ttm_eu_backoff_reservation(&release->ticket, &release->bos);
+   drm_exec_fini(&release->exec);
 }
 
 int qxl_alloc_surface_release_reserved(struct qxl_device *qdev,
@@ -404,18 +407,18 @@ void qxl_release_unmap(struct qxl_device *qdev,
 
 void qxl_release_fence_buffer_objects(struct qxl_release *release)
 {
-   struct ttm_buffer_object *bo;
struct ttm_device *bdev;
-   struct ttm_validate_buffer *entry;
+   struct qxl_bo_list *entry;
struct qxl_device *qdev;
+   struct qxl_bo *bo;
 
/* if only one object on t

[PATCH 5/9] drm/amdgpu: use drm_exec for MES testing

2023-02-28 Thread Christian König
Start using the new component here as well.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 86 +++--
 1 file changed, 39 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 82e27bd4f038..95292a65fd25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -22,6 +22,7 @@
  */
 
 #include 
+#include 
 
 #include "amdgpu_mes.h"
 #include "amdgpu.h"
@@ -1126,34 +1127,29 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device 
*adev,
 struct amdgpu_mes_ctx_data *ctx_data)
 {
struct amdgpu_bo_va *bo_va;
-   struct ww_acquire_ctx ticket;
-   struct list_head list;
-   struct amdgpu_bo_list_entry pd;
-   struct ttm_validate_buffer csa_tv;
struct amdgpu_sync sync;
+   struct drm_exec exec;
int r;
 
amdgpu_sync_create(&sync);
-   INIT_LIST_HEAD(&list);
-   INIT_LIST_HEAD(&csa_tv.head);
 
-   csa_tv.bo = &ctx_data->meta_data_obj->tbo;
-   csa_tv.num_shared = 1;
-
-   list_add(&csa_tv.head, &list);
-   amdgpu_vm_get_pd_bo(vm, &list, &pd);
-
-   r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
-   if (r) {
-   DRM_ERROR("failed to reserve meta data BO: err=%d\n", r);
-   return r;
+   drm_exec_init(&exec, false);
+   drm_exec_while_not_all_locked(&exec) {
+   r = drm_exec_prepare_obj(&exec,
+&ctx_data->meta_data_obj->tbo.base,
+0);
+   if (likely(!r))
+   r = amdgpu_vm_lock_pd(vm, &exec);
+   drm_exec_continue_on_contention(&exec);
+if (unlikely(r))
+   goto error_fini_exec;
}
 
bo_va = amdgpu_vm_bo_add(adev, vm, ctx_data->meta_data_obj);
if (!bo_va) {
-   ttm_eu_backoff_reservation(&ticket, &list);
DRM_ERROR("failed to create bo_va for meta data BO\n");
-   return -ENOMEM;
+   r = -ENOMEM;
+   goto error_fini_exec;
}
 
r = amdgpu_vm_bo_map(adev, bo_va, ctx_data->meta_data_gpu_addr, 0,
@@ -1163,33 +1159,35 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device 
*adev,
 
if (r) {
DRM_ERROR("failed to do bo_map on meta data, err=%d\n", r);
-   goto error;
+   goto error_del_bo_va;
}
 
r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r) {
DRM_ERROR("failed to do vm_bo_update on meta data\n");
-   goto error;
+   goto error_del_bo_va;
}
amdgpu_sync_fence(&sync, bo_va->last_pt_update);
 
r = amdgpu_vm_update_pdes(adev, vm, false);
if (r) {
DRM_ERROR("failed to update pdes on meta data\n");
-   goto error;
+   goto error_del_bo_va;
}
amdgpu_sync_fence(&sync, vm->last_update);
 
amdgpu_sync_wait(&sync, false);
-   ttm_eu_backoff_reservation(&ticket, &list);
+   drm_exec_fini(&exec);
 
amdgpu_sync_free(&sync);
ctx_data->meta_data_va = bo_va;
return 0;
 
-error:
+error_del_bo_va:
amdgpu_vm_bo_del(adev, bo_va);
-   ttm_eu_backoff_reservation(&ticket, &list);
+
+error_fini_exec:
+   drm_exec_fini(&exec);
amdgpu_sync_free(&sync);
return r;
 }
@@ -1200,34 +1198,28 @@ int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device 
*adev,
struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va;
struct amdgpu_bo *bo = ctx_data->meta_data_obj;
struct amdgpu_vm *vm = bo_va->base.vm;
-   struct amdgpu_bo_list_entry vm_pd;
-   struct list_head list, duplicates;
-   struct dma_fence *fence = NULL;
-   struct ttm_validate_buffer tv;
-   struct ww_acquire_ctx ticket;
-   long r = 0;
-
-   INIT_LIST_HEAD(&list);
-   INIT_LIST_HEAD(&duplicates);
-
-   tv.bo = &bo->tbo;
-   tv.num_shared = 2;
-   list_add(&tv.head, &list);
-
-   amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
-
-   r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
-   if (r) {
-   dev_err(adev->dev, "leaking bo va because "
-   "we fail to reserve bo (%ld)\n", r);
-   return r;
+   struct dma_fence *fence;
+   struct drm_exec exec;
+   long r;
+
+   drm_exec_init(&exec, false);
+   drm_exec_while_not_all_locked(&exec) {
+   r = drm_exec_prepare_obj(&exec,
+&ctx_data->meta_data_obj->tbo.base,
+0);
+   if (likely(!r))
+   r = amdgpu_vm_lock_pd(vm, &exec);
+   drm_exec_continue_on_contention(&exec);
+if (unlikely(r))
+

[PATCH 9/9] drm: move ttm_execbuf_util into vmwgfx

2023-02-28 Thread Christian König
VMWGFX is the only remaining user of this and should probably moved over
to drm_exec when it starts using GEM as well.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/ttm/Makefile   | 4 ++--
 drivers/gpu/drm/vmwgfx/Makefile| 2 +-
 drivers/gpu/drm/{ttm => vmwgfx}/ttm_execbuf_util.c | 7 ++-
 .../drm/ttm => drivers/gpu/drm/vmwgfx}/ttm_execbuf_util.h  | 0
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h| 2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_validation.h | 2 +-
 6 files changed, 11 insertions(+), 6 deletions(-)
 rename drivers/gpu/drm/{ttm => vmwgfx}/ttm_execbuf_util.c (97%)
 rename {include/drm/ttm => drivers/gpu/drm/vmwgfx}/ttm_execbuf_util.h (100%)

diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
index f906b22959cf..b05a8477d0d0 100644
--- a/drivers/gpu/drm/ttm/Makefile
+++ b/drivers/gpu/drm/ttm/Makefile
@@ -3,8 +3,8 @@
 # Makefile for the drm device driver.  This driver provides support for the
 
 ttm-y := ttm_tt.o ttm_bo.o ttm_bo_util.o ttm_bo_vm.o ttm_module.o \
-   ttm_execbuf_util.o ttm_range_manager.o ttm_resource.o ttm_pool.o \
-   ttm_device.o ttm_sys_manager.o
+   ttm_range_manager.o ttm_resource.o ttm_pool.o ttm_device.o \
+   ttm_sys_manager.o
 ttm-$(CONFIG_AGP) += ttm_agp_backend.o
 
 obj-$(CONFIG_DRM_TTM) += ttm.o
diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile
index e94479d9cd5b..e30e10e25c53 100644
--- a/drivers/gpu/drm/vmwgfx/Makefile
+++ b/drivers/gpu/drm/vmwgfx/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
-   vmwgfx_ioctl.o vmwgfx_resource.o vmwgfx_ttm_buffer.o \
+   vmwgfx_ioctl.o vmwgfx_resource.o vmwgfx_ttm_buffer.o 
ttm_execbuf_util.o \
vmwgfx_cmd.o vmwgfx_irq.o vmwgfx_ldu.o \
vmwgfx_overlay.o vmwgfx_gmrid_manager.o vmwgfx_fence.o \
vmwgfx_bo.o vmwgfx_scrn.o vmwgfx_context.o \
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c 
b/drivers/gpu/drm/vmwgfx/ttm_execbuf_util.c
similarity index 97%
rename from drivers/gpu/drm/ttm/ttm_execbuf_util.c
rename to drivers/gpu/drm/vmwgfx/ttm_execbuf_util.c
index f1c60fa80c2d..5e4e28899acd 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/vmwgfx/ttm_execbuf_util.c
@@ -26,8 +26,13 @@
  *
  **/
 
-#include 
 #include 
+#include 
+#include 
+#include 
+#include 
+
+#include "ttm_execbuf_util.h"
 
 static void ttm_eu_backoff_reservation_reverse(struct list_head *list,
  struct ttm_validate_buffer *entry)
diff --git a/include/drm/ttm/ttm_execbuf_util.h 
b/drivers/gpu/drm/vmwgfx/ttm_execbuf_util.h
similarity index 100%
rename from include/drm/ttm/ttm_execbuf_util.h
rename to drivers/gpu/drm/vmwgfx/ttm_execbuf_util.h
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index fb8f0c0642c0..49e3dd8c04ec 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -37,11 +37,11 @@
 #include 
 #include 
 
-#include 
 #include 
 #include 
 #include 
 
+#include "ttm_execbuf_util.h"
 #include "ttm_object.h"
 
 #include "vmwgfx_fence.h"
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h 
b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
index 240ee0c4ebfd..927fc8afdbfe 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
@@ -32,7 +32,7 @@
 #include 
 #include 
 
-#include 
+#include "ttm_execbuf_util.h"
 
 #define VMW_RES_DIRTY_NONE 0
 #define VMW_RES_DIRTY_SET BIT(0)
-- 
2.34.1



Re: [PATCH 2/6] drm/amdgpu: Implement a new userqueue fence driver

2023-02-28 Thread Arunpravin Paneer Selvam

Hi Christian,

On 2/27/2023 6:12 PM, Christian König wrote:

Am 26.02.23 um 17:54 schrieb Arunpravin Paneer Selvam:

Developed a userqueue fence driver for the userqueue process shared
BO synchronization.

Create a dma fence having write pointer as the seqno and allocate a
seq64 memory for each user queue process and feed this memory address
into the firmware/hardware, thus the firmware writes the read pointer
into the given address when the process completes it execution.
Compare wptr and rptr, if rptr >= wptr, signal the fences for the 
waiting

process to consume the buffers.

v2: Worked on review comments from Christian for the following
 modifications

 - Add wptr as sequence number into the fence
 - Add a reference count for the fence driver
 - Add dma_fence_put below the list_del as it might frees the 
userq fence.

 - Trim unnecessary code in interrupt handler.
 - Check dma fence signaled state in dma fence creation function 
for a
   potential problem of hardware completing the job processing 
beforehand.

 - Add necessary locks.
 - Create a list and process all the unsignaled fences.
 - clean up fences in destroy function.
 - implement .enabled callback function


A few more nit picks below, but from the technical side that looks 
mostly clean.




Signed-off-by: Arunpravin Paneer Selvam 


---
  drivers/gpu/drm/amd/amdgpu/Makefile   |   2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |   6 +
  .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 251 ++
  .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h   |  61 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c |  20 ++
  .../gpu/drm/amd/include/amdgpu_userqueue.h    |   2 +
  6 files changed, 341 insertions(+), 1 deletion(-)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile

index a239533a895f..ea09273b585f 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -59,7 +59,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
  amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
  amdgpu_fw_attestation.o amdgpu_securedisplay.o \
  amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
-    amdgpu_ring_mux.o amdgpu_seq64.o
+    amdgpu_ring_mux.o amdgpu_seq64.o amdgpu_userq_fence.o
    amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index bd3462d0da5f..6b7ac1ebd04c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -53,6 +53,7 @@
  #include "amdgpu_xgmi.h"
  #include "amdgpu_reset.h"
  #include "amdgpu_userqueue.h"
+#include "amdgpu_userq_fence.h"
    /*
   * KMS wrapper.
@@ -2827,6 +2828,10 @@ static int __init amdgpu_init(void)
  if (r)
  goto error_fence;
  +    r = amdgpu_userq_fence_slab_init();
+    if (r)
+    goto error_fence;
+
  DRM_INFO("amdgpu kernel modesetting enabled.\n");
  amdgpu_register_atpx_handler();
  amdgpu_acpi_detect();
@@ -2851,6 +2856,7 @@ static void __exit amdgpu_exit(void)
  amdgpu_unregister_atpx_handler();
  amdgpu_sync_fini();
  amdgpu_fence_slab_fini();
+    amdgpu_userq_fence_slab_fini();
  mmu_notifier_synchronize();
  }
  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c

new file mode 100644
index ..609a7328e9a6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person 
obtaining a
+ * copy of this software and associated documentation files (the 
"Software"),
+ * to deal in the Software without restriction, including without 
limitation
+ * the rights to use, copy, modify, merge, publish, distribute, 
sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom 
the

+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
included in

+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO 
EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
USE OR

+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include 
+#include 
+
+#include 
+
+#include "amdgpu.h"
+#include "a

Re: amdgpu didn't start with pci=nocrs parameter, get error "Fatal error during GPU init"

2023-02-28 Thread Mikhail Gavrilov
On Mon, Feb 27, 2023 at 3:22 PM Christian König
>
> Unfortunately yes. We could clean that up a bit more so that you don't
> run into a BUG() assertion, but what essentially happens here is that we
> completely fail to talk to the hardware.
>
> In this situation we can't even re-enable vesa or text console any more.
>
Then I don't understand why when amdgpu is blacklisted via
modprobe.blacklist=amdgpu then I see graphics and could login into
GNOME. Yes without hardware acceleration, but it is better than non
working graphics. It means there is some other driver (I assume this
is "video") which can successfully talk to the AMD hardware in
conditions where amdgpu cannot do this. My suggestion is that if
amdgpu fails to talk to the hardware, then let another suitable driver
do it. I attached a system log when I apply "pci=nocrs" with
"modprobe.blacklist=amdgpu" for showing that graphics work right in
this case.
To do this, does the Linux module loading mechanism need to be refined?


-- 
Best Regards,
Mike Gavrilov.


system-without-amdgpu.tar.xz
Description: application/xz


Re: [PATCH v2 1/1] drm/doc: Document DRM device reset expectations

2023-02-28 Thread Pekka Paalanen
On Mon, 27 Feb 2023 15:40:00 -0500
André Almeida  wrote:

> Create a section that specifies how to deal with DRM device resets for
> kernel and userspace drivers.
> 
> Signed-off-by: André Almeida 
> ---
>  Documentation/gpu/drm-uapi.rst | 51 ++
>  1 file changed, 51 insertions(+)
> 
> diff --git a/Documentation/gpu/drm-uapi.rst b/Documentation/gpu/drm-uapi.rst
> index 65fb3036a580..3d6c3ed392ea 100644
> --- a/Documentation/gpu/drm-uapi.rst
> +++ b/Documentation/gpu/drm-uapi.rst
> @@ -285,6 +285,57 @@ for GPU1 and GPU2 from different vendors, and a third 
> handler for
>  mmapped regular files. Threads cause additional pain with signal
>  handling as well.
>  
> +Device reset
> +
> +
> +The GPU stack is really complex and is prone to errors, from hardware bugs,
> +faulty applications and everything in the many layers in between. To recover
> +from this kind of state, sometimes is needed to reset the GPU. Unproper 
> handling
> +of GPU resets can lead to an unstable userspace. This section describes 
> what's
> +the expected behaviour from DRM drivers to do in those situations, from 
> usermode
> +drivers and compositors as well. The end goal is to have a seamless 
> experience
> +as possible, either the stack being able to recover itself or resetting to a 
> new
> +stable state.
> +
> +Robustness
> +--
> +
> +First of all, application robust APIs, when available, should be used. This
> +allows the application to correctly recover and continue to run after a 
> reset.
> +Apps that doesn't use this should be promptly killed when the kernel driver
> +detects that it's in broken state. Specifically guidelines for some APIs:

Hi,

the "kill" wording is still here. It feels too harsh to me, like I say
in my comments below, but let's see what others think.

Even the device hot-unplug guide above this does not call for killing
anything and is prepared for userspace to keep going indefinitely if
userspace is broken enough.

> +
> +- OpenGL: KMD signals the abortion of submitted commands and the UMD should 
> then
> +  react accordingly and abort the application.

No, not abort. Just return failures and make sure no API call will
block indefinitely.

> +
> +- Vulkan: Assumes that every app is able to deal with 
> ``VK_ERROR_DEVICE_LOST``.
> +  If it doesn't do it right, it's considered a broken application and UMD 
> will
> +  deal with it, aborting it.

Is it even possible to detect if an app does it right?

What if the app does do it right, but not before it attempts to hammer
a few more jobs in?

> +
> +Kernel mode driver
> +--
> +
> +The KMD must be able to detect that something is wrong with the application
> +and that a reset is needed to take place to recover the device (e.g. an 
> endless
> +wait). It needs to properly track the context that is broken and mark it as
> +dead, so any other syscalls to that context should be further rejected. The
> +other contexts should be preserved when possible, avoid crashing the rest of
> +userspace. KMD can ban a file descriptor that keeps causing resets, as it's
> +likely in a broken loop.

If userspace is in a broken loop repeatedly causing GPU reset, would it
keep using the same (render node) fd? To me it would be more likely to
close the fd and open a new one, then crash again. Robust or not, the
gfx library API would probably require tearing everything down and
starting from scratch. In fact, only robust apps would likely exhibit
this behaviour, and non-robust just get stuck or quit themselves.

I suppose in e.g. EGL, it is possible to just create a new context
instead of a new EGLDisplay, so both re-using and not using the old fd
are possible.

The process identity would usually remain, I believe, except in cases
like Chromium with its separate rendering processes, but then, would
you really want to ban whole Chromium in that case...

> +

Another thing for the kernel mode driver maybe worth mentioning is that
the driver could also pretend a hot-unplug if the GPU crash is so bad
that everything is at risk being lost or corrupted.

> +User mode driver
> +
> +
> +During a reset, UMD should be aware that rejected syscalls indicates that the
> +context is broken and for robust apps the recovery should happen for the
> +context. Non-robust apps must be terminated.

I think the termination thing probably needs to be much more nuanced,
and also interact with the repeat-offender policy.

Repeat-offender policy could be implemented in userspace too,
especially if userspace keeps using the same device fd which is likely
hidden by the gfx API.

> +
> +Compositors
> +---
> +
> +Compositors should be robust as well to properly deal with its errors.

What is the worth of this note? To me as a compositor developer it is
obvious.


Thanks,
pq

> +
> +
>  .. _drm_driver_ioctl:
>  
>  IOCTL Support on Device Nodes



pgpSVXeAF7Uej.pgp
Description: OpenPGP digital signature


Re: amdgpu didn't start with pci=nocrs parameter, get error "Fatal error during GPU init"

2023-02-28 Thread Christian König

Am 28.02.23 um 10:52 schrieb Mikhail Gavrilov:

On Mon, Feb 27, 2023 at 3:22 PM Christian König

Unfortunately yes. We could clean that up a bit more so that you don't
run into a BUG() assertion, but what essentially happens here is that we
completely fail to talk to the hardware.

In this situation we can't even re-enable vesa or text console any more.


Then I don't understand why when amdgpu is blacklisted via
modprobe.blacklist=amdgpu then I see graphics and could login into
GNOME. Yes without hardware acceleration, but it is better than non
working graphics. It means there is some other driver (I assume this
is "video") which can successfully talk to the AMD hardware in
conditions where amdgpu cannot do this.


The point is it doesn't need to talk to the amdgpu hardware. What it 
does is that it talks to the good old VGA/VESA emulation and that just 
happens to be still enabled by the BIOS/GRUB.


And that VGA/VESA emulation doesn't need any BAR or whatever to keep the 
hw running in the state where it was initialized before the kernel 
started. The kernel just grabs the addresses where it needs to write the 
display data and keeps going with that.


But when a hw specific driver wants to load this is the first thing 
which gets disabled because we need to load new firmware. And with the 
BARs disabled this can't be re-enabled without rebooting the system.



My suggestion is that if
amdgpu fails to talk to the hardware, then let another suitable driver
do it. I attached a system log when I apply "pci=nocrs" with
"modprobe.blacklist=amdgpu" for showing that graphics work right in
this case.
To do this, does the Linux module loading mechanism need to be refined?


That's actually working as expected. The real problem is that the BIOS 
on that system is so broken that we can't access the hw correctly.


What we could to do is to check the BARs very early on and refuse to 
load when they are disable. The problem with this approach is that there 
are systems where it is normal that the BARs are disable until the 
driver loads and get enabled during the hardware initialization process.


What you might want to look into is to find a quirk for the BIOS to 
properly enable the nvme controller.


Regards,
Christian.



[linux-next:master] BUILD REGRESSION 058f4df42121baadbb8a980c06011e912784dbd2

2023-02-28 Thread kernel test robot
tree/branch: 
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master
branch HEAD: 058f4df42121baadbb8a980c06011e912784dbd2  Add linux-next specific 
files for 20230228

Error/Warning reports:

https://lore.kernel.org/oe-kbuild-all/202302111601.jty4lkra-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202302170355.ljqlzucu-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202302210350.lynwcl4t-...@intel.com

Error/Warning: (recently discovered and may have been fixed)

drivers/gpu/drm/amd/amdgpu/../display/dc/dcn30/dcn30_optc.c:294:6: warning: no 
previous prototype for 'optc3_wait_drr_doublebuffer_pending_clear' 
[-Wmissing-prototypes]
drivers/gpu/drm/amd/amdgpu/umc_v8_10.c:212:6: warning: no previous prototype 
for 'umc_v8_10_convert_error_address' [-Wmissing-prototypes]
drivers/gpu/drm/amd/amdgpu/umc_v8_10.c:212:6: warning: no previous prototype 
for function 'umc_v8_10_convert_error_address' [-Wmissing-prototypes]
drivers/gpu/drm/amd/amdgpu/umc_v8_10.c:437:37: warning: variable 
'channel_index' set but not used [-Wunused-but-set-variable]
drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c:81:29: warning: variable 'ring' set but 
not used [-Wunused-but-set-variable]
include/asm-generic/div64.h:238:36: error: passing argument 1 of '__div64_32' 
from incompatible pointer type [-Werror=incompatible-pointer-types]

Unverified Error/Warning (likely false positive, please contact us if 
interested):

arch/parisc/mm/fault.c:427 do_page_fault() error: uninitialized symbol 'msg'.
drivers/iommu/apple-dart.c:1281:1: sparse: sparse: symbol 'apple_dart_pm_ops' 
was not declared. Should it be static?
drivers/usb/gadget/composite.c:2082:33: sparse: sparse: restricted __le16 
degrades to integer
drivers/watchdog/imx2_wdt.c:442:22: sparse: sparse: symbol 'imx_wdt' was not 
declared. Should it be static?
drivers/watchdog/imx2_wdt.c:446:22: sparse: sparse: symbol 'imx_wdt_legacy' was 
not declared. Should it be static?
net/bluetooth/hci_sync.c:2403 hci_pause_addr_resolution() warn: missing error 
code? 'err'

Error/Warning ids grouped by kconfigs:

gcc_recent_errors
|-- alpha-allyesconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-umc_v8_10.c:warning:no-previous-prototype-for-umc_v8_10_convert_error_address
|   |-- 
drivers-gpu-drm-amd-amdgpu-umc_v8_10.c:warning:variable-channel_index-set-but-not-used
|   `-- 
drivers-gpu-drm-amd-amdgpu-vcn_v4_0.c:warning:variable-ring-set-but-not-used
|-- alpha-randconfig-c041-20230226
|   |-- 
drivers-gpu-drm-amd-amdgpu-umc_v8_10.c:warning:no-previous-prototype-for-umc_v8_10_convert_error_address
|   |-- 
drivers-gpu-drm-amd-amdgpu-umc_v8_10.c:warning:variable-channel_index-set-but-not-used
|   `-- 
drivers-gpu-drm-amd-amdgpu-vcn_v4_0.c:warning:variable-ring-set-but-not-used
|-- alpha-randconfig-c043-20230226
|   |-- 
drivers-gpu-drm-amd-amdgpu-umc_v8_10.c:warning:no-previous-prototype-for-umc_v8_10_convert_error_address
|   |-- 
drivers-gpu-drm-amd-amdgpu-umc_v8_10.c:warning:variable-channel_index-set-but-not-used
|   `-- 
drivers-gpu-drm-amd-amdgpu-vcn_v4_0.c:warning:variable-ring-set-but-not-used
|-- arc-allyesconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-umc_v8_10.c:warning:no-previous-prototype-for-umc_v8_10_convert_error_address
|   |-- 
drivers-gpu-drm-amd-amdgpu-umc_v8_10.c:warning:variable-channel_index-set-but-not-used
|   |-- 
drivers-gpu-drm-amd-amdgpu-vcn_v4_0.c:warning:variable-ring-set-but-not-used
|   `-- 
include-asm-generic-div64.h:error:passing-argument-of-__div64_32-from-incompatible-pointer-type
|-- arc-randconfig-r033-20230226
|   |-- 
drivers-gpu-drm-amd-amdgpu-umc_v8_10.c:warning:no-previous-prototype-for-umc_v8_10_convert_error_address
|   |-- 
drivers-gpu-drm-amd-amdgpu-umc_v8_10.c:warning:variable-channel_index-set-but-not-used
|   `-- 
drivers-gpu-drm-amd-amdgpu-vcn_v4_0.c:warning:variable-ring-set-but-not-used
|-- arm-allmodconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-umc_v8_10.c:warning:no-previous-prototype-for-umc_v8_10_convert_error_address
|   |-- 
drivers-gpu-drm-amd-amdgpu-umc_v8_10.c:warning:variable-channel_index-set-but-not-used
|   |-- 
drivers-gpu-drm-amd-amdgpu-vcn_v4_0.c:warning:variable-ring-set-but-not-used
|   `-- 
include-asm-generic-div64.h:error:passing-argument-of-__div64_32-from-incompatible-pointer-type
|-- arm-allyesconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-umc_v8_10.c:warning:no-previous-prototype-for-umc_v8_10_convert_error_address
|   |-- 
drivers-gpu-drm-amd-amdgpu-umc_v8_10.c:warning:variable-channel_index-set-but-not-used
|   |-- 
drivers-gpu-drm-amd-amdgpu-vcn_v4_0.c:warning:variable-ring-set-but-not-used
|   `-- 
include-asm-generic-div64.h:error:passing-argument-of-__div64_32-from-incompatible-pointer-type
|-- arm-randconfig-s041-20230226
|   |-- 
drivers-gpu-drm-amd-amdgpu-umc_v8_10.c:warning:no-previous-prototype-for-umc_v8_10_convert_error_address
|   |-- 
drivers-gpu-drm-amd-amdgpu-umc_v8_10.c:warning:variable-c

Re: [PATCH v2 1/1] drm/doc: Document DRM device reset expectations

2023-02-28 Thread André Almeida

Hi Pekka,

Thank you for your feedback,

On 2/28/23 05:02, Pekka Paalanen wrote:

On Mon, 27 Feb 2023 15:40:00 -0500
André Almeida  wrote:


Create a section that specifies how to deal with DRM device resets for
kernel and userspace drivers.

Signed-off-by: André Almeida 
---
  Documentation/gpu/drm-uapi.rst | 51 ++
  1 file changed, 51 insertions(+)

diff --git a/Documentation/gpu/drm-uapi.rst b/Documentation/gpu/drm-uapi.rst
index 65fb3036a580..3d6c3ed392ea 100644
--- a/Documentation/gpu/drm-uapi.rst
+++ b/Documentation/gpu/drm-uapi.rst
@@ -285,6 +285,57 @@ for GPU1 and GPU2 from different vendors, and a third 
handler for
  mmapped regular files. Threads cause additional pain with signal
  handling as well.
  
+Device reset

+
+
+The GPU stack is really complex and is prone to errors, from hardware bugs,
+faulty applications and everything in the many layers in between. To recover
+from this kind of state, sometimes is needed to reset the GPU. Unproper 
handling
+of GPU resets can lead to an unstable userspace. This section describes what's
+the expected behaviour from DRM drivers to do in those situations, from 
usermode
+drivers and compositors as well. The end goal is to have a seamless experience
+as possible, either the stack being able to recover itself or resetting to a 
new
+stable state.
+
+Robustness
+--
+
+First of all, application robust APIs, when available, should be used. This
+allows the application to correctly recover and continue to run after a reset.
+Apps that doesn't use this should be promptly killed when the kernel driver
+detects that it's in broken state. Specifically guidelines for some APIs:

Hi,

the "kill" wording is still here. It feels too harsh to me, like I say
in my comments below, but let's see what others think.

Even the device hot-unplug guide above this does not call for killing
anything and is prepared for userspace to keep going indefinitely if
userspace is broken enough.


If I understood correctly, you don't think that neither KMD or UMD 
should terminate apps that hangs the GPU, right? Should those apps run 
indefinitely until the user decides to do something about it?


At least on Intel GPUs, if I run an OpenGL infinite loop the app will be 
terminated in a few moments, and the rest of userspace is preserved. 
There's an app that just do that if you want to have a look on how it 
works: https://gitlab.freedesktop.org/andrealmeid/gpu-timeout





+
+- OpenGL: KMD signals the abortion of submitted commands and the UMD should 
then
+  react accordingly and abort the application.

No, not abort. Just return failures and make sure no API call will
block indefinitely.


+
+- Vulkan: Assumes that every app is able to deal with ``VK_ERROR_DEVICE_LOST``.
+  If it doesn't do it right, it's considered a broken application and UMD will
+  deal with it, aborting it.

Is it even possible to detect if an app does it right?

What if the app does do it right, but not before it attempts to hammer
a few more jobs in?


I think what I meant was

+ If it doesn't support VK_ERROR_DEVICE_LOST, it's considered a broken 
app [...]


In the sense that if it doesn't support this, it is impossible for the 
app to recovery gracefully from a reset so it's considered broken



+
+Kernel mode driver
+--
+
+The KMD must be able to detect that something is wrong with the application
+and that a reset is needed to take place to recover the device (e.g. an endless
+wait). It needs to properly track the context that is broken and mark it as
+dead, so any other syscalls to that context should be further rejected. The
+other contexts should be preserved when possible, avoid crashing the rest of
+userspace. KMD can ban a file descriptor that keeps causing resets, as it's
+likely in a broken loop.

If userspace is in a broken loop repeatedly causing GPU reset, would it
keep using the same (render node) fd? To me it would be more likely to
close the fd and open a new one, then crash again. Robust or not, the
gfx library API would probably require tearing everything down and
starting from scratch. In fact, only robust apps would likely exhibit
this behaviour, and non-robust just get stuck or quit themselves.

I suppose in e.g. EGL, it is possible to just create a new context
instead of a new EGLDisplay, so both re-using and not using the old fd
are possible.

The process identity would usually remain, I believe, except in cases
like Chromium with its separate rendering processes, but then, would
you really want to ban whole Chromium in that case...

Right, so userspace is the right place to implement the repeat-offender 
policy, as you noted below.



+

Another thing for the kernel mode driver maybe worth mentioning is that
the driver could also pretend a hot-unplug if the GPU crash is so bad
that everything is at risk being lost or corrupted.


Ack, I'll add that




+User mode driver
+
+
+During a reset, UMD shou

Re: [PATCH 8/9] drm/qxl: switch to using drm_exec

2023-02-28 Thread kernel test robot
Hi Christian,

I love your patch! Yet something to improve:

[auto build test ERROR on drm-misc/drm-misc-next]
[also build test ERROR on drm/drm-next drm-intel/for-linux-next linus/master 
next-20230228]
[cannot apply to drm-exynos/exynos-drm-next drm-intel/for-linux-next-fixes v6.2]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:
https://github.com/intel-lab-lkp/linux/commits/Christian-K-nig/drm-add-drm_exec-selftests/20230228-173404
base:   git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
patch link:
https://lore.kernel.org/r/20230228083406.1720795-9-christian.koenig%40amd.com
patch subject: [PATCH 8/9] drm/qxl: switch to using drm_exec
config: i386-randconfig-a014-20230227 
(https://download.01.org/0day-ci/archive/20230228/202302282339.welaynwc-...@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
reproduce (this is a W=1 build):
# 
https://github.com/intel-lab-lkp/linux/commit/435d2421797eb683d27984c9a823b48704069df9
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review 
Christian-K-nig/drm-add-drm_exec-selftests/20230228-173404
git checkout 435d2421797eb683d27984c9a823b48704069df9
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 O=build_dir ARCH=i386 olddefconfig
make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot 
| Link: 
https://lore.kernel.org/oe-kbuild-all/202302282339.welaynwc-...@intel.com/

All errors (new ones prefixed by >>, old ones prefixed by <<):

>> ERROR: modpost: "drm_exec_init" [drivers/gpu/drm/qxl/qxl.ko] undefined!
>> ERROR: modpost: "drm_exec_prepare_obj" [drivers/gpu/drm/qxl/qxl.ko] 
>> undefined!
>> ERROR: modpost: "drm_exec_cleanup" [drivers/gpu/drm/qxl/qxl.ko] undefined!
>> ERROR: modpost: "drm_exec_fini" [drivers/gpu/drm/qxl/qxl.ko] undefined!

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests


Re: [PATCH 8/9] drm/qxl: switch to using drm_exec

2023-02-28 Thread kernel test robot
Hi Christian,

I love your patch! Yet something to improve:

[auto build test ERROR on drm-misc/drm-misc-next]
[also build test ERROR on drm/drm-next drm-intel/for-linux-next linus/master 
next-20230228]
[cannot apply to drm-intel/for-linux-next-fixes v6.2]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:
https://github.com/intel-lab-lkp/linux/commits/Christian-K-nig/drm-add-drm_exec-selftests/20230228-173404
base:   git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
patch link:
https://lore.kernel.org/r/20230228083406.1720795-9-christian.koenig%40amd.com
patch subject: [PATCH 8/9] drm/qxl: switch to using drm_exec
config: x86_64-randconfig-a016-20230227 
(https://download.01.org/0day-ci/archive/20230301/202303010013.szzncscw-...@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
reproduce (this is a W=1 build):
# 
https://github.com/intel-lab-lkp/linux/commit/435d2421797eb683d27984c9a823b48704069df9
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review 
Christian-K-nig/drm-add-drm_exec-selftests/20230228-173404
git checkout 435d2421797eb683d27984c9a823b48704069df9
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 O=build_dir ARCH=x86_64 olddefconfig
make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot 
| Link: 
https://lore.kernel.org/oe-kbuild-all/202303010013.szzncscw-...@intel.com/

All errors (new ones prefixed by >>):

   ld: vmlinux.o: in function `qxl_release_reserve_list':
>> drivers/gpu/drm/qxl/qxl_release.c:221: undefined reference to `drm_exec_init'
>> ld: drivers/gpu/drm/qxl/qxl_release.c:222: undefined reference to 
>> `drm_exec_cleanup'
>> ld: drivers/gpu/drm/qxl/qxl_release.c:224: undefined reference to 
>> `drm_exec_prepare_obj'
>> ld: drivers/gpu/drm/qxl/qxl_release.c:240: undefined reference to 
>> `drm_exec_fini'
   ld: vmlinux.o: in function `qxl_release_backoff_reserve_list':
>> drivers/gpu/drm/qxl/qxl_release.c:251: undefined reference to `drm_exec_fini'
   ld: vmlinux.o: in function `qxl_release_fence_buffer_objects':
   drivers/gpu/drm/qxl/qxl_release.c:439: undefined reference to `drm_exec_fini'


vim +221 drivers/gpu/drm/qxl/qxl_release.c

   210  
   211  int qxl_release_reserve_list(struct qxl_release *release, bool no_intr)
   212  {
   213  int ret;
   214  struct qxl_bo_list *entry;
   215  
   216  /* if only one object on the release its the release itself
   217 since these objects are pinned no need to reserve */
   218  if (list_is_singular(&release->bos))
   219  return 0;
   220  
 > 221  drm_exec_init(&release->exec, !no_intr);
 > 222  drm_exec_while_not_all_locked(&release->exec) {
   223  list_for_each_entry(entry, &release->bos, list) {
 > 224  ret = drm_exec_prepare_obj(&release->exec,
   225 &entry->bo->tbo.base,
   226 1);
   227  drm_exec_break_on_contention(&release->exec);
   228  if (ret)
   229  goto error;
   230  }
   231  }
   232  
   233  list_for_each_entry(entry, &release->bos, list) {
   234  ret = qxl_release_validate_bo(entry->bo);
   235  if (ret)
   236  goto error;
   237  }
   238  return 0;
   239  error:
 > 240  drm_exec_fini(&release->exec);
   241  return ret;
   242  }
   243  
   244  void qxl_release_backoff_reserve_list(struct qxl_release *release)
   245  {
   246  /* if only one object on the release its the release itself
   247 since these objects are pinned no need to reserve */
   248  if (list_is_singular(&release->bos))
   249  return;
   250  
 > 251  drm_exec_fini(&release->exec);
   252  }
   253  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests


Re: [PATCH 7/9] drm/radeon: switch over to drm_exec

2023-02-28 Thread kernel test robot
Hi Christian,

I love your patch! Yet something to improve:

[auto build test ERROR on drm-misc/drm-misc-next]
[also build test ERROR on drm/drm-next drm-intel/for-linux-next linus/master 
next-20230228]
[cannot apply to drm-exynos/exynos-drm-next drm-intel/for-linux-next-fixes v6.2]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:
https://github.com/intel-lab-lkp/linux/commits/Christian-K-nig/drm-add-drm_exec-selftests/20230228-173404
base:   git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
patch link:
https://lore.kernel.org/r/20230228083406.1720795-8-christian.koenig%40amd.com
patch subject: [PATCH 7/9] drm/radeon: switch over to drm_exec
config: openrisc-randconfig-r016-20230226 
(https://download.01.org/0day-ci/archive/20230301/202303010052.xjcf8umu-...@intel.com/config)
compiler: or1k-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/intel-lab-lkp/linux/commit/93b3fd6c23deae79357cfb6bc0a7fcb07ed819f9
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review 
Christian-K-nig/drm-add-drm_exec-selftests/20230228-173404
git checkout 93b3fd6c23deae79357cfb6bc0a7fcb07ed819f9
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 
O=build_dir ARCH=openrisc olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 
O=build_dir ARCH=openrisc SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot 
| Link: 
https://lore.kernel.org/oe-kbuild-all/202303010052.xjcf8umu-...@intel.com/

All errors (new ones prefixed by >>):

   or1k-linux-ld: drivers/gpu/drm/radeon/radeon_object.o: in function 
`radeon_bo_list_validate':
>> radeon_object.c:(.text+0xf10): undefined reference to `drm_exec_cleanup'
   radeon_object.c:(.text+0xf10): relocation truncated to fit: 
R_OR1K_INSN_REL_26 against undefined symbol `drm_exec_cleanup'
>> or1k-linux-ld: radeon_object.c:(.text+0xf9c): undefined reference to 
>> `drm_exec_prepare_obj'
   radeon_object.c:(.text+0xf9c): relocation truncated to fit: 
R_OR1K_INSN_REL_26 against undefined symbol `drm_exec_prepare_obj'
   or1k-linux-ld: drivers/gpu/drm/radeon/radeon_gem.o: in function 
`radeon_gem_va_update_vm':
>> radeon_gem.c:(.text+0x218): undefined reference to `drm_exec_init'
   radeon_gem.c:(.text+0x218): relocation truncated to fit: R_OR1K_INSN_REL_26 
against undefined symbol `drm_exec_init'
>> or1k-linux-ld: radeon_gem.c:(.text+0x228): undefined reference to 
>> `drm_exec_cleanup'
   radeon_gem.c:(.text+0x228): relocation truncated to fit: R_OR1K_INSN_REL_26 
against undefined symbol `drm_exec_cleanup'
>> or1k-linux-ld: radeon_gem.c:(.text+0x27c): undefined reference to 
>> `drm_exec_fini'
   radeon_gem.c:(.text+0x27c): relocation truncated to fit: R_OR1K_INSN_REL_26 
against undefined symbol `drm_exec_fini'
>> or1k-linux-ld: radeon_gem.c:(.text+0x2b8): undefined reference to 
>> `drm_exec_prepare_obj'
   radeon_gem.c:(.text+0x2b8): relocation truncated to fit: R_OR1K_INSN_REL_26 
against undefined symbol `drm_exec_prepare_obj'
   or1k-linux-ld: radeon_gem.c:(.text+0x360): undefined reference to 
`drm_exec_prepare_obj'
   radeon_gem.c:(.text+0x360): relocation truncated to fit: R_OR1K_INSN_REL_26 
against undefined symbol `drm_exec_prepare_obj'
   or1k-linux-ld: radeon_gem.c:(.text+0x3bc): undefined reference to 
`drm_exec_fini'
   radeon_gem.c:(.text+0x3bc): relocation truncated to fit: R_OR1K_INSN_REL_26 
against undefined symbol `drm_exec_fini'
   or1k-linux-ld: radeon_gem.c:(.text+0x41c): undefined reference to 
`drm_exec_fini'
   radeon_gem.c:(.text+0x41c): relocation truncated to fit: R_OR1K_INSN_REL_26 
against undefined symbol `drm_exec_fini'
   or1k-linux-ld: drivers/gpu/drm/radeon/radeon_cs.o: in function 
`radeon_cs_parser_fini':
>> radeon_cs.c:(.text+0xf98): undefined reference to `drm_exec_fini'
   radeon_cs.c:(.text+0xf98): relocation truncated to fit: R_OR1K_INSN_REL_26 
against undefined symbol `drm_exec_fini'
   or1k-linux-ld: drivers/gpu/drm/radeon/radeon_cs.o: in function 
`radeon_cs_parser_init':
>> radeon_cs.c:(.text+0x119c): undefined reference to `drm_exec_init'
   radeon_cs.c:(.text+0x119c): additional relocation overflows omitted from the 
output

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests


Re: [PATCH v2 1/1] drm/doc: Document DRM device reset expectations

2023-02-28 Thread Rob Clark
On Mon, Feb 27, 2023 at 12:40 PM André Almeida  wrote:
>
> Create a section that specifies how to deal with DRM device resets for
> kernel and userspace drivers.
>
> Signed-off-by: André Almeida 
> ---
>  Documentation/gpu/drm-uapi.rst | 51 ++
>  1 file changed, 51 insertions(+)
>
> diff --git a/Documentation/gpu/drm-uapi.rst b/Documentation/gpu/drm-uapi.rst
> index 65fb3036a580..3d6c3ed392ea 100644
> --- a/Documentation/gpu/drm-uapi.rst
> +++ b/Documentation/gpu/drm-uapi.rst
> @@ -285,6 +285,57 @@ for GPU1 and GPU2 from different vendors, and a third 
> handler for
>  mmapped regular files. Threads cause additional pain with signal
>  handling as well.
>
> +Device reset
> +
> +
> +The GPU stack is really complex and is prone to errors, from hardware bugs,
> +faulty applications and everything in the many layers in between. To recover
> +from this kind of state, sometimes is needed to reset the GPU. Unproper 
> handling
> +of GPU resets can lead to an unstable userspace. This section describes 
> what's
> +the expected behaviour from DRM drivers to do in those situations, from 
> usermode
> +drivers and compositors as well. The end goal is to have a seamless 
> experience
> +as possible, either the stack being able to recover itself or resetting to a 
> new
> +stable state.
> +
> +Robustness
> +--
> +
> +First of all, application robust APIs, when available, should be used. This
> +allows the application to correctly recover and continue to run after a 
> reset.
> +Apps that doesn't use this should be promptly killed when the kernel driver
> +detects that it's in broken state. Specifically guidelines for some APIs:
> +
> +- OpenGL: KMD signals the abortion of submitted commands and the UMD should 
> then
> +  react accordingly and abort the application.

I disagree.. what would be the point of GL_EXT_robustness
glGetGraphicsResetStatusEXT() if we are going to abort the application
before it has a chance to call this?

Also, this would break the deqp-egl robustness tests because they
would start crashing ;-)

> +
> +- Vulkan: Assumes that every app is able to deal with 
> ``VK_ERROR_DEVICE_LOST``.
> +  If it doesn't do it right, it's considered a broken application and UMD 
> will
> +  deal with it, aborting it.
> +
> +Kernel mode driver
> +--
> +
> +The KMD must be able to detect that something is wrong with the application
> +and that a reset is needed to take place to recover the device (e.g. an 
> endless
> +wait). It needs to properly track the context that is broken and mark it as
> +dead, so any other syscalls to that context should be further rejected. The
> +other contexts should be preserved when possible, avoid crashing the rest of
> +userspace. KMD can ban a file descriptor that keeps causing resets, as it's
> +likely in a broken loop.

syscalls to the context?  Like the one querying the reset status?  :-P

In general I don't think the KMD should block syscalls.  _Maybe_ there
could be some threshold at which point we start blocking things, but I
think that would still cause problems with deqp-egl.

What we should perhaps do is encourage drivers to implement
devcoredump support for logging/reporting GPU crashes.  This would
have the benefit that distro error reporting could be standardized.
And hopefully some actionable bug reports come out of it.

And maybe we could standardize UABI for reporting crashes so a
compositor has a chance to realize an app is crashing and take action.
(But again, how does the compositor know that this isn't intentional,
it would be kinda inconvenient if the compositor kept killing my deqp
runs.)  But for all the rest, nak

BR,
-R


> +
> +User mode driver
> +
> +
> +During a reset, UMD should be aware that rejected syscalls indicates that the
> +context is broken and for robust apps the recovery should happen for the
> +context. Non-robust apps must be terminated.
> +
> +Compositors
> +---
> +
> +Compositors should be robust as well to properly deal with its errors.
> +
> +
>  .. _drm_driver_ioctl:
>
>  IOCTL Support on Device Nodes
> --
> 2.39.2
>


Re: Common DRM execution context v3

2023-02-28 Thread Danilo Krummrich

Hi Christian,

On 2/28/23 09:33, Christian König wrote:

Hi guys,

thrid round for those patches. They have been in my queue for nearly a
year now because I couldn't find much time to push into this.

Danilo wants to use this for his GPU VAs tracker work and Arun needs it
for hist secure semaphore work, so we should probably get it reviewed
now.


Thanks for the follow up on this series - very much appreciated!

- Danilo



Compared to the last version I've fixed one memory leak found by Danilo
and removed the support for duplicate tracking. Only radeon really needs
that and we can trivially handle it differently there.

Please review and/or comment,
Christian.






Re: [PATCH 1/9] drm: execution context for GEM buffers v3

2023-02-28 Thread Danilo Krummrich

On 2/28/23 09:33, Christian König wrote:

This adds the infrastructure for an execution context for GEM buffers
which is similar to the existinc TTMs execbuf util and intended to replace


"existing"


it in the long term.

The basic functionality is that we abstracts the necessary loop to lock
many different GEM buffers with automated deadlock and duplicate handling.

v2: drop xarray and use dynamic resized array instead, the locking
 overhead is unecessary and measureable.


"unecessary", "measurable"


v3: drop duplicate tracking, radeon is really the only one needing that.

Signed-off-by: Christian König 
---
  Documentation/gpu/drm-mm.rst |  12 ++
  drivers/gpu/drm/Kconfig  |   6 +
  drivers/gpu/drm/Makefile |   2 +
  drivers/gpu/drm/drm_exec.c   | 249 +++
  include/drm/drm_exec.h   | 115 
  5 files changed, 384 insertions(+)
  create mode 100644 drivers/gpu/drm/drm_exec.c
  create mode 100644 include/drm/drm_exec.h

diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst
index a79fd3549ff8..a52e6f4117d6 100644
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@@ -493,6 +493,18 @@ DRM Sync Objects
  .. kernel-doc:: drivers/gpu/drm/drm_syncobj.c
 :export:
  
+DRM Execution context

+=
+
+.. kernel-doc:: drivers/gpu/drm/drm_exec.c
+   :doc: Overview
+
+.. kernel-doc:: include/drm/drm_exec.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_exec.c
+   :export:
+
  GPU Scheduler
  =
  
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig

index 17d252dc25e2..84a5fc28c48d 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -200,6 +200,12 @@ config DRM_TTM
  GPU memory types. Will be enabled automatically if a device driver
  uses it.
  
+config DRM_EXEC

+   tristate
+   depends on DRM
+   help
+ Execution context for command submissions
+
  config DRM_BUDDY
tristate
depends on DRM
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index ab4460fcd63f..d40defbb0347 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -78,6 +78,8 @@ obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += 
drm_panel_orientation_quirks.o
  #
  # Memory-management helpers
  #
+#
+obj-$(CONFIG_DRM_EXEC) += drm_exec.o
  
  obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o
  
diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c

new file mode 100644
index ..df546cc5a227
--- /dev/null
+++ b/drivers/gpu/drm/drm_exec.c
@@ -0,0 +1,249 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+
+#include 
+#include 
+#include 
+
+/**
+ * DOC: Overview
+ *
+ * This component mainly abstracts the retry loop necessary for locking
+ * multiple GEM objects while preparing hardware operations (e.g. command
+ * submissions, page table updates etc..).
+ *
+ * If a contention is detected while locking a GEM object the cleanup procedure
+ * unlocks all previously locked GEM objects and locks the contended one first
+ * before locking any further objects.
+ *
+ * After an object is locked fences slots can optionally be reserved on the
+ * dma_resv object inside the GEM object.
+ *
+ * A typical usage pattern should look like this::
+ *
+ * struct drm_gem_object *obj;
+ * struct drm_exec exec;
+ * unsigned long index;
+ * int ret;
+ *
+ * drm_exec_init(&exec, true);
+ * drm_exec_while_not_all_locked(&exec) {
+ * ret = drm_exec_prepare_obj(&exec, boA, 1);
+ * drm_exec_continue_on_contention(&exec);
+ * if (ret)
+ * goto error;
+ *
+ * ret = drm_exec_lock(&exec, boB, 1);


This function doesn't seem to exist (anymore).


+ * drm_exec_continue_on_contention(&exec);
+ * if (ret)
+ * goto error;
+ * }
+ *
+ * drm_exec_for_each_locked_object(&exec, index, obj) {
+ * dma_resv_add_fence(obj->resv, fence, DMA_RESV_USAGE_READ);
+ * ...
+ * }
+ * drm_exec_fini(&exec);
+ *
+ * See struct dma_exec for more details.
+ */
+
+/* Dummy value used to initially enter the retry loop */
+#define DRM_EXEC_DUMMY (void*)~0
+
+/* Unlock all objects and drop references */
+static void drm_exec_unlock_all(struct drm_exec *exec)
+{
+   struct drm_gem_object *obj;
+   unsigned long index;
+
+   drm_exec_for_each_locked_object(exec, index, obj) {
+   dma_resv_unlock(obj->resv);
+   drm_gem_object_put(obj);
+   }
+
+   if (exec->prelocked) {
+   dma_resv_unlock(exec->prelocked->resv);
+   drm_gem_object_put(exec->prelocked);
+   exec->prelocked = NULL;
+   }


Let's say we try to lock 3 objects A, B and C in chronological order and 
in the first "drm_exec_cleanup() iteration" C is contended. Firstly, we 
lock C in the next iteration. If now A or B is contended, we never set

[PATCH] Revert "drm/amdgpu/display: change pipe policy for DCN 2.1"

2023-02-28 Thread Alex Deucher
This reverts commit fa458eb10dc7218146a84e6d2e072424e64d188a.

The issue is no longer present even with this commit present
as verified by the original reporter.

Signed-off-by: Alex Deucher 
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1849#note_1759599
---
 drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
index 8f9244fe5c86..c10ff621cb1d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
@@ -642,7 +642,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.clock_trace = true,
.disable_pplib_clock_request = true,
.min_disp_clk_khz = 10,
-   .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
+   .pipe_split_policy = MPC_SPLIT_DYNAMIC,
.force_single_disp_pipe_split = false,
.disable_dcc = DCC_ENABLE,
.vsr_support = true,
-- 
2.39.2



[PATCH] drm/amdgpu: fix no previous prototype warning

2023-02-28 Thread Kun Liu
add static prefix for vangogh_set_apu_thermal_limit function

Signed-off-by: Kun Liu 
---
 drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index 016d5621e0b3..24046af60933 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -1597,7 +1597,7 @@ static int vangogh_get_apu_thermal_limit(struct 
smu_context *smu, uint32_t *limi
  0, limit);
 }
 
-int vangogh_set_apu_thermal_limit(struct smu_context *smu, uint32_t limit)
+static int vangogh_set_apu_thermal_limit(struct smu_context *smu, uint32_t 
limit)
 {
return smu_cmn_send_smc_msg_with_param(smu,
  SMU_MSG_SetReducedThermalLimit,
-- 
2.25.1



Re: [PATCH] drm/amdgpu: Stop clearing kiq position during fini

2023-02-28 Thread Yin, ZhenGuo (Chris)

Acked-by: ZhenGuo Yin 

On 2/27/2023 2:45 PM, Yaoyao Lei wrote:

Do not clear kiq position in RLC_CP_SCHEDULER so that CP could perform
IDLE-SAVE after VF fini.
Otherwise it could cause GFX hang if another Win guest is rendering.

Signed-off-by: Yaoyao Lei 
---
  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 14 +++---
  1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 6983acc456b2..073f5f23bc3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -7285,17 +7285,9 @@ static int gfx_v10_0_hw_fini(void *handle)
  
  	if (amdgpu_sriov_vf(adev)) {

gfx_v10_0_cp_gfx_enable(adev, false);
-   /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
-   if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) {
-   tmp = RREG32_SOC15(GC, 0, 
mmRLC_CP_SCHEDULERS_Sienna_Cichlid);
-   tmp &= 0xff00;
-   WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, 
tmp);
-   } else {
-   tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
-   tmp &= 0xff00;
-   WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
-   }
-
+   /* Remove the steps of clearing KIQ position.
+* It causes GFX hang when another Win guest is rendering.
+*/
return 0;
}
gfx_v10_0_cp_enable(adev, false);


RE: [PATCH] drm/amdgpu: fix no previous prototype warning

2023-02-28 Thread Quan, Evan
[AMD Official Use Only - General]

Please add proper tag as instructed. Other than this , the patch is 
Reviewed-by: Evan Quan 
"If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot 
| Link: 
https://lore.kernel.org/oe-kbuild-all/202303010827.c2n0ybgt-...@intel.com/";

BR
Evan
> -Original Message-
> From: Kun Liu 
> Sent: Wednesday, March 1, 2023 10:42 AM
> To: amd-gfx@lists.freedesktop.org; Quan, Evan 
> Cc: Liang, Richard qi ; Deucher, Alexander
> ; Liu, Kun 
> Subject: [PATCH] drm/amdgpu: fix no previous prototype warning
> 
> add static prefix for vangogh_set_apu_thermal_limit function
> 
> Signed-off-by: Kun Liu 
> ---
>  drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> index 016d5621e0b3..24046af60933 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> @@ -1597,7 +1597,7 @@ static int vangogh_get_apu_thermal_limit(struct
> smu_context *smu, uint32_t *limi
> 0, limit);
>  }
> 
> -int vangogh_set_apu_thermal_limit(struct smu_context *smu, uint32_t
> limit)
> +static int vangogh_set_apu_thermal_limit(struct smu_context *smu,
> uint32_t limit)
>  {
>   return smu_cmn_send_smc_msg_with_param(smu,
> 
> SMU_MSG_SetReducedThermalLimit,
> --
> 2.25.1


[PATCH] drm/amdgpu: Support umc node harvest config on umc v8_10

2023-02-28 Thread Candice Li
Don't need to query error count and error address on harvest umc nodes.
v2: Fix code bug, use active_mask instead of harvsest_config
and remove unnecessary argument in LOOP macro.
v3: Leave adev->gmc.num_umc unchanged.

Signed-off-by: Candice Li 
Reviewed-by: Tao Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 10 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h   |  7 +--
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c|  1 -
 drivers/gpu/drm/amd/amdgpu/umc_v8_10.h|  4 ++--
 4 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index ea040adb1f150f..aebf3542481ead 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -543,6 +543,7 @@ static void amdgpu_discovery_read_from_harvest_table(struct 
amdgpu_device *adev,
struct harvest_table *harvest_info;
u16 offset;
int i;
+   uint32_t umc_harvest_config = 0;
 
bhdr = (struct binary_header *)adev->mman.discovery_bin;
offset = le16_to_cpu(bhdr->table_list[HARVEST_INFO].offset);
@@ -570,12 +571,17 @@ static void 
amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev,
adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
break;
case UMC_HWID:
+   umc_harvest_config |=
+   1 << 
(le16_to_cpu(harvest_info->list[i].number_instance));
(*umc_harvest_count)++;
break;
default:
break;
}
}
+
+   adev->umc.active_mask = ((1 << adev->umc.node_inst_num) - 1) &
+   ~umc_harvest_config;
 }
 
 /* == */
@@ -1156,8 +1162,10 @@ static int amdgpu_discovery_reg_base_init(struct 
amdgpu_device *adev)
AMDGPU_MAX_SDMA_INSTANCES);
}
 
-   if (le16_to_cpu(ip->hw_id) == UMC_HWID)
+   if (le16_to_cpu(ip->hw_id) == UMC_HWID) {
adev->gmc.num_umc++;
+   adev->umc.node_inst_num++;
+   }
 
for (k = 0; k < num_base_address; k++) {
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index f2bf979af58835..36e19336f3b34e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -42,7 +42,7 @@
 #define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) 
LOOP_UMC_CH_INST((ch_inst))
 
 #define LOOP_UMC_NODE_INST(node_inst) \
-   for ((node_inst) = 0; (node_inst) < adev->umc.node_inst_num; 
(node_inst)++)
+   for_each_set_bit((node_inst), &(adev->umc.active_mask), 
adev->umc.node_inst_num)
 
 #define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) \
LOOP_UMC_NODE_INST((node_inst)) 
LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst))
@@ -69,7 +69,7 @@ struct amdgpu_umc {
/* number of umc instance with memory map register access */
uint32_t umc_inst_num;
 
-   /*number of umc node instance with memory map register access*/
+   /* Total number of umc node instance including harvest one */
uint32_t node_inst_num;
 
/* UMC regiser per channel offset */
@@ -82,6 +82,9 @@ struct amdgpu_umc {
 
const struct amdgpu_umc_funcs *funcs;
struct amdgpu_umc_ras *ras;
+
+   /* active mask for umc node instance */
+   unsigned long active_mask;
 };
 
 int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if 
*ras_block);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 85e0afc3d4f7f3..af7b3ba1ca0002 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -567,7 +567,6 @@ static void gmc_v11_0_set_umc_funcs(struct amdgpu_device 
*adev)
case IP_VERSION(8, 10, 0):
adev->umc.channel_inst_num = UMC_V8_10_CHANNEL_INSTANCE_NUM;
adev->umc.umc_inst_num = UMC_V8_10_UMC_INSTANCE_NUM;
-   adev->umc.node_inst_num = adev->gmc.num_umc;
adev->umc.max_ras_err_cnt_per_query = 
UMC_V8_10_TOTAL_CHANNEL_NUM(adev);
adev->umc.channel_offs = UMC_V8_10_PER_CHANNEL_OFFSET;
adev->umc.retire_unit = UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM;
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h 
b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h
index 25eaf4af5fcf4b..c6dfd433fec7bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h
@@ -31,9 +31,9 @@
 /* number of umc instance with memory map register access *

[PATCH] drm/amd/pm: Enable ecc_info table support for smu v13_0_10

2023-02-28 Thread Candice Li
Support EccInfoTable which includes umc ras error count and
error address.

Signed-off-by: Candice Li 
Reviewed-by: Evan Quan 
---
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 75 +++
 1 file changed, 75 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 923a9fb3c8873c..27448ffe60a439 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -46,6 +46,7 @@
 #include "asic_reg/mp/mp_13_0_0_sh_mask.h"
 #include "smu_cmn.h"
 #include "amdgpu_ras.h"
+#include "umc_v8_10.h"
 
 /*
  * DO NOT use these for err/warn/info/debug messages.
@@ -90,6 +91,12 @@
 
 #define DEBUGSMC_MSG_Mode1Reset2
 
+/*
+ * SMU_v13_0_10 supports ECCTABLE since version 80.34.0,
+ * use this to check ECCTABLE feature whether support
+ */
+#define SUPPORT_ECCTABLE_SMU_13_0_10_VERSION 0x00502200
+
 static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] 
= {
MSG_MAP(TestMessage,PPSMC_MSG_TestMessage,  
   1),
MSG_MAP(GetSmuVersion,  PPSMC_MSG_GetSmuVersion,
   1),
@@ -229,6 +236,7 @@ static struct cmn2asic_mapping 
smu_v13_0_0_table_map[SMU_TABLE_COUNT] = {
TAB_MAP(ACTIVITY_MONITOR_COEFF),
[SMU_TABLE_COMBO_PPTABLE] = {1, TABLE_COMBO_PPTABLE},
TAB_MAP(I2C_COMMANDS),
+   TAB_MAP(ECCINFO),
 };
 
 static struct cmn2asic_mapping smu_v13_0_0_pwr_src_map[SMU_POWER_SOURCE_COUNT] 
= {
@@ -462,6 +470,8 @@ static int smu_v13_0_0_tables_init(struct smu_context *smu)
   AMDGPU_GEM_DOMAIN_VRAM);
SMU_TABLE_INIT(tables, SMU_TABLE_COMBO_PPTABLE, 
MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
+   SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO, sizeof(EccInfoTable_t),
+   PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
 
smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t), 
GFP_KERNEL);
if (!smu_table->metrics_table)
@@ -477,8 +487,14 @@ static int smu_v13_0_0_tables_init(struct smu_context *smu)
if (!smu_table->watermarks_table)
goto err2_out;
 
+   smu_table->ecc_table = kzalloc(tables[SMU_TABLE_ECCINFO].size, 
GFP_KERNEL);
+   if (!smu_table->ecc_table)
+   goto err3_out;
+
return 0;
 
+err3_out:
+   kfree(smu_table->watermarks_table);
 err2_out:
kfree(smu_table->gpu_metrics_table);
 err1_out:
@@ -2036,6 +2052,64 @@ static int smu_v13_0_0_send_bad_mem_channel_flag(struct 
smu_context *smu,
return ret;
 }
 
+static int smu_v13_0_0_check_ecc_table_support(struct smu_context *smu)
+{
+   struct amdgpu_device *adev = smu->adev;
+   uint32_t if_version = 0xff, smu_version = 0xff;
+   int ret = 0;
+
+   ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version);
+   if (ret)
+   return -EOPNOTSUPP;
+
+   if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 10)) &&
+   (smu_version >= SUPPORT_ECCTABLE_SMU_13_0_10_VERSION))
+   return ret;
+   else
+   return -EOPNOTSUPP;
+}
+
+static ssize_t smu_v13_0_0_get_ecc_info(struct smu_context *smu,
+   void 
*table)
+{
+   struct smu_table_context *smu_table = &smu->smu_table;
+   struct amdgpu_device *adev = smu->adev;
+   EccInfoTable_t *ecc_table = NULL;
+   struct ecc_info_per_ch *ecc_info_per_channel = NULL;
+   int i, ret = 0;
+   struct umc_ecc_info *eccinfo = (struct umc_ecc_info *)table;
+
+   ret = smu_v13_0_0_check_ecc_table_support(smu);
+   if (ret)
+   return ret;
+
+   ret = smu_cmn_update_table(smu,
+   SMU_TABLE_ECCINFO,
+   0,
+   smu_table->ecc_table,
+   false);
+   if (ret) {
+   dev_info(adev->dev, "Failed to export SMU ecc table!\n");
+   return ret;
+   }
+
+   ecc_table = (EccInfoTable_t *)smu_table->ecc_table;
+
+   for (i = 0; i < UMC_V8_10_TOTAL_CHANNEL_NUM(adev); i++) {
+   ecc_info_per_channel = &(eccinfo->ecc[i]);
+   ecc_info_per_channel->ce_count_lo_chip =
+   ecc_table->EccInfo[i].ce_count_lo_chip;
+   ecc_info_per_channel->ce_count_hi_chip =
+   ecc_table->EccInfo[i].ce_count_hi_chip;
+   ecc_info_per_channel->mca_umc_status =
+   ecc_table->EccInfo[i].mca_umc_status;
+   ecc_info_per_channel->mca_umc_addr =
+   ecc_table->EccInfo[i].mca_umc_addr;
+   }
+
+   return ret;
+}
+
 static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {