[PATCH] drm/amdgpu: set default num_kcq to 2 under sriov

2023-05-04 Thread YuBiao Wang
The number of kernel queues has impact on the latency under sriov
usecase. So to reduce the latency we set the default num_kcq = 2 under
sriov if not set manually.

Signed-off-by: YuBiao Wang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 1311e42ab8e9..d0ad7cb0fa05 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -68,6 +68,9 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
/* enable mcbp for sriov */
amdgpu_mcbp = 1;
 
+   /* Reduce kcq number to 2 to reduce latency */
+   if (amdgpu_num_kcq == -1)
+   amdgpu_num_kcq = 2;
 }
 
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
-- 
2.25.1



[PATCH 4/4] drm/amdgpu: print vmhub id when no VM inv eng found

2023-05-04 Thread Yifan Zhang
to facilitate debugging.

Signed-off-by: Yifan Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index a4ba2c50a5c3..abdc2923cb62 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -561,8 +561,8 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device 
*adev)
 
inv_eng = ffs(vm_inv_engs[vmhub]);
if (!inv_eng) {
-   dev_err(adev->dev, "no VM inv eng for ring %s\n",
-   ring->name);
+   dev_err(adev->dev, "no VM inv eng for ring %s on hub 
%d\n",
+   ring->name, ring->vm_hub);
return -EINVAL;
}
 
-- 
2.37.3



[PATCH 3/4] drm/amdgpu: enable vmhub bitmask on gmc11

2023-05-04 Thread Yifan Zhang
This patch is to enable vmhub bitmask on gmc11.

Signed-off-by: Yifan Zhang 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 90cf79f8ddde..b669409e92a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -362,7 +362,7 @@ static int gmc_v11_0_flush_gpu_tlb_pasid(struct 
amdgpu_device *adev,
&queried_pasid);
if (ret && queried_pasid == pasid) {
if (all_hub) {
-   for (i = 0; i < adev->num_vmhubs; i++)
+   for_each_set_bit(i, adev->vmhubs_mask, 
AMDGPU_MAX_VMHUBS)
gmc_v11_0_flush_gpu_tlb(adev, vmid,
i, flush_type);
} else {
@@ -760,7 +760,8 @@ static int gmc_v11_0_sw_init(void *handle)
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
case IP_VERSION(11, 0, 4):
-   adev->num_vmhubs = 2;
+   set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+   set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
/*
 * To fulfill 4-level page support,
 * vm size is 256TB (48bit), maximum size,
-- 
2.37.3



[PATCH 2/4] drm/amdgpu: alloc vm inv engines for every vmhub

2023-05-04 Thread Yifan Zhang
From: Shiwu Zhang 

There are AMDGPU_MAX_VMHUBS of vmhub in maximum and need to init the
vm_inv_engs for all of them.

In this way, the below error can be ruled out.
[  217.317752] amdgpu :02:00.0: amdgpu: no VM inv eng for ring sdma0

Signed-off-by: Shiwu Zhang 
Reviewed-by: Christian Koenig 
Reviewed-by: Le Ma 
Signed-off-by: Yifan Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 8c8a2f37ba33..a4ba2c50a5c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -533,18 +533,19 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
 *subject to change when ring number changes
 * Engine 17: Gart flushes
 */
-#define GFXHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
-#define MMHUB_FREE_VM_INV_ENGS_BITMAP  0x1FFF3
+#define AMDGPU_VMHUB_INV_ENG_BITMAP0x1FFF3
 
 int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
 {
struct amdgpu_ring *ring;
-   unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
-   {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP,
-   GFXHUB_FREE_VM_INV_ENGS_BITMAP};
+   unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = {0};
unsigned i;
unsigned vmhub, inv_eng;
 
+   /* init the vm inv eng for all vmhubs */
+   for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
+   vm_inv_engs[i] = AMDGPU_VMHUB_INV_ENG_BITMAP;
+
if (adev->enable_mes) {
/* reserve engine 5 for firmware */
for (vmhub = 0; vmhub < AMDGPU_MAX_VMHUBS; vmhub++)
-- 
2.37.3



[PATCH 1/4] drm/amdgpu: add bitmask to iterate vmhubs

2023-05-04 Thread Yifan Zhang
From: Le Ma 

As the layout of VMHUB definition has been changed to cover multiple
XCD/AID case, the original num_vmhubs is not appropriate to do vmhub
iteration any more.

Drop num_vmhubs and introduce vmhubs_mask instead.

v2: switch to the new VMHUB layout
v3: use DECLARE_BITMAP to define vmhubs_mask

Change-Id: I3028c4cb607759253861cb10bf62f2fd13791e03
Signed-off-by: Le Ma 
Reviewed-by: Christian König 
Reviewed-by: Hawking Zhang 
Signed-off-by: Yifan Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   |  4 ++--
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c |  5 +++--
 drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 22 +-
 8 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5ded5a90dc68..9ac7ea452dee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -848,7 +848,7 @@ struct amdgpu_device {
dma_addr_t  dummy_page_addr;
struct amdgpu_vm_managervm_manager;
struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS];
-   unsignednum_vmhubs;
+   DECLARE_BITMAP(vmhubs_mask, AMDGPU_MAX_VMHUBS);
 
/* memory management */
struct amdgpu_mman  mman;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index f0a136d35279..5afbcc390d89 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -733,7 +733,7 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device 
*adev,
if (adev->family == AMDGPU_FAMILY_AI) {
int i;
 
-   for (i = 0; i < adev->num_vmhubs; i++)
+   for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
} else {
amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 01cb89ffbd56..6b12f4a75fc3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -182,7 +182,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, 
uint64_t offset,
}
mb();
amdgpu_device_flush_hdp(adev, NULL);
-   for (i = 0; i < adev->num_vmhubs; i++)
+   for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
 
drm_dev_exit(idx);
@@ -264,7 +264,7 @@ void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev)
 
mb();
amdgpu_device_flush_hdp(adev, NULL);
-   for (i = 0; i < adev->num_vmhubs; i++)
+   for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index ea2a448147e3..ff96f11c2adf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -460,7 +460,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct 
amdgpu_device *adev,
&queried_pasid);
if (ret && queried_pasid == pasid) {
if (all_hub) {
-   for (i = 0; i < adev->num_vmhubs; i++)
+   for_each_set_bit(i, adev->vmhubs_mask, 
AMDGPU_MAX_VMHUBS)
gmc_v10_0_flush_gpu_tlb(adev, vmid,
i, flush_type);
} else {
@@ -928,7 +928,8 @@ static int gmc_v10_0_sw_init(void *handle)
case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
case IP_VERSION(10, 3, 7):
-   adev->num_vmhubs = 2;
+   set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+   set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
/*
 * To fulfill 4-level page support,
 * vm size is 256TB (48bit), maximum size of 
Navi10/Navi14/Navi12,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index b7dad4e67813..aa754c95a0b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -808,7 +808,7 @@ static int gmc_v6_0_sw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-   adev->num_vmhubs = 1;
+   set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
 
if (adev->flags & AMD_IS_APU) {
adev->gmc.vram_type = AMDGPU_

[PATCH] drm/amdgpu: Fix vram recover doesn't work after whole GPU reset

2023-05-04 Thread Lin . Cao
v1: Vmbo->shadow is used to back vram bo up when vram lost. So that we
should set shadow as vmbo->shadow to recover vmbo->bo
v2: Modify if(vmbo->shadow) shadow = vmbo->shadow as if(!vmbo->shadow)
continue;

Fix: 'commit e18aaea733da ("drm/amdgpu: move shadow_list to amdgpu_bo_vm")'
Signed-off-by: Lin.Cao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 750eaffa81ba..0581b4fec001 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4514,7 +4514,12 @@ static int amdgpu_device_recover_vram(struct 
amdgpu_device *adev)
dev_info(adev->dev, "recover vram bo from shadow start\n");
mutex_lock(&adev->shadow_list_lock);
list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
-   shadow = &vmbo->bo;
+
+   /* If vm is compute context or adev is APU, shadow will be NULL 
*/
+   if (!vmbo->shadow)
+   continue;
+   shadow = vmbo->shadow;
+
/* No need to recover an evicted BO */
if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
-- 
2.25.1



[linux-next:master] BUILD REGRESSION 145e5cddfe8b4bf607510b2dcf630d95f4db420f

2023-05-04 Thread kernel test robot
tree/branch: 
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master
branch HEAD: 145e5cddfe8b4bf607510b2dcf630d95f4db420f  Add linux-next specific 
files for 20230504

Error/Warning reports:

https://lore.kernel.org/oe-kbuild-all/202304102354.q4voxgte-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202304220119.94pw6ysd-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202304230014.ybscpx20-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202304250419.ytcltuhg-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202305042329.gyk53ked-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202305050237.1cq4fbks-...@intel.com

Error/Warning: (recently discovered and may have been fixed)

arch/um/drivers/harddog_user.c:6:10: fatal error: stdio.h: No such file or 
directory
drivers/accel/habanalabs/gaudi/gaudi.c:117:19: warning: unused variable 
'gaudi_irq_name' [-Wunused-const-variable]
drivers/base/regmap/regcache-maple.c:113:23: warning: 'lower_index' is used 
uninitialized [-Wuninitialized]
drivers/base/regmap/regcache-maple.c:113:36: warning: 'lower_last' is used 
uninitialized [-Wuninitialized]
drivers/bluetooth/btnxpuart.c:1332:34: warning: unused variable 
'nxpuart_of_match_table' [-Wunused-const-variable]
drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:6339:6: warning: no 
previous prototype for 'amdgpu_dm_connector_funcs_force' [-Wmissing-prototypes]
drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:6395:21: warning: 
variable 'count' set but not used [-Wunused-but-set-variable]
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c:494:13: warning: variable 'j' set but 
not used [-Wunused-but-set-variable]
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c:48:38: warning: unused variable 
'golden_settings_gc_9_4_3' [-Wunused-const-variable]
drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h:62: warning: wrong kernel-doc 
identifier on line:
drivers/gpu/drm/i915/i915_pmu.h:41: warning: This comment starts with '/**', 
but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
drivers/gpu/drm/i915/i915_request.h:176: warning: This comment starts with 
'/**', but isn't a kernel-doc comment. Refer 
Documentation/doc-guide/kernel-doc.rst
drivers/gpu/drm/i915/i915_vma.h:145: warning: expecting prototype for 
i915_vma_offset(). Prototype was for i915_vma_size() instead
drivers/phy/mediatek/phy-mtk-hdmi-mt8195.c:298:6: warning: variable 'ret' is 
uninitialized when used here [-Wuninitialized]
mm/gup.c:2813:14: error: implicit declaration of function 
'folio_fast_pin_allowed'; did you mean 'folio_test_pinned'? 
[-Werror=implicit-function-declaration]
mm/gup.c:2813:7: error: call to undeclared function 'folio_fast_pin_allowed'; 
ISO C99 and later do not support implicit function declarations 
[-Wimplicit-function-declaration]
phy-mtk-hdmi-mt8195.c:(.text+0x186): undefined reference to `__floatundidf'
riscv64-linux-ld: phy-mtk-hdmi-mt8195.c:(.text+0x198): undefined reference to 
`__ltdf2'
riscv64-linux-ld: phy-mtk-hdmi-mt8195.c:(.text+0x1b8): undefined reference to 
`__gedf2'

Unverified Error/Warning (likely false positive, please contact us if 
interested):

drivers/cpufreq/pcc-cpufreq.c: linux/platform_device.h is included more than 
once.
drivers/gpu/host1x/context.c:82 host1x_memory_context_list_init() warn: missing 
error code 'err'
drivers/gpu/host1x/dev.c:417 host1x_iommu_attach() warn: passing zero to 
'ERR_PTR'
fs/ext4/super.c:4722 ext4_check_feature_compatibility() warn: bitwise AND 
condition is false here

Error/Warning ids grouped by kconfigs:

gcc_recent_errors
|-- alpha-allyesconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-amdgpu_dm-amdgpu_dm.c:warning:no-previous-prototype-for-amdgpu_dm_connector_funcs_force
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-amdgpu_dm-amdgpu_dm.c:warning:variable-count-set-but-not-used
|   `-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_gfx.c:warning:variable-j-set-but-not-used
|-- alpha-randconfig-r003-20230501
|   `-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_gfx.c:warning:variable-j-set-but-not-used
|-- arc-allyesconfig
|   |-- 
drivers-base-regmap-regcache-maple.c:warning:lower_index-is-used-uninitialized
|   |-- 
drivers-base-regmap-regcache-maple.c:warning:lower_last-is-used-uninitialized
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-amdgpu_dm-amdgpu_dm.c:warning:no-previous-prototype-for-amdgpu_dm_connector_funcs_force
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-amdgpu_dm-amdgpu_dm.c:warning:variable-count-set-but-not-used
|   `-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_gfx.c:warning:variable-j-set-but-not-used
|-- arc-buildonly-randconfig-r001-20230430
|   |-- 
drivers-base-regmap-regcache-maple.c:warning:lower_index-is-used-uninitialized
|   `-- 
drivers-base-regmap-regcache-maple.c:warning:lower_last-is-used-uninitialized
|-- arc-buildonly-

Re: [PATCH 2/2] drm/amdgpu: adjust vmhub flush tlb iteration to fit the new GFXHUB/MMHUB layout

2023-05-04 Thread Alex Deucher
Maybe add:
Fixes: dc267018dac1 ("drm/amdgpu: introduce vmhub definition for
multi-partition cases (v3)")
to the series.  With that, the series is:
Reviewed-by: Alex Deucher 

Alex

On Thu, May 4, 2023 at 11:51 AM Yifan Zhang  wrote:
>
> tlb flush has to be changed for the new mmhub layout
>
> Signed-off-by: Yifan Zhang 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 16 
>  drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c   | 10 +++---
>  2 files changed, 19 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> index 01cb89ffbd56..2383db399c95 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> @@ -160,6 +160,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, 
> uint64_t offset,
> /* Starting from VEGA10, system bit must be 0 to mean invalid. */
> uint64_t flags = 0;
> int idx;
> +   struct amdgpu_vmhub *hub;
>
> if (!adev->gart.ptr)
> return;
> @@ -182,8 +183,11 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, 
> uint64_t offset,
> }
> mb();
> amdgpu_device_flush_hdp(adev, NULL);
> -   for (i = 0; i < adev->num_vmhubs; i++)
> -   amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
> +   for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
> +   hub = &adev->vmhub[i];
> +   if (hub->vmhub_funcs)
> +   amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
> +   }
>
> drm_dev_exit(idx);
>  }
> @@ -258,14 +262,18 @@ void amdgpu_gart_bind(struct amdgpu_device *adev, 
> uint64_t offset,
>  void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev)
>  {
> int i;
> +   struct amdgpu_vmhub *hub;
>
> if (!adev->gart.ptr)
> return;
>
> mb();
> amdgpu_device_flush_hdp(adev, NULL);
> -   for (i = 0; i < adev->num_vmhubs; i++)
> -   amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
> +   for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
> +   hub = &adev->vmhub[i];
> +   if (hub->vmhub_funcs)
> +   amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
> +   }
>  }
>
>  /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> index 90cf79f8ddde..3ee7f5e067fb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> @@ -331,6 +331,7 @@ static int gmc_v11_0_flush_gpu_tlb_pasid(struct 
> amdgpu_device *adev,
> bool ret;
> struct amdgpu_ring *ring = &adev->gfx.kiq[0].ring;
> struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
> +   struct amdgpu_vmhub *hub;
>
> if (amdgpu_emu_mode == 0 && ring->sched.ready) {
> spin_lock(&adev->gfx.kiq[0].ring_lock);
> @@ -362,9 +363,12 @@ static int gmc_v11_0_flush_gpu_tlb_pasid(struct 
> amdgpu_device *adev,
> &queried_pasid);
> if (ret && queried_pasid == pasid) {
> if (all_hub) {
> -   for (i = 0; i < adev->num_vmhubs; i++)
> -   gmc_v11_0_flush_gpu_tlb(adev, vmid,
> -   i, flush_type);
> +   for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
> +   hub = &adev->vmhub[i];
> +   if (hub->vmhub_funcs)
> +   gmc_v11_0_flush_gpu_tlb(adev, 
> vmid,
> +   i, 
> flush_type);
> +   }
> } else {
> gmc_v11_0_flush_gpu_tlb(adev, vmid,
> AMDGPU_GFXHUB(0), flush_type);
> --
> 2.37.3
>


[PATCH 2/2] drm/amdgpu: adjust vmhub flush tlb iteration to fit the new GFXHUB/MMHUB layout

2023-05-04 Thread Yifan Zhang
tlb flush has to be changed for the new mmhub layout

Signed-off-by: Yifan Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 16 
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c   | 10 +++---
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 01cb89ffbd56..2383db399c95 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -160,6 +160,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, 
uint64_t offset,
/* Starting from VEGA10, system bit must be 0 to mean invalid. */
uint64_t flags = 0;
int idx;
+   struct amdgpu_vmhub *hub;
 
if (!adev->gart.ptr)
return;
@@ -182,8 +183,11 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, 
uint64_t offset,
}
mb();
amdgpu_device_flush_hdp(adev, NULL);
-   for (i = 0; i < adev->num_vmhubs; i++)
-   amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
+   for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
+   hub = &adev->vmhub[i];
+   if (hub->vmhub_funcs)
+   amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
+   }
 
drm_dev_exit(idx);
 }
@@ -258,14 +262,18 @@ void amdgpu_gart_bind(struct amdgpu_device *adev, 
uint64_t offset,
 void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev)
 {
int i;
+   struct amdgpu_vmhub *hub;
 
if (!adev->gart.ptr)
return;
 
mb();
amdgpu_device_flush_hdp(adev, NULL);
-   for (i = 0; i < adev->num_vmhubs; i++)
-   amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
+   for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
+   hub = &adev->vmhub[i];
+   if (hub->vmhub_funcs)
+   amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
+   }
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 90cf79f8ddde..3ee7f5e067fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -331,6 +331,7 @@ static int gmc_v11_0_flush_gpu_tlb_pasid(struct 
amdgpu_device *adev,
bool ret;
struct amdgpu_ring *ring = &adev->gfx.kiq[0].ring;
struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+   struct amdgpu_vmhub *hub;
 
if (amdgpu_emu_mode == 0 && ring->sched.ready) {
spin_lock(&adev->gfx.kiq[0].ring_lock);
@@ -362,9 +363,12 @@ static int gmc_v11_0_flush_gpu_tlb_pasid(struct 
amdgpu_device *adev,
&queried_pasid);
if (ret && queried_pasid == pasid) {
if (all_hub) {
-   for (i = 0; i < adev->num_vmhubs; i++)
-   gmc_v11_0_flush_gpu_tlb(adev, vmid,
-   i, flush_type);
+   for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
+   hub = &adev->vmhub[i];
+   if (hub->vmhub_funcs)
+   gmc_v11_0_flush_gpu_tlb(adev, 
vmid,
+   i, flush_type);
+   }
} else {
gmc_v11_0_flush_gpu_tlb(adev, vmid,
AMDGPU_GFXHUB(0), flush_type);
-- 
2.37.3



[PATCH 1/2] drm/amdgpu: change vm_inv_engs array based on new GFXHUB/MMHUB layout

2023-05-04 Thread Yifan Zhang
vm_inv_engs should be changd accordingly.

Signed-off-by: Yifan Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 8c8a2f37ba33..eeaa8d215801 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -539,12 +539,14 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
 int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
 {
struct amdgpu_ring *ring;
-   unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
-   {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP,
-   GFXHUB_FREE_VM_INV_ENGS_BITMAP};
+   unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS];
unsigned i;
unsigned vmhub, inv_eng;
 
+   vm_inv_engs[AMDGPU_GFXHUB(0)] = GFXHUB_FREE_VM_INV_ENGS_BITMAP;
+   vm_inv_engs[AMDGPU_GFXHUB(1)] = GFXHUB_FREE_VM_INV_ENGS_BITMAP;
+   vm_inv_engs[AMDGPU_MMHUB0(0)] = MMHUB_FREE_VM_INV_ENGS_BITMAP;
+
if (adev->enable_mes) {
/* reserve engine 5 for firmware */
for (vmhub = 0; vmhub < AMDGPU_MAX_VMHUBS; vmhub++)
-- 
2.37.3



Re: [PATCH 01/13] drm: execution context for GEM buffers v4

2023-05-04 Thread Intel



On 5/4/23 13:51, Christian König wrote:

This adds the infrastructure for an execution context for GEM buffers
which is similar to the existing TTMs execbuf util and intended to replace
it in the long term.

The basic functionality is that we abstracts the necessary loop to lock
many different GEM buffers with automated deadlock and duplicate handling.

v2: drop xarray and use dynamic resized array instead, the locking
 overhead is unecessary and measurable.
v3: drop duplicate tracking, radeon is really the only one needing that.
v4: fixes issues pointed out by Danilo, some typos in comments and a
 helper for lock arrays of GEM objects.

Signed-off-by: Christian König 

...

+/**
+ * struct drm_exec - Execution context
+ */
+struct drm_exec {
+   /**
+* @interruptible: If locks should be taken interruptible
+*/
+   boolinterruptible;
+
+   /**
+* @ticket: WW ticket used for acquiring locks
+*/
+   struct ww_acquire_ctx   ticket;
+
+   /**
+* @num_objects: number of objects locked
+*/
+   unsigned intnum_objects;
+
+   /**
+* @max_objects: maximum objects in array
+*/
+   unsigned intmax_objects;
+
+   /**
+* @objects: array of the locked objects
+*/
+   struct drm_gem_object   **objects;


Hi, Christian. Did you consider using a list here with links embedded in 
gem objects, now that only locked objects are to be put on the list / array.


That should work as only the process owning the lock may access the list 
link. Apart from getting rid of reallocing this is beneficial for the 
more general types of ww transactions that are used by i915 (and to a 
minor extent xe as well, I think).


In those cases we would want to unlock a temporary held object within 
the while_not_all_locked() loop and would then have to search the entire 
array for the correct pointer. Instead one could just remove it from the 
list of locked objects.


Thanks,

Thomas



Re: [PATCH 02/13] drm: add drm_exec selftests v2

2023-05-04 Thread Christian König

Hi Maira,

Am 04.05.23 um 14:07 schrieb Maíra Canal:

Hi Christian,

It would be nice if you use the KUnit macros, instead of pr_info.


yeah this was initially written before the DRM tests moved to KUnit and 
I only quickly converted it over. Going to give this a cleanup.


Thanks,
Christian.



On 5/4/23 08:51, Christian König wrote:

Largely just the initial skeleton.

v2: add array test as well

Signed-off-by: Christian König 
---
  drivers/gpu/drm/Kconfig   |  1 +
  drivers/gpu/drm/tests/Makefile    |  3 +-
  drivers/gpu/drm/tests/drm_exec_test.c | 96 +++
  3 files changed, 99 insertions(+), 1 deletion(-)
  create mode 100644 drivers/gpu/drm/tests/drm_exec_test.c

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 2dc81eb062eb..068e574e234e 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -80,6 +80,7 @@ config DRM_KUNIT_TEST
  select DRM_BUDDY
  select DRM_EXPORT_FOR_TESTS if m
  select DRM_KUNIT_TEST_HELPERS
+    select DRM_EXEC
  default KUNIT_ALL_TESTS
  help
    This builds unit tests for DRM. This option is not useful for
diff --git a/drivers/gpu/drm/tests/Makefile 
b/drivers/gpu/drm/tests/Makefile

index bca726a8f483..ba7baa622675 100644
--- a/drivers/gpu/drm/tests/Makefile
+++ b/drivers/gpu/drm/tests/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \
  drm_modes_test.o \
  drm_plane_helper_test.o \
  drm_probe_helper_test.o \
-    drm_rect_test.o
+    drm_rect_test.o    \
+    drm_exec_test.o
    CFLAGS_drm_mm_test.o := $(DISABLE_STRUCTLEAK_PLUGIN)
diff --git a/drivers/gpu/drm/tests/drm_exec_test.c 
b/drivers/gpu/drm/tests/drm_exec_test.c

new file mode 100644
index ..26aa13e62d22
--- /dev/null
+++ b/drivers/gpu/drm/tests/drm_exec_test.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#define pr_fmt(fmt) "drm_exec: " fmt
+
+#include 
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+#include "../lib/drm_random.h"
+
+static struct drm_device dev;
+
+static void drm_exec_sanitycheck(struct kunit *test)
+{
+    struct drm_exec exec;
+
+    drm_exec_init(&exec, true);
+    drm_exec_fini(&exec);
+    pr_info("%s - ok!\n", __func__);


Here you could use KUNIT_SUCCEED(test).


+}
+
+static void drm_exec_lock1(struct kunit *test)


Is there a reason to call the function drm_exec_lock1 instead of
just drm_exec_lock?


+{
+    struct drm_gem_object gobj = { };
+    struct drm_exec exec;
+    int ret;
+
+    drm_gem_private_object_init(&dev, &gobj, PAGE_SIZE);
+
+    drm_exec_init(&exec, true);
+    drm_exec_while_not_all_locked(&exec) {
+    ret = drm_exec_prepare_obj(&exec, &gobj, 1);
+    drm_exec_continue_on_contention(&exec);
+    if (ret) {
+    drm_exec_fini(&exec);
+    pr_err("%s - err %d!\n", __func__, ret);


Here you could use KUNIT_FAIL. Same for the other function.

Actually, it would be better if you created a function `exit`
associated with the test suite, where you would call drm_exec_fini,
and checked the ret variable with KUNIT_EXPECT_EQ(test, ret, 0) in
the test.


+    return;
+    }
+    }
+    drm_exec_fini(&exec);
+    pr_info("%s - ok!\n", __func__);
+}
+
+static void drm_exec_lock_array(struct kunit *test)
+{
+    struct drm_gem_object gobj1 = { };
+    struct drm_gem_object gobj2 = { };
+    struct drm_gem_object *array[] = { &gobj1, &gobj2 };
+    struct drm_exec exec;
+    int ret;
+
+    drm_gem_private_object_init(&dev, &gobj1, PAGE_SIZE);
+    drm_gem_private_object_init(&dev, &gobj2, PAGE_SIZE);
+
+    drm_exec_init(&exec, true);
+    ret = drm_exec_prepare_array(&exec, array, ARRAY_SIZE(array), 0);
+    if (ret) {
+    drm_exec_fini(&exec);
+    pr_err("%s - err %d!\n", __func__, ret);
+    return;
+    }
+    drm_exec_fini(&exec)> +    pr_info("%s - ok!\n", __func__);
+}
+
+static int drm_exec_suite_init(struct kunit_suite *suite)
+{
+    kunit_info(suite, "Testing DRM exec manager\n");


Isn't this already clear by the name of the test?

Best Regards,
- Maíra Canal


+    return 0;
+}
+
+static struct kunit_case drm_exec_tests[] = {
+    KUNIT_CASE(drm_exec_sanitycheck),
+    KUNIT_CASE(drm_exec_lock1),
+    KUNIT_CASE(drm_exec_lock_array),
+    {}
+};
+
+static struct kunit_suite drm_exec_test_suite = {
+    .name = "drm_exec",
+    .suite_init = drm_exec_suite_init,
+    .test_cases = drm_exec_tests,
+};
+
+kunit_test_suite(drm_exec_test_suite);
+
+MODULE_AUTHOR("AMD");
+MODULE_LICENSE("GPL and additional rights");




2023 X.Org Foundation Election vote results

2023-05-04 Thread Ricardo Garcia
The Board of Directors election and the vote on the By-laws concluded at
14:00 UTC on May 1st 2023 and these are the results:

- We had 75 members this year, of which 55 cast a vote, so the turnout
is 73.3%.

- On the question "Do you accept the proposed By-Law changes to make SFC
the new fiscal sponsor of the X.Org foundation, replacing SPI?" 52 of
the 55 members voted yes (94.5%). Among all 75 members, approval is
69.3% (52/75, over 2/3), so we can consider this change approved using
the current by-law rules.

- On the question "Do you accept the proposed By-Law changes to modify
the special voting quorum requirements to be limited to present (meaning
voting) members?" 48 of the 55 members voted yes (87.3%). Despite this,
48 votes represent only 64% of the members, which means the by-laws
change does not pass.

- In the election of the Directors to the Board of the X.Org Foundation,
the results were that Daniel Vetter, Lyude Paul, Arkadiusz Hiler and
Christopher Michael were elected for two-year terms.

The old full board is: Emma Anholt, Mark Filion, Ricardo Garcia, Samuel
Iglesias Gonsálvez, Manasi D Navare, Lyude Paul, Alyssa Rosenzweig and
Daniel Vetter.

The new full board is: Emma Anholt, Mark Filion, Ricardo Garcia,
Arkadiusz Hiler, Christopher Michael, Lyude Paul, Alyssa Rosenzweig and
Daniel Vetter.

Full election results, sorted by points:

* Daniel Vetter (367 points)
* Lyude Paul (348 points)
* Arkadiusz Hiler (286 points)
* Christopher Michael (263 points)
* Manasi Navare (195 points)
* Uma Shankar (157 points)
* Thomas Adam (105 points)
* William Weeks-Balconi (51 points)

Thanks everyone,
-Ricardo Garcia, on behalf of the X.Org elections committee



Re: [PATCH 02/13] drm: add drm_exec selftests v2

2023-05-04 Thread Maíra Canal

Hi Christian,

It would be nice if you use the KUnit macros, instead of pr_info.

On 5/4/23 08:51, Christian König wrote:

Largely just the initial skeleton.

v2: add array test as well

Signed-off-by: Christian König 
---
  drivers/gpu/drm/Kconfig   |  1 +
  drivers/gpu/drm/tests/Makefile|  3 +-
  drivers/gpu/drm/tests/drm_exec_test.c | 96 +++
  3 files changed, 99 insertions(+), 1 deletion(-)
  create mode 100644 drivers/gpu/drm/tests/drm_exec_test.c

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 2dc81eb062eb..068e574e234e 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -80,6 +80,7 @@ config DRM_KUNIT_TEST
select DRM_BUDDY
select DRM_EXPORT_FOR_TESTS if m
select DRM_KUNIT_TEST_HELPERS
+   select DRM_EXEC
default KUNIT_ALL_TESTS
help
  This builds unit tests for DRM. This option is not useful for
diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile
index bca726a8f483..ba7baa622675 100644
--- a/drivers/gpu/drm/tests/Makefile
+++ b/drivers/gpu/drm/tests/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \
drm_modes_test.o \
drm_plane_helper_test.o \
drm_probe_helper_test.o \
-   drm_rect_test.o
+   drm_rect_test.o \
+   drm_exec_test.o
  
  CFLAGS_drm_mm_test.o := $(DISABLE_STRUCTLEAK_PLUGIN)

diff --git a/drivers/gpu/drm/tests/drm_exec_test.c 
b/drivers/gpu/drm/tests/drm_exec_test.c
new file mode 100644
index ..26aa13e62d22
--- /dev/null
+++ b/drivers/gpu/drm/tests/drm_exec_test.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#define pr_fmt(fmt) "drm_exec: " fmt
+
+#include 
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+#include "../lib/drm_random.h"
+
+static struct drm_device dev;
+
+static void drm_exec_sanitycheck(struct kunit *test)
+{
+   struct drm_exec exec;
+
+   drm_exec_init(&exec, true);
+   drm_exec_fini(&exec);
+   pr_info("%s - ok!\n", __func__);


Here you could use KUNIT_SUCCEED(test).


+}
+
+static void drm_exec_lock1(struct kunit *test)


Is there a reason to call the function drm_exec_lock1 instead of
just drm_exec_lock?


+{
+   struct drm_gem_object gobj = { };
+   struct drm_exec exec;
+   int ret;
+
+   drm_gem_private_object_init(&dev, &gobj, PAGE_SIZE);
+
+   drm_exec_init(&exec, true);
+   drm_exec_while_not_all_locked(&exec) {
+   ret = drm_exec_prepare_obj(&exec, &gobj, 1);
+   drm_exec_continue_on_contention(&exec);
+   if (ret) {
+   drm_exec_fini(&exec);
+   pr_err("%s - err %d!\n", __func__, ret);


Here you could use KUNIT_FAIL. Same for the other function.

Actually, it would be better if you created a function `exit`
associated with the test suite, where you would call drm_exec_fini,
and checked the ret variable with KUNIT_EXPECT_EQ(test, ret, 0) in
the test.


+   return;
+   }
+   }
+   drm_exec_fini(&exec);
+   pr_info("%s - ok!\n", __func__);
+}
+
+static void drm_exec_lock_array(struct kunit *test)
+{
+   struct drm_gem_object gobj1 = { };
+   struct drm_gem_object gobj2 = { };
+   struct drm_gem_object *array[] = { &gobj1, &gobj2 };
+   struct drm_exec exec;
+   int ret;
+
+   drm_gem_private_object_init(&dev, &gobj1, PAGE_SIZE);
+   drm_gem_private_object_init(&dev, &gobj2, PAGE_SIZE);
+
+   drm_exec_init(&exec, true);
+   ret = drm_exec_prepare_array(&exec, array, ARRAY_SIZE(array), 0);
+   if (ret) {
+   drm_exec_fini(&exec);
+   pr_err("%s - err %d!\n", __func__, ret);
+   return;
+   }
+   drm_exec_fini(&exec)> +  pr_info("%s - ok!\n", __func__);
+}
+
+static int drm_exec_suite_init(struct kunit_suite *suite)
+{
+   kunit_info(suite, "Testing DRM exec manager\n");


Isn't this already clear by the name of the test?

Best Regards,
- Maíra Canal


+   return 0;
+}
+
+static struct kunit_case drm_exec_tests[] = {
+   KUNIT_CASE(drm_exec_sanitycheck),
+   KUNIT_CASE(drm_exec_lock1),
+   KUNIT_CASE(drm_exec_lock_array),
+   {}
+};
+
+static struct kunit_suite drm_exec_test_suite = {
+   .name = "drm_exec",
+   .suite_init = drm_exec_suite_init,
+   .test_cases = drm_exec_tests,
+};
+
+kunit_test_suite(drm_exec_test_suite);
+
+MODULE_AUTHOR("AMD");
+MODULE_LICENSE("GPL and additional rights");


[PATCH 12/13] drm/v3d: switch to using drm_exec

2023-05-04 Thread Christian König
Just a straightforward conversion without any optimization.

Only compile tested for now.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/v3d/v3d_gem.c | 43 ---
 1 file changed, 19 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 2e94ce788c71..75880ffc0cf1 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -10,6 +10,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #include 
@@ -249,20 +250,16 @@ v3d_invalidate_caches(struct v3d_dev *v3d)
  * to v3d, so we don't attach dma-buf fences to them.
  */
 static int
-v3d_lock_bo_reservations(struct v3d_job *job,
-struct ww_acquire_ctx *acquire_ctx)
+v3d_lock_bo_reservations(struct v3d_job *job, struct drm_exec *exec)
 {
int i, ret;
 
-   ret = drm_gem_lock_reservations(job->bo, job->bo_count, acquire_ctx);
+   drm_exec_init(exec, true);
+   ret = drm_exec_prepare_array(exec, job->bo, job->bo_count, 1);
if (ret)
-   return ret;
+   goto fail;
 
for (i = 0; i < job->bo_count; i++) {
-   ret = dma_resv_reserve_fences(job->bo[i]->resv, 1);
-   if (ret)
-   goto fail;
-
ret = drm_sched_job_add_implicit_dependencies(&job->base,
  job->bo[i], true);
if (ret)
@@ -272,7 +269,7 @@ v3d_lock_bo_reservations(struct v3d_job *job,
return 0;
 
 fail:
-   drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx);
+   drm_exec_fini(exec);
return ret;
 }
 
@@ -477,7 +474,7 @@ v3d_push_job(struct v3d_job *job)
 static void
 v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv,
 struct v3d_job *job,
-struct ww_acquire_ctx *acquire_ctx,
+struct drm_exec *exec,
 u32 out_sync,
 struct v3d_submit_ext *se,
 struct dma_fence *done_fence)
@@ -492,7 +489,7 @@ v3d_attach_fences_and_unlock_reservation(struct drm_file 
*file_priv,
   DMA_RESV_USAGE_WRITE);
}
 
-   drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx);
+   drm_exec_fini(exec);
 
/* Update the return sync object for the job */
/* If it only supports a single signal semaphore*/
@@ -669,7 +666,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
struct v3d_render_job *render = NULL;
struct v3d_job *clean_job = NULL;
struct v3d_job *last_job;
-   struct ww_acquire_ctx acquire_ctx;
+   struct drm_exec exec;
int ret = 0;
 
trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end);
@@ -731,7 +728,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
if (ret)
goto fail;
 
-   ret = v3d_lock_bo_reservations(last_job, &acquire_ctx);
+   ret = v3d_lock_bo_reservations(last_job, &exec);
if (ret)
goto fail;
 
@@ -775,7 +772,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 
v3d_attach_fences_and_unlock_reservation(file_priv,
 last_job,
-&acquire_ctx,
+&exec,
 args->out_sync,
 &se,
 last_job->done_fence);
@@ -791,8 +788,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 fail_unreserve:
mutex_unlock(&v3d->sched_lock);
 fail_perfmon:
-   drm_gem_unlock_reservations(last_job->bo,
-   last_job->bo_count, &acquire_ctx);
+   drm_exec_fini(&exec);
 fail:
v3d_job_cleanup((void *)bin);
v3d_job_cleanup((void *)render);
@@ -819,7 +815,7 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
struct drm_v3d_submit_tfu *args = data;
struct v3d_submit_ext se = {0};
struct v3d_tfu_job *job = NULL;
-   struct ww_acquire_ctx acquire_ctx;
+   struct drm_exec exec;
int ret = 0;
 
trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia);
@@ -870,7 +866,7 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
job->base.bo[job->base.bo_count] = bo;
}
 
-   ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx);
+   ret = v3d_lock_bo_reservations(&job->base, &exec);
if (ret)
goto fail;
 
@@ -879,7 +875,7 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
mutex_unlock(&v3d->sched_lock);
 
v3d_attach_fences_and_unlock_res

[PATCH 10/13] drm/virtgpu: switch to using drm_exec

2023-05-04 Thread Christian König
Just a straightforward conversion without any optimization.

Only compile tested for now.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/virtio/Kconfig   |  1 +
 drivers/gpu/drm/virtio/virtgpu_drv.h |  3 ++-
 drivers/gpu/drm/virtio/virtgpu_gem.c | 29 +++-
 3 files changed, 6 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/virtio/Kconfig b/drivers/gpu/drm/virtio/Kconfig
index ea06ff2aa4b4..a24a1ce5e666 100644
--- a/drivers/gpu/drm/virtio/Kconfig
+++ b/drivers/gpu/drm/virtio/Kconfig
@@ -5,6 +5,7 @@ config DRM_VIRTIO_GPU
select VIRTIO
select DRM_KMS_HELPER
select DRM_GEM_SHMEM_HELPER
+   select DRM_EXEC
select VIRTIO_DMA_SHARED_BUFFER
help
   This is the virtual GPU driver for virtio.  It can be used with
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h 
b/drivers/gpu/drm/virtio/virtgpu_drv.h
index af6ffb696086..c12434222e51 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -116,7 +117,7 @@ struct virtio_gpu_object_vram {
container_of((virtio_gpu_object), struct virtio_gpu_object_vram, base)
 
 struct virtio_gpu_object_array {
-   struct ww_acquire_ctx ticket;
+   struct drm_exec exec;
struct list_head next;
u32 nents, total;
struct drm_gem_object *objs[];
diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c 
b/drivers/gpu/drm/virtio/virtgpu_gem.c
index 7db48d17ee3a..bcab407074f4 100644
--- a/drivers/gpu/drm/virtio/virtgpu_gem.c
+++ b/drivers/gpu/drm/virtio/virtgpu_gem.c
@@ -171,6 +171,7 @@ struct virtio_gpu_object_array *virtio_gpu_array_alloc(u32 
nents)
 
objs->nents = 0;
objs->total = nents;
+   drm_exec_init(&objs->exec, true);
return objs;
 }
 
@@ -214,36 +215,12 @@ void virtio_gpu_array_add_obj(struct 
virtio_gpu_object_array *objs,
 
 int virtio_gpu_array_lock_resv(struct virtio_gpu_object_array *objs)
 {
-   unsigned int i;
-   int ret;
-
-   if (objs->nents == 1) {
-   ret = dma_resv_lock_interruptible(objs->objs[0]->resv, NULL);
-   } else {
-   ret = drm_gem_lock_reservations(objs->objs, objs->nents,
-   &objs->ticket);
-   }
-   if (ret)
-   return ret;
-
-   for (i = 0; i < objs->nents; ++i) {
-   ret = dma_resv_reserve_fences(objs->objs[i]->resv, 1);
-   if (ret) {
-   virtio_gpu_array_unlock_resv(objs);
-   return ret;
-   }
-   }
-   return ret;
+   return drm_exec_prepare_array(&objs->exec, objs->objs, objs->nents, 1);
 }
 
 void virtio_gpu_array_unlock_resv(struct virtio_gpu_object_array *objs)
 {
-   if (objs->nents == 1) {
-   dma_resv_unlock(objs->objs[0]->resv);
-   } else {
-   drm_gem_unlock_reservations(objs->objs, objs->nents,
-   &objs->ticket);
-   }
+   drm_exec_fini(&objs->exec);
 }
 
 void virtio_gpu_array_add_fence(struct virtio_gpu_object_array *objs,
-- 
2.34.1



[PATCH 13/13] drm: remove drm_gem_(un)lock_reservations

2023-05-04 Thread Christian König
Not used any more.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/drm_gem.c  | 78 --
 drivers/gpu/drm/scheduler/sched_main.c |  5 +-
 include/drm/drm_gem.h  |  4 --
 3 files changed, 2 insertions(+), 85 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 1a5a2cd0d4ec..cd411002 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -1214,84 +1214,6 @@ void drm_gem_vunmap_unlocked(struct drm_gem_object *obj, 
struct iosys_map *map)
 }
 EXPORT_SYMBOL(drm_gem_vunmap_unlocked);
 
-/**
- * drm_gem_lock_reservations - Sets up the ww context and acquires
- * the lock on an array of GEM objects.
- *
- * Once you've locked your reservations, you'll want to set up space
- * for your shared fences (if applicable), submit your job, then
- * drm_gem_unlock_reservations().
- *
- * @objs: drm_gem_objects to lock
- * @count: Number of objects in @objs
- * @acquire_ctx: struct ww_acquire_ctx that will be initialized as
- * part of tracking this set of locked reservations.
- */
-int
-drm_gem_lock_reservations(struct drm_gem_object **objs, int count,
- struct ww_acquire_ctx *acquire_ctx)
-{
-   int contended = -1;
-   int i, ret;
-
-   ww_acquire_init(acquire_ctx, &reservation_ww_class);
-
-retry:
-   if (contended != -1) {
-   struct drm_gem_object *obj = objs[contended];
-
-   ret = dma_resv_lock_slow_interruptible(obj->resv,
-acquire_ctx);
-   if (ret) {
-   ww_acquire_fini(acquire_ctx);
-   return ret;
-   }
-   }
-
-   for (i = 0; i < count; i++) {
-   if (i == contended)
-   continue;
-
-   ret = dma_resv_lock_interruptible(objs[i]->resv,
-   acquire_ctx);
-   if (ret) {
-   int j;
-
-   for (j = 0; j < i; j++)
-   dma_resv_unlock(objs[j]->resv);
-
-   if (contended != -1 && contended >= i)
-   dma_resv_unlock(objs[contended]->resv);
-
-   if (ret == -EDEADLK) {
-   contended = i;
-   goto retry;
-   }
-
-   ww_acquire_fini(acquire_ctx);
-   return ret;
-   }
-   }
-
-   ww_acquire_done(acquire_ctx);
-
-   return 0;
-}
-EXPORT_SYMBOL(drm_gem_lock_reservations);
-
-void
-drm_gem_unlock_reservations(struct drm_gem_object **objs, int count,
-   struct ww_acquire_ctx *acquire_ctx)
-{
-   int i;
-
-   for (i = 0; i < count; i++)
-   dma_resv_unlock(objs[i]->resv);
-
-   ww_acquire_fini(acquire_ctx);
-}
-EXPORT_SYMBOL(drm_gem_unlock_reservations);
-
 /**
  * drm_gem_lru_init - initialize a LRU
  *
diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index b09cdacfd062..2d8249148926 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -794,9 +794,8 @@ EXPORT_SYMBOL(drm_sched_job_add_resv_dependencies);
  * @write: whether the job might write the object (so we need to depend on
  * shared fences in the reservation object).
  *
- * This should be called after drm_gem_lock_reservations() on your array of
- * GEM objects used in the job but before updating the reservations with your
- * own fences.
+ * This should be called after locking your GEM objects used in the job but
+ * before updating the reservations with your own fences.
  *
  * Returns:
  * 0 on success, or an error on failing to expand the array.
diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
index b8efd836edef..7e027688a83d 100644
--- a/include/drm/drm_gem.h
+++ b/include/drm/drm_gem.h
@@ -476,10 +476,6 @@ int drm_gem_objects_lookup(struct drm_file *filp, void 
__user *bo_handles,
 struct drm_gem_object *drm_gem_object_lookup(struct drm_file *filp, u32 
handle);
 long drm_gem_dma_resv_wait(struct drm_file *filep, u32 handle,
bool wait_all, unsigned long timeout);
-int drm_gem_lock_reservations(struct drm_gem_object **objs, int count,
- struct ww_acquire_ctx *acquire_ctx);
-void drm_gem_unlock_reservations(struct drm_gem_object **objs, int count,
-struct ww_acquire_ctx *acquire_ctx);
 int drm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev,
u32 handle, u64 *offset);
 
-- 
2.34.1



[PATCH 11/13] drm/panfrost: switch to using drm_exec

2023-05-04 Thread Christian König
Just a straightforward conversion without any optimization.

Only compile tested for now.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/panfrost/Kconfig|  1 +
 drivers/gpu/drm/panfrost/panfrost_job.c | 11 ++-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/panfrost/Kconfig b/drivers/gpu/drm/panfrost/Kconfig
index e6403a9d66ad..e86a1a2fd8e1 100644
--- a/drivers/gpu/drm/panfrost/Kconfig
+++ b/drivers/gpu/drm/panfrost/Kconfig
@@ -7,6 +7,7 @@ config DRM_PANFROST
depends on !GENERIC_ATOMIC64# for IOMMU_IO_PGTABLE_LPAE
depends on MMU
select DRM_SCHED
+   select DRM_EXEC
select IOMMU_SUPPORT
select IOMMU_IO_PGTABLE_LPAE
select DRM_GEM_SHMEM_HELPER
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c 
b/drivers/gpu/drm/panfrost/panfrost_job.c
index dbc597ab46fb..7086a6044355 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -8,6 +8,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -275,13 +276,13 @@ static void panfrost_attach_object_fences(struct 
drm_gem_object **bos,
 int panfrost_job_push(struct panfrost_job *job)
 {
struct panfrost_device *pfdev = job->pfdev;
-   struct ww_acquire_ctx acquire_ctx;
+   struct drm_exec exec;
int ret = 0;
 
-   ret = drm_gem_lock_reservations(job->bos, job->bo_count,
-   &acquire_ctx);
+   drm_exec_init(&exec, true);
+   ret = drm_exec_prepare_array(&exec, job->bos, job->bo_count, 1);
if (ret)
-   return ret;
+   goto unlock;
 
mutex_lock(&pfdev->sched_lock);
drm_sched_job_arm(&job->base);
@@ -305,7 +306,7 @@ int panfrost_job_push(struct panfrost_job *job)
  job->render_done_fence);
 
 unlock:
-   drm_gem_unlock_reservations(job->bos, job->bo_count, &acquire_ctx);
+   drm_exec_fini(&exec);
 
return ret;
 }
-- 
2.34.1



[PATCH 09/13] drm/lima: switch to using drm_exec

2023-05-04 Thread Christian König
Just a straightforward conversion without any optimization.

Only compile tested for now.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/lima/Kconfig|  1 +
 drivers/gpu/drm/lima/lima_gem.c | 15 +++
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/lima/Kconfig b/drivers/gpu/drm/lima/Kconfig
index fa1d4f5df31e..1d2871d9ddd2 100644
--- a/drivers/gpu/drm/lima/Kconfig
+++ b/drivers/gpu/drm/lima/Kconfig
@@ -9,6 +9,7 @@ config DRM_LIMA
depends on COMMON_CLK
depends on OF
select DRM_SCHED
+   select DRM_EXEC
select DRM_GEM_SHMEM_HELPER
select PM_DEVFREQ
select DEVFREQ_GOV_SIMPLE_ONDEMAND
diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c
index 10252dc11a22..f48c1edff07d 100644
--- a/drivers/gpu/drm/lima/lima_gem.c
+++ b/drivers/gpu/drm/lima/lima_gem.c
@@ -8,6 +8,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #include 
@@ -292,7 +293,7 @@ static int lima_gem_add_deps(struct drm_file *file, struct 
lima_submit *submit)
 int lima_gem_submit(struct drm_file *file, struct lima_submit *submit)
 {
int i, err = 0;
-   struct ww_acquire_ctx ctx;
+   struct drm_exec exec;
struct lima_drm_priv *priv = to_lima_drm_priv(file);
struct lima_vm *vm = priv->vm;
struct drm_syncobj *out_sync = NULL;
@@ -329,8 +330,9 @@ int lima_gem_submit(struct drm_file *file, struct 
lima_submit *submit)
bos[i] = bo;
}
 
-   err = drm_gem_lock_reservations((struct drm_gem_object **)bos,
-   submit->nr_bos, &ctx);
+   drm_exec_init(&exec, true);
+   err = drm_exec_prepare_array(&exec, (struct drm_gem_object **)bos,
+submit->nr_bos, 0);
if (err)
goto err_out0;
 
@@ -360,9 +362,7 @@ int lima_gem_submit(struct drm_file *file, struct 
lima_submit *submit)
   submit->bos[i].flags & LIMA_SUBMIT_BO_WRITE ?
   DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ);
}
-
-   drm_gem_unlock_reservations((struct drm_gem_object **)bos,
-   submit->nr_bos, &ctx);
+   drm_exec_fini(&exec);
 
for (i = 0; i < submit->nr_bos; i++)
drm_gem_object_put(&bos[i]->base.base);
@@ -379,8 +379,7 @@ int lima_gem_submit(struct drm_file *file, struct 
lima_submit *submit)
 err_out2:
lima_sched_task_fini(submit->task);
 err_out1:
-   drm_gem_unlock_reservations((struct drm_gem_object **)bos,
-   submit->nr_bos, &ctx);
+   drm_exec_fini(&exec);
 err_out0:
for (i = 0; i < submit->nr_bos; i++) {
if (!bos[i])
-- 
2.34.1



[PATCH 07/13] drm/radeon: switch over to drm_exec

2023-05-04 Thread Christian König
Just a straightforward conversion without any optimization.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/radeon/Kconfig |  1 +
 drivers/gpu/drm/radeon/radeon.h|  7 ++--
 drivers/gpu/drm/radeon/radeon_cs.c | 45 +-
 drivers/gpu/drm/radeon/radeon_gem.c| 40 +--
 drivers/gpu/drm/radeon/radeon_object.c | 25 +++---
 drivers/gpu/drm/radeon/radeon_object.h |  2 +-
 drivers/gpu/drm/radeon/radeon_vm.c | 10 +++---
 7 files changed, 67 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/radeon/Kconfig b/drivers/gpu/drm/radeon/Kconfig
index e19d77d58810..2d5fb6240cec 100644
--- a/drivers/gpu/drm/radeon/Kconfig
+++ b/drivers/gpu/drm/radeon/Kconfig
@@ -11,6 +11,7 @@ config DRM_RADEON
select DRM_SUBALLOC_HELPER
 select DRM_TTM
select DRM_TTM_HELPER
+   select DRM_EXEC
select SND_HDA_COMPONENT if SND_HDA_CORE
select POWER_SUPPLY
select HWMON
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 8afb03bbce29..37a932a5195f 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -75,8 +75,8 @@
 
 #include 
 #include 
-#include 
 
+#include 
 #include 
 #include 
 #include 
@@ -458,7 +458,8 @@ struct radeon_mman {
 
 struct radeon_bo_list {
struct radeon_bo*robj;
-   struct ttm_validate_buffer  tv;
+   struct list_headlist;
+   boolshared;
uint64_tgpu_offset;
unsignedpreferred_domains;
unsignedallowed_domains;
@@ -1031,6 +1032,7 @@ struct radeon_cs_parser {
struct radeon_bo_list   *vm_bos;
struct list_headvalidated;
unsigneddma_reloc_idx;
+   struct drm_exec exec;
/* indices of various chunks */
struct radeon_cs_chunk  *chunk_ib;
struct radeon_cs_chunk  *chunk_relocs;
@@ -1044,7 +1046,6 @@ struct radeon_cs_parser {
u32 cs_flags;
u32 ring;
s32 priority;
-   struct ww_acquire_ctx   ticket;
 };
 
 static inline u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index 46a27ebf4588..5c681a44cec7 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -182,11 +182,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser 
*p)
}
}
 
-   p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
-   p->relocs[i].tv.num_shared = !r->write_domain;
-
-   radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
- priority);
+   p->relocs[i].shared = !r->write_domain;
+   radeon_cs_buckets_add(&buckets, &p->relocs[i].list, priority);
}
 
radeon_cs_buckets_get_list(&buckets, &p->validated);
@@ -197,7 +194,7 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser 
*p)
if (need_mmap_lock)
mmap_read_lock(current->mm);
 
-   r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, 
p->ring);
+   r = radeon_bo_list_validate(p->rdev, &p->exec, &p->validated, p->ring);
 
if (need_mmap_lock)
mmap_read_unlock(current->mm);
@@ -253,12 +250,11 @@ static int radeon_cs_sync_rings(struct radeon_cs_parser 
*p)
struct radeon_bo_list *reloc;
int r;
 
-   list_for_each_entry(reloc, &p->validated, tv.head) {
+   list_for_each_entry(reloc, &p->validated, list) {
struct dma_resv *resv;
 
resv = reloc->robj->tbo.base.resv;
-   r = radeon_sync_resv(p->rdev, &p->ib.sync, resv,
-reloc->tv.num_shared);
+   r = radeon_sync_resv(p->rdev, &p->ib.sync, resv, reloc->shared);
if (r)
return r;
}
@@ -275,6 +271,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void 
*data)
s32 priority = 0;
 
INIT_LIST_HEAD(&p->validated);
+   drm_exec_init(&p->exec, true);
 
if (!cs->num_chunks) {
return 0;
@@ -396,8 +393,8 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void 
*data)
 static int cmp_size_smaller_first(void *priv, const struct list_head *a,
  const struct list_head *b)
 {
-   struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, 
tv.head);
-   struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, 
tv.head);
+   struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, list);
+   struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, list);
 
/* Sort A before B if A is sm

[PATCH 08/13] drm/qxl: switch to using drm_exec

2023-05-04 Thread Christian König
Just a straightforward conversion without any optimization.

Only compile tested for now.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/qxl/Kconfig   |  1 +
 drivers/gpu/drm/qxl/qxl_drv.h |  7 ++--
 drivers/gpu/drm/qxl/qxl_release.c | 67 ---
 3 files changed, 39 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/qxl/Kconfig b/drivers/gpu/drm/qxl/Kconfig
index ca3f51c2a8fe..9c8e433be33e 100644
--- a/drivers/gpu/drm/qxl/Kconfig
+++ b/drivers/gpu/drm/qxl/Kconfig
@@ -5,6 +5,7 @@ config DRM_QXL
select DRM_KMS_HELPER
select DRM_TTM
select DRM_TTM_HELPER
+   select DRM_EXEC
select CRC32
help
  QXL virtual GPU for Spice virtualization desktop integration.
diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index ea993d7162e8..3e732648b332 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -38,12 +38,12 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
-#include 
 #include 
 
 #include "qxl_dev.h"
@@ -101,7 +101,8 @@ struct qxl_gem {
 };
 
 struct qxl_bo_list {
-   struct ttm_validate_buffer tv;
+   struct qxl_bo   *bo;
+   struct list_headlist;
 };
 
 struct qxl_crtc {
@@ -151,7 +152,7 @@ struct qxl_release {
struct qxl_bo *release_bo;
uint32_t release_offset;
uint32_t surface_release_id;
-   struct ww_acquire_ctx ticket;
+   struct drm_exec exec;
struct list_head bos;
 };
 
diff --git a/drivers/gpu/drm/qxl/qxl_release.c 
b/drivers/gpu/drm/qxl/qxl_release.c
index 368d26da0d6a..da7cd9cd58f9 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -121,13 +121,11 @@ qxl_release_free_list(struct qxl_release *release)
 {
while (!list_empty(&release->bos)) {
struct qxl_bo_list *entry;
-   struct qxl_bo *bo;
 
entry = container_of(release->bos.next,
-struct qxl_bo_list, tv.head);
-   bo = to_qxl_bo(entry->tv.bo);
-   qxl_bo_unref(&bo);
-   list_del(&entry->tv.head);
+struct qxl_bo_list, list);
+   qxl_bo_unref(&entry->bo);
+   list_del(&entry->list);
kfree(entry);
}
release->release_bo = NULL;
@@ -172,8 +170,8 @@ int qxl_release_list_add(struct qxl_release *release, 
struct qxl_bo *bo)
 {
struct qxl_bo_list *entry;
 
-   list_for_each_entry(entry, &release->bos, tv.head) {
-   if (entry->tv.bo == &bo->tbo)
+   list_for_each_entry(entry, &release->bos, list) {
+   if (entry->bo == bo)
return 0;
}
 
@@ -182,9 +180,8 @@ int qxl_release_list_add(struct qxl_release *release, 
struct qxl_bo *bo)
return -ENOMEM;
 
qxl_bo_ref(bo);
-   entry->tv.bo = &bo->tbo;
-   entry->tv.num_shared = 0;
-   list_add_tail(&entry->tv.head, &release->bos);
+   entry->bo = bo;
+   list_add_tail(&entry->list, &release->bos);
return 0;
 }
 
@@ -221,21 +218,27 @@ int qxl_release_reserve_list(struct qxl_release *release, 
bool no_intr)
if (list_is_singular(&release->bos))
return 0;
 
-   ret = ttm_eu_reserve_buffers(&release->ticket, &release->bos,
-!no_intr, NULL);
-   if (ret)
-   return ret;
-
-   list_for_each_entry(entry, &release->bos, tv.head) {
-   struct qxl_bo *bo = to_qxl_bo(entry->tv.bo);
-
-   ret = qxl_release_validate_bo(bo);
-   if (ret) {
-   ttm_eu_backoff_reservation(&release->ticket, 
&release->bos);
-   return ret;
+   drm_exec_init(&release->exec, !no_intr);
+   drm_exec_while_not_all_locked(&release->exec) {
+   list_for_each_entry(entry, &release->bos, list) {
+   ret = drm_exec_prepare_obj(&release->exec,
+  &entry->bo->tbo.base,
+  1);
+   drm_exec_break_on_contention(&release->exec);
+   if (ret)
+   goto error;
}
}
+
+   list_for_each_entry(entry, &release->bos, list) {
+   ret = qxl_release_validate_bo(entry->bo);
+   if (ret)
+   goto error;
+   }
return 0;
+error:
+   drm_exec_fini(&release->exec);
+   return ret;
 }
 
 void qxl_release_backoff_reserve_list(struct qxl_release *release)
@@ -245,7 +248,7 @@ void qxl_release_backoff_reserve_list(struct qxl_release 
*release)
if (list_is_singular(&release->bos))
return;
 
-   ttm_eu_backoff_reservation(&release->ticket, &release->bos);
+   drm_exec_fini(&release->

[PATCH 03/13] drm/amdkfd: switch over to using drm_exec v2

2023-05-04 Thread Christian König
Avoids quite a bit of logic and kmalloc overhead.

v2: fix multiple problems pointed out by Felix

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/Kconfig|   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|   5 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 302 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  14 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|   3 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c  |  46 +--
 6 files changed, 161 insertions(+), 210 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig 
b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 12adca8c7819..fcad4ea30a0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -21,6 +21,7 @@ config DRM_AMDGPU
select INTERVAL_TREE
select DRM_BUDDY
select DRM_SUBALLOC_HELPER
+   select DRM_EXEC
# amdgpu depends on ACPI_VIDEO when ACPI is enabled, for select to work
# ACPI_VIDEO's dependencies must also be selected.
select INPUT if ACPI
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 01ba3589b60a..dfb41d56d236 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -25,13 +25,13 @@
 #ifndef AMDGPU_AMDKFD_H_INCLUDED
 #define AMDGPU_AMDKFD_H_INCLUDED
 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
-#include 
 #include "amdgpu_sync.h"
 #include "amdgpu_vm.h"
 
@@ -69,8 +69,7 @@ struct kgd_mem {
struct hmm_range *range;
struct list_head attachments;
/* protected by amdkfd_process_info.lock */
-   struct ttm_validate_buffer validate_list;
-   struct ttm_validate_buffer resv_list;
+   struct list_head validate_list;
uint32_t domain;
unsigned int mapped_to_gpu_memory;
uint64_t va;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 83a83ced2439..75d394bb52b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -27,6 +27,8 @@
 #include 
 #include 
 
+#include 
+
 #include "amdgpu_object.h"
 #include "amdgpu_gem.h"
 #include "amdgpu_vm.h"
@@ -925,28 +927,20 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem 
*mem,
struct amdkfd_process_info *process_info,
bool userptr)
 {
-   struct ttm_validate_buffer *entry = &mem->validate_list;
-   struct amdgpu_bo *bo = mem->bo;
-
-   INIT_LIST_HEAD(&entry->head);
-   entry->num_shared = 1;
-   entry->bo = &bo->tbo;
mutex_lock(&process_info->lock);
if (userptr)
-   list_add_tail(&entry->head, &process_info->userptr_valid_list);
+   list_add_tail(&mem->validate_list,
+ &process_info->userptr_valid_list);
else
-   list_add_tail(&entry->head, &process_info->kfd_bo_list);
+   list_add_tail(&mem->validate_list, &process_info->kfd_bo_list);
mutex_unlock(&process_info->lock);
 }
 
 static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
struct amdkfd_process_info *process_info)
 {
-   struct ttm_validate_buffer *bo_list_entry;
-
-   bo_list_entry = &mem->validate_list;
mutex_lock(&process_info->lock);
-   list_del(&bo_list_entry->head);
+   list_del(&mem->validate_list);
mutex_unlock(&process_info->lock);
 }
 
@@ -1033,13 +1027,12 @@ static int init_user_pages(struct kgd_mem *mem, 
uint64_t user_addr,
  * object can track VM updates.
  */
 struct bo_vm_reservation_context {
-   struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */
-   unsigned int n_vms; /* Number of VMs reserved   */
-   struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries  */
-   struct ww_acquire_ctx ticket;   /* Reservation ticket   */
-   struct list_head list, duplicates;  /* BO lists */
-   struct amdgpu_sync *sync;   /* Pointer to sync object   */
-   bool reserved;  /* Whether BOs are reserved */
+   /* DRM execution context for the reservation */
+   struct drm_exec exec;
+   /* Number of VMs reserved */
+   unsigned int n_vms;
+   /* Pointer to sync object */
+   struct amdgpu_sync *sync;
 };
 
 enum bo_vm_match {
@@ -1063,35 +1056,24 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
 
WARN_ON(!vm);
 
-   ctx->reserved = false;
ctx->n_vms = 1;
ctx->sync = &mem->sync;
-
-   INIT_LIST_HEAD(&ctx->list);
-   INIT_LIST_HEAD(&ctx->duplicates);
-
-   ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL);
-   if (!ctx->vm_pd)
-   return -ENOMEM;
-
-   ctx->kfd_bo.priority = 0;
- 

[PATCH 05/13] drm/amdgpu: use drm_exec for MES testing

2023-05-04 Thread Christian König
Start using the new component here as well.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 86 +++--
 1 file changed, 39 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index f0f00466b59f..bfa9006600dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -22,6 +22,7 @@
  */
 
 #include 
+#include 
 
 #include "amdgpu_mes.h"
 #include "amdgpu.h"
@@ -1131,34 +1132,29 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device 
*adev,
 struct amdgpu_mes_ctx_data *ctx_data)
 {
struct amdgpu_bo_va *bo_va;
-   struct ww_acquire_ctx ticket;
-   struct list_head list;
-   struct amdgpu_bo_list_entry pd;
-   struct ttm_validate_buffer csa_tv;
struct amdgpu_sync sync;
+   struct drm_exec exec;
int r;
 
amdgpu_sync_create(&sync);
-   INIT_LIST_HEAD(&list);
-   INIT_LIST_HEAD(&csa_tv.head);
 
-   csa_tv.bo = &ctx_data->meta_data_obj->tbo;
-   csa_tv.num_shared = 1;
-
-   list_add(&csa_tv.head, &list);
-   amdgpu_vm_get_pd_bo(vm, &list, &pd);
-
-   r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
-   if (r) {
-   DRM_ERROR("failed to reserve meta data BO: err=%d\n", r);
-   return r;
+   drm_exec_init(&exec, false);
+   drm_exec_while_not_all_locked(&exec) {
+   r = drm_exec_prepare_obj(&exec,
+&ctx_data->meta_data_obj->tbo.base,
+0);
+   if (likely(!r))
+   r = amdgpu_vm_lock_pd(vm, &exec);
+   drm_exec_continue_on_contention(&exec);
+if (unlikely(r))
+   goto error_fini_exec;
}
 
bo_va = amdgpu_vm_bo_add(adev, vm, ctx_data->meta_data_obj);
if (!bo_va) {
-   ttm_eu_backoff_reservation(&ticket, &list);
DRM_ERROR("failed to create bo_va for meta data BO\n");
-   return -ENOMEM;
+   r = -ENOMEM;
+   goto error_fini_exec;
}
 
r = amdgpu_vm_bo_map(adev, bo_va, ctx_data->meta_data_gpu_addr, 0,
@@ -1168,33 +1164,35 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device 
*adev,
 
if (r) {
DRM_ERROR("failed to do bo_map on meta data, err=%d\n", r);
-   goto error;
+   goto error_del_bo_va;
}
 
r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r) {
DRM_ERROR("failed to do vm_bo_update on meta data\n");
-   goto error;
+   goto error_del_bo_va;
}
amdgpu_sync_fence(&sync, bo_va->last_pt_update);
 
r = amdgpu_vm_update_pdes(adev, vm, false);
if (r) {
DRM_ERROR("failed to update pdes on meta data\n");
-   goto error;
+   goto error_del_bo_va;
}
amdgpu_sync_fence(&sync, vm->last_update);
 
amdgpu_sync_wait(&sync, false);
-   ttm_eu_backoff_reservation(&ticket, &list);
+   drm_exec_fini(&exec);
 
amdgpu_sync_free(&sync);
ctx_data->meta_data_va = bo_va;
return 0;
 
-error:
+error_del_bo_va:
amdgpu_vm_bo_del(adev, bo_va);
-   ttm_eu_backoff_reservation(&ticket, &list);
+
+error_fini_exec:
+   drm_exec_fini(&exec);
amdgpu_sync_free(&sync);
return r;
 }
@@ -1205,34 +1203,28 @@ int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device 
*adev,
struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va;
struct amdgpu_bo *bo = ctx_data->meta_data_obj;
struct amdgpu_vm *vm = bo_va->base.vm;
-   struct amdgpu_bo_list_entry vm_pd;
-   struct list_head list, duplicates;
-   struct dma_fence *fence = NULL;
-   struct ttm_validate_buffer tv;
-   struct ww_acquire_ctx ticket;
-   long r = 0;
-
-   INIT_LIST_HEAD(&list);
-   INIT_LIST_HEAD(&duplicates);
-
-   tv.bo = &bo->tbo;
-   tv.num_shared = 2;
-   list_add(&tv.head, &list);
-
-   amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
-
-   r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
-   if (r) {
-   dev_err(adev->dev, "leaking bo va because "
-   "we fail to reserve bo (%ld)\n", r);
-   return r;
+   struct dma_fence *fence;
+   struct drm_exec exec;
+   long r;
+
+   drm_exec_init(&exec, false);
+   drm_exec_while_not_all_locked(&exec) {
+   r = drm_exec_prepare_obj(&exec,
+&ctx_data->meta_data_obj->tbo.base,
+0);
+   if (likely(!r))
+   r = amdgpu_vm_lock_pd(vm, &exec);
+   drm_exec_continue_on_contention(&exec);
+if (unlikely(r))
+

[PATCH 06/13] drm/amdgpu: use the new drm_exec object for CS v2

2023-05-04 Thread Christian König
Use the new component here as well and remove the old handling.

v2: drop dupplicate handling

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c |  71 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h |   5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 210 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h  |   7 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  22 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   3 -
 7 files changed, 115 insertions(+), 204 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 02b827785e39..eba3e4f01ea6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -53,7 +53,6 @@
 
 #include 
 #include 
-#include 
 
 #include 
 #include 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 252a876b0725..b6298e901cbd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -28,6 +28,7 @@
  *Christian König 
  */
 
+#include 
 #include 
 
 #include "amdgpu.h"
@@ -50,13 +51,20 @@ static void amdgpu_bo_list_free(struct kref *ref)
   refcount);
struct amdgpu_bo_list_entry *e;
 
-   amdgpu_bo_list_for_each_entry(e, list) {
-   struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+   amdgpu_bo_list_for_each_entry(e, list)
+   amdgpu_bo_unref(&e->bo);
+   call_rcu(&list->rhead, amdgpu_bo_list_free_rcu);
+}
 
-   amdgpu_bo_unref(&bo);
-   }
+static int amdgpu_bo_list_entry_cmp(const void *_a, const void *_b)
+{
+   const struct amdgpu_bo_list_entry *a = _a, *b = _b;
 
-   call_rcu(&list->rhead, amdgpu_bo_list_free_rcu);
+   if (a->priority > b->priority)
+   return 1;
+   if (a->priority < b->priority)
+   return -1;
+   return 0;
 }
 
 int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
@@ -118,7 +126,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, 
struct drm_file *filp,
 
entry->priority = min(info[i].bo_priority,
  AMDGPU_BO_LIST_MAX_PRIORITY);
-   entry->tv.bo = &bo->tbo;
+   entry->bo = bo;
 
if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GDS)
list->gds_obj = bo;
@@ -133,6 +141,8 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, 
struct drm_file *filp,
 
list->first_userptr = first_userptr;
list->num_entries = num_entries;
+   sort(array, last_entry, sizeof(struct amdgpu_bo_list_entry),
+amdgpu_bo_list_entry_cmp, NULL);
 
trace_amdgpu_cs_bo_status(list->num_entries, total_size);
 
@@ -141,16 +151,10 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, 
struct drm_file *filp,
return 0;
 
 error_free:
-   for (i = 0; i < last_entry; ++i) {
-   struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo);
-
-   amdgpu_bo_unref(&bo);
-   }
-   for (i = first_userptr; i < num_entries; ++i) {
-   struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo);
-
-   amdgpu_bo_unref(&bo);
-   }
+   for (i = 0; i < last_entry; ++i)
+   amdgpu_bo_unref(&array[i].bo);
+   for (i = first_userptr; i < num_entries; ++i)
+   amdgpu_bo_unref(&array[i].bo);
kvfree(list);
return r;
 
@@ -182,41 +186,6 @@ int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
return -ENOENT;
 }
 
-void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
-struct list_head *validated)
-{
-   /* This is based on the bucket sort with O(n) time complexity.
-* An item with priority "i" is added to bucket[i]. The lists are then
-* concatenated in descending order.
-*/
-   struct list_head bucket[AMDGPU_BO_LIST_NUM_BUCKETS];
-   struct amdgpu_bo_list_entry *e;
-   unsigned i;
-
-   for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++)
-   INIT_LIST_HEAD(&bucket[i]);
-
-   /* Since buffers which appear sooner in the relocation list are
-* likely to be used more often than buffers which appear later
-* in the list, the sort mustn't change the ordering of buffers
-* with the same priority, i.e. it must be stable.
-*/
-   amdgpu_bo_list_for_each_entry(e, list) {
-   struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
-   unsigned priority = e->priority;
-
-   if (!bo->parent)
-   list_add_tail(&e->tv.head, &bucket[priority]);
-
-   e->user_pages = NULL;
-   e->range = NULL;
-   }
-
-   /* Connect the sorted buckets in the output list. */
-   for (i =

[PATCH 04/13] drm/amdgpu: use drm_exec for GEM and CSA handling

2023-05-04 Thread Christian König
Start using the new component here as well.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 42 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 77 +++--
 2 files changed, 53 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index c6d4d41c4393..ea434c8de047 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -22,6 +22,8 @@
  * * Author: monk@amd.com
  */
 
+#include 
+
 #include "amdgpu.h"
 
 uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
@@ -65,31 +67,25 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
  struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
  uint64_t csa_addr, uint32_t size)
 {
-   struct ww_acquire_ctx ticket;
-   struct list_head list;
-   struct amdgpu_bo_list_entry pd;
-   struct ttm_validate_buffer csa_tv;
+   struct drm_exec exec;
int r;
 
-   INIT_LIST_HEAD(&list);
-   INIT_LIST_HEAD(&csa_tv.head);
-   csa_tv.bo = &bo->tbo;
-   csa_tv.num_shared = 1;
-
-   list_add(&csa_tv.head, &list);
-   amdgpu_vm_get_pd_bo(vm, &list, &pd);
-
-   r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
-   if (r) {
-   DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
-   return r;
+   drm_exec_init(&exec, true);
+   drm_exec_while_not_all_locked(&exec) {
+   r = amdgpu_vm_lock_pd(vm, &exec);
+   if (likely(!r))
+   r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 0);
+   drm_exec_continue_on_contention(&exec);
+   if (unlikely(r)) {
+   DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
+   goto error;
+   }
}
 
*bo_va = amdgpu_vm_bo_add(adev, vm, bo);
if (!*bo_va) {
-   ttm_eu_backoff_reservation(&ticket, &list);
-   DRM_ERROR("failed to create bo_va for static CSA\n");
-   return -ENOMEM;
+   r = -ENOMEM;
+   goto error;
}
 
r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
@@ -99,10 +95,10 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
if (r) {
DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r);
amdgpu_vm_bo_del(adev, *bo_va);
-   ttm_eu_backoff_reservation(&ticket, &list);
-   return r;
+   goto error;
}
 
-   ttm_eu_backoff_reservation(&ticket, &list);
-   return 0;
+error:
+   drm_exec_fini(&exec);
+   return r;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 863cb668e000..c5f74f241366 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -33,6 +33,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -197,29 +198,23 @@ static void amdgpu_gem_object_close(struct drm_gem_object 
*obj,
struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
 
-   struct amdgpu_bo_list_entry vm_pd;
-   struct list_head list, duplicates;
struct dma_fence *fence = NULL;
-   struct ttm_validate_buffer tv;
-   struct ww_acquire_ctx ticket;
struct amdgpu_bo_va *bo_va;
+   struct drm_exec exec;
long r;
 
-   INIT_LIST_HEAD(&list);
-   INIT_LIST_HEAD(&duplicates);
-
-   tv.bo = &bo->tbo;
-   tv.num_shared = 2;
-   list_add(&tv.head, &list);
-
-   amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
-
-   r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
-   if (r) {
-   dev_err(adev->dev, "leaking bo va because "
-   "we fail to reserve bo (%ld)\n", r);
-   return;
+   drm_exec_init(&exec, false);
+   drm_exec_while_not_all_locked(&exec) {
+   r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 0);
+   if (likely(!r))
+   r = amdgpu_vm_lock_pd(vm, &exec);
+   drm_exec_continue_on_contention(&exec);
+   if (unlikely(r)) {
+   dev_err(adev->dev, "leaking bo va (%ld)\n", r);
+   goto out_unlock;
+   }
}
+
bo_va = amdgpu_vm_bo_find(vm, bo);
if (!bo_va || --bo_va->ref_count)
goto out_unlock;
@@ -229,6 +224,9 @@ static void amdgpu_gem_object_close(struct drm_gem_object 
*obj,
goto out_unlock;
 
r = amdgpu_vm_clear_freed(adev, vm, &fence);
+   if (unlikely(r < 0))
+   dev_err(adev->dev, "failed to clear page "
+   "tables on GEM object close (%ld)\n", r);
if (r || !fence)
  

[PATCH 02/13] drm: add drm_exec selftests v2

2023-05-04 Thread Christian König
Largely just the initial skeleton.

v2: add array test as well

Signed-off-by: Christian König 
---
 drivers/gpu/drm/Kconfig   |  1 +
 drivers/gpu/drm/tests/Makefile|  3 +-
 drivers/gpu/drm/tests/drm_exec_test.c | 96 +++
 3 files changed, 99 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/tests/drm_exec_test.c

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 2dc81eb062eb..068e574e234e 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -80,6 +80,7 @@ config DRM_KUNIT_TEST
select DRM_BUDDY
select DRM_EXPORT_FOR_TESTS if m
select DRM_KUNIT_TEST_HELPERS
+   select DRM_EXEC
default KUNIT_ALL_TESTS
help
  This builds unit tests for DRM. This option is not useful for
diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile
index bca726a8f483..ba7baa622675 100644
--- a/drivers/gpu/drm/tests/Makefile
+++ b/drivers/gpu/drm/tests/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \
drm_modes_test.o \
drm_plane_helper_test.o \
drm_probe_helper_test.o \
-   drm_rect_test.o
+   drm_rect_test.o \
+   drm_exec_test.o
 
 CFLAGS_drm_mm_test.o := $(DISABLE_STRUCTLEAK_PLUGIN)
diff --git a/drivers/gpu/drm/tests/drm_exec_test.c 
b/drivers/gpu/drm/tests/drm_exec_test.c
new file mode 100644
index ..26aa13e62d22
--- /dev/null
+++ b/drivers/gpu/drm/tests/drm_exec_test.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#define pr_fmt(fmt) "drm_exec: " fmt
+
+#include 
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+#include "../lib/drm_random.h"
+
+static struct drm_device dev;
+
+static void drm_exec_sanitycheck(struct kunit *test)
+{
+   struct drm_exec exec;
+
+   drm_exec_init(&exec, true);
+   drm_exec_fini(&exec);
+   pr_info("%s - ok!\n", __func__);
+}
+
+static void drm_exec_lock1(struct kunit *test)
+{
+   struct drm_gem_object gobj = { };
+   struct drm_exec exec;
+   int ret;
+
+   drm_gem_private_object_init(&dev, &gobj, PAGE_SIZE);
+
+   drm_exec_init(&exec, true);
+   drm_exec_while_not_all_locked(&exec) {
+   ret = drm_exec_prepare_obj(&exec, &gobj, 1);
+   drm_exec_continue_on_contention(&exec);
+   if (ret) {
+   drm_exec_fini(&exec);
+   pr_err("%s - err %d!\n", __func__, ret);
+   return;
+   }
+   }
+   drm_exec_fini(&exec);
+   pr_info("%s - ok!\n", __func__);
+}
+
+static void drm_exec_lock_array(struct kunit *test)
+{
+   struct drm_gem_object gobj1 = { };
+   struct drm_gem_object gobj2 = { };
+   struct drm_gem_object *array[] = { &gobj1, &gobj2 };
+   struct drm_exec exec;
+   int ret;
+
+   drm_gem_private_object_init(&dev, &gobj1, PAGE_SIZE);
+   drm_gem_private_object_init(&dev, &gobj2, PAGE_SIZE);
+
+   drm_exec_init(&exec, true);
+   ret = drm_exec_prepare_array(&exec, array, ARRAY_SIZE(array), 0);
+   if (ret) {
+   drm_exec_fini(&exec);
+   pr_err("%s - err %d!\n", __func__, ret);
+   return;
+   }
+   drm_exec_fini(&exec);
+   pr_info("%s - ok!\n", __func__);
+}
+
+static int drm_exec_suite_init(struct kunit_suite *suite)
+{
+   kunit_info(suite, "Testing DRM exec manager\n");
+   return 0;
+}
+
+static struct kunit_case drm_exec_tests[] = {
+   KUNIT_CASE(drm_exec_sanitycheck),
+   KUNIT_CASE(drm_exec_lock1),
+   KUNIT_CASE(drm_exec_lock_array),
+   {}
+};
+
+static struct kunit_suite drm_exec_test_suite = {
+   .name = "drm_exec",
+   .suite_init = drm_exec_suite_init,
+   .test_cases = drm_exec_tests,
+};
+
+kunit_test_suite(drm_exec_test_suite);
+
+MODULE_AUTHOR("AMD");
+MODULE_LICENSE("GPL and additional rights");
-- 
2.34.1



[PATCH 01/13] drm: execution context for GEM buffers v4

2023-05-04 Thread Christian König
This adds the infrastructure for an execution context for GEM buffers
which is similar to the existing TTMs execbuf util and intended to replace
it in the long term.

The basic functionality is that we abstracts the necessary loop to lock
many different GEM buffers with automated deadlock and duplicate handling.

v2: drop xarray and use dynamic resized array instead, the locking
overhead is unecessary and measurable.
v3: drop duplicate tracking, radeon is really the only one needing that.
v4: fixes issues pointed out by Danilo, some typos in comments and a
helper for lock arrays of GEM objects.

Signed-off-by: Christian König 
---
 Documentation/gpu/drm-mm.rst |  12 ++
 drivers/gpu/drm/Kconfig  |   6 +
 drivers/gpu/drm/Makefile |   2 +
 drivers/gpu/drm/drm_exec.c   | 278 +++
 include/drm/drm_exec.h   | 119 +++
 5 files changed, 417 insertions(+)
 create mode 100644 drivers/gpu/drm/drm_exec.c
 create mode 100644 include/drm/drm_exec.h

diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst
index a79fd3549ff8..a52e6f4117d6 100644
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@@ -493,6 +493,18 @@ DRM Sync Objects
 .. kernel-doc:: drivers/gpu/drm/drm_syncobj.c
:export:
 
+DRM Execution context
+=
+
+.. kernel-doc:: drivers/gpu/drm/drm_exec.c
+   :doc: Overview
+
+.. kernel-doc:: include/drm/drm_exec.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_exec.c
+   :export:
+
 GPU Scheduler
 =
 
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index ba3fb04bb691..2dc81eb062eb 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -201,6 +201,12 @@ config DRM_TTM
  GPU memory types. Will be enabled automatically if a device driver
  uses it.
 
+config DRM_EXEC
+   tristate
+   depends on DRM
+   help
+ Execution context for command submissions
+
 config DRM_BUDDY
tristate
depends on DRM
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index a33257d2bc7f..9c6446eb3c83 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -78,6 +78,8 @@ obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += 
drm_panel_orientation_quirks.o
 #
 # Memory-management helpers
 #
+#
+obj-$(CONFIG_DRM_EXEC) += drm_exec.o
 
 obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o
 
diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c
new file mode 100644
index ..18071bff20f4
--- /dev/null
+++ b/drivers/gpu/drm/drm_exec.c
@@ -0,0 +1,278 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+
+#include 
+#include 
+#include 
+
+/**
+ * DOC: Overview
+ *
+ * This component mainly abstracts the retry loop necessary for locking
+ * multiple GEM objects while preparing hardware operations (e.g. command
+ * submissions, page table updates etc..).
+ *
+ * If a contention is detected while locking a GEM object the cleanup procedure
+ * unlocks all previously locked GEM objects and locks the contended one first
+ * before locking any further objects.
+ *
+ * After an object is locked fences slots can optionally be reserved on the
+ * dma_resv object inside the GEM object.
+ *
+ * A typical usage pattern should look like this::
+ *
+ * struct drm_gem_object *obj;
+ * struct drm_exec exec;
+ * unsigned long index;
+ * int ret;
+ *
+ * drm_exec_init(&exec, true);
+ * drm_exec_while_not_all_locked(&exec) {
+ * ret = drm_exec_prepare_obj(&exec, boA, 1);
+ * drm_exec_continue_on_contention(&exec);
+ * if (ret)
+ * goto error;
+ *
+ * ret = drm_exec_prepare_obj(&exec, boB, 1);
+ * drm_exec_continue_on_contention(&exec);
+ * if (ret)
+ * goto error;
+ * }
+ *
+ * drm_exec_for_each_locked_object(&exec, index, obj) {
+ * dma_resv_add_fence(obj->resv, fence, DMA_RESV_USAGE_READ);
+ * ...
+ * }
+ * drm_exec_fini(&exec);
+ *
+ * See struct dma_exec for more details.
+ */
+
+/* Dummy value used to initially enter the retry loop */
+#define DRM_EXEC_DUMMY (void*)~0
+
+/* Unlock all objects and drop references */
+static void drm_exec_unlock_all(struct drm_exec *exec)
+{
+   struct drm_gem_object *obj;
+   unsigned long index;
+
+   drm_exec_for_each_locked_object(exec, index, obj) {
+   dma_resv_unlock(obj->resv);
+   drm_gem_object_put(obj);
+   }
+
+   drm_gem_object_put(exec->prelocked);
+   exec->prelocked = NULL;
+}
+
+/**
+ * drm_exec_init - initialize a drm_exec object
+ * @exec: the drm_exec object to initialize
+ * @interruptible: if locks should be acquired interruptible
+ *
+ * Initialize the object and make sure that we can track locked objects.
+ */
+void drm_exec_init(struct drm_exec *exec, bool interruptible)
+{
+   exec->interruptible = interruptible;
+   exec->objects 

Common DRM execution context v4

2023-05-04 Thread Christian König
Hi guys,

so well known patch set by now. I've tried to address all review
comments and extended the set to also replace
drm_gem_lock_reservations() as suggested by Thomas.

I won't have much time to work on this in the next few weeks, so feel
free to pick up this work and commit it when you need it.

Regards,
Christian.