Re: [PATCH] drm/amdgpu: update the vm invalidation engine layout V2

2018-12-13 Thread Kuehling, Felix
Reviewed-by: Felix Kuehling 

Thanks,
  Felix


From: amd-gfx  on behalf of Evan Quan 

Sent: Thursday, December 13, 2018 9:59:30 PM
To: amd-gfx@lists.freedesktop.org
Cc: Kuehling, Felix; Quan, Evan
Subject: [PATCH] drm/amdgpu: update the vm invalidation engine layout V2

We need new invalidation engine layout due to new SDMA page
queues added.

V2: fix coding style and add correct return value

Change-Id: I2f3861689bffb9828c9eae744a7a0de4963ac2b6
Signed-off-by: Evan Quan 
Reviewed-by: Christian König 
Reviewed-by: Oak Zeng 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 53 ---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h | 10 +
 2 files changed, 41 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index dc4dadc3575c..ce8d41514e0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -719,37 +719,46 @@ static bool gmc_v9_0_keep_stolen_memory(struct 
amdgpu_device *adev)
}
 }

-static int gmc_v9_0_late_init(void *handle)
+static int gmc_v9_0_allocate_vm_inv_eng(struct amdgpu_device *adev)
 {
-   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-   /*
-* The latest engine allocation on gfx9 is:
-* Engine 0, 1: idle
-* Engine 2, 3: firmware
-* Engine 4~13: amdgpu ring, subject to change when ring number changes
-* Engine 14~15: idle
-* Engine 16: kfd tlb invalidation
-* Engine 17: Gart flushes
-*/
-   unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 4, 4 };
+   struct amdgpu_ring *ring;
+   unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
+   {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP};
unsigned i;
-   int r;
+   unsigned vmhub, inv_eng;

-   if (!gmc_v9_0_keep_stolen_memory(adev))
-   amdgpu_bo_late_init(adev);
+   for (i = 0; i < adev->num_rings; ++i) {
+   ring = adev->rings[i];
+   vmhub = ring->funcs->vmhub;
+
+   inv_eng = ffs(vm_inv_engs[vmhub]);
+   if (!inv_eng) {
+   dev_err(adev->dev, "no VM inv eng for ring %s\n",
+   ring->name);
+   return -EINVAL;
+   }

-   for(i = 0; i < adev->num_rings; ++i) {
-   struct amdgpu_ring *ring = adev->rings[i];
-   unsigned vmhub = ring->funcs->vmhub;
+   ring->vm_inv_eng = inv_eng - 1;
+   change_bit(inv_eng - 1, (unsigned long *)(&vm_inv_engs[vmhub]));

-   ring->vm_inv_eng = vm_inv_eng[vmhub]++;
dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
 ring->name, ring->vm_inv_eng, ring->funcs->vmhub);
}

-   /* Engine 16 is used for KFD and 17 for GART flushes */
-   for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i)
-   BUG_ON(vm_inv_eng[i] > 16);
+   return 0;
+}
+
+static int gmc_v9_0_late_init(void *handle)
+{
+   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+   int r;
+
+   if (!gmc_v9_0_keep_stolen_memory(adev))
+   amdgpu_bo_late_init(adev);
+
+   r = gmc_v9_0_allocate_vm_inv_eng(adev);
+   if (r)
+   return r;

if (adev->asic_type == CHIP_VEGA10 && !amdgpu_sriov_vf(adev)) {
r = gmc_v9_0_ecc_available(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
index b030ca5ea107..5c8deac65580 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
@@ -24,6 +24,16 @@
 #ifndef __GMC_V9_0_H__
 #define __GMC_V9_0_H__

+   /*
+* The latest engine allocation on gfx9 is:
+* Engine 2, 3: firmware
+* Engine 0, 1, 4~16: amdgpu ring,
+*subject to change when ring number changes
+* Engine 17: Gart flushes
+*/
+#define GFXHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
+#define MMHUB_FREE_VM_INV_ENGS_BITMAP  0x1FFF3
+
 extern const struct amd_ip_funcs gmc_v9_0_ip_funcs;
 extern const struct amdgpu_ip_block_version gmc_v9_0_ip_block;

--
2.19.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 2/4] drm/amdgpu: update the vm invalidation engine layout

2018-12-13 Thread Quan, Evan
Addressed this in the new patch which was just sent out.

Thanks,
Evan
> -Original Message-
> From: Kuehling, Felix
> Sent: 2018年12月14日 1:09
> To: amd-gfx@lists.freedesktop.org; Quan, Evan ;
> Deucher, Alexander ; Zeng, Oak
> ; Koenig, Christian 
> Subject: Re: [PATCH 2/4] drm/amdgpu: update the vm invalidation engine
> layout
> 
> Some nit-picks inline.
> 
> On 2018-12-12 11:09 p.m., Evan Quan wrote:
> > We need new invalidation engine layout due to new SDMA page queues
> > added.
> >
> > Change-Id: I2f3861689bffb9828c9eae744a7a0de4963ac2b6
> > Signed-off-by: Evan Quan 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 47
> > ++-  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
> |
> > 10 ++
> >  2 files changed, 35 insertions(+), 22 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> > b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> > index dc4dadc3575c..092a4d111557 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> > @@ -719,37 +719,40 @@ static bool
> gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev)
> > }
> >  }
> >
> > -static int gmc_v9_0_late_init(void *handle)
> > +static int gmc_v9_0_allocate_vm_inv_eng(struct amdgpu_device *adev)
> 
> The function returns int. But only ever returns 0 and the caller ignores the
> return value.
> 
> 
> >  {
> > -   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> > -   /*
> > -* The latest engine allocation on gfx9 is:
> > -* Engine 0, 1: idle
> > -* Engine 2, 3: firmware
> > -* Engine 4~13: amdgpu ring, subject to change when ring number
> changes
> > -* Engine 14~15: idle
> > -* Engine 16: kfd tlb invalidation
> > -* Engine 17: Gart flushes
> > -*/
> > -   unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 4, 4 };
> > +   struct amdgpu_ring *ring;
> > +   unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
> > +   {GFXHUB_FREE_VM_INV_ENGS_BITMAP,
> MMHUB_FREE_VM_INV_ENGS_BITMAP};
> > unsigned i;
> > -   int r;
> > +   unsigned vmhub, inv_eng;
> >
> > -   if (!gmc_v9_0_keep_stolen_memory(adev))
> > -   amdgpu_bo_late_init(adev);
> > +   for (i = 0; i < adev->num_rings; ++i) {
> > +   ring = adev->rings[i];
> > +   vmhub = ring->funcs->vmhub;
> > +
> > +   inv_eng = ffs(vm_inv_engs[vmhub]);
> > +   BUG_ON(!inv_eng);
> 
> Adding new BUG_ONs is discouraged. checkpatch.pl always warns about
> these. Errors that can be handled more gracefully shouldn't use a BUG_ON.
> E.g. use a WARN_ON and return an error code.
> 
> 
> >
> > -   for(i = 0; i < adev->num_rings; ++i) {
> > -   struct amdgpu_ring *ring = adev->rings[i];
> > -   unsigned vmhub = ring->funcs->vmhub;
> > +   ring->vm_inv_eng = inv_eng - 1;
> > +   change_bit(inv_eng - 1, (unsigned long
> *)(&vm_inv_engs[vmhub]));
> >
> > -   ring->vm_inv_eng = vm_inv_eng[vmhub]++;
> > dev_info(adev->dev, "ring %s uses VM inv eng %u on
> hub %u\n",
> >  ring->name, ring->vm_inv_eng, ring->funcs-
> >vmhub);
> > }
> >
> > -   /* Engine 16 is used for KFD and 17 for GART flushes */
> > -   for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i)
> > -   BUG_ON(vm_inv_eng[i] > 16);
> > +   return 0;
> 
> This is the only return statement. Maybe this could be a void function.
> Unless you decide to turn the BUG_ON into a WARN with an error return.
> 
> 
> > +}
> > +
> > +static int gmc_v9_0_late_init(void *handle) {
> > +   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> > +   int r;
> > +
> > +   if (!gmc_v9_0_keep_stolen_memory(adev))
> > +   amdgpu_bo_late_init(adev);
> > +
> > +   gmc_v9_0_allocate_vm_inv_eng(adev);
> 
> You're ignoring the return value. Either, make the function void, or handle
> potential error returns.
> 
> Regards,
>   Felix
> 
> 
> >
> > if (adev->asic_type == CHIP_VEGA10 && !amdgpu_sriov_vf(adev)) {
> > r = gmc_v9_0_ecc_available(adev);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
> > b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
> > index b030ca5ea107..5c8deac65580 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
> > @@ -24,6 +24,16 @@
> >  #ifndef __GMC_V9_0_H__
> >  #define __GMC_V9_0_H__
> >
> > +   /*
> > +* The latest engine allocation on gfx9 is:
> > +* Engine 2, 3: firmware
> > +* Engine 0, 1, 4~16: amdgpu ring,
> > +*subject to change when ring number changes
> > +* Engine 17: Gart flushes
> > +*/
> > +#define GFXHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
> > +#define MMHUB_FREE_VM_INV_ENGS_BITMAP  0x1FFF3
> > +
> >  extern const struct amd_ip_funcs gmc_v9_0_ip_funcs;  extern const
> > struct amdgpu_ip_block_version gmc_v9_0_ip_block;
> >
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-g

[PATCH] drm/amdgpu: update the vm invalidation engine layout V2

2018-12-13 Thread Evan Quan
We need new invalidation engine layout due to new SDMA page
queues added.

V2: fix coding style and add correct return value

Change-Id: I2f3861689bffb9828c9eae744a7a0de4963ac2b6
Signed-off-by: Evan Quan 
Reviewed-by: Christian König 
Reviewed-by: Oak Zeng 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 53 ---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h | 10 +
 2 files changed, 41 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index dc4dadc3575c..ce8d41514e0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -719,37 +719,46 @@ static bool gmc_v9_0_keep_stolen_memory(struct 
amdgpu_device *adev)
}
 }
 
-static int gmc_v9_0_late_init(void *handle)
+static int gmc_v9_0_allocate_vm_inv_eng(struct amdgpu_device *adev)
 {
-   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-   /*
-* The latest engine allocation on gfx9 is:
-* Engine 0, 1: idle
-* Engine 2, 3: firmware
-* Engine 4~13: amdgpu ring, subject to change when ring number changes
-* Engine 14~15: idle
-* Engine 16: kfd tlb invalidation
-* Engine 17: Gart flushes
-*/
-   unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 4, 4 };
+   struct amdgpu_ring *ring;
+   unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
+   {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP};
unsigned i;
-   int r;
+   unsigned vmhub, inv_eng;
 
-   if (!gmc_v9_0_keep_stolen_memory(adev))
-   amdgpu_bo_late_init(adev);
+   for (i = 0; i < adev->num_rings; ++i) {
+   ring = adev->rings[i];
+   vmhub = ring->funcs->vmhub;
+
+   inv_eng = ffs(vm_inv_engs[vmhub]);
+   if (!inv_eng) {
+   dev_err(adev->dev, "no VM inv eng for ring %s\n",
+   ring->name);
+   return -EINVAL;
+   }
 
-   for(i = 0; i < adev->num_rings; ++i) {
-   struct amdgpu_ring *ring = adev->rings[i];
-   unsigned vmhub = ring->funcs->vmhub;
+   ring->vm_inv_eng = inv_eng - 1;
+   change_bit(inv_eng - 1, (unsigned long *)(&vm_inv_engs[vmhub]));
 
-   ring->vm_inv_eng = vm_inv_eng[vmhub]++;
dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
 ring->name, ring->vm_inv_eng, ring->funcs->vmhub);
}
 
-   /* Engine 16 is used for KFD and 17 for GART flushes */
-   for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i)
-   BUG_ON(vm_inv_eng[i] > 16);
+   return 0;
+}
+
+static int gmc_v9_0_late_init(void *handle)
+{
+   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+   int r;
+
+   if (!gmc_v9_0_keep_stolen_memory(adev))
+   amdgpu_bo_late_init(adev);
+
+   r = gmc_v9_0_allocate_vm_inv_eng(adev);
+   if (r)
+   return r;
 
if (adev->asic_type == CHIP_VEGA10 && !amdgpu_sriov_vf(adev)) {
r = gmc_v9_0_ecc_available(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
index b030ca5ea107..5c8deac65580 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
@@ -24,6 +24,16 @@
 #ifndef __GMC_V9_0_H__
 #define __GMC_V9_0_H__
 
+   /*
+* The latest engine allocation on gfx9 is:
+* Engine 2, 3: firmware
+* Engine 0, 1, 4~16: amdgpu ring,
+*subject to change when ring number changes
+* Engine 17: Gart flushes
+*/
+#define GFXHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
+#define MMHUB_FREE_VM_INV_ENGS_BITMAP  0x1FFF3
+
 extern const struct amd_ip_funcs gmc_v9_0_ip_funcs;
 extern const struct amdgpu_ip_block_version gmc_v9_0_ip_block;
 
-- 
2.19.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[WIP PATCH 12/15] drm/dp_mst: Add some atomic state iterator macros

2018-12-13 Thread Lyude Paul
Changes since v6:
 - Move EXPORT_SYMBOL() for drm_dp_mst_topology_state_funcs to this
   commit
 - Document __drm_dp_mst_state_iter_get() and note that it shouldn't be
   called directly

Signed-off-by: Lyude Paul 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/drm_dp_mst_topology.c |  5 +-
 include/drm/drm_dp_mst_helper.h   | 96 +++
 2 files changed, 99 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c 
b/drivers/gpu/drm/drm_dp_mst_topology.c
index 93f08bfd2ab3..8d94c8943ac7 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -3415,10 +3415,11 @@ static void drm_dp_mst_destroy_state(struct 
drm_private_obj *obj,
kfree(mst_state);
 }
 
-static const struct drm_private_state_funcs mst_state_funcs = {
+const struct drm_private_state_funcs drm_dp_mst_topology_state_funcs = {
.atomic_duplicate_state = drm_dp_mst_duplicate_state,
.atomic_destroy_state = drm_dp_mst_destroy_state,
 };
+EXPORT_SYMBOL(drm_dp_mst_topology_state_funcs);
 
 /**
  * drm_atomic_get_mst_topology_state: get MST topology state
@@ -3502,7 +3503,7 @@ int drm_dp_mst_topology_mgr_init(struct 
drm_dp_mst_topology_mgr *mgr,
 
drm_atomic_private_obj_init(dev, &mgr->base,
&mst_state->base,
-   &mst_state_funcs);
+   &drm_dp_mst_topology_state_funcs);
 
return 0;
 }
diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h
index 50643a39765d..b8a8d75410d0 100644
--- a/include/drm/drm_dp_mst_helper.h
+++ b/include/drm/drm_dp_mst_helper.h
@@ -637,4 +637,100 @@ int drm_dp_send_power_updown_phy(struct 
drm_dp_mst_topology_mgr *mgr,
 void drm_dp_mst_get_port_malloc(struct drm_dp_mst_port *port);
 void drm_dp_mst_put_port_malloc(struct drm_dp_mst_port *port);
 
+extern const struct drm_private_state_funcs drm_dp_mst_topology_state_funcs;
+
+/**
+ * __drm_dp_mst_state_iter_get - private atomic state iterator function for
+ * macro-internal use
+ * @state: &struct drm_atomic_state pointer
+ * @mgr: pointer to the &struct drm_dp_mst_topology_mgr iteration cursor
+ * @old_state: optional pointer to the old &struct drm_dp_mst_topology_state
+ * iteration cursor
+ * @new_state: optional pointer to the new &struct drm_dp_mst_topology_state
+ * iteration cursor
+ * @i: int iteration cursor, for macro-internal use
+ *
+ * Used by for_each_oldnew_mst_mgr_in_state(),
+ * for_each_old_mst_mgr_in_state(), and for_each_new_mst_mgr_in_state(). Don't
+ * call this directly.
+ *
+ * Returns:
+ * True if the current &struct drm_private_obj is a &struct
+ * drm_dp_mst_topology_mgr, false otherwise.
+ */
+static inline bool
+__drm_dp_mst_state_iter_get(struct drm_atomic_state *state,
+   struct drm_dp_mst_topology_mgr **mgr,
+   struct drm_dp_mst_topology_state **old_state,
+   struct drm_dp_mst_topology_state **new_state,
+   int i)
+{
+   struct __drm_private_objs_state *objs_state = &state->private_objs[i];
+
+   if (objs_state->ptr->funcs != &drm_dp_mst_topology_state_funcs)
+   return false;
+
+   *mgr = to_dp_mst_topology_mgr(objs_state->ptr);
+   if (old_state)
+   *old_state = to_dp_mst_topology_state(objs_state->old_state);
+   if (new_state)
+   *new_state = to_dp_mst_topology_state(objs_state->new_state);
+
+   return true;
+}
+
+/**
+ * for_each_oldnew_mst_mgr_in_state - iterate over all DP MST topology
+ * managers in an atomic update
+ * @__state: &struct drm_atomic_state pointer
+ * @mgr: &struct drm_dp_mst_topology_mgr iteration cursor
+ * @old_state: &struct drm_dp_mst_topology_state iteration cursor for the old
+ * state
+ * @new_state: &struct drm_dp_mst_topology_state iteration cursor for the new
+ * state
+ * @__i: int iteration cursor, for macro-internal use
+ *
+ * This iterates over all DRM DP MST topology managers in an atomic update,
+ * tracking both old and new state. This is useful in places where the state
+ * delta needs to be considered, for example in atomic check functions.
+ */
+#define for_each_oldnew_mst_mgr_in_state(__state, mgr, old_state, new_state, 
__i) \
+   for ((__i) = 0; (__i) < (__state)->num_private_objs; (__i)++) \
+   for_each_if(__drm_dp_mst_state_iter_get((__state), &(mgr), 
&(old_state), &(new_state), (__i)))
+
+/**
+ * for_each_old_mst_mgr_in_state - iterate over all DP MST topology managers
+ * in an atomic update
+ * @__state: &struct drm_atomic_state pointer
+ * @mgr: &struct drm_dp_mst_topology_mgr iteration cursor
+ * @old_state: &struct drm_dp_mst_topology_state iteration cursor for the old
+ * state
+ * @__i: int iteration cursor, for macro-internal use
+ *
+ * This iterates over all DRM DP MST topology managers in an atomic update,
+ * tracking only the old state. This is useful i

[WIP PATCH 13/15] drm/dp_mst: Start tracking per-port VCPI allocations

2018-12-13 Thread Lyude Paul
There has been a TODO waiting for quite a long time in
drm_dp_mst_topology.c:

/* We cannot rely on port->vcpi.num_slots to update
 * topology_state->avail_slots as the port may not exist if the parent
 * branch device was unplugged. This should be fixed by tracking
 * per-port slot allocation in drm_dp_mst_topology_state instead of
 * depending on the caller to tell us how many slots to release.
 */

That's not the only reason we should fix this: forcing the driver to
track the VCPI allocations throughout a state's atomic check is
error prone, because it means that extra care has to be taken with the
order that drm_dp_atomic_find_vcpi_slots() and
drm_dp_atomic_release_vcpi_slots() are called in in order to ensure
idempotency. Currently the only driver actually using these helpers,
i915, doesn't even do this correctly: multiple ->best_encoder() checks
with i915's current implementation would not be idempotent and would
over-allocate VCPI slots, something I learned trying to implement
fallback retraining in MST.

So: simplify this whole mess, and teach drm_dp_atomic_find_vcpi_slots()
and drm_dp_atomic_release_vcpi_slots() to track the VCPI allocations for
each port. This allows us to ensure idempotency without having to rely
on the driver as much. Additionally: the driver doesn't need to do any
kind of VCPI slot tracking anymore if it doesn't need it for it's own
internal state.

Additionally; this adds a new drm_dp_mst_atomic_check() helper which
must be used by atomic drivers to perform validity checks for the new
VCPI allocations incurred by a state.

Also: update the documentation and make it more obvious that these
/must/ be called by /all/ atomic drivers supporting MST.

Changes since v6:
 - Keep a kref to all of the ports we have allocations on. This required
   a good bit of changing to when we call drm_dp_find_vcpi_slots(),
   mainly that we need to ensure that we only redo VCPI allocations on
   actual mode or CRTC changes, not crtc_state->active changes.
   Additionally, we no longer take the registration of the DRM connector
   for each port into account because so long as we have a kref to the
   port in the new or previous atomic state, the connector will stay
   registered.
 - Use the small changes to drm_dp_put_port() to add even more error
   checking to make misusage of the helpers more obvious. I added this
   after having to chase down various use-after-free conditions that
   started popping up from the new helpers so no one else has to
   troubleshoot that.
 - Move some accidental DRM_DEBUG_KMS() calls to DRM_DEBUG_ATOMIC()
 - Update documentation again, note that find/release() should both not be
   called on the same port in a single atomic check phase (but multiple
   calls to one or the other is OK)

Changes since v4:
 - Don't skip the atomic checks for VCPI allocations if no new VCPI
   allocations happen in a state. This makes the next change I'm about
   to list here a lot easier to implement.
 - Don't ignore VCPI allocations on destroyed ports, instead ensure that
   when ports are destroyed and still have VCPI allocations in the
   topology state, the only state changes allowed are releasing said
   ports' VCPI. This prevents a state with a mix of VCPI allocations
   from destroyed ports, and allocations from valid ports.

Changes since v3:
 - Don't release VCPI allocations in the topology state immediately in
   drm_dp_atomic_release_vcpi_slots(), instead mark them as 0 and skip
   over them in drm_dp_mst_duplicate_state(). This makes it so
   drm_dp_atomic_release_vcpi_slots() is still idempotent while also
   throwing warnings if the driver messes up it's book keeping and tries
   to release VCPI slots on a port that doesn't have any pre-existing
   VCPI allocation - danvet
 - Change mst_state/state in some debugging messages to "mst state"

Changes since v2:
 - Use kmemdup() for duplicating MST state - danvet
 - Move port validation out of duplicate state callback - danvet
 - Handle looping through MST topology states in
   drm_dp_mst_atomic_check() so the driver doesn't have to do it
 - Fix documentation in drm_dp_atomic_find_vcpi_slots()
 - Move the atomic check for each individual topology state into it's
   own function, reduces indenting
 - Don't consider "stale" MST ports when calculating the bandwidth
   requirements. This is needed because originally we relied on the
   state duplication functions to prune any stale ports from the new
   state, which would prevent us from incorrectly considering their
   bandwidth requirements alongside legitimate new payloads.
 - Add function references in drm_dp_atomic_release_vcpi_slots() - danvet
 - Annotate atomic VCPI and atomic check functions with __must_check
   - danvet

Changes since v1:
 - Don't use the now-removed ->atomic_check() for private objects hook,
   just give drivers a function to call themselves

Signed-off-by: Lyude Paul 
Cc: Daniel Vetter 
---
 drivers/gpu/dr

[WIP PATCH 15/15] drm/nouveau: Use atomic VCPI helpers for MST

2018-12-13 Thread Lyude Paul
Currently, nouveau uses the yolo method of setting up MST displays: it
uses the old VCPI helpers (drm_dp_find_vcpi_slots()) for computing the
display configuration. These helpers don't take care to make sure they
take a reference to the mstb port that they're checking, and
additionally don't actually check whether or not the topology still has
enough bandwidth to provide the VCPI tokens required.

So, drop usage of the old helpers and move entirely over to the atomic
helpers.

Changes since v5:
 - Update nv50_msto_atomic_check() and nv50_mstc_atomic_check() to the
   new requirements for drm_dp_atomic_find_vcpi_slots() and
   drm_dp_atomic_release_vcpi_slots()

Signed-off-by: Lyude Paul 
Cc: Daniel Vetter 
---
 drivers/gpu/drm/nouveau/dispnv50/disp.c | 52 ++---
 1 file changed, 46 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c 
b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index 67f7bf97e5d9..df696008d205 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -762,16 +762,22 @@ nv50_msto_atomic_check(struct drm_encoder *encoder,
   struct drm_crtc_state *crtc_state,
   struct drm_connector_state *conn_state)
 {
-   struct nv50_mstc *mstc = nv50_mstc(conn_state->connector);
+   struct drm_atomic_state *state = crtc_state->state;
+   struct drm_connector *connector = conn_state->connector;
+   struct nv50_mstc *mstc = nv50_mstc(connector);
struct nv50_mstm *mstm = mstc->mstm;
-   int bpp = conn_state->connector->display_info.bpc * 3;
+   int bpp = connector->display_info.bpc * 3;
int slots;
 
-   mstc->pbn = drm_dp_calc_pbn_mode(crtc_state->adjusted_mode.clock, bpp);
+   mstc->pbn = drm_dp_calc_pbn_mode(crtc_state->adjusted_mode.clock,
+bpp);
 
-   slots = drm_dp_find_vcpi_slots(&mstm->mgr, mstc->pbn);
-   if (slots < 0)
-   return slots;
+   if (crtc_state->connectors_changed || crtc_state->mode_changed) {
+   slots = drm_dp_atomic_find_vcpi_slots(state, &mstm->mgr,
+ mstc->port, mstc->pbn);
+   if (slots < 0)
+   return slots;
+   }
 
return nv50_outp_atomic_check_view(encoder, crtc_state, conn_state,
   mstc->native);
@@ -934,12 +940,42 @@ nv50_mstc_get_modes(struct drm_connector *connector)
return ret;
 }
 
+static int
+nv50_mstc_atomic_check(struct drm_connector *connector,
+  struct drm_connector_state *new_conn_state)
+{
+   struct drm_atomic_state *state = new_conn_state->state;
+   struct nv50_mstc *mstc = nv50_mstc(connector);
+   struct drm_dp_mst_topology_mgr *mgr = &mstc->mstm->mgr;
+   struct drm_connector_state *old_conn_state =
+   drm_atomic_get_old_connector_state(state, connector);
+   struct drm_crtc_state *old_crtc_state;
+   struct drm_crtc *new_crtc = new_conn_state->crtc,
+   *old_crtc = old_conn_state->crtc;
+
+   if (!old_crtc)
+   return 0;
+
+   old_crtc_state = drm_atomic_get_old_crtc_state(state, old_crtc);
+   if (!old_crtc_state || !old_crtc_state->enable)
+   return 0;
+
+   if (new_crtc)
+   return 0;
+
+   /* This connector will be left without an enabled CRTC, so its VCPI
+* must be released here
+*/
+   return drm_dp_atomic_release_vcpi_slots(state, mgr, mstc->port);
+}
+
 static const struct drm_connector_helper_funcs
 nv50_mstc_help = {
.get_modes = nv50_mstc_get_modes,
.mode_valid = nv50_mstc_mode_valid,
.best_encoder = nv50_mstc_best_encoder,
.atomic_best_encoder = nv50_mstc_atomic_best_encoder,
+   .atomic_check = nv50_mstc_atomic_check,
 };
 
 static enum drm_connector_status
@@ -2121,6 +2157,10 @@ nv50_disp_atomic_check(struct drm_device *dev, struct 
drm_atomic_state *state)
return ret;
}
 
+   ret = drm_dp_mst_atomic_check(state);
+   if (ret)
+   return ret;
+
return 0;
 }
 
-- 
2.19.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[WIP PATCH 09/15] drm/nouveau: Fix potential use-after-frees for MSTCs

2018-12-13 Thread Lyude Paul
Now that we finally have a sane way to keep port allocations around, use
it to fix the potential unchecked ->port accesses that nouveau makes by
making sure we keep the mst port allocated for as long as it's
drm_connector is accessible.

Additionally, now that we've guaranteed that mstc->port is allocated for
as long as we keep mstc around we can remove the connector registration
checks for codepaths which release payloads, allowing us to release
payloads on active topologies properly. These registration checks were
only required before in order to avoid situations where mstc->port could
technically be pointing at freed memory.

Signed-off-by: Lyude Paul 
---
 drivers/gpu/drm/nouveau/dispnv50/disp.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c 
b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index 0f7d72518604..982054bbcc8b 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -964,7 +964,11 @@ static void
 nv50_mstc_destroy(struct drm_connector *connector)
 {
struct nv50_mstc *mstc = nv50_mstc(connector);
+
drm_connector_cleanup(&mstc->connector);
+   if (mstc->port)
+   drm_dp_mst_put_port_malloc(mstc->port);
+
kfree(mstc);
 }
 
@@ -1012,6 +1016,7 @@ nv50_mstc_new(struct nv50_mstm *mstm, struct 
drm_dp_mst_port *port,
drm_object_attach_property(&mstc->connector.base, 
dev->mode_config.path_property, 0);
drm_object_attach_property(&mstc->connector.base, 
dev->mode_config.tile_property, 0);
drm_connector_set_path_property(&mstc->connector, path);
+   drm_dp_mst_get_port_malloc(port);
return 0;
 }
 
@@ -1077,6 +1082,7 @@ nv50_mstm_destroy_connector(struct 
drm_dp_mst_topology_mgr *mgr,
drm_fb_helper_remove_one_connector(&drm->fbcon->helper, 
&mstc->connector);
 
drm_modeset_lock(&drm->dev->mode_config.connection_mutex, NULL);
+   drm_dp_mst_put_port_malloc(mstc->port);
mstc->port = NULL;
drm_modeset_unlock(&drm->dev->mode_config.connection_mutex);
 
-- 
2.19.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[WIP PATCH 14/15] drm/dp_mst: Check payload count in drm_dp_mst_atomic_check()

2018-12-13 Thread Lyude Paul
It occurred to me that we never actually check this! So let's start
doing that.

Signed-off-by: Lyude Paul 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/drm_dp_mst_topology.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c 
b/drivers/gpu/drm/drm_dp_mst_topology.c
index b9374c981a5b..ebffb834f5d6 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -3538,7 +3538,7 @@ drm_dp_mst_atomic_check_topology_state(struct 
drm_dp_mst_topology_mgr *mgr,
   struct drm_dp_mst_topology_state 
*mst_state)
 {
struct drm_dp_vcpi_allocation *vcpi;
-   int avail_slots = 63, ret;
+   int avail_slots = 63, payload_count = 0, ret;
 
/* There's no possible scenario where releasing VCPI or keeping it the
 * same would make the state invalid
@@ -3575,6 +3575,13 @@ drm_dp_mst_atomic_check_topology_state(struct 
drm_dp_mst_topology_mgr *mgr,
goto port_fail;
}
 
+   if (++payload_count > mgr->max_payloads) {
+   DRM_DEBUG_ATOMIC("[MST MGR:%p] state %p has too many 
payloads (max=%d)\n",
+mgr, mst_state, mgr->max_payloads);
+   ret = -EINVAL;
+   goto port_fail;
+   }
+
drm_dp_mst_topology_put_port(vcpi->port);
}
DRM_DEBUG_ATOMIC("[MST MGR:%p] mst state %p VCPI avail=%d used=%d\n",
-- 
2.19.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[WIP PATCH 08/15] drm/nouveau: Remove unnecessary VCPI checks in nv50_msto_cleanup()

2018-12-13 Thread Lyude Paul
There is no need to look at the port's VCPI allocation before calling
drm_dp_mst_deallocate_vcpi(), as we already have msto->disabled to let
us avoid cleaning up an msto more then once. The DP MST core will never
call drm_dp_mst_deallocate_vcpi() on it's own, which is presumably what
these checks are meant to protect against.

More importantly though, we're about to stop clearing mstc->port in the
next commit, which means if we could potentially hit a use-after-free
error if we tried to check mstc->port->vcpi here. So to make life easier
for anyone who bisects this code in the future, use msto->disabled
instead to check whether or not we need to deallocate VCPI instead.

Signed-off-by: Lyude Paul 
---
 drivers/gpu/drm/nouveau/dispnv50/disp.c | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c 
b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index 641252208e67..0f7d72518604 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -704,14 +704,17 @@ nv50_msto_cleanup(struct nv50_msto *msto)
struct nv50_mstc *mstc = msto->mstc;
struct nv50_mstm *mstm = mstc->mstm;
 
+   if (!msto->disabled)
+   return;
+
NV_ATOMIC(drm, "%s: msto cleanup\n", msto->encoder.name);
-   if (mstc->port && mstc->port->vcpi.vcpi > 0 && !nv50_msto_payload(msto))
+
+   if (mstc->port)
drm_dp_mst_deallocate_vcpi(&mstm->mgr, mstc->port);
-   if (msto->disabled) {
-   msto->mstc = NULL;
-   msto->head = NULL;
-   msto->disabled = false;
-   }
+
+   msto->mstc = NULL;
+   msto->head = NULL;
+   msto->disabled = false;
 }
 
 static void
-- 
2.19.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[WIP PATCH 07/15] drm/nouveau: Remove bogus cleanup in nv50_mstm_add_connector()

2018-12-13 Thread Lyude Paul
Trying to destroy the connector using mstc->connector.funcs->destroy()
if connector initialization fails is wrong: there is no possible
codepath in nv50_mstc_new where nv50_mstm_add_connector() would return
<0 and mstc would be non-NULL.

Signed-off-by: Lyude Paul 
---
 drivers/gpu/drm/nouveau/dispnv50/disp.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c 
b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index 26af45785939..641252208e67 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -1099,11 +1099,8 @@ nv50_mstm_add_connector(struct drm_dp_mst_topology_mgr 
*mgr,
int ret;
 
ret = nv50_mstc_new(mstm, port, path, &mstc);
-   if (ret) {
-   if (mstc)
-   mstc->connector.funcs->destroy(&mstc->connector);
+   if (ret)
return NULL;
-   }
 
return &mstc->connector;
 }
-- 
2.19.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[WIP PATCH 10/15] drm/nouveau: Stop unsetting mstc->port, use malloc refs

2018-12-13 Thread Lyude Paul
Same as we did for i915, but for nouveau this time. Additionally, we
grab a malloc reference to the port that lasts for the entire lifetime
of nv50_mstc, which gives us the guarantee that mstc->port will always
point to valid memory for as long as the mstc stays around.

Signed-off-by: Lyude Paul 
---
 drivers/gpu/drm/nouveau/dispnv50/disp.c | 18 +-
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c 
b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index 982054bbcc8b..157d208d37b5 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -709,8 +709,7 @@ nv50_msto_cleanup(struct nv50_msto *msto)
 
NV_ATOMIC(drm, "%s: msto cleanup\n", msto->encoder.name);
 
-   if (mstc->port)
-   drm_dp_mst_deallocate_vcpi(&mstm->mgr, mstc->port);
+   drm_dp_mst_deallocate_vcpi(&mstm->mgr, mstc->port);
 
msto->mstc = NULL;
msto->head = NULL;
@@ -735,7 +734,7 @@ nv50_msto_prepare(struct nv50_msto *msto)
};
 
NV_ATOMIC(drm, "%s: msto prepare\n", msto->encoder.name);
-   if (mstc->port && mstc->port->vcpi.vcpi > 0) {
+   if (mstc->port->vcpi.vcpi > 0) {
struct drm_dp_payload *payload = nv50_msto_payload(msto);
if (payload) {
args.vcpi.start_slot = payload->start_slot;
@@ -832,8 +831,7 @@ nv50_msto_disable(struct drm_encoder *encoder)
struct nv50_mstc *mstc = msto->mstc;
struct nv50_mstm *mstm = mstc->mstm;
 
-   if (mstc->port)
-   drm_dp_mst_reset_vcpi_slots(&mstm->mgr, mstc->port);
+   drm_dp_mst_reset_vcpi_slots(&mstm->mgr, mstc->port);
 
mstm->outp->update(mstm->outp, msto->head->base.index, NULL, 0, 0);
mstm->modified = true;
@@ -945,7 +943,7 @@ nv50_mstc_detect(struct drm_connector *connector, bool 
force)
enum drm_connector_status conn_status;
int ret;
 
-   if (!mstc->port)
+   if (drm_connector_is_unregistered(connector))
return connector_status_disconnected;
 
ret = pm_runtime_get_sync(connector->dev->dev);
@@ -966,8 +964,7 @@ nv50_mstc_destroy(struct drm_connector *connector)
struct nv50_mstc *mstc = nv50_mstc(connector);
 
drm_connector_cleanup(&mstc->connector);
-   if (mstc->port)
-   drm_dp_mst_put_port_malloc(mstc->port);
+   drm_dp_mst_put_port_malloc(mstc->port);
 
kfree(mstc);
 }
@@ -1081,11 +1078,6 @@ nv50_mstm_destroy_connector(struct 
drm_dp_mst_topology_mgr *mgr,
 
drm_fb_helper_remove_one_connector(&drm->fbcon->helper, 
&mstc->connector);
 
-   drm_modeset_lock(&drm->dev->mode_config.connection_mutex, NULL);
-   drm_dp_mst_put_port_malloc(mstc->port);
-   mstc->port = NULL;
-   drm_modeset_unlock(&drm->dev->mode_config.connection_mutex);
-
drm_connector_put(&mstc->connector);
 }
 
-- 
2.19.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[WIP PATCH 02/15] drm/dp_mst: Refactor drm_dp_update_payload_part1()

2018-12-13 Thread Lyude Paul
There should be no functional changes here

Signed-off-by: Lyude Paul 
Cc: Juston Li 
---
 drivers/gpu/drm/drm_dp_mst_topology.c | 71 ---
 1 file changed, 42 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c 
b/drivers/gpu/drm/drm_dp_mst_topology.c
index 9b1b5c9b1fa0..2ab16c9e6243 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -1879,39 +1879,48 @@ int drm_dp_update_payload_part1(struct 
drm_dp_mst_topology_mgr *mgr)
 
mutex_lock(&mgr->payload_lock);
for (i = 0; i < mgr->max_payloads; i++) {
+   struct drm_dp_vcpi *vcpi = mgr->proposed_vcpis[i];
+   struct drm_dp_payload *payload = &mgr->payloads[i];
+
/* solve the current payloads - compare to the hw ones
   - update the hw view */
req_payload.start_slot = cur_slots;
-   if (mgr->proposed_vcpis[i]) {
-   port = container_of(mgr->proposed_vcpis[i], struct 
drm_dp_mst_port, vcpi);
+   if (vcpi) {
+   port = container_of(vcpi, struct drm_dp_mst_port,
+   vcpi);
port = drm_dp_get_validated_port_ref(mgr, port);
if (!port) {
mutex_unlock(&mgr->payload_lock);
return -EINVAL;
}
-   req_payload.num_slots = 
mgr->proposed_vcpis[i]->num_slots;
-   req_payload.vcpi = mgr->proposed_vcpis[i]->vcpi;
+   req_payload.num_slots = vcpi->num_slots;
+   req_payload.vcpi = vcpi->vcpi;
} else {
port = NULL;
req_payload.num_slots = 0;
}
 
-   mgr->payloads[i].start_slot = req_payload.start_slot;
+   payload->start_slot = req_payload.start_slot;
/* work out what is required to happen with this payload */
-   if (mgr->payloads[i].num_slots != req_payload.num_slots) {
+   if (payload->num_slots != req_payload.num_slots) {
 
/* need to push an update for this payload */
if (req_payload.num_slots) {
-   drm_dp_create_payload_step1(mgr, 
mgr->proposed_vcpis[i]->vcpi, &req_payload);
-   mgr->payloads[i].num_slots = 
req_payload.num_slots;
-   mgr->payloads[i].vcpi = req_payload.vcpi;
-   } else if (mgr->payloads[i].num_slots) {
-   mgr->payloads[i].num_slots = 0;
-   drm_dp_destroy_payload_step1(mgr, port, 
mgr->payloads[i].vcpi, &mgr->payloads[i]);
-   req_payload.payload_state = 
mgr->payloads[i].payload_state;
-   mgr->payloads[i].start_slot = 0;
+   drm_dp_create_payload_step1(mgr, vcpi->vcpi,
+   &req_payload);
+   payload->num_slots = req_payload.num_slots;
+   payload->vcpi = req_payload.vcpi;
+
+   } else if (payload->num_slots) {
+   payload->num_slots = 0;
+   drm_dp_destroy_payload_step1(mgr, port,
+payload->vcpi,
+payload);
+   req_payload.payload_state =
+   payload->payload_state;
+   payload->start_slot = 0;
}
-   mgr->payloads[i].payload_state = 
req_payload.payload_state;
+   payload->payload_state = req_payload.payload_state;
}
cur_slots += req_payload.num_slots;
 
@@ -1920,22 +1929,26 @@ int drm_dp_update_payload_part1(struct 
drm_dp_mst_topology_mgr *mgr)
}
 
for (i = 0; i < mgr->max_payloads; i++) {
-   if (mgr->payloads[i].payload_state == DP_PAYLOAD_DELETE_LOCAL) {
-   DRM_DEBUG_KMS("removing payload %d\n", i);
-   for (j = i; j < mgr->max_payloads - 1; j++) {
-   memcpy(&mgr->payloads[j], &mgr->payloads[j + 
1], sizeof(struct drm_dp_payload));
-   mgr->proposed_vcpis[j] = mgr->proposed_vcpis[j 
+ 1];
-   if (mgr->proposed_vcpis[j] && 
mgr->proposed_vcpis[j]->num_slots) {
-   set_bit(j + 1, &mgr->payload_mask);
-   } else {
-   clear_bit(j + 1, &mgr->payload_mask);
-   

[WIP PATCH 06/15] drm/i915: Keep malloc references to MST ports

2018-12-13 Thread Lyude Paul
So that the ports stay around until we've destroyed the connectors, in
order to ensure that we don't pass an invalid pointer to any MST helpers
once we introduce the new MST VCPI helpers.

Signed-off-by: Lyude Paul 
---
 drivers/gpu/drm/i915/intel_connector.c | 4 
 drivers/gpu/drm/i915/intel_dp_mst.c| 2 ++
 2 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_connector.c 
b/drivers/gpu/drm/i915/intel_connector.c
index 18e370f607bc..37d2c644f4b8 100644
--- a/drivers/gpu/drm/i915/intel_connector.c
+++ b/drivers/gpu/drm/i915/intel_connector.c
@@ -95,6 +95,10 @@ void intel_connector_destroy(struct drm_connector *connector)
intel_panel_fini(&intel_connector->panel);
 
drm_connector_cleanup(connector);
+
+   if (intel_connector->port)
+   drm_dp_mst_put_port_malloc(intel_connector->port);
+
kfree(connector);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c 
b/drivers/gpu/drm/i915/intel_dp_mst.c
index f05427b74e34..4d6ced34d465 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -484,6 +484,8 @@ static struct drm_connector 
*intel_dp_add_mst_connector(struct drm_dp_mst_topolo
if (ret)
goto err;
 
+   drm_dp_mst_get_port_malloc(port);
+
return connector;
 
 err:
-- 
2.19.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[WIP PATCH 11/15] drm/nouveau: Grab payload lock in nv50_msto_payload()

2018-12-13 Thread Lyude Paul
Going through the currently programmed payloads isn't safe without
holding mgr->payload_lock, so actually do that and warn if anyone tries
calling nv50_msto_payload() in the future without grabbing the right
locks.

Signed-off-by: Lyude Paul 
---
 drivers/gpu/drm/nouveau/dispnv50/disp.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c 
b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index 157d208d37b5..67f7bf97e5d9 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -680,6 +680,8 @@ nv50_msto_payload(struct nv50_msto *msto)
struct nv50_mstm *mstm = mstc->mstm;
int vcpi = mstc->port->vcpi.vcpi, i;
 
+   WARN_ON(!mutex_is_locked(&mstm->mgr.payload_lock));
+
NV_ATOMIC(drm, "%s: vcpi %d\n", msto->encoder.name, vcpi);
for (i = 0; i < mstm->mgr.max_payloads; i++) {
struct drm_dp_payload *payload = &mstm->mgr.payloads[i];
@@ -733,6 +735,8 @@ nv50_msto_prepare(struct nv50_msto *msto)
   (0x0100 << msto->head->base.index),
};
 
+   mutex_lock(&mstm->mgr.payload_lock);
+
NV_ATOMIC(drm, "%s: msto prepare\n", msto->encoder.name);
if (mstc->port->vcpi.vcpi > 0) {
struct drm_dp_payload *payload = nv50_msto_payload(msto);
@@ -748,7 +752,9 @@ nv50_msto_prepare(struct nv50_msto *msto)
  msto->encoder.name, msto->head->base.base.name,
  args.vcpi.start_slot, args.vcpi.num_slots,
  args.vcpi.pbn, args.vcpi.aligned_pbn);
+
nvif_mthd(&drm->display->disp.object, 0, &args, sizeof(args));
+   mutex_unlock(&mstm->mgr.payload_lock);
 }
 
 static int
-- 
2.19.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[WIP PATCH 05/15] drm/dp_mst: Fix payload deallocation on hotplugs using malloc refs

2018-12-13 Thread Lyude Paul
Up until now, freeing payloads on remote MST hubs that just had ports
removed has almost never worked because we've been relying on port
validation in order to stop us from accessing ports that have already
been freed from memory, but ports which need their payloads released due
to being removed will never be a valid part of the topology after
they've been removed.

Since we've introduced malloc refs, we can replace all of the validation
logic in payload helpers which are used for deallocation with some
well-placed malloc krefs. This ensures that regardless of whether or not
the ports are still valid and in the topology, any port which has an
allocated payload will remain allocated in memory until it's payloads
have been removed - finally allowing us to actually release said
payloads correctly.

Signed-off-by: Lyude Paul 
---
 drivers/gpu/drm/drm_dp_mst_topology.c | 54 +++
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c 
b/drivers/gpu/drm/drm_dp_mst_topology.c
index ae9d019af9f2..93f08bfd2ab3 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -1989,10 +1989,6 @@ static int drm_dp_payload_send_msg(struct 
drm_dp_mst_topology_mgr *mgr,
u8 sinks[DRM_DP_MAX_SDP_STREAMS];
int i;
 
-   port = drm_dp_mst_topology_get_port_validated(mgr, port);
-   if (!port)
-   return -EINVAL;
-
port_num = port->port_num;
mstb = drm_dp_mst_topology_get_mstb_validated(mgr, port->parent);
if (!mstb) {
@@ -2000,10 +1996,8 @@ static int drm_dp_payload_send_msg(struct 
drm_dp_mst_topology_mgr *mgr,
   port->parent,
   &port_num);
 
-   if (!mstb) {
-   drm_dp_mst_topology_put_port(port);
+   if (!mstb)
return -EINVAL;
-   }
}
 
txmsg = kzalloc(sizeof(*txmsg), GFP_KERNEL);
@@ -2032,7 +2026,6 @@ static int drm_dp_payload_send_msg(struct 
drm_dp_mst_topology_mgr *mgr,
kfree(txmsg);
 fail_put:
drm_dp_mst_topology_put_mstb(mstb);
-   drm_dp_mst_topology_put_port(port);
return ret;
 }
 
@@ -2137,15 +2130,16 @@ static int drm_dp_destroy_payload_step2(struct 
drm_dp_mst_topology_mgr *mgr,
  */
 int drm_dp_update_payload_part1(struct drm_dp_mst_topology_mgr *mgr)
 {
-   int i, j;
-   int cur_slots = 1;
struct drm_dp_payload req_payload;
struct drm_dp_mst_port *port;
+   int i, j;
+   int cur_slots = 1;
 
mutex_lock(&mgr->payload_lock);
for (i = 0; i < mgr->max_payloads; i++) {
struct drm_dp_vcpi *vcpi = mgr->proposed_vcpis[i];
struct drm_dp_payload *payload = &mgr->payloads[i];
+   bool put_port = false;
 
/* solve the current payloads - compare to the hw ones
   - update the hw view */
@@ -2153,12 +2147,20 @@ int drm_dp_update_payload_part1(struct 
drm_dp_mst_topology_mgr *mgr)
if (vcpi) {
port = container_of(vcpi, struct drm_dp_mst_port,
vcpi);
-   port = drm_dp_mst_topology_get_port_validated(mgr,
- port);
-   if (!port) {
-   mutex_unlock(&mgr->payload_lock);
-   return -EINVAL;
+
+   /* Validated ports don't matter if we're releasing
+* VCPI
+*/
+   if (vcpi->num_slots) {
+   port = drm_dp_mst_topology_get_port_validated(
+   mgr, port);
+   if (!port) {
+   mutex_unlock(&mgr->payload_lock);
+   return -EINVAL;
+   }
+   put_port = true;
}
+
req_payload.num_slots = vcpi->num_slots;
req_payload.vcpi = vcpi->vcpi;
} else {
@@ -2190,7 +2192,7 @@ int drm_dp_update_payload_part1(struct 
drm_dp_mst_topology_mgr *mgr)
}
cur_slots += req_payload.num_slots;
 
-   if (port)
+   if (put_port)
drm_dp_mst_topology_put_port(port);
}
 
@@ -3005,6 +3007,8 @@ bool drm_dp_mst_allocate_vcpi(struct 
drm_dp_mst_topology_mgr *mgr,
DRM_DEBUG_KMS("initing vcpi for pbn=%d slots=%d\n",
  pbn, port->vcpi.num_slots);
 
+   /* Keep port allocated until it's payload has been removed */
+   drm_dp_mst_get_port_malloc(port);
drm_dp_mst_topology_put_port(port);

[WIP PATCH 04/15] drm/dp_mst: Stop releasing VCPI when removing ports from topology

2018-12-13 Thread Lyude Paul
This has never actually worked, and isn't needed anyway: the driver's
always going to try to deallocate VCPI when it tears down the display
that the VCPI belongs to.

Signed-off-by: Lyude Paul 
---
 drivers/gpu/drm/drm_dp_mst_topology.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c 
b/drivers/gpu/drm/drm_dp_mst_topology.c
index c196fb580beb..ae9d019af9f2 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -1084,8 +1084,6 @@ static void drm_dp_destroy_port(struct kref *kref)
struct drm_dp_mst_topology_mgr *mgr = port->mgr;
 
if (!port->input) {
-   port->vcpi.num_slots = 0;
-
kfree(port->cached_edid);
 
/*
@@ -3381,12 +3379,6 @@ static void drm_dp_destroy_connector_work(struct 
work_struct *work)
drm_dp_port_teardown_pdt(port, port->pdt);
port->pdt = DP_PEER_DEVICE_NONE;
 
-   if (!port->input && port->vcpi.vcpi > 0) {
-   drm_dp_mst_reset_vcpi_slots(mgr, port);
-   drm_dp_update_payload_part1(mgr);
-   drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi);
-   }
-
drm_dp_mst_put_port_malloc(port);
send_hotplug = true;
}
-- 
2.19.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[WIP PATCH 00/15] MST refcounting/atomic helpers cleanup

2018-12-13 Thread Lyude Paul
This is a WIP version of the series I've been working on for a while now
to get all of the atomic DRM drivers in the tree to use the atomic MST
helpers, and to make the atomic MST helpers actually idempotent. Turns
out it's a lot more difficult to do that without also fixing how port
and branch device refcounting works so that it actually makes sense,
since the current upstream implementation requires a ton of magic in the
atomic helpers to work around properly and in many situations just plain
doesn't work as intended.

This patch series is starting to get bigger, and since there's still a
few bits here and there regarding the new refcount implementation that I
haven't quite decided on yet I figured I should get an opinion from
everyone else.

Currently I've got a couple of thoughts on how I could improve this
further:

* Get rid of drm_dp_mst_get_*_validated() entirely - I'm 90% sure that
  with the new refcounting scheme we might not actually need port
  validation at all anymore, assuming we make the use of malloc references
  in all of the DRM drivers. Either way, I don't think validation was ever
  actually a concept that worked: without malloc references, the port or
  branch device that's being passed to drm_dp_mst_get_*_validated()
  could be freed which also in turn means that that the stale pointer
  could in theory have gotten reused for a new port and thus-cause us to
  consider a freed port validated.
* Get rid of drm_dp_mst_get_vcpi_slots() - with malloc references, I
  don't think there's any use for this either
* Get rid of drm_dp_mst_reset_vcpi_slots() - I think the only time this
  function ever made sense was with port validation? Honestly, I wonder
  if we ever needed this at all...

Note: I haven't applied some of the comments from the reviews for the
series this is based off of:

drm/dp_mst: Improve VCPI helpers, use in nouveau
https://patchwork.freedesktop.org/series/51414/

This is just getting put on the ML so I can get some feedback on this.

Lyude Paul (15):
  drm/dp_mst: Remove bogus conditional in drm_dp_update_payload_part1()
  drm/dp_mst: Refactor drm_dp_update_payload_part1()
  drm/dp_mst: Introduce new refcounting scheme for mstbs and ports
  drm/dp_mst: Stop releasing VCPI when removing ports from topology
  drm/dp_mst: Fix payload deallocation on hotplugs using malloc refs
  drm/i915: Keep malloc references to MST ports
  drm/nouveau: Remove bogus cleanup in nv50_mstm_add_connector()
  drm/nouveau: Remove unnecessary VCPI checks in nv50_msto_cleanup()
  drm/nouveau: Fix potential use-after-frees for MSTCs
  drm/nouveau: Stop unsetting mstc->port, use malloc refs
  drm/nouveau: Grab payload lock in nv50_msto_payload()
  drm/dp_mst: Add some atomic state iterator macros
  drm/dp_mst: Start tracking per-port VCPI allocations
  drm/dp_mst: Check payload count in drm_dp_mst_atomic_check()
  drm/nouveau: Use atomic VCPI helpers for MST

 .../gpu/dp-mst/topology-figure-1.dot  |  31 +
 .../gpu/dp-mst/topology-figure-2.dot  |  37 +
 .../gpu/dp-mst/topology-figure-3.dot  |  40 +
 Documentation/gpu/drm-kms-helpers.rst | 125 ++-
 drivers/gpu/drm/drm_dp_mst_topology.c | 910 ++
 drivers/gpu/drm/i915/intel_connector.c|   4 +
 drivers/gpu/drm/i915/intel_display.c  |   4 +
 drivers/gpu/drm/i915/intel_dp_mst.c   |  66 +-
 drivers/gpu/drm/nouveau/dispnv50/disp.c   |  94 +-
 include/drm/drm_dp_mst_helper.h   | 139 ++-
 10 files changed, 1178 insertions(+), 272 deletions(-)
 create mode 100644 Documentation/gpu/dp-mst/topology-figure-1.dot
 create mode 100644 Documentation/gpu/dp-mst/topology-figure-2.dot
 create mode 100644 Documentation/gpu/dp-mst/topology-figure-3.dot

-- 
2.19.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[WIP PATCH 03/15] drm/dp_mst: Introduce new refcounting scheme for mstbs and ports

2018-12-13 Thread Lyude Paul
The current way of handling refcounting in the DP MST helpers is really
confusing and probably just plain wrong because it's been hacked up many
times over the years without anyone actually going over the code and
seeing if things could be simplified.

To the best of my understanding, the current scheme works like this:
drm_dp_mst_port and drm_dp_mst_branch both have a single refcount. When
this refcount hits 0 for either of the two, they're removed from the
topology state, but not immediately freed. Both ports and branch devices
will reinitialize their kref once it's hit 0 before actually destroying
themselves. The intended purpose behind this is so that we can avoid
problems like not being able to free a remote payload that might still
be active, due to us having removed all of the port/branch device
structures in memory, as per:

91a25e463130 ("drm/dp/mst: deallocate payload on port destruction")

Which may have worked, but then it caused use-after-free errors. Being
new to MST at the time, I tried fixing it;

263efde31f97 ("drm/dp/mst: Get validated port ref in 
drm_dp_update_payload_part1()")

But, that was broken: both drm_dp_mst_port and drm_dp_mst_branch structs
are validated in almost every DP MST helper function. Simply put, this
means we go through the topology and try to see if the given
drm_dp_mst_branch or drm_dp_mst_port is still attached to something
before trying to use it in order to avoid dereferencing freed memory
(something that has happened a LOT in the past with this library).
Because of this it doesn't actually matter whether or not we keep keep
the ports and branches around in memory as that's not enough, because
any function that validates the branches and ports passed to it will
still reject them anyway since they're no longer in the topology
structure. So, use-after-free errors were fixed but payload deallocation
was completely broken.

Two years later, AMD informed me about this issue and I attempted to
come up with a temporary fix, pending a long-overdue cleanup of this
library:

c54c7374ff44 ("drm/dp_mst: Skip validating ports during destruction, just ref")

But then that introduced use-after-free errors, so I quickly reverted
it:

9765635b3075 ("Revert "drm/dp_mst: Skip validating ports during destruction, 
just ref"")

And in the process, learned that there is just no simple fix for this:
the design is just broken. Unfortuntely, the usage of these helpers are
quite broken as well. Some drivers like i915 have been smart enough to
avoid accessing any kind of information from MST port structures, but
others like nouveau have assumed, understandably so, that
drm_dp_mst_port structures are normal and can just be accessed at any
time without worrying about use-after-free errors.

After a lot of discussion, me and Daniel Vetter came up with a better
idea to replace all of this.

To summarize, since this is documented far more indepth in the
documentation this patch introduces, we make it so that drm_dp_mst_port
and drm_dp_mst_branch structures have two different classes of
refcounts: topology_kref, and malloc_kref. topology_kref corresponds to
the lifetime of the given drm_dp_mst_port or drm_dp_mst_branch in it's
given topology. Once it hits zero, any associated connectors are removed
and the branch or port can no longer be validated. malloc_kref
corresponds to the lifetime of the memory allocation for the actual
structure, and will always be non-zero so long as the topology_kref is
non-zero. This gives us a way to allow callers to hold onto port and
branch device structures past their topology lifetime, and dramatically
simplifies the lifetimes of both structures. This also finally fixes the
port deallocation problem, properly.

Additionally: since this now means that we can keep ports and branch
devices allocated in memory for however long we need, we no longer need
a significant amount of the port validation that we currently do.

Additionally, there is one last scenario that this fixes, which couldn't
have been fixed properly beforehand:

- CPU1 unrefs port from topology (refcount 1->0)
- CPU2 refs port in topology(refcount 0->1)

Since we now can guarantee memory safety for ports and branches
as-needed, we also can make our main reference counting functions fix
this problem by using kref_get_unless_zero() internally so that topology
refcounts can only ever reach 0 once.

Signed-off-by: Lyude Paul 
Cc: Daniel Vetter 
Cc: David Airlie 
Cc: Jerry Zuo 
Cc: Harry Wentland 
Cc: Juston Li 
---
 .../gpu/dp-mst/topology-figure-1.dot  |  31 ++
 .../gpu/dp-mst/topology-figure-2.dot  |  37 ++
 .../gpu/dp-mst/topology-figure-3.dot  |  40 ++
 Documentation/gpu/drm-kms-helpers.rst | 125 -
 drivers/gpu/drm/drm_dp_mst_topology.c | 512 +-
 include/drm/drm_dp_mst_helper.h   |  19 +-
 6 files changed, 637 insertions(+), 127 deletions(-)
 create mode 100644 Documentation/gpu/dp-mst/topology-figure-1.dot
 create mode 100644 Do

[WIP PATCH 01/15] drm/dp_mst: Remove bogus conditional in drm_dp_update_payload_part1()

2018-12-13 Thread Lyude Paul
There's no reason we need this, it's just confusing looking.

Signed-off-by: Lyude Paul 
Cc: Juston Li 
---
 drivers/gpu/drm/drm_dp_mst_topology.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c 
b/drivers/gpu/drm/drm_dp_mst_topology.c
index ad0fb6d003be..9b1b5c9b1fa0 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -1896,9 +1896,7 @@ int drm_dp_update_payload_part1(struct 
drm_dp_mst_topology_mgr *mgr)
req_payload.num_slots = 0;
}
 
-   if (mgr->payloads[i].start_slot != req_payload.start_slot) {
-   mgr->payloads[i].start_slot = req_payload.start_slot;
-   }
+   mgr->payloads[i].start_slot = req_payload.start_slot;
/* work out what is required to happen with this payload */
if (mgr->payloads[i].num_slots != req_payload.num_slots) {
 
-- 
2.19.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6

2018-12-13 Thread Yang, Philip
I thought to change the filename to amdgpu_hmm.c/h, then change the data 
structure,
variables, function name from amdgpu_mn_* to amdgpu_hmm_*, seems too 
many changes,
so I gave up that change.

Philip

On 2018-12-07 7:00 a.m., Zhou, David(ChunMing) wrote:
> Even you should rename amdgpu_mn.c/h to amdgpu_hmm.c/h.
>
> -David
>
>> -Original Message-
>> From: amd-gfx  On Behalf Of Yang,
>> Philip
>> Sent: Friday, December 07, 2018 5:03 AM
>> To: amd-gfx@lists.freedesktop.org
>> Cc: Yang, Philip 
>> Subject: [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu
>> notifier v6
>>
>> Replace our MMU notifier with
>> hmm_mirror_ops.sync_cpu_device_pagetables
>> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a
>> dependency in DRM_AMDGPU_USERPTR Kconfig.
>>
>> It supports both KFD userptr and gfx userptr paths.
>>
>> The depdent HMM patchset from Jérôme Glisse are all merged into 4.20.0
>> kernel now.
>>
>> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
>> Signed-off-by: Philip Yang 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/Kconfig |   6 +-
>>   drivers/gpu/drm/amd/amdgpu/Makefile|   2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 122 ++-
>> --
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |   2 +-
>>   4 files changed, 55 insertions(+), 77 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig
>> b/drivers/gpu/drm/amd/amdgpu/Kconfig
>> index 9221e5489069..960a63355705 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
>> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
>> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK  config
>> DRM_AMDGPU_USERPTR
>>  bool "Always enable userptr write support"
>>  depends on DRM_AMDGPU
>> -select MMU_NOTIFIER
>> +select HMM_MIRROR
>>  help
>> -  This option selects CONFIG_MMU_NOTIFIER if it isn't already
>> -  selected to enabled full userptr support.
>> +  This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
>> +  isn't already selected to enabled full userptr support.
>>
>>   config DRM_AMDGPU_GART_DEBUGFS
>>  bool "Allow GART access through debugfs"
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>> index f76bcb9c45e4..675efc850ff4 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>> @@ -172,7 +172,7 @@ endif
>>   amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
>>   amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
>>   amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
>> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
>> +amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
>>
>>   include $(FULL_AMD_PATH)/powerplay/Makefile
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> index e55508b39496..5d518d2bb9be 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> @@ -45,7 +45,7 @@
>>
>>   #include 
>>   #include 
>> -#include 
>> +#include 
>>   #include 
>>   #include 
>>   #include 
>> @@ -58,7 +58,6 @@
>>*
>>* @adev: amdgpu device pointer
>>* @mm: process address space
>> - * @mn: MMU notifier structure
>>* @type: type of MMU notifier
>>* @work: destruction work item
>>* @node: hash table node to find structure by adev and mn @@ -66,6 +65,7
>> @@
>>* @objects: interval tree containing amdgpu_mn_nodes
>>* @read_lock: mutex for recursive locking of @lock
>>* @recursion: depth of recursion
>> + * @mirror: HMM mirror function support
>>*
>>* Data for each amdgpu device and process address space.
>>*/
>> @@ -73,7 +73,6 @@ struct amdgpu_mn {
>>  /* constant after initialisation */
>>  struct amdgpu_device*adev;
>>  struct mm_struct*mm;
>> -struct mmu_notifier mn;
>>  enum amdgpu_mn_type type;
>>
>>  /* only used on destruction */
>> @@ -87,6 +86,9 @@ struct amdgpu_mn {
>>  struct rb_root_cached   objects;
>>  struct mutexread_lock;
>>  atomic_trecursion;
>> +
>> +/* HMM mirror */
>> +struct hmm_mirror   mirror;
>>   };
>>
>>   /**
>> @@ -103,7 +105,7 @@ struct amdgpu_mn_node {  };
>>
>>   /**
>> - * amdgpu_mn_destroy - destroy the MMU notifier
>> + * amdgpu_mn_destroy - destroy the HMM mirror
>>*
>>* @work: previously sheduled work item
>>*
>> @@ -129,28 +131,26 @@ static void amdgpu_mn_destroy(struct work_struct
>> *work)
>>  }
>>  up_write(&amn->lock);
>>  mutex_unlock(&adev->mn_lock);
>> -mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
>> +
>> +hmm_mirror_unregister(&amn->mirror);
>>  kfree(amn);
>>   }
>>
>>   /**
>> - * amdgpu_mn_release - callback to notify about mm destruction
>> + * amdgpu_hmm_mirror_release - callback to notify about mm destruction
>>*
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> + * @mirror: the HMM mirror (mm) this callback is about
>>*
>> - * Shedule a work item to

[PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v4

2018-12-13 Thread Yang, Philip
Use HMM helper function hmm_vma_fault() to get physical pages backing
userptr and start CPU page table update track of those pages. Then use
hmm_vma_range_done() to check if those pages are updated before
amdgpu_cs_submit for gfx or before user queues are resumed for kfd.

If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart
from scratch, for kfd, restore worker is rescheduled to retry.

HMM simplify the CPU page table concurrent update check, so remove
guptasklock, mmu_invalidations, last_set_pages fields from
amdgpu_ttm_tt struct.

HMM does not pin the page (increase page ref count), so remove related
operations like release_pages(), put_page(), mark_page_dirty().

Change-Id: I2e8c0c6f0d8c21e5596a32d7fc91762778bc9e67
Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|   1 -
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  95 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c   |   2 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h   |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 185 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   |  14 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c|  27 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h|   4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 166 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |   4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|   1 -
 11 files changed, 208 insertions(+), 294 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 70429f7aa9a8..717791d4fa45 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -62,7 +62,6 @@ struct kgd_mem {
 
atomic_t invalid;
struct amdkfd_process_info *process_info;
-   struct page **user_pages;
 
struct amdgpu_sync sync;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index be1ab43473c6..2897793600f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -582,28 +582,12 @@ static int init_user_pages(struct kgd_mem *mem, struct 
mm_struct *mm,
goto out;
}
 
-   /* If no restore worker is running concurrently, user_pages
-* should not be allocated
-*/
-   WARN(mem->user_pages, "Leaking user_pages array");
-
-   mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
-  sizeof(struct page *),
-  GFP_KERNEL | __GFP_ZERO);
-   if (!mem->user_pages) {
-   pr_err("%s: Failed to allocate pages array\n", __func__);
-   ret = -ENOMEM;
-   goto unregister_out;
-   }
-
-   ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
+   ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages);
if (ret) {
pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
-   goto free_out;
+   goto unregister_out;
}
 
-   amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
-
ret = amdgpu_bo_reserve(bo, true);
if (ret) {
pr_err("%s: Failed to reserve BO\n", __func__);
@@ -616,11 +600,7 @@ static int init_user_pages(struct kgd_mem *mem, struct 
mm_struct *mm,
amdgpu_bo_unreserve(bo);
 
 release_out:
-   if (ret)
-   release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
-free_out:
-   kvfree(mem->user_pages);
-   mem->user_pages = NULL;
+   amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
 unregister_out:
if (ret)
amdgpu_mn_unregister(bo);
@@ -679,7 +659,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo;
ctx->kfd_bo.tv.num_shared = 1;
-   ctx->kfd_bo.user_pages = NULL;
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
 
amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
@@ -743,7 +722,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo;
ctx->kfd_bo.tv.num_shared = 1;
-   ctx->kfd_bo.user_pages = NULL;
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
 
i = 0;
@@ -1371,15 +1349,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
list_del(&bo_list_entry->head);
mutex_unlock(&process_info->lock);
 
-   /* Free user pages if necessary */
-   if (mem->user_pages) {
-   pr_debug("%s: Freeing user_pages array\n", __func__);
-   if (mem->user_pages[0])
-   release_pages(mem->user_pages,
-   mem->bo->tbo.ttm->num_pages);
-   kvfree(mem->user_pages);
-   }
-
ret = reserve_bo_and_c

Re: [PATCH] drm/amd/display: Add fast path for cursor plane updates

2018-12-13 Thread Kazlauskas, Nicholas
On 12/13/18 2:21 PM, Kazlauskas, Nicholas wrote:
> On 12/13/18 11:01 AM, Kazlauskas, Nicholas wrote:
>> On 12/13/18 10:48 AM, Michel Dänzer wrote:
>>> On 2018-12-05 8:59 p.m., Nicholas Kazlauskas wrote:
 [Why]
 Legacy cursor plane updates from drm helpers go through the full
 atomic codepath. A high volume of cursor updates through this slow
 code path can cause subsequent page-flips to skip vblank intervals
 since each individual update is slow.

 This problem is particularly noticeable for the compton compositor.

 [How]
 A fast path for cursor plane updates is added by using DRM asynchronous
 commit support provided by async_check and async_update. These don't do
 a full state/flip_done dependency stall and they don't block other
 commit work.

 However, DC still expects itself to be single-threaded for anything
 that can issue register writes. Screen corruption or hangs can occur
 if write sequences overlap. Every call that potentially perform
 register writes needs to be guarded for asynchronous updates to work.
 The dc_lock mutex was added for this.

 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175

 Cc: Leo Li 
 Cc: Harry Wentland 
 Signed-off-by: Nicholas Kazlauskas 
>>>
>>> Looks like this change introduced (or at least exposed) a reference
>>> counting bug resulting in use-after-free when Xorg shuts down[0]. See
>>> the attached dmesg excerpt (note that I wrapped the !bo->pin_count check
>>> in amdgpu_bo_unpin in WARN_ON_ONCE).
>>>
>>>
>>> [0] Only with
>>> https://gitlab.freedesktop.org/xorg/driver/xf86-video-amdgpu/commit/0d60233d26ec70d4e1faa343b438e33829c6d5e4
>>> , i.e. alternating between two BOs for the HW cursor, instead of always
>>> using the same one.
>>>
>>>
>>
>> Thanks for the heads up, I don't think I had that patch in my
>> xf86-video-amdgpu when testing the desktop stack.
>>
>> The async atomic helper does the:
>>
>> drm_atomic_helper_prepare_planes
>> drm_atomic_helper_async_commit
>> drm_atomic_helper_cleanup_planes
>>
>> ...sequence correctly from what I can tell, so maybe it's something with
>> dm_plane_helper_prepare_fb or dm_plane_helper_cleanup_fb itself.
>>
>> One case where unref could be called (not following a ref) is during
>> drm_atomic_helper_prepare_planes - if prepare_fb fails then cleanup_fb
>> gets called regardless, and we only ref the fb if prepare_fb is in the
>> success path.
>>
>> Nicholas Kazlauskas
>> ___
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
> 
> The prepare_fb/cleanup_fb calls are actually fine since cleanup_fb only
> gets called on planes that had prepare_fb succeed in all cases as far as
> I can tell.
> 
> I think the bug here might be forgetting to set the plane->state to the
> new_state. The cleanup fb callback decides whether to call it on the old
> plane state or new plane state depending on if the commit was aborted or
> not. I think every fast plane update might be treated as aborted in this
> case.
> 
> Nicholas Kazlauskas
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> 

This is a bug with DRM, actually.

Typically for a regular atomic commit the prepare_fb callback is called 
for the new_plane_state and cleanup_fb is called for the old_plane_state 
at the end of commit tail.

However, for asynchronous commits this isn't the same - prepare_fb is 
called for new_plane_state and cleanup_fb is then immediately called 
after also for the new_plane_state.

Looking at your stack trace I can see that this is exactly what causes 
the use after free,

The CRTC has changed so it's stuck in the slow path (commit_tail is in 
the trace). However, the plane->state->fb has already been unpinned and 
unref. But the plane->state->fb is *not* NULL from the previous fast 
update, so when it gets to cleanup planes it tries to free the 
old_plane_state it unpins and unrefs the bo a second time.

Then a new fast cursor update comes along (and the fb hasn't changed) so 
it tries to prepare_fb on the same freed bo.

Nicholas Kazlauskas
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v3

2018-12-13 Thread Yang, Philip
On 2018-12-10 7:12 p.m., Kuehling, Felix wrote:
> This is a nice improvement from the last version. I still see some
> potential problems. See inline ...
>
> I'm skipping over the CS and GEM parts. I hope Christian can review
> those parts.
>
> On 2018-12-06 4:02 p.m., Yang, Philip wrote:
>> Use HMM helper function hmm_vma_fault() to get physical pages backing
>> userptr and start CPU page table update track of those pages. Then use
>> hmm_vma_range_done() to check if those pages are updated before
>> amdgpu_cs_submit for gfx or before user queues are resumed for kfd.
>>
>> If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart
>> from scratch, for kfd, restore worker is rescheduled to retry.
>>
>> HMM simplify the CPU page table concurrent update check, so remove
>> guptasklock, mmu_invalidations, last_set_pages fields from
>> amdgpu_ttm_tt struct.
>>
>> HMM does not pin the page (increase page ref count), so remove related
>> operations like release_pages(), put_page(), mark_page_dirty().
>>
>> Change-Id: Id2d3130378b44a774e0d77156102a20a203b5036
>> Signed-off-by: Philip Yang 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|   1 -
>>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  81 ++--
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c   |   2 -
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h   |   3 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 188 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   |  14 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c|  34 +++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h|   7 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 160 ++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |   4 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|   1 -
>>   11 files changed, 206 insertions(+), 289 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> index bcf587b4ba98..b492dd9e541a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> @@ -62,7 +62,6 @@ struct kgd_mem {
>>   
>>  atomic_t invalid;
>>  struct amdkfd_process_info *process_info;
>> -struct page **user_pages;
>>   
>>  struct amdgpu_sync sync;
>>   
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index b29ef088fa14..75833b40507b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -580,28 +580,12 @@ static int init_user_pages(struct kgd_mem *mem, struct 
>> mm_struct *mm,
>>  goto out;
>>  }
>>   
>> -/* If no restore worker is running concurrently, user_pages
>> - * should not be allocated
>> - */
>> -WARN(mem->user_pages, "Leaking user_pages array");
>> -
>> -mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
>> -   sizeof(struct page *),
>> -   GFP_KERNEL | __GFP_ZERO);
>> -if (!mem->user_pages) {
>> -pr_err("%s: Failed to allocate pages array\n", __func__);
>> -ret = -ENOMEM;
>> -goto unregister_out;
>> -}
>> -
>> -ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
>> +ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages);
>>  if (ret) {
>>  pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
>> -goto free_out;
>> +goto unregister_out;
>>  }
>>   
>> -amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
>> -
>>  ret = amdgpu_bo_reserve(bo, true);
>>  if (ret) {
>>  pr_err("%s: Failed to reserve BO\n", __func__);
>> @@ -614,11 +598,7 @@ static int init_user_pages(struct kgd_mem *mem, struct 
>> mm_struct *mm,
>>  amdgpu_bo_unreserve(bo);
>>   
>>   release_out:
>> -if (ret)
>> -release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
>> -free_out:
>> -kvfree(mem->user_pages);
>> -mem->user_pages = NULL;
>> +amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
>>   unregister_out:
>>  if (ret)
>>  amdgpu_mn_unregister(bo);
>> @@ -677,7 +657,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
>>  ctx->kfd_bo.priority = 0;
>>  ctx->kfd_bo.tv.bo = &bo->tbo;
>>  ctx->kfd_bo.tv.num_shared = 1;
>> -ctx->kfd_bo.user_pages = NULL;
>>  list_add(&ctx->kfd_bo.tv.head, &ctx->list);
>>   
>>  amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
>> @@ -741,7 +720,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
>>  ctx->kfd_bo.priority = 0;
>>  ctx->kfd_bo.tv.bo = &bo->tbo;
>>  ctx->kfd_bo.tv.num_shared = 1;
>> -ctx->kfd_bo.user_pages = NULL;
>>  list_add(&ctx->kfd_bo.tv.head, &ctx->list);
>>   
>>  i = 0;
>> @@ -1329,15 +1307,6 @@ int amdgpu_amdkfd_gpuvm_fr

Re: [PATCH] drm/amd/display: Add fast path for cursor plane updates

2018-12-13 Thread Kazlauskas, Nicholas
On 12/13/18 11:01 AM, Kazlauskas, Nicholas wrote:
> On 12/13/18 10:48 AM, Michel Dänzer wrote:
>> On 2018-12-05 8:59 p.m., Nicholas Kazlauskas wrote:
>>> [Why]
>>> Legacy cursor plane updates from drm helpers go through the full
>>> atomic codepath. A high volume of cursor updates through this slow
>>> code path can cause subsequent page-flips to skip vblank intervals
>>> since each individual update is slow.
>>>
>>> This problem is particularly noticeable for the compton compositor.
>>>
>>> [How]
>>> A fast path for cursor plane updates is added by using DRM asynchronous
>>> commit support provided by async_check and async_update. These don't do
>>> a full state/flip_done dependency stall and they don't block other
>>> commit work.
>>>
>>> However, DC still expects itself to be single-threaded for anything
>>> that can issue register writes. Screen corruption or hangs can occur
>>> if write sequences overlap. Every call that potentially perform
>>> register writes needs to be guarded for asynchronous updates to work.
>>> The dc_lock mutex was added for this.
>>>
>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175
>>>
>>> Cc: Leo Li 
>>> Cc: Harry Wentland 
>>> Signed-off-by: Nicholas Kazlauskas 
>>
>> Looks like this change introduced (or at least exposed) a reference
>> counting bug resulting in use-after-free when Xorg shuts down[0]. See
>> the attached dmesg excerpt (note that I wrapped the !bo->pin_count check
>> in amdgpu_bo_unpin in WARN_ON_ONCE).
>>
>>
>> [0] Only with
>> https://gitlab.freedesktop.org/xorg/driver/xf86-video-amdgpu/commit/0d60233d26ec70d4e1faa343b438e33829c6d5e4
>> , i.e. alternating between two BOs for the HW cursor, instead of always
>> using the same one.
>>
>>
> 
> Thanks for the heads up, I don't think I had that patch in my
> xf86-video-amdgpu when testing the desktop stack.
> 
> The async atomic helper does the:
> 
> drm_atomic_helper_prepare_planes
> drm_atomic_helper_async_commit
> drm_atomic_helper_cleanup_planes
> 
> ...sequence correctly from what I can tell, so maybe it's something with
> dm_plane_helper_prepare_fb or dm_plane_helper_cleanup_fb itself.
> 
> One case where unref could be called (not following a ref) is during
> drm_atomic_helper_prepare_planes - if prepare_fb fails then cleanup_fb
> gets called regardless, and we only ref the fb if prepare_fb is in the
> success path.
> 
> Nicholas Kazlauskas
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> 

The prepare_fb/cleanup_fb calls are actually fine since cleanup_fb only 
gets called on planes that had prepare_fb succeed in all cases as far as 
I can tell.

I think the bug here might be forgetting to set the plane->state to the 
new_state. The cleanup fb callback decides whether to call it on the old 
plane state or new plane state depending on if the commit was aborted or 
not. I think every fast plane update might be treated as aborted in this 
case.

Nicholas Kazlauskas
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [Intel-gfx] [PATCH 06/10] drm/syncobj: use the timeline point in drm_syncobj_find_fence v3

2018-12-13 Thread Koenig, Christian
Am 13.12.18 um 18:26 schrieb Daniel Vetter:
>>> Code sharing just because the code looks similar is imo a really
>>> bad idea, when the semantics are entirely different (that was also the
>>> reason behind not reusing all the cpu event stuff for dma_fence, they're
>>> not normal cpu events).
>> Ok, the last sentence is what I don't understand.
>>
>> What exactly is the semantic difference between the dma_fence_wait and
>> the wait_event interface?
>>
>> I mean the wait_event interface was introduced to prevent drivers from
>> openly coding an event interface and getting it wrong all the time.
>>
>> So a good part of the bugs we have seen around waiting for dma-fences
>> are exactly why wait_event was invented in the first place.
>>
>> The only big thing I can see missing in the wait_event interface is
>> waiting for many events at the same time, but that should be a rather
>> easy addition.
> So this bikeshed was years ago, maybe I should type a patch to
> document it, but as far as I remember the big difference is:
>
> - wait_event and friends generally Just Work. It can go wrong of
> course, but the usual pattern is that the waker-side does and
> uncoditional wake_up_all, and hence all the waiter needs to do is add
> themselves to the waiter list.
>
> - dma_buf otoh is entirely different: We wanted to support all kinds
> fo signalling modes, including having interrupts disabled by default
> (not sure whether we actually achieve this still with all the cpu-side
> scheduling the big drivers do). Which means the waker does not
> unconditionally call wake_up_all, at least not timeline, and waiters
> need to call dma_fence_enable_signalling before they can add
> themselves to the waiter list and call schedule().

Well that is not something I'm questioning because we really need this 
behavior as well.

But all of this can be perfectly implemented on top of wake_up_all.

> The other bit difference is how you check for the classic wakeup races
> where the event happens between when you checked for it and when you
> go to sleep. Because hw is involved, the rules are again a bit
> different, and their different between drivers because hw is
> incoherent/broken in all kinds of ways. So there's also really tricky
> things going on between adding the waiter to the waiter list and
> dma_fence_enable_signalling. For pure cpu events you can ignore this
> and bake the few necessary barriers into the various macros, dma_fence
> needs more.

Ah, yes I think I know what you mean with that and I also consider this 
a bad idea as well.

Only very few drivers actually need this behavior and the ones who do 
should be perfectly able to implement this inside the driver code.

The crux is that leaking this behavior into the dma-fence made it 
unnecessary complicated and result in quite a bunch of unnecessary 
irq_work and delayed_work usage.

I will take a look at this over the holidays. Shouldn't be to hard to 
fix and actually has some value additional to being just a nice cleanup.

Regards,
Christian.

>
> Adding Maarten, maybe there was more. I definitely remember huge&very
> long discussions about all this.
> -Daniel

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 6/6] drm/amdgpu/vcn:Remove bit 31 for scratch2 to indicate the WA is active

2018-12-13 Thread Zhu, James
Remove bit 31 for scratch2 to indicate the Hardware bug work around is active.

Signed-off-by: James Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 564ed94..eb6b783 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -279,7 +279,7 @@ static int amdgpu_vcn_pause_dpg_mode(struct amdgpu_device 
*adev,
 
ring = &adev->vcn.ring_dec;
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
-  RREG32_SOC15(UVD, 0, 
mmUVD_SCRATCH2));
+  RREG32_SOC15(UVD, 0, 
mmUVD_SCRATCH2) & 0x7FFF);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
   
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
   
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
@@ -339,7 +339,7 @@ static int amdgpu_vcn_pause_dpg_mode(struct amdgpu_device 
*adev,
 
ring = &adev->vcn.ring_dec;
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
-  RREG32_SOC15(UVD, 0, 
mmUVD_SCRATCH2));
+  RREG32_SOC15(UVD, 0, 
mmUVD_SCRATCH2) & 0x7FFF);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
   
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
   
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [Intel-gfx] [PATCH 06/10] drm/syncobj: use the timeline point in drm_syncobj_find_fence v3

2018-12-13 Thread Daniel Vetter
On Thu, Dec 13, 2018 at 5:47 PM Koenig, Christian
 wrote:
>
> Am 13.12.18 um 17:01 schrieb Daniel Vetter:
> > On Thu, Dec 13, 2018 at 12:24:57PM +, Koenig, Christian wrote:
> >> Am 13.12.18 um 13:21 schrieb Chris Wilson:
> >>> Quoting Koenig, Christian (2018-12-13 12:11:10)
>  Am 13.12.18 um 12:37 schrieb Chris Wilson:
> > Quoting Chunming Zhou (2018-12-11 10:34:45)
> >> From: Christian König 
> >>
> >> Implement finding the right timeline point in drm_syncobj_find_fence.
> >>
> >> v2: return -EINVAL when the point is not submitted yet.
> >> v3: fix reference counting bug, add flags handling as well
> >>
> >> Signed-off-by: Christian König 
> >> ---
> >> drivers/gpu/drm/drm_syncobj.c | 43 
> >> ---
> >> 1 file changed, 40 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/drm_syncobj.c 
> >> b/drivers/gpu/drm/drm_syncobj.c
> >> index 76ce13dafc4d..d964b348ecba 100644
> >> --- a/drivers/gpu/drm/drm_syncobj.c
> >> +++ b/drivers/gpu/drm/drm_syncobj.c
> >> @@ -231,16 +231,53 @@ int drm_syncobj_find_fence(struct drm_file 
> >> *file_private,
> >>   struct dma_fence **fence)
> >> {
> >>struct drm_syncobj *syncobj = 
> >> drm_syncobj_find(file_private, handle);
> >> -   int ret = 0;
> >> +   struct syncobj_wait_entry wait;
> >> +   int ret;
> >>
> >>if (!syncobj)
> >>return -ENOENT;
> >>
> >>*fence = drm_syncobj_fence_get(syncobj);
> >> -   if (!*fence) {
> >> +   drm_syncobj_put(syncobj);
> >> +
> >> +   if (*fence) {
> >> +   ret = dma_fence_chain_find_seqno(fence, point);
> >> +   if (!ret)
> >> +   return 0;
> >> +   dma_fence_put(*fence);
> >> +   } else {
> >>ret = -EINVAL;
> >>}
> >> -   drm_syncobj_put(syncobj);
> >> +
> >> +   if (!(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
> >> +   return ret;
> >> +
> >> +   memset(&wait, 0, sizeof(wait));
> >> +   wait.task = current;
> >> +   wait.point = point;
> >> +   drm_syncobj_fence_add_wait(syncobj, &wait);
> >> +
> >> +   do {
> >> +   set_current_state(TASK_INTERRUPTIBLE);
> >> +   if (wait.fence) {
> >> +   ret = 0;
> >> +   break;
> >> +   }
> >> +
> >> +   if (signal_pending(current)) {
> >> +   ret = -ERESTARTSYS;
> >> +   break;
> >> +   }
> >> +
> >> +   schedule();
> >> +   } while (1);
> > I've previously used a dma_fence_proxy so that we could do nonblocking
> > waits on future submits. That would be preferrable (a requirement for
> > our stupid BKL-driven code).
>  That is exactly what I would definitely NAK.
> 
>  I would rather say we should come up with a wait_multiple_events() macro
>  and completely nuke the custom implementation of this in:
>  1. dma_fence_default_wait and dma_fence_wait_any_timeout
>  2. the radeon fence implementation
>  3. the nouveau fence implementation
>  4. the syncobj code
> 
>  Cause all of them do exactly the same. The dma_fence implementation
>  unfortunately came up with a custom event handling mechanism instead of
>  extending the core Linux wait_event() system.
> >>> I don't want a blocking wait at all.
> >> Ok I wasn't clear enough :) That is exactly what I would NAK!
> >>
> >> The wait must be blocking or otherwise you would allow wait-before-signal.
> > Well the current implementation is pulling a rather big trick on readers
> > in this regard: It looks like a dma_fence, it's implemented as one even,
> > heck you even open-code a dma_fence_wait here.
> >
> > Except the semantics are completely different.
> >
> > So aside from the discussion whether we really want to fully chain them I
> > think it just doesn't make sense to implement the "wait for fence submit"
> > as a dma_fence wait. And I'd outright remove that part from the uapi, and
> > force the wait. The current radv/anv plans I discussed with Jason was that
> > we'd have a separate submit thread, and hence unconditionally blocking
> > until the fence has materialized is the right thing to do. Even allowing
> > that option, either through a flag, or making these things look like
> > dma_fences (they are _not_) just tricks folks into misunderstanding what's
> > going on.
>
> Good, that sounds strongly like something I can agree on as well.
>
> > Code sharing just because the code looks similar is imo a really
> > bad idea, when the semantics are entirely different

Re: [PATCH 2/4] drm/amdgpu: update the vm invalidation engine layout

2018-12-13 Thread Kuehling, Felix
Some nit-picks inline.

On 2018-12-12 11:09 p.m., Evan Quan wrote:
> We need new invalidation engine layout due to new SDMA page
> queues added.
>
> Change-Id: I2f3861689bffb9828c9eae744a7a0de4963ac2b6
> Signed-off-by: Evan Quan 
> ---
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 47 ++-
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h | 10 ++
>  2 files changed, 35 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index dc4dadc3575c..092a4d111557 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -719,37 +719,40 @@ static bool gmc_v9_0_keep_stolen_memory(struct 
> amdgpu_device *adev)
>   }
>  }
>  
> -static int gmc_v9_0_late_init(void *handle)
> +static int gmc_v9_0_allocate_vm_inv_eng(struct amdgpu_device *adev)

The function returns int. But only ever returns 0 and the caller ignores
the return value.


>  {
> - struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> - /*
> -  * The latest engine allocation on gfx9 is:
> -  * Engine 0, 1: idle
> -  * Engine 2, 3: firmware
> -  * Engine 4~13: amdgpu ring, subject to change when ring number changes
> -  * Engine 14~15: idle
> -  * Engine 16: kfd tlb invalidation
> -  * Engine 17: Gart flushes
> -  */
> - unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 4, 4 };
> + struct amdgpu_ring *ring;
> + unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
> + {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP};
>   unsigned i;
> - int r;
> + unsigned vmhub, inv_eng;
>  
> - if (!gmc_v9_0_keep_stolen_memory(adev))
> - amdgpu_bo_late_init(adev);
> + for (i = 0; i < adev->num_rings; ++i) {
> + ring = adev->rings[i];
> + vmhub = ring->funcs->vmhub;
> +
> + inv_eng = ffs(vm_inv_engs[vmhub]);
> + BUG_ON(!inv_eng);

Adding new BUG_ONs is discouraged. checkpatch.pl always warns about
these. Errors that can be handled more gracefully shouldn't use a
BUG_ON. E.g. use a WARN_ON and return an error code.


>  
> - for(i = 0; i < adev->num_rings; ++i) {
> - struct amdgpu_ring *ring = adev->rings[i];
> - unsigned vmhub = ring->funcs->vmhub;
> + ring->vm_inv_eng = inv_eng - 1;
> + change_bit(inv_eng - 1, (unsigned long *)(&vm_inv_engs[vmhub]));
>  
> - ring->vm_inv_eng = vm_inv_eng[vmhub]++;
>   dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
>ring->name, ring->vm_inv_eng, ring->funcs->vmhub);
>   }
>  
> - /* Engine 16 is used for KFD and 17 for GART flushes */
> - for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i)
> - BUG_ON(vm_inv_eng[i] > 16);
> + return 0;

This is the only return statement. Maybe this could be a void function.
Unless you decide to turn the BUG_ON into a WARN with an error return.


> +}
> +
> +static int gmc_v9_0_late_init(void *handle)
> +{
> + struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> + int r;
> +
> + if (!gmc_v9_0_keep_stolen_memory(adev))
> + amdgpu_bo_late_init(adev);
> +
> + gmc_v9_0_allocate_vm_inv_eng(adev);

You're ignoring the return value. Either, make the function void, or
handle potential error returns.

Regards,
  Felix


>  
>   if (adev->asic_type == CHIP_VEGA10 && !amdgpu_sriov_vf(adev)) {
>   r = gmc_v9_0_ecc_available(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
> index b030ca5ea107..5c8deac65580 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
> @@ -24,6 +24,16 @@
>  #ifndef __GMC_V9_0_H__
>  #define __GMC_V9_0_H__
>  
> + /*
> +  * The latest engine allocation on gfx9 is:
> +  * Engine 2, 3: firmware
> +  * Engine 0, 1, 4~16: amdgpu ring,
> +  *subject to change when ring number changes
> +  * Engine 17: Gart flushes
> +  */
> +#define GFXHUB_FREE_VM_INV_ENGS_BITMAP   0x1FFF3
> +#define MMHUB_FREE_VM_INV_ENGS_BITMAP0x1FFF3
> +
>  extern const struct amd_ip_funcs gmc_v9_0_ip_funcs;
>  extern const struct amdgpu_ip_block_version gmc_v9_0_ip_block;
>  
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [Intel-gfx] [PATCH 06/10] drm/syncobj: use the timeline point in drm_syncobj_find_fence v3

2018-12-13 Thread Koenig, Christian
Am 13.12.18 um 17:01 schrieb Daniel Vetter:
> On Thu, Dec 13, 2018 at 12:24:57PM +, Koenig, Christian wrote:
>> Am 13.12.18 um 13:21 schrieb Chris Wilson:
>>> Quoting Koenig, Christian (2018-12-13 12:11:10)
 Am 13.12.18 um 12:37 schrieb Chris Wilson:
> Quoting Chunming Zhou (2018-12-11 10:34:45)
>> From: Christian König 
>>
>> Implement finding the right timeline point in drm_syncobj_find_fence.
>>
>> v2: return -EINVAL when the point is not submitted yet.
>> v3: fix reference counting bug, add flags handling as well
>>
>> Signed-off-by: Christian König 
>> ---
>> drivers/gpu/drm/drm_syncobj.c | 43 
>> ---
>> 1 file changed, 40 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_syncobj.c 
>> b/drivers/gpu/drm/drm_syncobj.c
>> index 76ce13dafc4d..d964b348ecba 100644
>> --- a/drivers/gpu/drm/drm_syncobj.c
>> +++ b/drivers/gpu/drm/drm_syncobj.c
>> @@ -231,16 +231,53 @@ int drm_syncobj_find_fence(struct drm_file 
>> *file_private,
>>   struct dma_fence **fence)
>> {
>>struct drm_syncobj *syncobj = drm_syncobj_find(file_private, 
>> handle);
>> -   int ret = 0;
>> +   struct syncobj_wait_entry wait;
>> +   int ret;
>> 
>>if (!syncobj)
>>return -ENOENT;
>> 
>>*fence = drm_syncobj_fence_get(syncobj);
>> -   if (!*fence) {
>> +   drm_syncobj_put(syncobj);
>> +
>> +   if (*fence) {
>> +   ret = dma_fence_chain_find_seqno(fence, point);
>> +   if (!ret)
>> +   return 0;
>> +   dma_fence_put(*fence);
>> +   } else {
>>ret = -EINVAL;
>>}
>> -   drm_syncobj_put(syncobj);
>> +
>> +   if (!(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
>> +   return ret;
>> +
>> +   memset(&wait, 0, sizeof(wait));
>> +   wait.task = current;
>> +   wait.point = point;
>> +   drm_syncobj_fence_add_wait(syncobj, &wait);
>> +
>> +   do {
>> +   set_current_state(TASK_INTERRUPTIBLE);
>> +   if (wait.fence) {
>> +   ret = 0;
>> +   break;
>> +   }
>> +
>> +   if (signal_pending(current)) {
>> +   ret = -ERESTARTSYS;
>> +   break;
>> +   }
>> +
>> +   schedule();
>> +   } while (1);
> I've previously used a dma_fence_proxy so that we could do nonblocking
> waits on future submits. That would be preferrable (a requirement for
> our stupid BKL-driven code).
 That is exactly what I would definitely NAK.

 I would rather say we should come up with a wait_multiple_events() macro
 and completely nuke the custom implementation of this in:
 1. dma_fence_default_wait and dma_fence_wait_any_timeout
 2. the radeon fence implementation
 3. the nouveau fence implementation
 4. the syncobj code

 Cause all of them do exactly the same. The dma_fence implementation
 unfortunately came up with a custom event handling mechanism instead of
 extending the core Linux wait_event() system.
>>> I don't want a blocking wait at all.
>> Ok I wasn't clear enough :) That is exactly what I would NAK!
>>
>> The wait must be blocking or otherwise you would allow wait-before-signal.
> Well the current implementation is pulling a rather big trick on readers
> in this regard: It looks like a dma_fence, it's implemented as one even,
> heck you even open-code a dma_fence_wait here.
>
> Except the semantics are completely different.
>
> So aside from the discussion whether we really want to fully chain them I
> think it just doesn't make sense to implement the "wait for fence submit"
> as a dma_fence wait. And I'd outright remove that part from the uapi, and
> force the wait. The current radv/anv plans I discussed with Jason was that
> we'd have a separate submit thread, and hence unconditionally blocking
> until the fence has materialized is the right thing to do. Even allowing
> that option, either through a flag, or making these things look like
> dma_fences (they are _not_) just tricks folks into misunderstanding what's
> going on.

Good, that sounds strongly like something I can agree on as well.

> Code sharing just because the code looks similar is imo a really
> bad idea, when the semantics are entirely different (that was also the
> reason behind not reusing all the cpu event stuff for dma_fence, they're
> not normal cpu events).

Ok, the last sentence is what I don't understand.

What exactly is the semantic difference between the dma_fence_wait and 
the wait_event interface?


RE: [PATCH 2/4] drm/amdgpu: update the vm invalidation engine layout

2018-12-13 Thread Zeng, Oak
Reviewed-by: Oak Zeng 

Regards,
Oak

-Original Message-
From: Evan Quan  
Sent: Wednesday, December 12, 2018 11:10 PM
To: amd-gfx@lists.freedesktop.org
Cc: Koenig, Christian ; Zeng, Oak ; 
Deucher, Alexander ; Quan, Evan 
Subject: [PATCH 2/4] drm/amdgpu: update the vm invalidation engine layout

We need new invalidation engine layout due to new SDMA page queues added.

Change-Id: I2f3861689bffb9828c9eae744a7a0de4963ac2b6
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 47 ++-  
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h | 10 ++
 2 files changed, 35 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index dc4dadc3575c..092a4d111557 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -719,37 +719,40 @@ static bool gmc_v9_0_keep_stolen_memory(struct 
amdgpu_device *adev)
}
 }
 
-static int gmc_v9_0_late_init(void *handle)
+static int gmc_v9_0_allocate_vm_inv_eng(struct amdgpu_device *adev)
 {
-   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-   /*
-* The latest engine allocation on gfx9 is:
-* Engine 0, 1: idle
-* Engine 2, 3: firmware
-* Engine 4~13: amdgpu ring, subject to change when ring number changes
-* Engine 14~15: idle
-* Engine 16: kfd tlb invalidation
-* Engine 17: Gart flushes
-*/
-   unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 4, 4 };
+   struct amdgpu_ring *ring;
+   unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
+   {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP};
unsigned i;
-   int r;
+   unsigned vmhub, inv_eng;
 
-   if (!gmc_v9_0_keep_stolen_memory(adev))
-   amdgpu_bo_late_init(adev);
+   for (i = 0; i < adev->num_rings; ++i) {
+   ring = adev->rings[i];
+   vmhub = ring->funcs->vmhub;
+
+   inv_eng = ffs(vm_inv_engs[vmhub]);
+   BUG_ON(!inv_eng);
 
-   for(i = 0; i < adev->num_rings; ++i) {
-   struct amdgpu_ring *ring = adev->rings[i];
-   unsigned vmhub = ring->funcs->vmhub;
+   ring->vm_inv_eng = inv_eng - 1;
+   change_bit(inv_eng - 1, (unsigned long *)(&vm_inv_engs[vmhub]));
 
-   ring->vm_inv_eng = vm_inv_eng[vmhub]++;
dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
 ring->name, ring->vm_inv_eng, ring->funcs->vmhub);
}
 
-   /* Engine 16 is used for KFD and 17 for GART flushes */
-   for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i)
-   BUG_ON(vm_inv_eng[i] > 16);
+   return 0;
+}
+
+static int gmc_v9_0_late_init(void *handle) {
+   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+   int r;
+
+   if (!gmc_v9_0_keep_stolen_memory(adev))
+   amdgpu_bo_late_init(adev);
+
+   gmc_v9_0_allocate_vm_inv_eng(adev);
 
if (adev->asic_type == CHIP_VEGA10 && !amdgpu_sriov_vf(adev)) {
r = gmc_v9_0_ecc_available(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
index b030ca5ea107..5c8deac65580 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
@@ -24,6 +24,16 @@
 #ifndef __GMC_V9_0_H__
 #define __GMC_V9_0_H__
 
+   /*
+* The latest engine allocation on gfx9 is:
+* Engine 2, 3: firmware
+* Engine 0, 1, 4~16: amdgpu ring,
+*subject to change when ring number changes
+* Engine 17: Gart flushes
+*/
+#define GFXHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
+#define MMHUB_FREE_VM_INV_ENGS_BITMAP  0x1FFF3
+
 extern const struct amd_ip_funcs gmc_v9_0_ip_funcs;  extern const struct 
amdgpu_ip_block_version gmc_v9_0_ip_block;
 
--
2.19.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: WARN once if amdgpu_bo_unpin is called for an unpinned BO

2018-12-13 Thread Deucher, Alexander
Reviewed-by: Alex Deucher 


From: amd-gfx  on behalf of Michel 
Dänzer 
Sent: Thursday, December 13, 2018 11:06:35 AM
To: amd-gfx@lists.freedesktop.org
Subject: [PATCH] drm/amdgpu: WARN once if amdgpu_bo_unpin is called for an 
unpinned BO

From: Michel Dänzer 

It indicates a pin/unpin imbalance bug somewhere. While the bug isn't
necessarily in the call chain hitting this, it's at least one part
involved.

Signed-off-by: Michel Dänzer 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index fd271f9746a2..728e15e5d68a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -912,7 +912,7 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
 struct ttm_operation_ctx ctx = { false, false };
 int r, i;

-   if (!bo->pin_count) {
+   if (WARN_ON_ONCE(!bo->pin_count)) {
 dev_warn(adev->dev, "%p unpin not necessary\n", bo);
 return 0;
 }
--
2.20.0.rc2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 1/4] drm/amdgpu: increase the MAX ring number

2018-12-13 Thread Zeng, Oak
Reviewed-by: Oak Zeng 

Regards,
Oak

-Original Message-
From: amd-gfx  On Behalf Of Evan Quan
Sent: Wednesday, December 12, 2018 11:10 PM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Zeng, Oak 
; Quan, Evan ; Koenig, Christian 

Subject: [PATCH 1/4] drm/amdgpu: increase the MAX ring number

As two more SDMA page queue rings are added on Vega20.

Change-Id: I8a3d8fbc924f7c24aaebf17b3f329a4a38fe5a56
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 4b31c96a2a2c..fe657788024f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -31,7 +31,7 @@
 #endif
 
 /* max number of rings */
-#define AMDGPU_MAX_RINGS   21
+#define AMDGPU_MAX_RINGS   23
 #define AMDGPU_MAX_GFX_RINGS   1
 #define AMDGPU_MAX_COMPUTE_RINGS   8
 #define AMDGPU_MAX_VCE_RINGS   3
-- 
2.19.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: WARN once if amdgpu_bo_unpin is called for an unpinned BO

2018-12-13 Thread Michel Dänzer
From: Michel Dänzer 

It indicates a pin/unpin imbalance bug somewhere. While the bug isn't
necessarily in the call chain hitting this, it's at least one part
involved.

Signed-off-by: Michel Dänzer 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index fd271f9746a2..728e15e5d68a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -912,7 +912,7 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
struct ttm_operation_ctx ctx = { false, false };
int r, i;
 
-   if (!bo->pin_count) {
+   if (WARN_ON_ONCE(!bo->pin_count)) {
dev_warn(adev->dev, "%p unpin not necessary\n", bo);
return 0;
}
-- 
2.20.0.rc2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [Intel-gfx] [PATCH 06/10] drm/syncobj: use the timeline point in drm_syncobj_find_fence v3

2018-12-13 Thread Daniel Vetter
On Thu, Dec 13, 2018 at 12:24:57PM +, Koenig, Christian wrote:
> Am 13.12.18 um 13:21 schrieb Chris Wilson:
> > Quoting Koenig, Christian (2018-12-13 12:11:10)
> >> Am 13.12.18 um 12:37 schrieb Chris Wilson:
> >>> Quoting Chunming Zhou (2018-12-11 10:34:45)
>  From: Christian König 
> 
>  Implement finding the right timeline point in drm_syncobj_find_fence.
> 
>  v2: return -EINVAL when the point is not submitted yet.
>  v3: fix reference counting bug, add flags handling as well
> 
>  Signed-off-by: Christian König 
>  ---
> drivers/gpu/drm/drm_syncobj.c | 43 ---
> 1 file changed, 40 insertions(+), 3 deletions(-)
> 
>  diff --git a/drivers/gpu/drm/drm_syncobj.c 
>  b/drivers/gpu/drm/drm_syncobj.c
>  index 76ce13dafc4d..d964b348ecba 100644
>  --- a/drivers/gpu/drm/drm_syncobj.c
>  +++ b/drivers/gpu/drm/drm_syncobj.c
>  @@ -231,16 +231,53 @@ int drm_syncobj_find_fence(struct drm_file 
>  *file_private,
>   struct dma_fence **fence)
> {
>    struct drm_syncobj *syncobj = drm_syncobj_find(file_private, 
>  handle);
>  -   int ret = 0;
>  +   struct syncobj_wait_entry wait;
>  +   int ret;
> 
>    if (!syncobj)
>    return -ENOENT;
> 
>    *fence = drm_syncobj_fence_get(syncobj);
>  -   if (!*fence) {
>  +   drm_syncobj_put(syncobj);
>  +
>  +   if (*fence) {
>  +   ret = dma_fence_chain_find_seqno(fence, point);
>  +   if (!ret)
>  +   return 0;
>  +   dma_fence_put(*fence);
>  +   } else {
>    ret = -EINVAL;
>    }
>  -   drm_syncobj_put(syncobj);
>  +
>  +   if (!(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
>  +   return ret;
>  +
>  +   memset(&wait, 0, sizeof(wait));
>  +   wait.task = current;
>  +   wait.point = point;
>  +   drm_syncobj_fence_add_wait(syncobj, &wait);
>  +
>  +   do {
>  +   set_current_state(TASK_INTERRUPTIBLE);
>  +   if (wait.fence) {
>  +   ret = 0;
>  +   break;
>  +   }
>  +
>  +   if (signal_pending(current)) {
>  +   ret = -ERESTARTSYS;
>  +   break;
>  +   }
>  +
>  +   schedule();
>  +   } while (1);
> >>> I've previously used a dma_fence_proxy so that we could do nonblocking
> >>> waits on future submits. That would be preferrable (a requirement for
> >>> our stupid BKL-driven code).
> >> That is exactly what I would definitely NAK.
> >>
> >> I would rather say we should come up with a wait_multiple_events() macro
> >> and completely nuke the custom implementation of this in:
> >> 1. dma_fence_default_wait and dma_fence_wait_any_timeout
> >> 2. the radeon fence implementation
> >> 3. the nouveau fence implementation
> >> 4. the syncobj code
> >>
> >> Cause all of them do exactly the same. The dma_fence implementation
> >> unfortunately came up with a custom event handling mechanism instead of
> >> extending the core Linux wait_event() system.
> > I don't want a blocking wait at all.
> 
> Ok I wasn't clear enough :) That is exactly what I would NAK!
> 
> The wait must be blocking or otherwise you would allow wait-before-signal.

Well the current implementation is pulling a rather big trick on readers
in this regard: It looks like a dma_fence, it's implemented as one even,
heck you even open-code a dma_fence_wait here.

Except the semantics are completely different.

So aside from the discussion whether we really want to fully chain them I
think it just doesn't make sense to implement the "wait for fence submit"
as a dma_fence wait. And I'd outright remove that part from the uapi, and
force the wait. The current radv/anv plans I discussed with Jason was that
we'd have a separate submit thread, and hence unconditionally blocking
until the fence has materialized is the right thing to do. Even allowing
that option, either through a flag, or making these things look like
dma_fences (they are _not_) just tricks folks into misunderstanding what's
going on. Code sharing just because the code looks similar is imo a really
bad idea, when the semantics are entirely different (that was also the
reason behind not reusing all the cpu event stuff for dma_fence, they're
not normal cpu events).
-Daniel

> 
> Christian.
> 
> > -Chris
> 
> ___
> Intel-gfx mailing list
> intel-...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_

Re: [PATCH] drm/amd/display: Add fast path for cursor plane updates

2018-12-13 Thread Kazlauskas, Nicholas
On 12/13/18 10:48 AM, Michel Dänzer wrote:
> On 2018-12-05 8:59 p.m., Nicholas Kazlauskas wrote:
>> [Why]
>> Legacy cursor plane updates from drm helpers go through the full
>> atomic codepath. A high volume of cursor updates through this slow
>> code path can cause subsequent page-flips to skip vblank intervals
>> since each individual update is slow.
>>
>> This problem is particularly noticeable for the compton compositor.
>>
>> [How]
>> A fast path for cursor plane updates is added by using DRM asynchronous
>> commit support provided by async_check and async_update. These don't do
>> a full state/flip_done dependency stall and they don't block other
>> commit work.
>>
>> However, DC still expects itself to be single-threaded for anything
>> that can issue register writes. Screen corruption or hangs can occur
>> if write sequences overlap. Every call that potentially perform
>> register writes needs to be guarded for asynchronous updates to work.
>> The dc_lock mutex was added for this.
>>
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175
>>
>> Cc: Leo Li 
>> Cc: Harry Wentland 
>> Signed-off-by: Nicholas Kazlauskas 
> 
> Looks like this change introduced (or at least exposed) a reference
> counting bug resulting in use-after-free when Xorg shuts down[0]. See
> the attached dmesg excerpt (note that I wrapped the !bo->pin_count check
> in amdgpu_bo_unpin in WARN_ON_ONCE).
> 
> 
> [0] Only with
> https://gitlab.freedesktop.org/xorg/driver/xf86-video-amdgpu/commit/0d60233d26ec70d4e1faa343b438e33829c6d5e4
> , i.e. alternating between two BOs for the HW cursor, instead of always
> using the same one.
> 
> 

Thanks for the heads up, I don't think I had that patch in my 
xf86-video-amdgpu when testing the desktop stack.

The async atomic helper does the:

drm_atomic_helper_prepare_planes
drm_atomic_helper_async_commit
drm_atomic_helper_cleanup_planes

...sequence correctly from what I can tell, so maybe it's something with
dm_plane_helper_prepare_fb or dm_plane_helper_cleanup_fb itself.

One case where unref could be called (not following a ref) is during 
drm_atomic_helper_prepare_planes - if prepare_fb fails then cleanup_fb 
gets called regardless, and we only ref the fb if prepare_fb is in the 
success path.

Nicholas Kazlauskas
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: Add fast path for cursor plane updates

2018-12-13 Thread Michel Dänzer
On 2018-12-05 8:59 p.m., Nicholas Kazlauskas wrote:
> [Why]
> Legacy cursor plane updates from drm helpers go through the full
> atomic codepath. A high volume of cursor updates through this slow
> code path can cause subsequent page-flips to skip vblank intervals
> since each individual update is slow.
> 
> This problem is particularly noticeable for the compton compositor.
> 
> [How]
> A fast path for cursor plane updates is added by using DRM asynchronous
> commit support provided by async_check and async_update. These don't do
> a full state/flip_done dependency stall and they don't block other
> commit work.
> 
> However, DC still expects itself to be single-threaded for anything
> that can issue register writes. Screen corruption or hangs can occur
> if write sequences overlap. Every call that potentially perform
> register writes needs to be guarded for asynchronous updates to work.
> The dc_lock mutex was added for this.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175
> 
> Cc: Leo Li 
> Cc: Harry Wentland 
> Signed-off-by: Nicholas Kazlauskas 

Looks like this change introduced (or at least exposed) a reference
counting bug resulting in use-after-free when Xorg shuts down[0]. See
the attached dmesg excerpt (note that I wrapped the !bo->pin_count check
in amdgpu_bo_unpin in WARN_ON_ONCE).


[0] Only with
https://gitlab.freedesktop.org/xorg/driver/xf86-video-amdgpu/commit/0d60233d26ec70d4e1faa343b438e33829c6d5e4
, i.e. alternating between two BOs for the HW cursor, instead of always
using the same one.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
Dec 13 16:35:07 kaveri kernel: [   52.603334] WARNING: CPU: 13 PID: 2010 at drivers/gpu/drm//amd/amdgpu/amdgpu_object.c:915 amdgpu_bo_unpin+0x24e/0x340 [amdgpu]
Dec 13 16:35:07 kaveri kernel: [   52.603454] Modules linked in: fuse(E) ipt_MASQUERADE(E) nf_conntrack_netlink(E) xfrm_user(E) xfrm_algo(E) nft_counter(E) nft_chain_nat_ipv4(E) nf_nat_ipv4(E) xt_addrtype(E) nft_compat(E) nf_tables(E) nfnetlink(E) xt_conntrack(E) nf_nat(E) nf_conntrack(E) nf_defrag_ipv6(E) nf_defrag_ipv4(E) libcrc32c(E) br_netfilter(E) bridge(E) stp(E) llc(E) overlay(E) lz4(E) lz4_compress(E) cpufreq_powersave(E) cpufreq_userspace(E) cpufreq_conservative(E) binfmt_misc(E) amdgpu(OE) nls_ascii(E) nls_cp437(E) vfat(E) fat(E) chash(OE) gpu_sched(OE) edac_mce_amd(E) kvm(E) irqbypass(E) crct10dif_pclmul(E) crc32_pclmul(E) ghash_clmulni_intel(E) radeon(OE) snd_hda_codec_realtek(E) snd_hda_codec_generic(E) snd_hda_codec_hdmi(E) ttm(OE) wmi_bmof(E) snd_hda_intel(E) drm_kms_helper(OE) aesni_intel(E) snd_hda_codec(E) efi_pstore(E) aes_x86_64(E) snd_hda_core(E) crypto_simd(E) snd_hwdep(E) r8169(E) cryptd(E) drm(OE) glue_helper(E) pcspkr(E) efivars(E) sg(E) k10temp(E) snd_pcm(E) i2c_algo_bit(E) libphy(E)
Dec 13 16:35:07 kaveri kernel: [   52.603509]  fb_sys_fops(E) syscopyarea(E) ccp(E) snd_timer(E) sysfillrect(E) sp5100_tco(E) sysimgblt(E) snd(E) soundcore(E) rng_core(E) i2c_piix4(E) wmi(E) pcc_cpufreq(E) button(E) acpi_cpufreq(E) tcp_bbr(E) sch_fq(E) nct6775(E) hwmon_vid(E) sunrpc(E) efivarfs(E) ip_tables(E) x_tables(E) autofs4(E) ext4(E) crc32c_generic(E) crc16(E) mbcache(E) jbd2(E) fscrypto(E) dm_mod(E) raid10(E) raid1(E) raid0(E) multipath(E) linear(E) md_mod(E) sd_mod(E) evdev(E) hid_generic(E) usbhid(E) hid(E) ahci(E) libahci(E) xhci_pci(E) libata(E) xhci_hcd(E) crc32c_intel(E) scsi_mod(E) usbcore(E) gpio_amdpt(E) gpio_generic(E)
Dec 13 16:35:07 kaveri kernel: [   52.603555] CPU: 13 PID: 2010 Comm: Xorg Tainted: G   OE 4.20.0-rc3+ #118
Dec 13 16:35:07 kaveri kernel: [   52.603559] Hardware name: Micro-Star International Co., Ltd. MS-7A34/B350 TOMAHAWK (MS-7A34), BIOS 1.80 09/13/2017
Dec 13 16:35:07 kaveri kernel: [   52.603628] RIP: 0010:amdgpu_bo_unpin+0x24e/0x340 [amdgpu]
Dec 13 16:35:07 kaveri kernel: [   52.603633] Code: 3c 11 00 0f 85 ff 00 00 00 49 8b bd 28 d4 ff ff 4c 89 e2 48 c7 c6 00 9a eb c2 89 04 24 e8 6a d4 5a ca 8b 04 24 e9 a9 fe ff ff <0f> 0b 49 8d bd 28 d4 ff ff 48 b8 00 00 00 00 00 fc ff df 48 89 fa
Dec 13 16:35:07 kaveri kernel: [   52.603636] RSP: 0018:8883d403f3d8 EFLAGS: 00010246
Dec 13 16:35:07 kaveri kernel: [   52.603641] RAX:  RBX: 11107a807e96 RCX: 8883c445e650
Dec 13 16:35:07 kaveri kernel: [   52.603645] RDX: 11107888bd37 RSI:  RDI: 8883c445e9b8
Dec 13 16:35:07 kaveri kernel: [   52.603648] RBP: 8883c445e9e8 R08: fbfff83643a7 R09: fbfff83643a6
Dec 13 16:35:07 kaveri kernel: [   52.603652] R10: fbfff83643a6 R11: c1b21d33 R12: 8883c445e600
Dec 13 16:35:07 kaveri kernel: [   52.603655] R13: 8883b1de2bd8 R14: 11107a807e7e R15: 8883b1de2bd8
Dec 13 16:35:07 kaveri kernel: [   52.603659] FS:  7fc8b8929940() GS:8883ee14() knlGS:
Dec 13 16:35:07 kaveri kernel: [   52.603663] CS:  0010 DS:  ES:  CR0: 00

Re: [igt-dev] [PATCH i-g-t] igt/amdgpu_amd_prime: Bail if we fail to create more contexts

2018-12-13 Thread Chris Wilson
Quoting Chris Wilson (2018-12-13 15:36:43)
> Quoting Antonio Argenziano (2018-12-13 15:28:00)
> > 
> > 
> > On 13/12/18 03:57, Chris Wilson wrote:
> > > amdgpu has started to report out of space after creating a few contexts.
> > > This is not the scope of this test as here we just verifying that fences
> > > created in amd can be imported and used for synchronisation by i915 and
> > > for that we just need at least one context created!
> > > 
> > > References: https://bugs.freedesktop.org/show_bug.cgi?id=109049
> > > Signed-off-by: Chris Wilson 
> > 
> > LGTM.
> > 
> > Reviwed-by: Antonio Argenziano 
> > 
> > > ---
> > >   tests/amdgpu/amd_prime.c | 5 +++--
> > >   1 file changed, 3 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/tests/amdgpu/amd_prime.c b/tests/amdgpu/amd_prime.c
> > > index bda0ce83d..518c88963 100644
> > > --- a/tests/amdgpu/amd_prime.c
> > > +++ b/tests/amdgpu/amd_prime.c
> > > @@ -354,8 +354,8 @@ static void amd_to_i915(int i915, int amd, 
> > > amdgpu_device_handle device)
> > 
> > doesn't i915_to_amd have the same issue?
> 
> Only if you manage to run out of swap in 2s or used gen11. We don't like
> to mention the feature improvements.

Actually, in i915 we use asynchronous destruction of contexts and so
immediately release the resources and can reallocate as required. But
amdgpu uses synchronous context destruction and so we have to hold on to
the context even though we only want to import the fence into i915.

If we run out of contexts here with i915 (even on icl), it's a kernel
bug.
-Chris
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH i-g-t] igt/amdgpu_amd_prime: Bail if we fail to create more contexts

2018-12-13 Thread Alex Deucher
On Thu, Dec 13, 2018 at 6:57 AM Chris Wilson  wrote:
>
> amdgpu has started to report out of space after creating a few contexts.
> This is not the scope of this test as here we just verifying that fences
> created in amd can be imported and used for synchronisation by i915 and
> for that we just need at least one context created!
>
> References: https://bugs.freedesktop.org/show_bug.cgi?id=109049
> Signed-off-by: Chris Wilson 

Acked-by: Alex Deucher 

> ---
>  tests/amdgpu/amd_prime.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/tests/amdgpu/amd_prime.c b/tests/amdgpu/amd_prime.c
> index bda0ce83d..518c88963 100644
> --- a/tests/amdgpu/amd_prime.c
> +++ b/tests/amdgpu/amd_prime.c
> @@ -354,8 +354,8 @@ static void amd_to_i915(int i915, int amd, 
> amdgpu_device_handle device)
> contexts = realloc(contexts, size * 
> sizeof(*contexts));
> }
>
> -   r = amdgpu_cs_ctx_create(device, &contexts[count]);
> -   igt_assert_eq(r, 0);
> +   if (amdgpu_cs_ctx_create(device, &contexts[count]))
> +   break;
>
> r = amdgpu_cs_submit(contexts[count], 0, &ibs_request, 1);
> igt_assert_eq(r, 0);
> @@ -364,6 +364,7 @@ static void amd_to_i915(int i915, int amd, 
> amdgpu_device_handle device)
> }
>
> igt_info("Reservation width = %ld\n", count);
> +   igt_require(count);
>
> amdgpu_bo_export(ib_result_handle,
>  amdgpu_bo_handle_type_dma_buf_fd,
> --
> 2.20.0
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [igt-dev] [PATCH i-g-t] igt/amdgpu_amd_prime: Bail if we fail to create more contexts

2018-12-13 Thread Chris Wilson
Quoting Antonio Argenziano (2018-12-13 15:28:00)
> 
> 
> On 13/12/18 03:57, Chris Wilson wrote:
> > amdgpu has started to report out of space after creating a few contexts.
> > This is not the scope of this test as here we just verifying that fences
> > created in amd can be imported and used for synchronisation by i915 and
> > for that we just need at least one context created!
> > 
> > References: https://bugs.freedesktop.org/show_bug.cgi?id=109049
> > Signed-off-by: Chris Wilson 
> 
> LGTM.
> 
> Reviwed-by: Antonio Argenziano 
> 
> > ---
> >   tests/amdgpu/amd_prime.c | 5 +++--
> >   1 file changed, 3 insertions(+), 2 deletions(-)
> > 
> > diff --git a/tests/amdgpu/amd_prime.c b/tests/amdgpu/amd_prime.c
> > index bda0ce83d..518c88963 100644
> > --- a/tests/amdgpu/amd_prime.c
> > +++ b/tests/amdgpu/amd_prime.c
> > @@ -354,8 +354,8 @@ static void amd_to_i915(int i915, int amd, 
> > amdgpu_device_handle device)
> 
> doesn't i915_to_amd have the same issue?

Only if you manage to run out of swap in 2s or used gen11. We don't like
to mention the feature improvements.
-Chris
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [igt-dev] [PATCH i-g-t] igt/amdgpu_amd_prime: Bail if we fail to create more contexts

2018-12-13 Thread Antonio Argenziano



On 13/12/18 03:57, Chris Wilson wrote:

amdgpu has started to report out of space after creating a few contexts.
This is not the scope of this test as here we just verifying that fences
created in amd can be imported and used for synchronisation by i915 and
for that we just need at least one context created!

References: https://bugs.freedesktop.org/show_bug.cgi?id=109049
Signed-off-by: Chris Wilson 


LGTM.

Reviwed-by: Antonio Argenziano 


---
  tests/amdgpu/amd_prime.c | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/amdgpu/amd_prime.c b/tests/amdgpu/amd_prime.c
index bda0ce83d..518c88963 100644
--- a/tests/amdgpu/amd_prime.c
+++ b/tests/amdgpu/amd_prime.c
@@ -354,8 +354,8 @@ static void amd_to_i915(int i915, int amd, 
amdgpu_device_handle device)


doesn't i915_to_amd have the same issue?

Antonio


contexts = realloc(contexts, size * sizeof(*contexts));
}
  
-		r = amdgpu_cs_ctx_create(device, &contexts[count]);

-   igt_assert_eq(r, 0);
+   if (amdgpu_cs_ctx_create(device, &contexts[count]))
+   break;
  
  		r = amdgpu_cs_submit(contexts[count], 0, &ibs_request, 1);

igt_assert_eq(r, 0);
@@ -364,6 +364,7 @@ static void amd_to_i915(int i915, int amd, 
amdgpu_device_handle device)
}
  
  	igt_info("Reservation width = %ld\n", count);

+   igt_require(count);
  
  	amdgpu_bo_export(ib_result_handle,

 amdgpu_bo_handle_type_dma_buf_fd,


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/display: fix compiler warnings about wm variable

2018-12-13 Thread Huaisheng Ye
From: Huaisheng Ye 

There are compiler warnings within functions 'dcn10_log_hubbub_state’
and 'dcn10_get_hubbub_state’. This patch avoids the compiler reports
the following warning when building amdgpu.ko.

drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_hw_sequencer.c: In 
function ‘dcn10_log_hubbub_state’:
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_hw_sequencer.c:94:9: 
warning: missing braces around initializer [-Wmissing-braces]
  struct dcn_hubbub_wm wm = {0};
 ^
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_hw_sequencer.c:94:9: 
warning: (near initialization for ‘wm.sets’) [-Wmissing-braces]

drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_hw_sequencer_debug.c: In 
function ‘dcn10_get_hubbub_state’:
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_hw_sequencer_debug.c:74:9: 
warning: missing braces around initializer [-Wmissing-braces]
  struct dcn_hubbub_wm wm = {0};
 ^
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_hw_sequencer_debug.c:74:9: 
warning: (near initialization for ‘wm.sets’) [-Wmissing-braces]

Signed-off-by: Huaisheng Ye 
---
 drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c   | 3 ++-
 drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index 193184a..e96933a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -91,9 +91,10 @@ static void log_mpc_crc(struct dc *dc,
 void dcn10_log_hubbub_state(struct dc *dc, struct dc_log_buffer_ctx *log_ctx)
 {
struct dc_context *dc_ctx = dc->ctx;
-   struct dcn_hubbub_wm wm = {0};
+   struct dcn_hubbub_wm wm;
int i;
 
+   memset(&wm, 0, sizeof(struct dcn_hubbub_wm));
hubbub1_wm_read_state(dc->res_pool->hubbub, &wm);
 
DTN_INFO("HUBBUB WM:  data_urgent  pte_meta_urgent"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
index 6415890..f5610ea 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
@@ -71,7 +71,7 @@ static unsigned int snprintf_count(char *pBuf, unsigned int 
bufSize, char *fmt,
 static unsigned int dcn10_get_hubbub_state(struct dc *dc, char *pBuf, unsigned 
int bufSize)
 {
struct dc_context *dc_ctx = dc->ctx;
-   struct dcn_hubbub_wm wm = {0};
+   struct dcn_hubbub_wm wm;
int i;
 
unsigned int chars_printed = 0;
@@ -80,6 +80,7 @@ static unsigned int dcn10_get_hubbub_state(struct dc *dc, 
char *pBuf, unsigned i
const uint32_t ref_clk_mhz = dc_ctx->dc->res_pool->ref_clock_inKhz / 
1000;
static const unsigned int frac = 1000;
 
+   memset(&wm, 0, sizeof(struct dcn_hubbub_wm));
hubbub1_wm_read_state(dc->res_pool->hubbub, &wm);
 
chars_printed = snprintf_count(pBuf, remaining_buffer, 
"wm_set_index,data_urgent,pte_meta_urgent,sr_enter,sr_exit,dram_clk_chanage\n");
-- 
1.8.3.1



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [pull] amdgpu drm-next-4.21

2018-12-13 Thread Alex Deucher
On Wed, Dec 12, 2018 at 7:08 PM Dave Airlie  wrote:
>
> On Thu, 13 Dec 2018 at 07:13, Alex Deucher  wrote:
> >
> > Hi Dave,
> >
> > Updates for 4.21:
> > - Powerplay updates for newer polaris variants
> > - Add cursor plane update fast path
> > - Enable gpu reset by default on CI parts
> > - Fix config with KFD/HSA not enabled
> > - Misc bug fixes
> >
>
> Either this or the previous one broke the etnaviv build, I see two
> reports on the list of this breakage from the kbuild robot but no
> mention of it, can you guys either stick some arm cross compiles in
> your build paths or keep an eye on the kbuild robot emails for your
> branches.
>
> I created my own fix and pushed it in the merge commit.

Sorry about that.  I did see it, but I mixed it up with another issue
on drm-misc which was already fixed and thought it was a result of the
last drm-next merge into my tree.  I'll pay closer attention next
time.

Alex

>
> Dave.
>
> > The following changes since commit 22666cc1481ae3814d9c7718418cc4a3aa7d90c3:
> >
> >   drm/amdgpu: move IV prescreening into the GMC code (2018-12-07 18:14:26 
> > -0500)
> >
> > are available in the git repository at:
> >
> >   git://people.freedesktop.org/~agd5f/linux drm-next-4.21
> >
> > for you to fetch changes up to 674e78acae0dfb4beb56132e41cbae5b60f7d662:
> >
> >   drm/amd/display: Add fast path for cursor plane updates (2018-12-12 
> > 15:32:10 -0500)
> >
> > 
> > Alex Deucher (1):
> >   drm/amdgpu/powerplay: Add special avfs cases for some polaris asics 
> > (v3)
> >
> > Andrey Grodzovsky (1):
> >   drm/amdgpu: Enable GPU recovery by default for CI
> >
> > Kuehling, Felix (1):
> >   drm/amdgpu: Fix stub function name
> >
> > Nicholas Kazlauskas (4):
> >   Revert "drm/amd/display: Set RMX_ASPECT as default"
> >   drm/amd/display: Fix unintialized max_bpc state values
> >   drm/amd/display: Fix duplicating scaling/underscan connector state
> >   drm/amd/display: Add fast path for cursor plane updates
> >
> > Rex Zhu (1):
> >   drm/amdgpu: Limit vm max ctx number to 4096
> >
> > Tiecheng Zhou (1):
> >   drm/amdgpu: bypass RLC init under sriov for Tonga (v2)
> >
> > YueHaibing (1):
> >   drm/amdgpu: remove set but not used variable 'grbm_soft_reset'
> >
> >  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  2 +-
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c|  2 +-
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 +
> >  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 11 +--
> >  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c  | 79 
> > --
> >  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h  |  8 +++
> >  .../drm/amd/powerplay/smumgr/polaris10_smumgr.c| 54 +++
> >  8 files changed, 147 insertions(+), 12 deletions(-)
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: unify Vega20 PSP SOS firmwares for A0 and A1

2018-12-13 Thread Deucher, Alexander
Acked-by: Alex Deucher 


From: amd-gfx  on behalf of Evan Quan 

Sent: Thursday, December 13, 2018 1:14:51 AM
To: amd-gfx@lists.freedesktop.org
Cc: Quan, Evan
Subject: [PATCH] drm/amdgpu: unify Vega20 PSP SOS firmwares for A0 and A1

The new PSP SOS firmware can support both A0 and A1.

Change-Id: I9bf85eb77b183a4403667c77e291e32689aed0af
---
 drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 12 +---
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index 61cf2f6954e7..f3f5d4dd4631 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -34,14 +34,11 @@
 #include "nbio/nbio_7_4_offset.h"

 MODULE_FIRMWARE("amdgpu/vega20_sos.bin");
-MODULE_FIRMWARE("amdgpu/vega20_sos_old.bin");
 MODULE_FIRMWARE("amdgpu/vega20_ta.bin");

 /* address block */
 #define smnMP1_FIRMWARE_FLAGS   0x3010024

-#define VEGA20_BL_VERSION_VAR_NEW 0xA1
-
 static int
 psp_v11_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type 
*type)
 {
@@ -104,7 +101,6 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
 int err = 0;
 const struct psp_firmware_header_v1_0 *sos_hdr;
 const struct ta_firmware_header_v1_0 *ta_hdr;
-   uint32_t bl_version;

 DRM_DEBUG("\n");

@@ -116,13 +112,7 @@ static int psp_v11_0_init_microcode(struct psp_context 
*psp)
 BUG();
 }

-   bl_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_100);
-   bl_version = (bl_version & 0xFF) >> 16;
-
-   if (bl_version == VEGA20_BL_VERSION_VAR_NEW)
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sos.bin", 
chip_name);
-   else
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sos_old.bin", 
chip_name);
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sos.bin", chip_name);
 err = request_firmware(&adev->psp.sos_fw, fw_name, adev->dev);
 if (err)
 goto out;
--
2.19.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [Intel-gfx] [PATCH 06/10] drm/syncobj: use the timeline point in drm_syncobj_find_fence v3

2018-12-13 Thread Koenig, Christian
Am 13.12.18 um 13:21 schrieb Chris Wilson:
> Quoting Koenig, Christian (2018-12-13 12:11:10)
>> Am 13.12.18 um 12:37 schrieb Chris Wilson:
>>> Quoting Chunming Zhou (2018-12-11 10:34:45)
 From: Christian König 

 Implement finding the right timeline point in drm_syncobj_find_fence.

 v2: return -EINVAL when the point is not submitted yet.
 v3: fix reference counting bug, add flags handling as well

 Signed-off-by: Christian König 
 ---
drivers/gpu/drm/drm_syncobj.c | 43 ---
1 file changed, 40 insertions(+), 3 deletions(-)

 diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
 index 76ce13dafc4d..d964b348ecba 100644
 --- a/drivers/gpu/drm/drm_syncobj.c
 +++ b/drivers/gpu/drm/drm_syncobj.c
 @@ -231,16 +231,53 @@ int drm_syncobj_find_fence(struct drm_file 
 *file_private,
  struct dma_fence **fence)
{
   struct drm_syncobj *syncobj = drm_syncobj_find(file_private, 
 handle);
 -   int ret = 0;
 +   struct syncobj_wait_entry wait;
 +   int ret;

   if (!syncobj)
   return -ENOENT;

   *fence = drm_syncobj_fence_get(syncobj);
 -   if (!*fence) {
 +   drm_syncobj_put(syncobj);
 +
 +   if (*fence) {
 +   ret = dma_fence_chain_find_seqno(fence, point);
 +   if (!ret)
 +   return 0;
 +   dma_fence_put(*fence);
 +   } else {
   ret = -EINVAL;
   }
 -   drm_syncobj_put(syncobj);
 +
 +   if (!(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
 +   return ret;
 +
 +   memset(&wait, 0, sizeof(wait));
 +   wait.task = current;
 +   wait.point = point;
 +   drm_syncobj_fence_add_wait(syncobj, &wait);
 +
 +   do {
 +   set_current_state(TASK_INTERRUPTIBLE);
 +   if (wait.fence) {
 +   ret = 0;
 +   break;
 +   }
 +
 +   if (signal_pending(current)) {
 +   ret = -ERESTARTSYS;
 +   break;
 +   }
 +
 +   schedule();
 +   } while (1);
>>> I've previously used a dma_fence_proxy so that we could do nonblocking
>>> waits on future submits. That would be preferrable (a requirement for
>>> our stupid BKL-driven code).
>> That is exactly what I would definitely NAK.
>>
>> I would rather say we should come up with a wait_multiple_events() macro
>> and completely nuke the custom implementation of this in:
>> 1. dma_fence_default_wait and dma_fence_wait_any_timeout
>> 2. the radeon fence implementation
>> 3. the nouveau fence implementation
>> 4. the syncobj code
>>
>> Cause all of them do exactly the same. The dma_fence implementation
>> unfortunately came up with a custom event handling mechanism instead of
>> extending the core Linux wait_event() system.
> I don't want a blocking wait at all.

Ok I wasn't clear enough :) That is exactly what I would NAK!

The wait must be blocking or otherwise you would allow wait-before-signal.

Christian.

> -Chris

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [Intel-gfx] [PATCH 06/10] drm/syncobj: use the timeline point in drm_syncobj_find_fence v3

2018-12-13 Thread Chris Wilson
Quoting Koenig, Christian (2018-12-13 12:11:10)
> Am 13.12.18 um 12:37 schrieb Chris Wilson:
> > Quoting Chunming Zhou (2018-12-11 10:34:45)
> >> From: Christian König 
> >>
> >> Implement finding the right timeline point in drm_syncobj_find_fence.
> >>
> >> v2: return -EINVAL when the point is not submitted yet.
> >> v3: fix reference counting bug, add flags handling as well
> >>
> >> Signed-off-by: Christian König 
> >> ---
> >>   drivers/gpu/drm/drm_syncobj.c | 43 ---
> >>   1 file changed, 40 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> >> index 76ce13dafc4d..d964b348ecba 100644
> >> --- a/drivers/gpu/drm/drm_syncobj.c
> >> +++ b/drivers/gpu/drm/drm_syncobj.c
> >> @@ -231,16 +231,53 @@ int drm_syncobj_find_fence(struct drm_file 
> >> *file_private,
> >> struct dma_fence **fence)
> >>   {
> >>  struct drm_syncobj *syncobj = drm_syncobj_find(file_private, 
> >> handle);
> >> -   int ret = 0;
> >> +   struct syncobj_wait_entry wait;
> >> +   int ret;
> >>   
> >>  if (!syncobj)
> >>  return -ENOENT;
> >>   
> >>  *fence = drm_syncobj_fence_get(syncobj);
> >> -   if (!*fence) {
> >> +   drm_syncobj_put(syncobj);
> >> +
> >> +   if (*fence) {
> >> +   ret = dma_fence_chain_find_seqno(fence, point);
> >> +   if (!ret)
> >> +   return 0;
> >> +   dma_fence_put(*fence);
> >> +   } else {
> >>  ret = -EINVAL;
> >>  }
> >> -   drm_syncobj_put(syncobj);
> >> +
> >> +   if (!(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
> >> +   return ret;
> >> +
> >> +   memset(&wait, 0, sizeof(wait));
> >> +   wait.task = current;
> >> +   wait.point = point;
> >> +   drm_syncobj_fence_add_wait(syncobj, &wait);
> >> +
> >> +   do {
> >> +   set_current_state(TASK_INTERRUPTIBLE);
> >> +   if (wait.fence) {
> >> +   ret = 0;
> >> +   break;
> >> +   }
> >> +
> >> +   if (signal_pending(current)) {
> >> +   ret = -ERESTARTSYS;
> >> +   break;
> >> +   }
> >> +
> >> +   schedule();
> >> +   } while (1);
> > I've previously used a dma_fence_proxy so that we could do nonblocking
> > waits on future submits. That would be preferrable (a requirement for
> > our stupid BKL-driven code).
> 
> That is exactly what I would definitely NAK.
> 
> I would rather say we should come up with a wait_multiple_events() macro 
> and completely nuke the custom implementation of this in:
> 1. dma_fence_default_wait and dma_fence_wait_any_timeout
> 2. the radeon fence implementation
> 3. the nouveau fence implementation
> 4. the syncobj code
> 
> Cause all of them do exactly the same. The dma_fence implementation 
> unfortunately came up with a custom event handling mechanism instead of 
> extending the core Linux wait_event() system.

I don't want a blocking wait at all.
-Chris
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [Intel-gfx] [PATCH 06/10] drm/syncobj: use the timeline point in drm_syncobj_find_fence v3

2018-12-13 Thread Koenig, Christian
Am 13.12.18 um 12:37 schrieb Chris Wilson:
> Quoting Chunming Zhou (2018-12-11 10:34:45)
>> From: Christian König 
>>
>> Implement finding the right timeline point in drm_syncobj_find_fence.
>>
>> v2: return -EINVAL when the point is not submitted yet.
>> v3: fix reference counting bug, add flags handling as well
>>
>> Signed-off-by: Christian König 
>> ---
>>   drivers/gpu/drm/drm_syncobj.c | 43 ---
>>   1 file changed, 40 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
>> index 76ce13dafc4d..d964b348ecba 100644
>> --- a/drivers/gpu/drm/drm_syncobj.c
>> +++ b/drivers/gpu/drm/drm_syncobj.c
>> @@ -231,16 +231,53 @@ int drm_syncobj_find_fence(struct drm_file 
>> *file_private,
>> struct dma_fence **fence)
>>   {
>>  struct drm_syncobj *syncobj = drm_syncobj_find(file_private, 
>> handle);
>> -   int ret = 0;
>> +   struct syncobj_wait_entry wait;
>> +   int ret;
>>   
>>  if (!syncobj)
>>  return -ENOENT;
>>   
>>  *fence = drm_syncobj_fence_get(syncobj);
>> -   if (!*fence) {
>> +   drm_syncobj_put(syncobj);
>> +
>> +   if (*fence) {
>> +   ret = dma_fence_chain_find_seqno(fence, point);
>> +   if (!ret)
>> +   return 0;
>> +   dma_fence_put(*fence);
>> +   } else {
>>  ret = -EINVAL;
>>  }
>> -   drm_syncobj_put(syncobj);
>> +
>> +   if (!(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
>> +   return ret;
>> +
>> +   memset(&wait, 0, sizeof(wait));
>> +   wait.task = current;
>> +   wait.point = point;
>> +   drm_syncobj_fence_add_wait(syncobj, &wait);
>> +
>> +   do {
>> +   set_current_state(TASK_INTERRUPTIBLE);
>> +   if (wait.fence) {
>> +   ret = 0;
>> +   break;
>> +   }
>> +
>> +   if (signal_pending(current)) {
>> +   ret = -ERESTARTSYS;
>> +   break;
>> +   }
>> +
>> +   schedule();
>> +   } while (1);
> I've previously used a dma_fence_proxy so that we could do nonblocking
> waits on future submits. That would be preferrable (a requirement for
> our stupid BKL-driven code).

That is exactly what I would definitely NAK.

I would rather say we should come up with a wait_multiple_events() macro 
and completely nuke the custom implementation of this in:
1. dma_fence_default_wait and dma_fence_wait_any_timeout
2. the radeon fence implementation
3. the nouveau fence implementation
4. the syncobj code

Cause all of them do exactly the same. The dma_fence implementation 
unfortunately came up with a custom event handling mechanism instead of 
extending the core Linux wait_event() system.

This in turn lead to a lot of this duplicated handling.

Christian.

> -Chris

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH i-g-t] igt/amdgpu_amd_prime: Bail if we fail to create more contexts

2018-12-13 Thread Chris Wilson
amdgpu has started to report out of space after creating a few contexts.
This is not the scope of this test as here we just verifying that fences
created in amd can be imported and used for synchronisation by i915 and
for that we just need at least one context created!

References: https://bugs.freedesktop.org/show_bug.cgi?id=109049
Signed-off-by: Chris Wilson 
---
 tests/amdgpu/amd_prime.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/amdgpu/amd_prime.c b/tests/amdgpu/amd_prime.c
index bda0ce83d..518c88963 100644
--- a/tests/amdgpu/amd_prime.c
+++ b/tests/amdgpu/amd_prime.c
@@ -354,8 +354,8 @@ static void amd_to_i915(int i915, int amd, 
amdgpu_device_handle device)
contexts = realloc(contexts, size * sizeof(*contexts));
}
 
-   r = amdgpu_cs_ctx_create(device, &contexts[count]);
-   igt_assert_eq(r, 0);
+   if (amdgpu_cs_ctx_create(device, &contexts[count]))
+   break;
 
r = amdgpu_cs_submit(contexts[count], 0, &ibs_request, 1);
igt_assert_eq(r, 0);
@@ -364,6 +364,7 @@ static void amd_to_i915(int i915, int amd, 
amdgpu_device_handle device)
}
 
igt_info("Reservation width = %ld\n", count);
+   igt_require(count);
 
amdgpu_bo_export(ib_result_handle,
 amdgpu_bo_handle_type_dma_buf_fd,
-- 
2.20.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [Intel-gfx] [PATCH 06/10] drm/syncobj: use the timeline point in drm_syncobj_find_fence v3

2018-12-13 Thread Chris Wilson
Quoting Chunming Zhou (2018-12-11 10:34:45)
> From: Christian König 
> 
> Implement finding the right timeline point in drm_syncobj_find_fence.
> 
> v2: return -EINVAL when the point is not submitted yet.
> v3: fix reference counting bug, add flags handling as well
> 
> Signed-off-by: Christian König 
> ---
>  drivers/gpu/drm/drm_syncobj.c | 43 ---
>  1 file changed, 40 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index 76ce13dafc4d..d964b348ecba 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -231,16 +231,53 @@ int drm_syncobj_find_fence(struct drm_file 
> *file_private,
>struct dma_fence **fence)
>  {
> struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
> -   int ret = 0;
> +   struct syncobj_wait_entry wait;
> +   int ret;
>  
> if (!syncobj)
> return -ENOENT;
>  
> *fence = drm_syncobj_fence_get(syncobj);
> -   if (!*fence) {
> +   drm_syncobj_put(syncobj);
> +
> +   if (*fence) {
> +   ret = dma_fence_chain_find_seqno(fence, point);
> +   if (!ret)
> +   return 0;
> +   dma_fence_put(*fence);
> +   } else {
> ret = -EINVAL;
> }
> -   drm_syncobj_put(syncobj);
> +
> +   if (!(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
> +   return ret;
> +
> +   memset(&wait, 0, sizeof(wait));
> +   wait.task = current;
> +   wait.point = point;
> +   drm_syncobj_fence_add_wait(syncobj, &wait);

> +
> +   do {
> +   set_current_state(TASK_INTERRUPTIBLE);
> +   if (wait.fence) {
> +   ret = 0;
> +   break;
> +   }
> +
> +   if (signal_pending(current)) {
> +   ret = -ERESTARTSYS;
> +   break;
> +   }
> +
> +   schedule();
> +   } while (1);

I've previously used a dma_fence_proxy so that we could do nonblocking
waits on future submits. That would be preferrable (a requirement for
our stupid BKL-driven code).
-Chris
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [Intel-gfx] [PATCH 01/10] dma-buf: add new dma_fence_chain container v4

2018-12-13 Thread Chris Wilson
Quoting Chunming Zhou (2018-12-11 10:34:40)
> From: Christian König 
> 
> Lockless container implementation similar to a dma_fence_array, but with
> only two elements per node and automatic garbage collection.
> 
> v2: properly document dma_fence_chain_for_each, add 
> dma_fence_chain_find_seqno,
> drop prev reference during garbage collection if it's not a chain fence.
> v3: use head and iterator for dma_fence_chain_for_each
> v4: fix reference count in dma_fence_chain_enable_signaling
> 
> Signed-off-by: Christian König 
> ---
>  drivers/dma-buf/Makefile  |   3 +-
>  drivers/dma-buf/dma-fence-chain.c | 241 ++
>  include/linux/dma-fence-chain.h   |  81 ++
>  3 files changed, 324 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/dma-buf/dma-fence-chain.c
>  create mode 100644 include/linux/dma-fence-chain.h
> 
> diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
> index 0913a6ccab5a..1f006e083eb9 100644
> --- a/drivers/dma-buf/Makefile
> +++ b/drivers/dma-buf/Makefile
> @@ -1,4 +1,5 @@
> -obj-y := dma-buf.o dma-fence.o dma-fence-array.o reservation.o seqno-fence.o
> +obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \
> +reservation.o seqno-fence.o
>  obj-$(CONFIG_SYNC_FILE)+= sync_file.o
>  obj-$(CONFIG_SW_SYNC)  += sw_sync.o sync_debug.o
>  obj-$(CONFIG_UDMABUF)  += udmabuf.o
> diff --git a/drivers/dma-buf/dma-fence-chain.c 
> b/drivers/dma-buf/dma-fence-chain.c
> new file mode 100644
> index ..0c5e3c902fa0
> --- /dev/null
> +++ b/drivers/dma-buf/dma-fence-chain.c
> @@ -0,0 +1,241 @@
> +/*

SPDX-License-Identifier: GPL-2.0

> + * fence-chain: chain fences together in a timeline
> + *
> + * Copyright (C) 2018 Advanced Micro Devices, Inc.
> + * Authors:
> + * Christian König 
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published 
> by
> + * the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but 
> WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + */
> +
> +#include 
> +
> +static bool dma_fence_chain_enable_signaling(struct dma_fence *fence);
> +
> +/**
> + * dma_fence_chain_get_prev - use RCU to get a reference to the previous 
> fence
> + * @chain: chain node to get the previous node from
> + *
> + * Use dma_fence_get_rcu_safe to get a reference to the previous fence of the
> + * chain node.
> + */
> +static struct dma_fence *dma_fence_chain_get_prev(struct dma_fence_chain 
> *chain)
> +{
> +   struct dma_fence *prev;
> +
> +   rcu_read_lock();
> +   prev = dma_fence_get_rcu_safe(&chain->prev);
> +   rcu_read_unlock();
> +   return prev;
> +}
> +
> +/**
> + * dma_fence_chain_walk - chain walking function
> + * @fence: current chain node
> + *
> + * Walk the chain to the next node. Returns the next fence or NULL if we are 
> at
> + * the end of the chain. Garbage collects chain nodes which are already
> + * signaled.
> + */
> +struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
> +{
> +   struct dma_fence_chain *chain, *prev_chain;
> +   struct dma_fence *prev, *replacement, *tmp;
> +
> +   chain = to_dma_fence_chain(fence);
> +   if (!chain) {
> +   dma_fence_put(fence);
> +   return NULL;
> +   }
> +
> +   while ((prev = dma_fence_chain_get_prev(chain))) {
> +
> +   prev_chain = to_dma_fence_chain(prev);
> +   if (prev_chain) {
> +   if (!dma_fence_is_signaled(prev_chain->fence))
> +   break;
> +
> +   replacement = dma_fence_chain_get_prev(prev_chain);
> +   } else {
> +   if (!dma_fence_is_signaled(prev))
> +   break;
> +
> +   replacement = NULL;
> +   }
> +
> +   tmp = cmpxchg(&chain->prev, prev, replacement);
> +   if (tmp == prev)
> +   dma_fence_put(tmp);
> +   else
> +   dma_fence_put(replacement);

if (cmpxchg(&chain->prev, prev, replacement) == prev)
dma_fence_put(prev)
else
dma_fence_put(replacement);

would be a little easier for me to read.

> +   dma_fence_put(prev);
> +   }
> +
> +   dma_fence_put(fence);

Putting the caller's fence caught me by surprise.

I'd be tempted to do

> +
> +/**
> + * dma_fence_chain_for_each - iterate over all fences in chain
> + * @iter: current fence
> + * @head: starting point
> + *
> + * Iterate over all fences in the chain. We keep a reference to the current
> + * fence while inside the loop which must be dropped when breaking out.
> + */
> +#define 

Re: [PATCH 1/4] drm/amdgpu: increase the MAX ring number

2018-12-13 Thread Koenig, Christian
Am 13.12.18 um 05:09 schrieb Evan Quan:
> As two more SDMA page queue rings are added on Vega20.
>
> Change-Id: I8a3d8fbc924f7c24aaebf17b3f329a4a38fe5a56
> Signed-off-by: Evan Quan 

Reviewed-by: Christian König  for this one.

And Acked-by: Christian König  for patch #3 
and #4.

Regards,
Christian.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 +-
>   1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 4b31c96a2a2c..fe657788024f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -31,7 +31,7 @@
>   #endif
>   
>   /* max number of rings */
> -#define AMDGPU_MAX_RINGS 21
> +#define AMDGPU_MAX_RINGS 23
>   #define AMDGPU_MAX_GFX_RINGS1
>   #define AMDGPU_MAX_COMPUTE_RINGS8
>   #define AMDGPU_MAX_VCE_RINGS3

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/4] drm/amdgpu: update the vm invalidation engine layout

2018-12-13 Thread Koenig, Christian
Am 13.12.18 um 05:09 schrieb Evan Quan:
> We need new invalidation engine layout due to new SDMA page
> queues added.
>
> Change-Id: I2f3861689bffb9828c9eae744a7a0de4963ac2b6
> Signed-off-by: Evan Quan 

Wow, that looks really nice. Patch is Reviewed-by: Christian König 


> ---
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 47 ++-
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h | 10 ++
>   2 files changed, 35 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index dc4dadc3575c..092a4d111557 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -719,37 +719,40 @@ static bool gmc_v9_0_keep_stolen_memory(struct 
> amdgpu_device *adev)
>   }
>   }
>   
> -static int gmc_v9_0_late_init(void *handle)
> +static int gmc_v9_0_allocate_vm_inv_eng(struct amdgpu_device *adev)
>   {
> - struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> - /*
> -  * The latest engine allocation on gfx9 is:
> -  * Engine 0, 1: idle
> -  * Engine 2, 3: firmware
> -  * Engine 4~13: amdgpu ring, subject to change when ring number changes
> -  * Engine 14~15: idle
> -  * Engine 16: kfd tlb invalidation
> -  * Engine 17: Gart flushes
> -  */
> - unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 4, 4 };
> + struct amdgpu_ring *ring;
> + unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
> + {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP};
>   unsigned i;
> - int r;
> + unsigned vmhub, inv_eng;
>   
> - if (!gmc_v9_0_keep_stolen_memory(adev))
> - amdgpu_bo_late_init(adev);
> + for (i = 0; i < adev->num_rings; ++i) {
> + ring = adev->rings[i];
> + vmhub = ring->funcs->vmhub;
> +
> + inv_eng = ffs(vm_inv_engs[vmhub]);
> + BUG_ON(!inv_eng);
>   
> - for(i = 0; i < adev->num_rings; ++i) {
> - struct amdgpu_ring *ring = adev->rings[i];
> - unsigned vmhub = ring->funcs->vmhub;
> + ring->vm_inv_eng = inv_eng - 1;
> + change_bit(inv_eng - 1, (unsigned long *)(&vm_inv_engs[vmhub]));
>   
> - ring->vm_inv_eng = vm_inv_eng[vmhub]++;
>   dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
>ring->name, ring->vm_inv_eng, ring->funcs->vmhub);
>   }
>   
> - /* Engine 16 is used for KFD and 17 for GART flushes */
> - for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i)
> - BUG_ON(vm_inv_eng[i] > 16);
> + return 0;
> +}
> +
> +static int gmc_v9_0_late_init(void *handle)
> +{
> + struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> + int r;
> +
> + if (!gmc_v9_0_keep_stolen_memory(adev))
> + amdgpu_bo_late_init(adev);
> +
> + gmc_v9_0_allocate_vm_inv_eng(adev);
>   
>   if (adev->asic_type == CHIP_VEGA10 && !amdgpu_sriov_vf(adev)) {
>   r = gmc_v9_0_ecc_available(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
> index b030ca5ea107..5c8deac65580 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
> @@ -24,6 +24,16 @@
>   #ifndef __GMC_V9_0_H__
>   #define __GMC_V9_0_H__
>   
> + /*
> +  * The latest engine allocation on gfx9 is:
> +  * Engine 2, 3: firmware
> +  * Engine 0, 1, 4~16: amdgpu ring,
> +  *subject to change when ring number changes
> +  * Engine 17: Gart flushes
> +  */
> +#define GFXHUB_FREE_VM_INV_ENGS_BITMAP   0x1FFF3
> +#define MMHUB_FREE_VM_INV_ENGS_BITMAP0x1FFF3
> +
>   extern const struct amd_ip_funcs gmc_v9_0_ip_funcs;
>   extern const struct amdgpu_ip_block_version gmc_v9_0_ip_block;
>   

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx