[PATCH 7/9] drm/syncobj: add transition iotcls between binary and timeline v2

2019-03-19 Thread Chunming Zhou
we need to import/export timeline point.

v2: unify to one transfer ioctl

Signed-off-by: Chunming Zhou 
Cc: Lionel Landwerlin 
---
 drivers/gpu/drm/drm_internal.h |  2 +
 drivers/gpu/drm/drm_ioctl.c|  2 +
 drivers/gpu/drm/drm_syncobj.c  | 74 ++
 include/uapi/drm/drm.h | 10 +
 4 files changed, 88 insertions(+)

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 695179bb88dc..dd11ae5f1eef 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -180,6 +180,8 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, 
void *data,
   struct drm_file *file_private);
 int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
   struct drm_file *file_private);
+int drm_syncobj_transfer_ioctl(struct drm_device *dev, void *data,
+  struct drm_file *file_private);
 int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
   struct drm_file *file_private);
 int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 7a534c184e52..92b3b7b2fd81 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -686,6 +686,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
  DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, 
drm_syncobj_fd_to_handle_ioctl,
  DRM_UNLOCKED|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TRANSFER, drm_syncobj_transfer_ioctl,
+ DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
  DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, 
drm_syncobj_timeline_wait_ioctl,
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 087fd4e7eaf3..ee2d66e047e7 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -679,6 +679,80 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, 
void *data,
&args->handle);
 }
 
+static int drm_syncobj_transfer_to_timeline(struct drm_file *file_private,
+   struct drm_syncobj_transfer *args)
+{
+   struct drm_syncobj *timeline_syncobj = NULL;
+   struct dma_fence *fence;
+   struct dma_fence_chain *chain;
+   int ret;
+
+   timeline_syncobj = drm_syncobj_find(file_private, args->dst_handle);
+   if (!timeline_syncobj) {
+   return -ENOENT;
+   }
+   ret = drm_syncobj_find_fence(file_private, args->src_handle,
+args->src_point, args->flags,
+&fence);
+   if (ret)
+   goto err;
+   chain = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
+   if (!chain) {
+   ret = -ENOMEM;
+   goto err1;
+   }
+   drm_syncobj_add_point(timeline_syncobj, chain, fence, args->dst_point);
+err1:
+   dma_fence_put(fence);
+err:
+   drm_syncobj_put(timeline_syncobj);
+
+   return ret;
+}
+
+static int
+drm_syncobj_transfer_to_binary(struct drm_file *file_private,
+  struct drm_syncobj_transfer *args)
+{
+   struct drm_syncobj *binary_syncobj = NULL;
+   struct dma_fence *fence;
+   int ret;
+
+   binary_syncobj = drm_syncobj_find(file_private, args->dst_handle);
+   if (!binary_syncobj)
+   return -ENOENT;
+   ret = drm_syncobj_find_fence(file_private, args->src_handle,
+args->src_point, args->flags, &fence);
+   if (ret)
+   goto err;
+   drm_syncobj_replace_fence(binary_syncobj, fence);
+   dma_fence_put(fence);
+err:
+   drm_syncobj_put(binary_syncobj);
+
+   return ret;
+}
+int
+drm_syncobj_transfer_ioctl(struct drm_device *dev, void *data,
+  struct drm_file *file_private)
+{
+   struct drm_syncobj_transfer *args = data;
+   int ret;
+
+   if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+   return -ENODEV;
+
+   if (args->pad)
+   return -EINVAL;
+
+   if (args->dst_point)
+   ret = drm_syncobj_transfer_to_timeline(file_private, args);
+   else
+   ret = drm_syncobj_transfer_to_binary(file_private, args);
+
+   return ret;
+}
+
 static void syncobj_wait_fence_func(struct dma_fence *fence,
struct dma_fence_cb *cb)
 {
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index c62be0840ba5..e8d0d6b51875 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -735,6 +735,15 @@ struct drm_syncobj_handle {
__u32 pad;
 };
 
+struc

[PATCH 6/9] drm/amdgpu: add timeline support in amdgpu CS v3

2019-03-19 Thread Chunming Zhou
syncobj wait/signal operation is appending in command submission.
v2: separate to two kinds in/out_deps functions
v3: fix checking for timeline syncobj

Signed-off-by: Chunming Zhou 
Cc: Tobias Hector 
Cc: Jason Ekstrand 
Cc: Dave Airlie 
Cc: Chris Wilson 
Cc: Lionel Landwerlin 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  10 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 152 +
 include/uapi/drm/amdgpu_drm.h  |   8 ++
 3 files changed, 144 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 8d0d7f3dd5fb..deec2c796253 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -433,6 +433,12 @@ struct amdgpu_cs_chunk {
void*kdata;
 };
 
+struct amdgpu_cs_post_dep {
+   struct drm_syncobj *syncobj;
+   struct dma_fence_chain *chain;
+   u64 point;
+};
+
 struct amdgpu_cs_parser {
struct amdgpu_device*adev;
struct drm_file *filp;
@@ -462,8 +468,8 @@ struct amdgpu_cs_parser {
/* user fence */
struct amdgpu_bo_list_entry uf_entry;
 
-   unsigned num_post_dep_syncobjs;
-   struct drm_syncobj **post_dep_syncobjs;
+   unsignednum_post_deps;
+   struct amdgpu_cs_post_dep   *post_deps;
 };
 
 static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 52a5e4fdc95b..2f6239b6be6f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -215,6 +215,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser 
*p, union drm_amdgpu_cs
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
+   case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
+   case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
break;
 
default:
@@ -804,9 +806,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser 
*parser, int error,
ttm_eu_backoff_reservation(&parser->ticket,
   &parser->validated);
 
-   for (i = 0; i < parser->num_post_dep_syncobjs; i++)
-   drm_syncobj_put(parser->post_dep_syncobjs[i]);
-   kfree(parser->post_dep_syncobjs);
+   for (i = 0; i < parser->num_post_deps; i++) {
+   drm_syncobj_put(parser->post_deps[i].syncobj);
+   kfree(parser->post_deps[i].chain);
+   }
+   kfree(parser->post_deps);
 
dma_fence_put(parser->fence);
 
@@ -1117,13 +1121,18 @@ static int amdgpu_cs_process_fence_dep(struct 
amdgpu_cs_parser *p,
 }
 
 static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
-uint32_t handle)
+uint32_t handle, u64 point,
+u64 flags)
 {
-   int r;
struct dma_fence *fence;
-   r = drm_syncobj_find_fence(p->filp, handle, 0, 0, &fence);
-   if (r)
+   int r;
+
+   r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
+   if (r) {
+   DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
+ handle, point, r);
return r;
+   }
 
r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true);
dma_fence_put(fence);
@@ -1134,46 +1143,118 @@ static int 
amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
 static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
struct amdgpu_cs_chunk *chunk)
 {
+   struct drm_amdgpu_cs_chunk_sem *deps;
unsigned num_deps;
int i, r;
-   struct drm_amdgpu_cs_chunk_sem *deps;
 
deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
num_deps = chunk->length_dw * 4 /
sizeof(struct drm_amdgpu_cs_chunk_sem);
+   for (i = 0; i < num_deps; ++i) {
+   r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
+ 0, 0);
+   if (r)
+   return r;
+   }
+
+   return 0;
+}
+
 
+static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser 
*p,
+struct amdgpu_cs_chunk 
*chunk)
+{
+   struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
+   unsigned num_deps;
+   int i, r;
+
+   syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
+   num_deps = chunk->length_dw * 4 /
+   sizeof(struct drm_amdgpu_cs_chunk_syncobj);
for (i = 0; i < num_deps; ++i) {
-   r = amdgpu_syncobj_lookup_and_ad

[PATCH 9/9] drm/amdgpu: update version for timeline syncobj support in amdgpu

2019-03-19 Thread Chunming Zhou
Signed-off-by: Chunming Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 8a0732088640..4d8db87048d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -74,9 +74,10 @@
  * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES
  * - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID
  * - 3.30.0 - Add AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE.
+ * - 3.31.0 - Add syncobj timeline support to AMDGPU_CS.
  */
 #define KMS_DRIVER_MAJOR   3
-#define KMS_DRIVER_MINOR   30
+#define KMS_DRIVER_MINOR   31
 #define KMS_DRIVER_PATCHLEVEL  0
 
 int amdgpu_vram_limit = 0;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/9] drm/syncobj: add support for timeline point wait v8

2019-03-19 Thread Chunming Zhou
points array is one-to-one match with syncobjs array.
v2:
add seperate ioctl for timeline point wait, otherwise break uapi.
v3:
userspace can specify two kinds waits::
a. Wait for time point to be completed.
b. and wait for time point to become available
v4:
rebase
v5:
add comment for xxx_WAIT_AVAILABLE
v6: rebase and rework on new container
v7: drop _WAIT_COMPLETED, it is the default anyway
v8: correctly handle garbage collected fences

Signed-off-by: Chunming Zhou 
Signed-off-by: Christian König 
Cc: Tobias Hector 
Cc: Jason Ekstrand 
Cc: Dave Airlie 
Cc: Chris Wilson 
Cc: Lionel Landwerlin 
---
 drivers/gpu/drm/drm_internal.h |   2 +
 drivers/gpu/drm/drm_ioctl.c|   2 +
 drivers/gpu/drm/drm_syncobj.c  | 153 ++---
 include/uapi/drm/drm.h |  15 
 4 files changed, 143 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 251d67e04c2d..331ac6225b58 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -182,6 +182,8 @@ int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, 
void *data,
   struct drm_file *file_private);
 int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
   struct drm_file *file_private);
+int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
+   struct drm_file *file_private);
 int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_private);
 int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 687943df58e1..c984654646fa 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -688,6 +688,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
  DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
  DRM_UNLOCKED|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, 
drm_syncobj_timeline_wait_ioctl,
+ DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_RESET, drm_syncobj_reset_ioctl,
  DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 19a9ce638119..eae51978cda4 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -61,6 +61,7 @@ struct syncobj_wait_entry {
struct task_struct *task;
struct dma_fence *fence;
struct dma_fence_cb fence_cb;
+   u64point;
 };
 
 static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
@@ -95,6 +96,8 @@ EXPORT_SYMBOL(drm_syncobj_find);
 static void drm_syncobj_fence_add_wait(struct drm_syncobj *syncobj,
   struct syncobj_wait_entry *wait)
 {
+   struct dma_fence *fence;
+
if (wait->fence)
return;
 
@@ -103,11 +106,15 @@ static void drm_syncobj_fence_add_wait(struct drm_syncobj 
*syncobj,
 * have the lock, try one more time just to be sure we don't add a
 * callback when a fence has already been set.
 */
-   if (syncobj->fence)
-   wait->fence = dma_fence_get(
-   rcu_dereference_protected(syncobj->fence, 1));
-   else
+   fence = dma_fence_get(rcu_dereference_protected(syncobj->fence, 1));
+   if (!fence || dma_fence_chain_find_seqno(&fence, wait->point)) {
+   dma_fence_put(fence);
list_add_tail(&wait->node, &syncobj->cb_list);
+   } else if (!fence) {
+   wait->fence = dma_fence_get_stub();
+   } else {
+   wait->fence = fence;
+   }
spin_unlock(&syncobj->lock);
 }
 
@@ -149,10 +156,8 @@ void drm_syncobj_add_point(struct drm_syncobj *syncobj,
dma_fence_chain_init(chain, prev, fence, point);
rcu_assign_pointer(syncobj->fence, &chain->base);
 
-   list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) {
-   list_del_init(&cur->node);
+   list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node)
syncobj_wait_syncobj_func(syncobj, cur);
-   }
spin_unlock(&syncobj->lock);
 
/* Walk the chain once to trigger garbage collection */
@@ -184,10 +189,8 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
rcu_assign_pointer(syncobj->fence, fence);
 
if (fence != old_fence) {
-   list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) {
-   list_del_init(&cur->node);
+   list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node)
syncobj_wait_syncobj_func(syncobj, cur);
-   }
}
 
spin

[PATCH 8/9] drm/syncobj: add timeline signal ioctl for syncobj v4

2019-03-19 Thread Chunming Zhou
v2: individually allocate chain array, since chain node is free independently.
v3: all existing points must be already signaled before cpu perform signal 
operation,
so add check condition for that.
v4: remove v3 change and add checking to prevent out-of-order

Signed-off-by: Chunming Zhou 
Cc: Tobias Hector 
Cc: Jason Ekstrand 
Cc: Dave Airlie 
Cc: Chris Wilson 
Cc: Lionel Landwerlin 
---
 drivers/gpu/drm/drm_internal.h |  2 +
 drivers/gpu/drm/drm_ioctl.c|  2 +
 drivers/gpu/drm/drm_syncobj.c  | 93 ++
 include/uapi/drm/drm.h |  1 +
 4 files changed, 98 insertions(+)

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index dd11ae5f1eef..d9a483a5fce0 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -190,6 +190,8 @@ int drm_syncobj_reset_ioctl(struct drm_device *dev, void 
*data,
struct drm_file *file_private);
 int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
 struct drm_file *file_private);
+int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_private);
 int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_private);
 
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 92b3b7b2fd81..d337f161909c 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -696,6 +696,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
  DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
  DRM_UNLOCKED|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, 
drm_syncobj_timeline_signal_ioctl,
+ DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
  DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, 
DRM_UNLOCKED),
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index ee2d66e047e7..a3702c75fd1e 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -1183,6 +1183,99 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void 
*data,
return ret;
 }
 
+int
+drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_private)
+{
+   struct drm_syncobj_timeline_array *args = data;
+   struct drm_syncobj **syncobjs;
+   struct dma_fence_chain **chains;
+   uint64_t *points;
+   uint32_t i, j, timeline_count = 0;
+   int ret;
+
+   if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+   return -EOPNOTSUPP;
+
+   if (args->pad != 0)
+   return -EINVAL;
+
+   if (args->count_handles == 0)
+   return -EINVAL;
+
+   ret = drm_syncobj_array_find(file_private,
+u64_to_user_ptr(args->handles),
+args->count_handles,
+&syncobjs);
+   if (ret < 0)
+   return ret;
+
+   points = kmalloc_array(args->count_handles, sizeof(*points),
+  GFP_KERNEL);
+   if (!points) {
+   ret = -ENOMEM;
+   goto out;
+   }
+   if (!u64_to_user_ptr(args->points)) {
+   memset(points, 0, args->count_handles * sizeof(uint64_t));
+   } else if (copy_from_user(points, u64_to_user_ptr(args->points),
+ sizeof(uint64_t) * args->count_handles)) {
+   ret = -EFAULT;
+   goto err_points;
+   }
+
+   for (i = 0; i < args->count_handles; i++) {
+   struct dma_fence_chain *chain;
+   struct dma_fence *fence;
+
+   fence = drm_syncobj_fence_get(syncobjs[i]);
+   chain = to_dma_fence_chain(fence);
+   if (chain) {
+   if (points[i] <= fence->seqno) {
+   DRM_ERROR("signal point canot be 
out-of-order!\n");
+   ret = -EPERM;
+   goto err_points;
+   }
+   }
+   if (points[i])
+   timeline_count++;
+   }
+
+   chains = kmalloc_array(timeline_count, sizeof(void *), GFP_KERNEL);
+   if (!chains) {
+   ret = -ENOMEM;
+   goto err_points;
+   }
+   for (i = 0; i < timeline_count; i++) {
+   chains[i] = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
+   if (!chains[i]) {
+   for (j = 0; j < i; j++)
+   kfree(chains[j]);
+   ret = -ENOME

[PATCH 5/9] drm/syncobj: use the timeline point in drm_syncobj_find_fence v4

2019-03-19 Thread Chunming Zhou
From: Christian König 

Implement finding the right timeline point in drm_syncobj_find_fence.

v2: return -EINVAL when the point is not submitted yet.
v3: fix reference counting bug, add flags handling as well
v4: add timeout for find fence

Signed-off-by: Christian König 
Cc: Lionel Landwerlin 
---
 drivers/gpu/drm/drm_syncobj.c | 50 ---
 1 file changed, 47 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 0e62a793c8dd..087fd4e7eaf3 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -213,6 +213,8 @@ static void drm_syncobj_assign_null_handle(struct 
drm_syncobj *syncobj)
dma_fence_put(fence);
 }
 
+/* 5s default for wait submission */
+#define DRM_SYNCOBJ_WAIT_FOR_SUBMIT_TIMEOUT 50ULL
 /**
  * drm_syncobj_find_fence - lookup and reference the fence in a sync object
  * @file_private: drm file private pointer
@@ -233,16 +235,58 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
   struct dma_fence **fence)
 {
struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
-   int ret = 0;
+   struct syncobj_wait_entry wait;
+   u64 timeout = nsecs_to_jiffies64(DRM_SYNCOBJ_WAIT_FOR_SUBMIT_TIMEOUT);
+   int ret;
 
if (!syncobj)
return -ENOENT;
 
*fence = drm_syncobj_fence_get(syncobj);
-   if (!*fence) {
+   drm_syncobj_put(syncobj);
+
+   if (*fence) {
+   ret = dma_fence_chain_find_seqno(fence, point);
+   if (!ret)
+   return 0;
+   dma_fence_put(*fence);
+   } else {
ret = -EINVAL;
}
-   drm_syncobj_put(syncobj);
+
+   if (!(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
+   return ret;
+
+   memset(&wait, 0, sizeof(wait));
+   wait.task = current;
+   wait.point = point;
+   drm_syncobj_fence_add_wait(syncobj, &wait);
+
+   do {
+   set_current_state(TASK_INTERRUPTIBLE);
+   if (wait.fence) {
+   ret = 0;
+   break;
+   }
+if (timeout == 0) {
+ret = -ETIME;
+break;
+}
+
+   if (signal_pending(current)) {
+   ret = -ERESTARTSYS;
+   break;
+   }
+
+timeout = schedule_timeout(timeout);
+   } while (1);
+
+   __set_current_state(TASK_RUNNING);
+   *fence = wait.fence;
+
+   if (wait.node.next)
+   drm_syncobj_remove_wait(syncobj, &wait);
+
return ret;
 }
 EXPORT_SYMBOL(drm_syncobj_find_fence);
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 4/9] drm/syncobj: add timeline payload query ioctl v6

2019-03-19 Thread Chunming Zhou
user mode can query timeline payload.
v2: check return value of copy_to_user
v3: handle querying entry by entry
v4: rebase on new chain container, simplify interface
v5: query last signaled timeline point, not last point.
v6: add unorder point check

Signed-off-by: Chunming Zhou 
Cc: Tobias Hector 
Cc: Jason Ekstrand 
Cc: Dave Airlie 
Cc: Chris Wilson 
Cc: Lionel Landwerlin 
---
 drivers/gpu/drm/drm_internal.h |  2 ++
 drivers/gpu/drm/drm_ioctl.c|  2 ++
 drivers/gpu/drm/drm_syncobj.c  | 62 ++
 include/uapi/drm/drm.h | 10 ++
 4 files changed, 76 insertions(+)

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 331ac6225b58..695179bb88dc 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -188,6 +188,8 @@ int drm_syncobj_reset_ioctl(struct drm_device *dev, void 
*data,
struct drm_file *file_private);
 int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
 struct drm_file *file_private);
+int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
+   struct drm_file *file_private);
 
 /* drm_framebuffer.c */
 void drm_framebuffer_print_info(struct drm_printer *p, unsigned int indent,
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index c984654646fa..7a534c184e52 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -694,6 +694,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
  DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
  DRM_UNLOCKED|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
+ DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, 
DRM_UNLOCKED),
DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, 
drm_crtc_queue_sequence_ioctl, DRM_UNLOCKED),
DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, 
DRM_MASTER|DRM_UNLOCKED),
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index eae51978cda4..0e62a793c8dd 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -1064,3 +1064,65 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void 
*data,
 
return ret;
 }
+
+int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
+   struct drm_file *file_private)
+{
+   struct drm_syncobj_timeline_array *args = data;
+   struct drm_syncobj **syncobjs;
+   uint64_t __user *points = u64_to_user_ptr(args->points);
+   uint32_t i;
+   int ret;
+
+   if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+   return -ENODEV;
+
+   if (args->pad != 0)
+   return -EINVAL;
+
+   if (args->count_handles == 0)
+   return -EINVAL;
+
+   ret = drm_syncobj_array_find(file_private,
+u64_to_user_ptr(args->handles),
+args->count_handles,
+&syncobjs);
+   if (ret < 0)
+   return ret;
+
+   for (i = 0; i < args->count_handles; i++) {
+   struct dma_fence_chain *chain;
+   struct dma_fence *fence;
+   uint64_t point;
+
+   fence = drm_syncobj_fence_get(syncobjs[i]);
+   chain = to_dma_fence_chain(fence);
+   if (chain) {
+   struct dma_fence *iter, *last_signaled = NULL;
+
+   dma_fence_chain_for_each(iter, fence) {
+   if (!iter)
+   break;
+   dma_fence_put(last_signaled);
+   last_signaled = dma_fence_get(iter);
+   if 
(!to_dma_fence_chain(last_signaled)->prev_seqno)
+   /* It is most likely that timeline has
+* unorder points. */
+   break;
+   }
+   point = dma_fence_is_signaled(last_signaled) ?
+   last_signaled->seqno :
+   to_dma_fence_chain(last_signaled)->prev_seqno;
+   dma_fence_put(last_signaled);
+   } else {
+   point = 0;
+   }
+   ret = copy_to_user(&points[i], &point, sizeof(uint64_t));
+   ret = ret ? -EFAULT : 0;
+   if (ret)
+   break;
+   }
+   drm_syncobj_array_free(syncobjs, args->count_handles);
+
+   return ret;
+}
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 44ebcdd9bd1d..c62be0840ba5 100644
--- a/include/uapi/drm/d

[PATCH 1/9] dma-buf: add new dma_fence_chain container v6

2019-03-19 Thread Chunming Zhou
From: Christian König 

Lockless container implementation similar to a dma_fence_array, but with
only two elements per node and automatic garbage collection.

v2: properly document dma_fence_chain_for_each, add dma_fence_chain_find_seqno,
drop prev reference during garbage collection if it's not a chain fence.
v3: use head and iterator for dma_fence_chain_for_each
v4: fix reference count in dma_fence_chain_enable_signaling
v5: fix iteration when walking each chain node
v6: add __rcu for member 'prev' of struct chain node

Signed-off-by: Christian König 
Cc: Lionel Landwerlin 
---
 drivers/dma-buf/Makefile  |   3 +-
 drivers/dma-buf/dma-fence-chain.c | 241 ++
 include/linux/dma-fence-chain.h   |  81 ++
 3 files changed, 324 insertions(+), 1 deletion(-)
 create mode 100644 drivers/dma-buf/dma-fence-chain.c
 create mode 100644 include/linux/dma-fence-chain.h

diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
index 0913a6ccab5a..1f006e083eb9 100644
--- a/drivers/dma-buf/Makefile
+++ b/drivers/dma-buf/Makefile
@@ -1,4 +1,5 @@
-obj-y := dma-buf.o dma-fence.o dma-fence-array.o reservation.o seqno-fence.o
+obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \
+reservation.o seqno-fence.o
 obj-$(CONFIG_SYNC_FILE)+= sync_file.o
 obj-$(CONFIG_SW_SYNC)  += sw_sync.o sync_debug.o
 obj-$(CONFIG_UDMABUF)  += udmabuf.o
diff --git a/drivers/dma-buf/dma-fence-chain.c 
b/drivers/dma-buf/dma-fence-chain.c
new file mode 100644
index ..0c5e3c902fa0
--- /dev/null
+++ b/drivers/dma-buf/dma-fence-chain.c
@@ -0,0 +1,241 @@
+/*
+ * fence-chain: chain fences together in a timeline
+ *
+ * Copyright (C) 2018 Advanced Micro Devices, Inc.
+ * Authors:
+ * Christian König 
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include 
+
+static bool dma_fence_chain_enable_signaling(struct dma_fence *fence);
+
+/**
+ * dma_fence_chain_get_prev - use RCU to get a reference to the previous fence
+ * @chain: chain node to get the previous node from
+ *
+ * Use dma_fence_get_rcu_safe to get a reference to the previous fence of the
+ * chain node.
+ */
+static struct dma_fence *dma_fence_chain_get_prev(struct dma_fence_chain 
*chain)
+{
+   struct dma_fence *prev;
+
+   rcu_read_lock();
+   prev = dma_fence_get_rcu_safe(&chain->prev);
+   rcu_read_unlock();
+   return prev;
+}
+
+/**
+ * dma_fence_chain_walk - chain walking function
+ * @fence: current chain node
+ *
+ * Walk the chain to the next node. Returns the next fence or NULL if we are at
+ * the end of the chain. Garbage collects chain nodes which are already
+ * signaled.
+ */
+struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
+{
+   struct dma_fence_chain *chain, *prev_chain;
+   struct dma_fence *prev, *replacement, *tmp;
+
+   chain = to_dma_fence_chain(fence);
+   if (!chain) {
+   dma_fence_put(fence);
+   return NULL;
+   }
+
+   while ((prev = dma_fence_chain_get_prev(chain))) {
+
+   prev_chain = to_dma_fence_chain(prev);
+   if (prev_chain) {
+   if (!dma_fence_is_signaled(prev_chain->fence))
+   break;
+
+   replacement = dma_fence_chain_get_prev(prev_chain);
+   } else {
+   if (!dma_fence_is_signaled(prev))
+   break;
+
+   replacement = NULL;
+   }
+
+   tmp = cmpxchg(&chain->prev, prev, replacement);
+   if (tmp == prev)
+   dma_fence_put(tmp);
+   else
+   dma_fence_put(replacement);
+   dma_fence_put(prev);
+   }
+
+   dma_fence_put(fence);
+   return prev;
+}
+EXPORT_SYMBOL(dma_fence_chain_walk);
+
+/**
+ * dma_fence_chain_find_seqno - find fence chain node by seqno
+ * @pfence: pointer to the chain node where to start
+ * @seqno: the sequence number to search for
+ *
+ * Advance the fence pointer to the chain node which will signal this sequence
+ * number. If no sequence number is provided then this is a no-op.
+ *
+ * Returns EINVAL if the fence is not a chain node or the sequence number has
+ * not yet advanced far enough.
+ */
+int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t seqno)
+{
+   struct dma_fence_chain *chain;
+
+   if (!seqno)
+   return 0;
+
+   chain = to_dma_fence_chain(*pfence);
+   if (!chain || chain->base.seqno < 

[PATCH 2/9] drm/syncobj: add new drm_syncobj_add_point interface v4

2019-03-19 Thread Chunming Zhou
From: Christian König 

Use the dma_fence_chain object to create a timeline of fence objects
instead of just replacing the existing fence.

v2: rebase and cleanup
v3: fix garbage collection parameters
v4: add unorder point check, print a warn calltrace

Signed-off-by: Christian König 
Cc: Lionel Landwerlin 
---
 drivers/gpu/drm/drm_syncobj.c | 39 +++
 include/drm/drm_syncobj.h |  5 +
 2 files changed, 44 insertions(+)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 5329e66598c6..19a9ce638119 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -122,6 +122,45 @@ static void drm_syncobj_remove_wait(struct drm_syncobj 
*syncobj,
spin_unlock(&syncobj->lock);
 }
 
+/**
+ * drm_syncobj_add_point - add new timeline point to the syncobj
+ * @syncobj: sync object to add timeline point do
+ * @chain: chain node to use to add the point
+ * @fence: fence to encapsulate in the chain node
+ * @point: sequence number to use for the point
+ *
+ * Add the chain node as new timeline point to the syncobj.
+ */
+void drm_syncobj_add_point(struct drm_syncobj *syncobj,
+  struct dma_fence_chain *chain,
+  struct dma_fence *fence,
+  uint64_t point)
+{
+   struct syncobj_wait_entry *cur, *tmp;
+   struct dma_fence *prev;
+
+   dma_fence_get(fence);
+
+   spin_lock(&syncobj->lock);
+
+   prev = drm_syncobj_fence_get(syncobj);
+   /* You are adding an unorder point to timeline, which could cause 
payload returned from query_ioctl is 0! */
+   WARN_ON_ONCE(prev && prev->seqno >= point);
+   dma_fence_chain_init(chain, prev, fence, point);
+   rcu_assign_pointer(syncobj->fence, &chain->base);
+
+   list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) {
+   list_del_init(&cur->node);
+   syncobj_wait_syncobj_func(syncobj, cur);
+   }
+   spin_unlock(&syncobj->lock);
+
+   /* Walk the chain once to trigger garbage collection */
+   dma_fence_chain_for_each(fence, prev);
+   dma_fence_put(prev);
+}
+EXPORT_SYMBOL(drm_syncobj_add_point);
+
 /**
  * drm_syncobj_replace_fence - replace fence in a sync object.
  * @syncobj: Sync object to replace fence in
diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
index 0311c9fdbd2f..6cf7243a1dc5 100644
--- a/include/drm/drm_syncobj.h
+++ b/include/drm/drm_syncobj.h
@@ -27,6 +27,7 @@
 #define __DRM_SYNCOBJ_H__
 
 #include 
+#include 
 
 struct drm_file;
 
@@ -112,6 +113,10 @@ drm_syncobj_fence_get(struct drm_syncobj *syncobj)
 
 struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
 u32 handle);
+void drm_syncobj_add_point(struct drm_syncobj *syncobj,
+  struct dma_fence_chain *chain,
+  struct dma_fence *fence,
+  uint64_t point);
 void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
   struct dma_fence *fence);
 int drm_syncobj_find_fence(struct drm_file *file_private,
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 8/8] drm/amdgpu: use the new VM backend for clears

2019-03-19 Thread zhoucm1

patch#2 and patch#4 are Ached-by: Chunming Zhou 

patch#1, #3, #5~#8 are Reviewed-by: Chunming Zhou 



On 2019年03月19日 20:44, Christian König wrote:

And remove the existing code when it is unused.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 89 +-
  1 file changed, 32 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 729da1c486cd..af1a7020c3ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -711,11 +711,9 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
  {
struct ttm_operation_ctx ctx = { true, false };
unsigned level = adev->vm_manager.root_level;
+   struct amdgpu_vm_update_params params;
struct amdgpu_bo *ancestor = bo;
-   struct dma_fence *fence = NULL;
unsigned entries, ats_entries;
-   struct amdgpu_ring *ring;
-   struct amdgpu_job *job;
uint64_t addr;
int r;
  
@@ -750,8 +748,6 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,

}
}
  
-	ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched);

-
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (r)
return r;
@@ -772,60 +768,45 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
  
  	}
  
-	r = amdgpu_job_alloc_with_ib(adev, 64, &job);

+   memset(¶ms, 0, sizeof(params));
+   params.adev = adev;
+   params.vm = vm;
+
+   r = vm->update_funcs->prepare(¶ms, AMDGPU_FENCE_OWNER_KFD, NULL);
if (r)
return r;
  
-	do {

-   addr = amdgpu_bo_gpu_offset(bo);
-   if (ats_entries) {
-   uint64_t ats_value;
-
-   ats_value = AMDGPU_PTE_DEFAULT_ATC;
-   if (level != AMDGPU_VM_PTB)
-   ats_value |= AMDGPU_PDE_PTE;
-
-   amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
- ats_entries, 0, ats_value);
-   addr += ats_entries * 8;
-   }
-
-   if (entries) {
-   uint64_t value = 0;
-
-   /* Workaround for fault priority problem on GMC9 */
-   if (level == AMDGPU_VM_PTB &&
-   adev->asic_type >= CHIP_VEGA10)
-   value = AMDGPU_PTE_EXECUTABLE;
-
-   amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
- entries, 0, value);
-   }
+   addr = 0;
+   if (ats_entries) {
+   uint64_t ats_value;
  
-		bo = bo->shadow;

-   } while (bo);
+   ats_value = AMDGPU_PTE_DEFAULT_ATC;
+   if (level != AMDGPU_VM_PTB)
+   ats_value |= AMDGPU_PDE_PTE;
  
-	amdgpu_ring_pad_ib(ring, &job->ibs[0]);

+   r = vm->update_funcs->update(¶ms, bo, addr, 0, ats_entries,
+0, ats_value);
+   if (r)
+   return r;
  
-	WARN_ON(job->ibs[0].length_dw > 64);

-   r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv,
-AMDGPU_FENCE_OWNER_KFD, false);
-   if (r)
-   goto error_free;
+   addr += ats_entries * 8;
+   }
  
-	r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_UNDEFINED,

- &fence);
-   if (r)
-   goto error_free;
+   if (entries) {
+   uint64_t value = 0;
  
-	amdgpu_bo_fence(vm->root.base.bo, fence, true);

-   dma_fence_put(fence);
+   /* Workaround for fault priority problem on GMC9 */
+   if (level == AMDGPU_VM_PTB &&
+   adev->asic_type >= CHIP_VEGA10)
+   value = AMDGPU_PTE_EXECUTABLE;
  
-	return 0;

+   r = vm->update_funcs->update(¶ms, bo, addr, 0, entries,
+0, value);
+   if (r)
+   return r;
+   }
  
-error_free:

-   amdgpu_job_free(job);
-   return r;
+   return vm->update_funcs->commit(¶ms, NULL);
  }
  
  /**

@@ -913,7 +894,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
if (r)
goto error_free_pt;
  
-	return 1;

+   return 0;
  
  error_free_pt:

amdgpu_bo_unref(&pt->shadow);
@@ -1421,12 +1402,10 @@ static int amdgpu_vm_update_ptes(struct 
amdgpu_vm_update_params *params,
unsigned shift, parent_shift, mask;
uint64_t incr, entry_end, pe_start;
struct amdgpu_bo *pt;
-   bool need_to_sync;
  
  		r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor);

-   if (r < 0)
+   if (r)
   

Re: [PATCH 8/9] drm/syncobj: add timeline signal ioctl for syncobj v3

2019-03-19 Thread zhoucm1



On 2019年03月19日 19:54, Lionel Landwerlin wrote:

On 15/03/2019 12:09, Chunming Zhou wrote:
v2: individually allocate chain array, since chain node is free 
independently.
v3: all existing points must be already signaled before cpu perform 
signal operation,

 so add check condition for that.

Signed-off-by: Chunming Zhou 
---
  drivers/gpu/drm/drm_internal.h |   2 +
  drivers/gpu/drm/drm_ioctl.c    |   2 +
  drivers/gpu/drm/drm_syncobj.c  | 103 +
  include/uapi/drm/drm.h |   1 +
  4 files changed, 108 insertions(+)

diff --git a/drivers/gpu/drm/drm_internal.h 
b/drivers/gpu/drm/drm_internal.h

index dd11ae5f1eef..d9a483a5fce0 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -190,6 +190,8 @@ int drm_syncobj_reset_ioctl(struct drm_device 
*dev, void *data,

  struct drm_file *file_private);
  int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
   struct drm_file *file_private);
+int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void 
*data,

+  struct drm_file *file_private);
  int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
  struct drm_file *file_private);
  diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 92b3b7b2fd81..d337f161909c 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -696,6 +696,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
    DRM_UNLOCKED|DRM_RENDER_ALLOW),
  DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
    DRM_UNLOCKED|DRM_RENDER_ALLOW),
+    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, 
drm_syncobj_timeline_signal_ioctl,

+  DRM_UNLOCKED|DRM_RENDER_ALLOW),
  DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
    DRM_UNLOCKED|DRM_RENDER_ALLOW),
  DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, 
drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
diff --git a/drivers/gpu/drm/drm_syncobj.c 
b/drivers/gpu/drm/drm_syncobj.c

index 306c7b7e2770..eaeb038f97d7 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -1183,6 +1183,109 @@ drm_syncobj_signal_ioctl(struct drm_device 
*dev, void *data,

  return ret;
  }
  +int
+drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
+  struct drm_file *file_private)
+{
+    struct drm_syncobj_timeline_array *args = data;
+    struct drm_syncobj **syncobjs;
+    struct dma_fence_chain **chains;
+    uint64_t *points;
+    uint32_t i, j, timeline_count = 0;
+    int ret;
+
+    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+    return -EOPNOTSUPP;
+
+    if (args->pad != 0)
+    return -EINVAL;
+
+    if (args->count_handles == 0)
+    return -EINVAL;
+
+    ret = drm_syncobj_array_find(file_private,
+ u64_to_user_ptr(args->handles),
+ args->count_handles,
+ &syncobjs);
+    if (ret < 0)
+    return ret;
+
+    for (i = 0; i < args->count_handles; i++) {
+    struct dma_fence_chain *chain;
+    struct dma_fence *fence;
+
+    fence = drm_syncobj_fence_get(syncobjs[i]);
+    chain = to_dma_fence_chain(fence);
+    if (chain) {
+    struct dma_fence *iter;
+
+    dma_fence_chain_for_each(iter, fence) {
+    if (!iter)
+    break;
+    if (!dma_fence_is_signaled(iter)) {
+    dma_fence_put(iter);
+    DRM_ERROR("Client must guarantee all existing 
timeline points signaled before performing host signal operation!");

+    ret = -EPERM;
+    goto out;



Sorry if I'm failing to remember whether we discussed this before.


Signaling a point from the host should be fine even if the previous 
points in the timeline are not signaled.

ok, will remove that checking.



After all this can happen on the device side as well (out of order 
signaling).



I thought the thing we didn't want is out of order submission.

Just checking the last chain node seqno against the host signal point 
should be enough.



What about simply returning -EPERM, we can warn the application from 
userspace?

OK, will add that.





+    }
+    }
+    }
+    }
+
+    points = kmalloc_array(args->count_handles, sizeof(*points),
+   GFP_KERNEL);
+    if (!points) {
+    ret = -ENOMEM;
+    goto out;
+    }
+    if (!u64_to_user_ptr(args->points)) {
+    memset(points, 0, args->count_handles * sizeof(uint64_t));
+    } else if (copy_from_user(points, u64_to_user_ptr(args->points),
+  sizeof(uint64_t) * args->count_handles)) {
+    ret = -EFAULT;
+    goto err_points;
+    }
+
+
+    for (i = 

[PATCH libdrm] tests/amdgpu: minor fix for dispatch/draw test

2019-03-19 Thread Cui, Flora
1. clear cmd buffer
2. make amdgpu_memcpy_dispatch_test static
3. tab/space fix

Change-Id: Idf55f8881f66458b585092eccb55b6042520e4ad
Signed-off-by: Flora Cui 
---
 tests/amdgpu/basic_tests.c | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c
index a364f67..2d47269 100644
--- a/tests/amdgpu/basic_tests.c
+++ b/tests/amdgpu/basic_tests.c
@@ -2177,6 +2177,7 @@ static void 
amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
&bo_cmd, (void **)&ptr_cmd,
&mc_address_cmd, &va_cmd);
CU_ASSERT_EQUAL(r, 0);
+   memset(ptr_cmd, 0, bo_cmd_size);
 
r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
AMDGPU_GEM_DOMAIN_VRAM, 0,
@@ -2227,7 +2228,7 @@ static void 
amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
ptr_cmd[i++] = 1;
 
while (i & 7)
-   ptr_cmd[i++] =  0x1000; /* type3 nop packet */
+   ptr_cmd[i++] = 0x1000; /* type3 nop packet */
 
resources[0] = bo_dst;
resources[1] = bo_shader;
@@ -2283,9 +2284,9 @@ static void 
amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
CU_ASSERT_EQUAL(r, 0);
 }
 
-void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
-uint32_t ip_type,
-uint32_t ring)
+static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
+   uint32_t ip_type,
+   uint32_t ring)
 {
amdgpu_context_handle context_handle;
amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
@@ -2313,6 +2314,7 @@ void amdgpu_memcpy_dispatch_test(amdgpu_device_handle 
device_handle,
&bo_cmd, (void **)&ptr_cmd,
&mc_address_cmd, &va_cmd);
CU_ASSERT_EQUAL(r, 0);
+   memset(ptr_cmd, 0, bo_cmd_size);
 
r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
AMDGPU_GEM_DOMAIN_VRAM, 0,
@@ -2371,7 +2373,7 @@ void amdgpu_memcpy_dispatch_test(amdgpu_device_handle 
device_handle,
ptr_cmd[i++] = 1;
 
while (i & 7)
-   ptr_cmd[i++] =  0x1000; /* type3 nop packet */
+   ptr_cmd[i++] = 0x1000; /* type3 nop packet */
 
resources[0] = bo_shader;
resources[1] = bo_src;
@@ -2799,7 +2801,8 @@ void amdgpu_memset_draw(amdgpu_device_handle 
device_handle,
AMDGPU_GEM_DOMAIN_GTT, 0,
&bo_cmd, (void **)&ptr_cmd,
&mc_address_cmd, &va_cmd);
-CU_ASSERT_EQUAL(r, 0);
+   CU_ASSERT_EQUAL(r, 0);
+   memset(ptr_cmd, 0, bo_cmd_size);
 
r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
AMDGPU_GEM_DOMAIN_VRAM, 0,
@@ -2828,7 +2831,7 @@ void amdgpu_memset_draw(amdgpu_device_handle 
device_handle,
i += amdgpu_draw_draw(ptr_cmd + i);
 
while (i & 7)
-   ptr_cmd[i++] =  0x1000; /* type3 nop packet */
+   ptr_cmd[i++] = 0x1000; /* type3 nop packet */
 
resources[0] = bo_dst;
resources[1] = bo_shader_ps;
@@ -2952,6 +2955,7 @@ static void amdgpu_memcpy_draw(amdgpu_device_handle 
device_handle,
&bo_cmd, (void **)&ptr_cmd,
&mc_address_cmd, &va_cmd);
CU_ASSERT_EQUAL(r, 0);
+   memset(ptr_cmd, 0, bo_cmd_size);
 
r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
AMDGPU_GEM_DOMAIN_VRAM, 0,
@@ -2999,7 +3003,7 @@ static void amdgpu_memcpy_draw(amdgpu_device_handle 
device_handle,
i += amdgpu_draw_draw(ptr_cmd + i);
 
while (i & 7)
-   ptr_cmd[i++] =  0x1000; /* type3 nop packet */
+   ptr_cmd[i++] = 0x1000; /* type3 nop packet */
 
resources[0] = bo_dst;
resources[1] = bo_src;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: Clang warning in drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

2019-03-19 Thread Pan, Xinhui
these two enumerated types are same for now. both of them might change in the 
future.

I have not used clang, but would  .block_id =  (int)head->block fix your 
warning? If such change is acceptable, I can make one then.

Thanks
xinhui


-Original Message-
From: Nathan Chancellor  
Sent: 2019年3月20日 8:54
To: Deucher, Alexander ; Koenig, Christian 
; Zhou, David(ChunMing) ; Pan, 
Xinhui 
Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; 
linux-ker...@vger.kernel.org; clang-built-li...@googlegroups.com
Subject: Clang warning in drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

Hi all,

The introduction of this file in commit dbd249c24427 ("drm/amdgpu: add 
amdgpu_ras.c to support ras (v2)") introduces the following Clang
warnings:

drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:544:23: warning: implicit conversion 
from enumeration type 'enum amdgpu_ras_block' to different enumeration type 
'enum ta_ras_block' [-Wenum-conversion]
.block_id =  head->block,
 ~~^
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:545:24: warning: implicit conversion 
from enumeration type 'enum amdgpu_ras_error_type' to different enumeration 
type 'enum ta_ras_error_type' [-Wenum-conversion]
.error_type = head->type,
  ~~^~~~
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:549:23: warning: implicit conversion 
from enumeration type 'enum amdgpu_ras_block' to different enumeration type 
'enum ta_ras_block' [-Wenum-conversion]
.block_id =  head->block,
 ~~^
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:550:24: warning: implicit conversion 
from enumeration type 'enum amdgpu_ras_error_type' to different enumeration 
type 'enum ta_ras_error_type' [-Wenum-conversion]
.error_type = head->type,
  ~~^~~~
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:650:26: warning: implicit conversion 
from enumeration type 'enum amdgpu_ras_block' to different enumeration type 
'enum ta_ras_block' [-Wenum-conversion]
.block_id = info->head.block,
~~~^
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:651:35: warning: implicit conversion 
from enumeration type 'enum amdgpu_ras_error_type' to different enumeration 
type 'enum ta_ras_error_type' [-Wenum-conversion]
.inject_error_type = info->head.type,
 ~~~^~~~
6 warnings generated.

Normally, I would sent a fix for this myself but I am not entirely sure why 
these two enumerated types exist when one would do since they have the same 
values minus the prefix. In fact, the ta_ras_{block,error_type} values are 
never used aside from being defined. Some clarification would be appreciated.

Thank you,
Nathan
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: randr: Virtual monitor not present with MST display

2019-03-19 Thread Wentland, Harry


On 2019-03-18 5:58 p.m., Paul Menzel wrote:
> 
> Dear Harry,
> 

... snip ...

>>
>> Michel, do you know if this is supposed to work with
>> xf86-video-amdgpu? When I've tried it before I didn't have any luck but
>> didn't have time to look into it.
> 
> Sorry, what is your question. With the commit from the merge request applied 
> it works here. Also, the commit was added to the master branch in the mean 
> time.
> 

Ah, of course. Somehow I thought this was still targetting the modesetting 
driver and didn't notice it's on video-amdgpu.

Apologies for the confusion.

Harry

> 
> Kind regards,
> 
> Paul
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: Potential NULL pointer dereference in radeon_ttm_tt_populate

2019-03-19 Thread Shaobo He

> See here:
> #if IS_ENABLED(CONFIG_AGP)
>  if (rdev->flags & RADEON_IS_AGP) {
>  return ttm_agp_tt_populate(ttm, ctx);
>  }
> #endif
>

This code appears to be after the potential location of NULL pointer dereference 
that I pointed out. Please see,


```
if (slave && ttm->sg) {
drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
   gtt->ttm.dma_address,ttm->num_pages);
ttm->state = tt_unbound;
return 0;
}
```
The argument expression `gtt->ttm.dma_address` is the potentially offending code 
if `gtt` is NULL.


Shaobo
On 3/19/19 12:28 PM, Christian König wrote:

... or the backend methods is not `radeon_backend_func`.

That's the case when it is an AGP backend.


Moreover, could you point out the check of such case before the offending code?


See here:
#if IS_ENABLED(CONFIG_AGP)
     if (rdev->flags & RADEON_IS_AGP) {
     return ttm_agp_tt_populate(ttm, ctx);
     }
#endif



Meaning the check of whether `ttm` is an AGP ttm?

Well exactly that's the point, we never check that the ttm structure is an AGP 
ttm.

We check if the device is an AGP device and if that's the case then the ttm 
structure must be an AGP structure as well.


Regards,
Christian.


Am 19.03.19 um 17:46 schrieb Shaobo He:

Hi Christian,

Thank you very much for your reply. I'm a little confused here so I really 
appreciate if you could clarify it more.


For example, I don't understand why the condition of function 
`radeon_ttm_tt_to_gtt` returning NULL is the argument being an AGP ttm. Based 
on its definition, it returns NULL when the argument is NULL or the backend 
methods is not `radeon_backend_func`. Is there any correlation that I missed 
here?


Moreover, could you point out the check of such case before the offending 
code? Meaning the check of whether `ttm` is an AGP ttm?


Best,
Shaobo
On 2019/3/19 3:16, Christian König wrote:

Hi Shaobo,

that question came up a couple of times now. And the answer is: No, there 
can't be a NULL pointer dereference.


The function radeon_ttm_tt_to_gtt returns NULL only when it is an AGP ttm 
structure, and that case is checked right before the offending code.


Unfortunately I don't see how an automated code checker should ever be able 
to figure that out by itself.


Regards,
Christian.

Am 18.03.19 um 21:58 schrieb Shaobo He:

Hello everyone,

My name is Shaobo He and I am a graduate student at University of Utah. I am 
using a static analysis tool to search for null pointer dereferences and 
came across a potentially invalid memory access in the file 
drivers/gpu/drm/radeon/radeon_ttm.c: in function `radeon_ttm_tt_populate`, 
function `radeon_ttm_tt_to_gtt` can return a NULL pointer which is 
dereferenced by the call to `drm_prime_sg_to_page_addr_arrays`.


Please let me know if it makes sense. I am looking forward to your reply.

Best,
Shaobo
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: revert "XGMI pstate switch initial support"

2019-03-19 Thread Liu, Shaoyun
As I understand,  if we want to implement the  logic in bo_add/rmv 
function ,  I need following conditions are all to be true for a valid 
XGMI request.

1.  check the adev and the bo_adev are different .
     This is correct on rocm implementation .( 
amdgpu_amdkfd_gpuvm_map_memory_to_gpu pass in the correct adev it want 
to mapped to).  For gem bo (amdgpu_gem_object_open/close), the  adev is  
get from bo directly so it's alway same.  Do we need extra adev info 
and  how or we don't need to care about the XGMI  for graphic side .

2. check the  bo->preferred_domains to be  AMDGPU_GEM_DOMAIN_VRAM.
     But as Felix mentioned , this domain can be changed by 
AMDGPU_GEM_OP_SET_PLACEMENT , how to handle this?

Can you explain a little bit more on how to handle the eviction with the 
flag in bo_va structure as you mentioned ?  Do you mean we disable the 
eviction for the  bo with XGMI request ?

Regards
shaoyun.liu


On 2019-03-19 12:09 p.m., Koenig, Christian wrote:
> Am 19.03.19 um 16:42 schrieb Liu, Shaoyun:
>> Thanks Felix .
>>
>> We did consider to put the  logic into bo_add/bo_rmv, but Felix pointed
>> out the  object can be migrate from FB to system memory after allocation
>> .
> Yeah, I also considered that and that is actually a really good argument.
>
> But even in this case you still only need the flag in the bo_va
> structure, not the mapping.
>
>>     I also think of put the  logic inside amdgpu_vm_bo_update_mapping ,
>> but seems that function prefer to take the  dma address already been
>> calculated (change that will cause a huge code reorganize) . That's the
>> reason I put the logic before calling into  amdgpu_vm_bo_update_mapping .
> Both places won't work correctly. The problem is simply that you haven't
> considered what happens when the VM is destroyed.
>
> In this case we should not unmap the XGMI mapping, but rather just throw
> away the page tables completely.
>
> Additional to that the mapping code is often interrupted because the
> calling process receives a signal. So it can happen that all of that is
> called multiple times. The is_xgmi variable prevents that, but that's
> really not straight forward.
>
> Se the PRT handling for how complicated that gets when you attach
> something like this to the mapping. IIRC we have 3 different code paths
> how a PRT mapping can end up being destroyed including a delayed destroy
> handler and callback.
>
>> As the race condition  you described sounds reasonable.  let me think
>> how to fix it .
> Two possible code pattern usually used for this:
>
> A) You have a lock protecting both the counter as well as the operation:
>
> lock();
> if (increment_and_test_counter())
>       power_on()
> unlock();
>
> lock()
> if (decrement_and_test_counter())
>       power_off();
> unlock();
>
> B) The counter is an atomic and you have a lock protecting the operation:
>
> if (atomic_inc_return() == 1) {
>       lock();
>       power_on();
>       unlock();
> }
>
> if (atomic_dec_return() == 0) {
>       lock();
>       if (double_check_atomic_for_race())
>           power_off();
>       unlock();
> }
>
> The later is more efficient, but also more work to implement.
>
> Either way I suggest to move the actual increment/decrement into the
> xgmi code and only have the signalling in the VM code.
>
> Regards,
> Christian.
>
>> Regards
>>
>> shaoyun.liu
>>
>> On 2019-03-19 9:10 a.m., Kuehling, Felix wrote:
>>> On 3/19/2019 8:49 AM, Christian König wrote:
 Yeah, all that is perfectly fine.

 The problem is Shaoyun didn't put this into the mapping code, but
 rather into the VM state machine. So this won't work at all (the
 counter and increment/decrement unbalanced and multiple times).
>>> We tried to consider all the possible ways that this could go wrong.
>>> Basically, every time a mapping is updated, we update the is_xgmi state
>>> and update the counter if it changed. Have you seen the counter become
>>> unbalanced?
>>>
>>>
 The correct place to add this is amdgpu_vm_bo_add/amdgpu_vm_bo_rmv.
>>> I think we considered that. The problem is that a BO can be migrated
>>> between bo_add and bo_rmv. I found that even bo->preferred_domain can
>>> change with AMDGPU_GEM_OP_SET_PLACEMENT. So you can't reliably know
>>> whether to increment your counter, and your counter can become
>>> unbalanced if a migration or AMDGPU_GEM_OP_SET_PLACEMENT happens between
>>> bo_add and bo_rmv.
>>>
>>> Therefore we're trying to check for XGMI mappings every time the mapping
>>> changes and keep track of the state in amdgpu_bo_va_mapping.
>>>
>>>
 Additional to that the approach with the counter doesn't work because
 you don't have a lock protecting the hw update itself. E.g. while
 powering down you can add a mapping which needs to power it up again
 and so powering down and powering up race with each other.
>>> That's a good point.
>>>
>>> Regards,
>>>   Felix
>>>
>>>
 Regards,
 Christian.

 Am 19.03.19 um 13:42 schrieb Kueh

Re: Potential NULL pointer dereference in radeon_ttm_tt_populate

2019-03-19 Thread Christian König

... or the backend methods is not `radeon_backend_func`.

That's the case when it is an AGP backend.

Moreover, could you point out the check of such case before the 
offending code?


See here:
#if IS_ENABLED(CONFIG_AGP)
    if (rdev->flags & RADEON_IS_AGP) {
    return ttm_agp_tt_populate(ttm, ctx);
    }
#endif



Meaning the check of whether `ttm` is an AGP ttm?
Well exactly that's the point, we never check that the ttm structure is 
an AGP ttm.


We check if the device is an AGP device and if that's the case then the 
ttm structure must be an AGP structure as well.


Regards,
Christian.


Am 19.03.19 um 17:46 schrieb Shaobo He:

Hi Christian,

Thank you very much for your reply. I'm a little confused here so I 
really appreciate if you could clarify it more.


For example, I don't understand why the condition of function 
`radeon_ttm_tt_to_gtt` returning NULL is the argument being an AGP 
ttm. Based on its definition, it returns NULL when the argument is 
NULL or the backend methods is not `radeon_backend_func`. Is there any 
correlation that I missed here?


Moreover, could you point out the check of such case before the 
offending code? Meaning the check of whether `ttm` is an AGP ttm?


Best,
Shaobo
On 2019/3/19 3:16, Christian König wrote:

Hi Shaobo,

that question came up a couple of times now. And the answer is: No, 
there can't be a NULL pointer dereference.


The function radeon_ttm_tt_to_gtt returns NULL only when it is an AGP 
ttm structure, and that case is checked right before the offending code.


Unfortunately I don't see how an automated code checker should ever 
be able to figure that out by itself.


Regards,
Christian.

Am 18.03.19 um 21:58 schrieb Shaobo He:

Hello everyone,

My name is Shaobo He and I am a graduate student at University of 
Utah. I am using a static analysis tool to search for null pointer 
dereferences and came across a potentially invalid memory access in 
the file drivers/gpu/drm/radeon/radeon_ttm.c: in function 
`radeon_ttm_tt_populate`, function `radeon_ttm_tt_to_gtt` can return 
a NULL pointer which is dereferenced by the call to 
`drm_prime_sg_to_page_addr_arrays`.


Please let me know if it makes sense. I am looking forward to your 
reply.


Best,
Shaobo
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: Potential NULL pointer dereference in radeon_ttm_tt_populate

2019-03-19 Thread Shaobo He

Hi Christian,

Thank you very much for your reply. I'm a little confused here so I really 
appreciate if you could clarify it more.


For example, I don't understand why the condition of function 
`radeon_ttm_tt_to_gtt` returning NULL is the argument being an AGP ttm. Based on 
its definition, it returns NULL when the argument is NULL or the backend methods 
is not `radeon_backend_func`. Is there any correlation that I missed here?


Moreover, could you point out the check of such case before the offending code? 
Meaning the check of whether `ttm` is an AGP ttm?


Best,
Shaobo
On 2019/3/19 3:16, Christian König wrote:

Hi Shaobo,

that question came up a couple of times now. And the answer is: No, there can't 
be a NULL pointer dereference.


The function radeon_ttm_tt_to_gtt returns NULL only when it is an AGP ttm 
structure, and that case is checked right before the offending code.


Unfortunately I don't see how an automated code checker should ever be able to 
figure that out by itself.


Regards,
Christian.

Am 18.03.19 um 21:58 schrieb Shaobo He:

Hello everyone,

My name is Shaobo He and I am a graduate student at University of Utah. I am 
using a static analysis tool to search for null pointer dereferences and came 
across a potentially invalid memory access in the file 
drivers/gpu/drm/radeon/radeon_ttm.c: in function `radeon_ttm_tt_populate`, 
function `radeon_ttm_tt_to_gtt` can return a NULL pointer which is 
dereferenced by the call to `drm_prime_sg_to_page_addr_arrays`.


Please let me know if it makes sense. I am looking forward to your reply.

Best,
Shaobo
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: Potential NULL pointer dereference in atombios_get_encoder_mode

2019-03-19 Thread Shaobo He



On 2019/3/18 14:15, Shaobo He wrote:

Hello everyone,

My name is Shaobo He and I am a graduate student at University of Utah. I am 
using a static analysis tool to search for null pointer dereferences and came 
across a potentially invalid memory accesses in the file 
drivers/gpu/drm/radeon/atombios_encoders.c: in function 
`atombios_get_encoder_mode`, function `radeon_get_connector_for_encoder_init` 
can return a NULL pointer. It seems the only chance that it may happen is that 
there is not any connector attached to the device of the encoder or connectors 
are all invalid. I was wondering if it can happen in the call context.


Please let me know if it makes sense. I am looking forward to your reply.

Best,
Shaobo

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[ANNOUNCE] xf86-video-amdgpu 19.0.1

2019-03-19 Thread Michel Dänzer

I'm pleased to announce the 19.0.1 release of xf86-video-amdgpu, the
Xorg driver for AMD Radeon GPUs supported by the amdgpu kernel driver.
This release supports xserver versions 1.13-1.20.

This is a minor follow-up release with only one change:

* Add support for RandR output tile properties, allowing monitors using
  DisplayPort Multi Stream Transport tiling to work better out of the
  box.

Thanks to everybody who contributed to this release in any way!


Dave Airlie (1):
  modesetting: add tile property support

Michel Dänzer (1):
  Bump version for the 19.0.1 release

git tag: xf86-video-amdgpu-19.0.1

https://xorg.freedesktop.org/archive/individual/driver/xf86-video-amdgpu-19.0.1.tar.bz2
MD5:  f3b33958e99c896084f12cd48f7ba007  xf86-video-amdgpu-19.0.1.tar.bz2
SHA1: 2952d2a4e25a6e27cfbd49c11727400eb80f7fe0  xf86-video-amdgpu-19.0.1.tar.bz2
SHA256: aaa197196aadcb12e93e10a2aa03c9aedc9ba7b27c2643a8ef620d41e2d1c6d5  
xf86-video-amdgpu-19.0.1.tar.bz2
SHA512: 
dda04e3ccee140354f59e04895cf50cd7dc06a105c40ce35069613c5b5fbe083cab6833fd293d96334f64dde1a0c4af6154d473594a144f37404188ae7f37dd5
  xf86-video-amdgpu-19.0.1.tar.bz2
PGP:  
https://xorg.freedesktop.org/archive/individual/driver/xf86-video-amdgpu-19.0.1.tar.bz2.sig

https://xorg.freedesktop.org/archive/individual/driver/xf86-video-amdgpu-19.0.1.tar.gz
MD5:  4827bddcc4022937b8f581e958ca7a04  xf86-video-amdgpu-19.0.1.tar.gz
SHA1: 48f53d14407a0baf605fbe8ad65774cf1ff37a52  xf86-video-amdgpu-19.0.1.tar.gz
SHA256: 1ad8d92a47453360fdaf07c80ac03e7bb8c1548f0cec4c2aa0166aa9ddd26dff  
xf86-video-amdgpu-19.0.1.tar.gz
SHA512: 
34e45c1d64d034053508dde3409591130ef643ebab62d4fbfa63bdfd71c9909d48fddd92941986714196c5280193fbbe54fb705f96ddd0424e86aa43258a90a4
  xf86-video-amdgpu-19.0.1.tar.gz
PGP:  
https://xorg.freedesktop.org/archive/individual/driver/xf86-video-amdgpu-19.0.1.tar.gz.sig


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer





signature.asc
Description: OpenPGP digital signature
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[ANNOUNCE] xf86-video-ati 19.0.1

2019-03-19 Thread Michel Dänzer

I'm pleased to announce the 19.0.1 release of xf86-video-ati, the Xorg
driver for ATI/AMD Radeon GPUs supported by the radeon kernel driver.
This release supports xserver versions 1.13-1.20.

This is a bug-fix release, with only three changes:

* Fixes for two regressions which crept into the 19.0.0 release.
* Add support for RandR output tile properties, allowing monitors using
  DisplayPort Multi Stream Transport tiling to work better out of the
  box. (Note that DP MST support in the radeon kernel driver is still
  experimental and disabled by default)

Thanks to everybody who contributed to this release in any way!


Dave Airlie (1):
  modesetting: add tile property support

Michel Dänzer (3):
  Revert "glamor: Avoid glamor_create_pixmap for pixmaps backing windows"
  Use radeon_finish in drmmode_crtc_scanout_update
  Bump version for 19.0.1 release

git tag: xf86-video-ati-19.0.1

https://xorg.freedesktop.org/archive/individual/driver/xf86-video-ati-19.0.1.tar.bz2
MD5:  47eccf71823206ade9629cba69de7ef6  xf86-video-ati-19.0.1.tar.bz2
SHA1: e8899c2d237381d9278429a1427e02fcba1d5174  xf86-video-ati-19.0.1.tar.bz2
SHA256: 5cb6015d8664546ad1311bc9c363d7bc41ebf60e7046ceb44dd38e5b707961b0  
xf86-video-ati-19.0.1.tar.bz2
SHA512: 
e04c5395e3a49d81b8f7a4b0e11fe8c9ebd17af056a4eab4541873796dce05b103c93fb185f3a00873010df0655cd7311e6d27e177aeb7345c4c8017bbd1eb17
  xf86-video-ati-19.0.1.tar.bz2
PGP:  
https://xorg.freedesktop.org/archive/individual/driver/xf86-video-ati-19.0.1.tar.bz2.sig

https://xorg.freedesktop.org/archive/individual/driver/xf86-video-ati-19.0.1.tar.gz
MD5:  2135e3681e177d5f62efb04633702b43  xf86-video-ati-19.0.1.tar.gz
SHA1: ad024e197c681cbfc927835e6d41b49bc5de9d2f  xf86-video-ati-19.0.1.tar.gz
SHA256: b32172e8a70e08af46d1c67395bf55022b4c98c4097462e75efedf2deff874a9  
xf86-video-ati-19.0.1.tar.gz
SHA512: 
ec7e0114a4e781da29aaf2085f21cfe3abac6ac253016494c483a5cb70ecf6b09f225b990e26126772ef367c90468a74dc621be86172b39c6d5b8a83d84fea3f
  xf86-video-ati-19.0.1.tar.gz
PGP:  
https://xorg.freedesktop.org/archive/individual/driver/xf86-video-ati-19.0.1.tar.gz.sig


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer





signature.asc
Description: OpenPGP digital signature
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: revert "XGMI pstate switch initial support"

2019-03-19 Thread Koenig, Christian
Am 19.03.19 um 16:42 schrieb Liu, Shaoyun:
> Thanks Felix .
>
> We did consider to put the  logic into bo_add/bo_rmv, but Felix pointed
> out the  object can be migrate from FB to system memory after allocation
> .

Yeah, I also considered that and that is actually a really good argument.

But even in this case you still only need the flag in the bo_va 
structure, not the mapping.

>    I also think of put the  logic inside amdgpu_vm_bo_update_mapping ,
> but seems that function prefer to take the  dma address already been
> calculated (change that will cause a huge code reorganize) . That's the
> reason I put the logic before calling into  amdgpu_vm_bo_update_mapping .

Both places won't work correctly. The problem is simply that you haven't 
considered what happens when the VM is destroyed.

In this case we should not unmap the XGMI mapping, but rather just throw 
away the page tables completely.

Additional to that the mapping code is often interrupted because the 
calling process receives a signal. So it can happen that all of that is 
called multiple times. The is_xgmi variable prevents that, but that's 
really not straight forward.

Se the PRT handling for how complicated that gets when you attach 
something like this to the mapping. IIRC we have 3 different code paths 
how a PRT mapping can end up being destroyed including a delayed destroy 
handler and callback.

>
> As the race condition  you described sounds reasonable.  let me think
> how to fix it .

Two possible code pattern usually used for this:

A) You have a lock protecting both the counter as well as the operation:

lock();
if (increment_and_test_counter())
     power_on()
unlock();

lock()
if (decrement_and_test_counter())
     power_off();
unlock();

B) The counter is an atomic and you have a lock protecting the operation:

if (atomic_inc_return() == 1) {
     lock();
     power_on();
     unlock();
}

if (atomic_dec_return() == 0) {
     lock();
     if (double_check_atomic_for_race())
         power_off();
     unlock();
}

The later is more efficient, but also more work to implement.

Either way I suggest to move the actual increment/decrement into the 
xgmi code and only have the signalling in the VM code.

Regards,
Christian.

>
> Regards
>
> shaoyun.liu
>
> On 2019-03-19 9:10 a.m., Kuehling, Felix wrote:
>> On 3/19/2019 8:49 AM, Christian König wrote:
>>> Yeah, all that is perfectly fine.
>>>
>>> The problem is Shaoyun didn't put this into the mapping code, but
>>> rather into the VM state machine. So this won't work at all (the
>>> counter and increment/decrement unbalanced and multiple times).
>> We tried to consider all the possible ways that this could go wrong.
>> Basically, every time a mapping is updated, we update the is_xgmi state
>> and update the counter if it changed. Have you seen the counter become
>> unbalanced?
>>
>>
>>> The correct place to add this is amdgpu_vm_bo_add/amdgpu_vm_bo_rmv.
>> I think we considered that. The problem is that a BO can be migrated
>> between bo_add and bo_rmv. I found that even bo->preferred_domain can
>> change with AMDGPU_GEM_OP_SET_PLACEMENT. So you can't reliably know
>> whether to increment your counter, and your counter can become
>> unbalanced if a migration or AMDGPU_GEM_OP_SET_PLACEMENT happens between
>> bo_add and bo_rmv.
>>
>> Therefore we're trying to check for XGMI mappings every time the mapping
>> changes and keep track of the state in amdgpu_bo_va_mapping.
>>
>>
>>> Additional to that the approach with the counter doesn't work because
>>> you don't have a lock protecting the hw update itself. E.g. while
>>> powering down you can add a mapping which needs to power it up again
>>> and so powering down and powering up race with each other.
>> That's a good point.
>>
>> Regards,
>>  Felix
>>
>>
>>> Regards,
>>> Christian.
>>>
>>> Am 19.03.19 um 13:42 schrieb Kuehling, Felix:
 We discussed a few different approaches before settling on this one.

 Maybe it needs some more background. XGMI links are quite power hungry.
 Being able to power them down improves performance for power-limited
 workloads that don't need XGMI. In machine learning, pretty much all
 workloads are power limited on our GPUs, so this is not just a
 theoretical thing. The problem is, how do you know whether you need
 XGMI? You need to know whether there are P2P memory mappings involving
 XGMI. So the natural place to put that is in the memory mapping code.

 If you do spot a race condition in the code, let's talk about how to
 fix it.

 Regards,
       Felix

 On 3/19/2019 8:07 AM, Christian König wrote:
> This reverts commit c9115f8904eef0f880d3b4f8306f553b1bb1c532.
>
> Adding this to the mapping is complete nonsense and the whole
> implementation looks racy. This patch wasn't thoughtfully reviewed
> and should be reverted for now.
>
> Signed-off-by: Christian König 
> ---
>      drivers/gpu/drm/amd/

Re: [PATCH] drm/amdgpu: revert "XGMI pstate switch initial support"

2019-03-19 Thread Liu, Shaoyun
Thanks Felix .

We did consider to put the  logic into bo_add/bo_rmv, but Felix pointed 
out the  object can be migrate from FB to system memory after allocation 
.  I also think of put the  logic inside amdgpu_vm_bo_update_mapping , 
but seems that function prefer to take the  dma address already been 
calculated (change that will cause a huge code reorganize) . That's the 
reason I put the logic before calling into  amdgpu_vm_bo_update_mapping .

As the race condition  you described sounds reasonable.  let me think 
how to fix it .

Regards

shaoyun.liu

On 2019-03-19 9:10 a.m., Kuehling, Felix wrote:
> On 3/19/2019 8:49 AM, Christian König wrote:
>> Yeah, all that is perfectly fine.
>>
>> The problem is Shaoyun didn't put this into the mapping code, but
>> rather into the VM state machine. So this won't work at all (the
>> counter and increment/decrement unbalanced and multiple times).
> We tried to consider all the possible ways that this could go wrong.
> Basically, every time a mapping is updated, we update the is_xgmi state
> and update the counter if it changed. Have you seen the counter become
> unbalanced?
>
>
>> The correct place to add this is amdgpu_vm_bo_add/amdgpu_vm_bo_rmv.
> I think we considered that. The problem is that a BO can be migrated
> between bo_add and bo_rmv. I found that even bo->preferred_domain can
> change with AMDGPU_GEM_OP_SET_PLACEMENT. So you can't reliably know
> whether to increment your counter, and your counter can become
> unbalanced if a migration or AMDGPU_GEM_OP_SET_PLACEMENT happens between
> bo_add and bo_rmv.
>
> Therefore we're trying to check for XGMI mappings every time the mapping
> changes and keep track of the state in amdgpu_bo_va_mapping.
>
>
>> Additional to that the approach with the counter doesn't work because
>> you don't have a lock protecting the hw update itself. E.g. while
>> powering down you can add a mapping which needs to power it up again
>> and so powering down and powering up race with each other.
> That's a good point.
>
> Regards,
>     Felix
>
>
>> Regards,
>> Christian.
>>
>> Am 19.03.19 um 13:42 schrieb Kuehling, Felix:
>>> We discussed a few different approaches before settling on this one.
>>>
>>> Maybe it needs some more background. XGMI links are quite power hungry.
>>> Being able to power them down improves performance for power-limited
>>> workloads that don't need XGMI. In machine learning, pretty much all
>>> workloads are power limited on our GPUs, so this is not just a
>>> theoretical thing. The problem is, how do you know whether you need
>>> XGMI? You need to know whether there are P2P memory mappings involving
>>> XGMI. So the natural place to put that is in the memory mapping code.
>>>
>>> If you do spot a race condition in the code, let's talk about how to
>>> fix it.
>>>
>>> Regards,
>>>      Felix
>>>
>>> On 3/19/2019 8:07 AM, Christian König wrote:
 This reverts commit c9115f8904eef0f880d3b4f8306f553b1bb1c532.

 Adding this to the mapping is complete nonsense and the whole
 implementation looks racy. This patch wasn't thoughtfully reviewed
 and should be reverted for now.

 Signed-off-by: Christian König 
 ---
     drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  3 ---
     drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 ---
     drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  1 -
     drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 29
 +-
     drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   | 15 ---
     drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   |  2 --
     6 files changed, 1 insertion(+), 52 deletions(-)

 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
 b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
 index b5720c1610e1..1db192150532 100644
 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
 @@ -931,9 +931,6 @@ struct amdgpu_device {
     int asic_reset_res;
     struct work_struct    xgmi_reset_work;
     -    /* counter of mapped memory through xgmi */
 -    atomic_t    xgmi_map_counter;
 -
     bool    in_baco_reset;
     };
     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
 b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
 index 964a4d3f1f43..206583707124 100644
 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
 @@ -2018,9 +2018,6 @@ static void
 amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
     r = amdgpu_device_enable_mgpu_fan_boost();
     if (r)
     DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
 -
 -    /*set to low pstate by default */
 -    amdgpu_xgmi_set_pstate(adev, 0);
     }
        static void amdgpu_device_delay_enable_gfx_off(struct
 work_struct *work)
 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
 b/dr

Re: [PATCH] drm/amd/amdgpu: fix incorrect translation about the PCIe MLW info

2019-03-19 Thread Alex Deucher
On Tue, Mar 19, 2019 at 12:26 AM Chengming Gui  wrote:
>
> Max Link Width's full mask is 0x3f,
> and it's highest bit express X16.
>
> Signed-off-by: Chengming Gui 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 ++--
>  1 file changed, 2 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 964a4d3..435f0d7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3763,15 +3763,6 @@ static void amdgpu_device_get_pcie_info(struct 
> amdgpu_device *adev)
> } else {
> switch (platform_link_width) {
> case PCIE_LNK_X32:
> -   adev->pm.pcie_mlw_mask = 
> (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
> - 
> CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
> - 
> CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
> - 
> CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
> - 
> CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
> - 
> CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
> - 
> CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
> -   break;
> -   case PCIE_LNK_X16:
> adev->pm.pcie_mlw_mask = 
> (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
>   
> CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
>   
> CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
> @@ -3779,13 +3770,14 @@ static void amdgpu_device_get_pcie_info(struct 
> amdgpu_device *adev)
>   
> CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
>   
> CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
> break;
> -   case PCIE_LNK_X12:
> +   case PCIE_LNK_X16:

Not sure I understand this change or the one below.  If we have a x16
link, why don't you want CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 set?

Alex

> adev->pm.pcie_mlw_mask = 
> (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
>   
> CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
>   
> CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
>   
> CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
>   
> CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
> break;
> +   case PCIE_LNK_X12:
> case PCIE_LNK_X8:
> adev->pm.pcie_mlw_mask = 
> (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
>   
> CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
> --
> 2.7.4
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: add one rlc version into gfxoff blacklist

2019-03-19 Thread Alex Deucher
On Tue, Mar 19, 2019 at 9:31 AM Huang Rui  wrote:
>
> RLC #53815 ucode has the noise issue on 4k playback while gfxoff enabled.
>
> Signed-off-by: Huang Rui 

Acked-by: Alex Deucher 


> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 6b48d4c..3765d97 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -589,6 +589,7 @@ static void gfx_v9_0_check_if_need_gfxoff(struct 
> amdgpu_device *adev)
> if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
> break;
> if ((adev->gfx.rlc_fw_version < 531) ||
> +   (adev->gfx.rlc_fw_version == 53815) ||
> (adev->gfx.rlc_feature_version < 1) ||
> !adev->gfx.rlc.is_rlc_v2_1)
> adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
> --
> 2.7.4
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: Radeon init fail and resulting cascade of UBSAN errors

2019-03-19 Thread Meelis Roos

I just put a Radeon 9600XT into an Athlon XP machine. I do not know if the card
is good or not, but 2D dumb framebufer works fine with radeon module. However,
init of the acceleration engine fails. I compiled my own kernel (v5.0 from git)
with UBSAN (no other debug yet) and got a cascade of UBSAN warnings and two 
other
errors on bootup.


Meanwhile I have concluded that the card must have been weakly inserted or 
something
like that - after reseating, the init failure has not reappared.


A "kobject (b8a6a825): tried to init an initialized object", some UBSAN warnings
from ttm and finally a kernel read fault.

Whether or not the initial problem is hardware or software problem, the rest
seem to be our error handling bugs.


But his still holds - after init failure, things seem to break.



Full dmesg:

[    0.00] Linux version 5.0.0 (mroos@kt600) (gcc version 8.3.0 (Debian 
8.3.0-2)) #2 Tue Mar 12 18:48:05 EET 2019
[    0.00] x86/fpu: x87 FPU will use FXSAVE
[    0.00] BIOS-provided physical RAM map:
[    0.00] BIOS-e820: [mem 0x-0x0009fbff] usable
[    0.00] BIOS-e820: [mem 0x0009fc00-0x0009] reserved
[    0.00] BIOS-e820: [mem 0x000ce000-0x000d3fff] reserved
[    0.00] BIOS-e820: [mem 0x000f-0x000f] reserved
[    0.00] BIOS-e820: [mem 0x0010-0x7ffe] usable
[    0.00] BIOS-e820: [mem 0x7fff-0x7fff7fff] ACPI data
[    0.00] BIOS-e820: [mem 0x7fff8000-0x7fff] ACPI NVS
[    0.00] BIOS-e820: [mem 0xfec0-0xfec00fff] reserved
[    0.00] BIOS-e820: [mem 0xfee0-0xfee00fff] reserved
[    0.00] BIOS-e820: [mem 0xfff8-0x] reserved
[    0.00] Notice: NX (Execute Disable) protection missing in CPU!
[    0.00] Legacy DMI 2.3 present.
[    0.00] DMI:  K7VT6-C /K7VT6-C , BIOS P1.50 06/15/2006
[    0.00] tsc: Fast TSC calibration using PIT
[    0.00] tsc: Detected 1798.193 MHz processor
[    0.006124] e820: update [mem 0x-0x0fff] usable ==> reserved
[    0.006128] e820: remove [mem 0x000a-0x000f] usable
[    0.006138] last_pfn = 0x7fff0 max_arch_pfn = 0x10
[    0.006147] MTRR default type: uncachable
[    0.006149] MTRR fixed ranges enabled:
[    0.006151]   0-9 write-back
[    0.006153]   A-B uncachable
[    0.006154]   C-C7FFF write-protect
[    0.006155]   C8000-E uncachable
[    0.006156]   F-F write-protect
[    0.006157] MTRR variable ranges enabled:
[    0.006160]   0 base 0 mask F8000 write-back
[    0.006161]   1 disabled
[    0.006161]   2 disabled
[    0.006162]   3 disabled
[    0.006163]   4 disabled
[    0.006164]   5 base 0E000 mask FF000 write-combining
[    0.006165]   6 disabled
[    0.006165]   7 disabled
[    0.006536] x86/PAT: Configuration [0-7]: WB  WC  UC- UC  WB  WP  UC- WT
[    0.006896] check: Scanning 1 areas for low memory corruption
[    0.006906] initial memory mapped: [mem 0x-0x1fbf]
[    0.006915] Base memory trampoline at [(ptrval)] 9b000 size 16384
[    0.006963] BRK [0x1f85d000, 0x1f85dfff] PGTABLE
[    0.006999] ACPI: Early table checksum verification disabled
[    0.010441] ACPI: RSDP 0x000FA920 14 (v00 AMI   )
[    0.010448] ACPI: RSDT 0x7FFF 2C (v01 AMIINT VIA_K7   
0010 MSFT 0097)
[    0.010461] ACPI: FACP 0x7FFF0030 81 (v01 AMIINT VIA_K7   
0011 MSFT 0097)
[    0.010477] ACPI: DSDT 0x7FFF0120 00324C (v01 VIA    K7VT4    
1000 INTL 02002024)
[    0.010484] ACPI: FACS 0x7FFF8000 40
[    0.010489] ACPI: APIC 0x7FFF00C0 54 (v01 AMIINT VIA_K7   
0009 MSFT 0097)
[    0.010506] ACPI: Local APIC address 0xfee0
[    0.010513] 1163MB HIGHMEM available.
[    0.010518] 883MB LOWMEM available.
[    0.010519]   mapped low ram: 0 - 373fe000
[    0.010521]   low ram: 0 - 373fe000
[    0.010524] BRK [0x1f85e000, 0x1f85efff] PGTABLE
[    0.011606] Zone ranges:
[    0.011610]   DMA  [mem 0x1000-0x00ff]
[    0.011615]   Normal   [mem 0x0100-0x373fdfff]
[    0.011618]   HighMem  [mem 0x373fe000-0x7ffe]
[    0.011620] Movable zone start for each node
[    0.011622] Early memory node ranges
[    0.011624]   node   0: [mem 0x1000-0x0009efff]
[    0.011627]   node   0: [mem 0x0010-0x7ffe]
[    0.011638] Zeroed struct page in unavailable ranges: 98 pages
[    0.011641] Initmem setup node 0 [mem 0x1000-0x7ffe]
[    0.011648] On node 0 totalpages: 524174
[    0.011650]   DMA zone: 32 pages used for memmap
[    0.011651]   DMA zone: 0 pages reserved
[    0.011653]   DMA zone: 3998 pages, LIFO batch:0
[    0.011993]   Normal zone: 1736 pages used for memmap
[    0.011994]   Normal zone: 06 pages, LIFO

5.1-rc1: radeon WARNING: CPU: 1 PID: 160 at drivers/gpu/drm/drm_fourcc.c:278 drm_format_info+0x74/0xe0 [drm]

2019-03-19 Thread Meelis Roos

I am seeing the following warning on at least 2 different servers with onboard 
radeon graphics.
It was not there with 5.0.

[4.764838] [drm] radeon kernel modesetting enabled.
[4.765334] radeon :01:01.0: vgaarb: deactivate vga console
[4.767746] Console: switching to colour dummy device 80x25
[4.768285] [drm] initializing kernel modesetting (RV100 0x1002:0x515E 
0x1014:0x0305 0x02).
[4.768492] radeon :01:01.0: VRAM: 128M 0xD000 - 
0xD7FF (16M used)
[4.768498] radeon :01:01.0: GTT: 512M 0xB000 - 
0xCFFF
[4.769756] [drm] Detected VRAM RAM=128M, BAR=128M
[4.769760] [drm] RAM width 16bits DDR
[4.771231] [TTM] Zone  kernel: Available graphics memory: 2023308 kiB
[4.771238] [TTM] Initializing pool allocator
[4.771247] [TTM] Initializing DMA pool allocator
[4.771282] [drm] radeon: 16M of VRAM memory ready
[4.771285] [drm] radeon: 512M of GTT memory ready.
[4.771308] [drm] GART: num cpu pages 131072, num gpu pages 131072
[4.792356] [drm] PCI GART of 512M enabled (table at 0xBBA0).
[4.792476] radeon :01:01.0: WB disabled
[4.792486] radeon :01:01.0: fence driver on ring 0 use gpu addr 
0xb000 and cpu addr 0x(ptrval)
[4.792492] [drm] Supports vblank timestamp caching Rev 2 (21.10.2013).
[4.792495] [drm] Driver supports precise vblank timestamp query.
[4.792518] [drm] radeon: irq initialized.
[4.792537] [drm] Loading R100 Microcode
[4.809842] [drm] radeon: ring at 0xB0001000
[4.809886] [drm] ring test succeeded in 1 usecs
[4.810559] [drm] ib test succeeded in 0 usecs
[4.811424] [drm] No valid Ext TMDS info found in BIOS
[4.811472] [drm] Radeon Display Connectors
[4.811476] [drm] Connector 0:
[4.811479] [drm]   VGA-1
[4.811482] [drm]   DDC: 0x60 0x60 0x60 0x60 0x60 0x60 0x60 0x60
[4.811485] [drm]   Encoders:
[4.811488] [drm] CRT1: INTERNAL_DAC1
[4.811491] [drm] Connector 1:
[4.811494] [drm]   DVI-D-1
[4.811496] [drm]   HPD2
[4.811499] [drm]   DDC: 0x64 0x64 0x64 0x64 0x64 0x64 0x64 0x64
[4.811502] [drm]   Encoders:
[4.811504] [drm] DFP2: INTERNAL_DVO1
[4.857555] [drm] requested bpp 16, scaled depth down to 0
[4.857643] WARNING: CPU: 1 PID: 160 at drivers/gpu/drm/drm_fourcc.c:278 
drm_format_info+0x74/0xe0 [drm]
[4.857659] Modules linked in: radeon(+) coretemp kvm_intel ses kvm 
i2c_algo_bit drm_kms_helper syscopyarea sysfillrect iTCO_wdt sysimgblt 
fb_sys_fops iTCO_vendor_support ttm bnx2 enclosure ipmi_ssif drm lpc_ich 
irqbypass drm_panel_orientation_quirks scsi_transport_sas i2c_i801 evdev 
psmouse ibmpex mfd_core ibmaem pcspkr ata_piix ipmi_devintf libata 
ipmi_msghandler i5000_edac i5k_amb button rng_core ip_tables x_tables autofs4
[4.857698] CPU: 1 PID: 160 Comm: systemd-udevd Not tainted 5.1.0-rc1 #11
[4.857702] Hardware name: IBM IBM System x3550 -[7978E2G]-/System Planar, 
BIOS -[GFE149BUS-1.17]- 02/14/2011
[4.857717] RIP: 0010:drm_format_info+0x74/0xe0 [drm]
[4.857722] Code: 4a 8d 14 33 48 89 d8 48 81 fa f0 05 00 00 77 41 45 39 ef 75 d3 
49 83 fc 4c 77 5b 48 83 c4 08 5b 5d 41 5c 41 5d 41 5e 41 5f c3 <0f> 0b 48 83 c4 
08 31 c0 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 89 cf
[4.857729] RSP: 0018:a5e4002278b8 EFLAGS: 00010246
[4.857733] RAX: c03e99bc RBX: c03e99d0 RCX: c0412600
[4.857736] RDX: 05e0 RSI:  RDI: 
[4.857740] RBP: 004c R08: c04e6a60 R09: 
[4.857743] R10: 0002 R11:  R12: 004b
[4.857747] R13: 36313050 R14: 3fc16c24 R15: 
[4.857751] FS:  7fb1a5852d40() GS:9c1f3fd0() 
knlGS:
[4.857755] CS:  0010 DS:  ES:  CR0: 80050033
[4.857759] CR2: 5619ca47603c CR3: 000139abc000 CR4: 06e0
[4.857762] Call Trace:
[4.857783]  drm_format_plane_cpp+0xa/0x40 [drm]
[4.857946]  radeonfb_create+0x74/0x4a0 [radeon]
[4.857960]  ? printk+0x53/0x6a
[4.857978]  __drm_fb_helper_initial_config_and_unlock+0x2d6/0x510 
[drm_kms_helper]
[4.858026]  radeon_fbdev_init+0x106/0x130 [radeon]
[4.858075]  radeon_modeset_init.cold.16+0x2a2/0xac5 [radeon]
[4.858116]  radeon_driver_load_kms+0xa0/0x1e0 [radeon]
[4.858129]  drm_dev_register+0x10c/0x1a0 [drm]
[4.858144]  drm_get_pci_dev+0x90/0x190 [drm]
[4.858152]  pci_device_probe+0xa1/0x110
[4.858159]  really_probe+0xe6/0x2a0
[4.858163]  driver_probe_device+0x59/0xe0
[4.858167]  device_driver_attach+0x4b/0x50
[4.858170]  __driver_attach+0x46/0xb0
[4.858174]  ? device_driver_attach+0x50/0x50
[4.858178]  bus_for_each_dev+0x72/0xb0
[4.858183]  bus_add_driver+0xff/0x1f0
[4.858187]  ? 0xc0791000
[4.858191]  driver_register+0x56/0xe0
[4.858194]  ? 

Re: [PATCH 3/4] drm/amd/display: In VRR mode, do DRM core vblank handling at end of vblank.

2019-03-19 Thread Kazlauskas, Nicholas
On 3/19/19 9:23 AM, Kazlauskas, Nicholas wrote:
> On 3/18/19 1:19 PM, Mario Kleiner wrote:
>> In VRR mode, proper vblank/pageflip timestamps can only be computed
>> after the display scanout position has left front-porch. Therefore
>> delay calls to drm_crtc_handle_vblank(), and thereby calls to
>> drm_update_vblank_count() and pageflip event delivery, to after the
>> end of front-porch when in VRR mode.
>>
>> We add a new vupdate irq, which triggers at the end of the vupdate
>> interval, ie. at the end of vblank, and calls the core vblank handler
>> function. The new irq handler is not executed in standard non-VRR
>> mode, so vblank handling for fixed refresh rate mode is identical
>> to the past implementation.
>>
>> Signed-off-by: Mario Kleiner 

Looks I lost some of my comments I wanted to send in my last email. Just 
a few nitpicks (including some things Paul mentioned).

Also meant to CC Harry on this as well.

>> ---
>>drivers/gpu/drm/amd/amdgpu/amdgpu.h|   1 +
>>drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c  | 129 
>> -
>>drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h  |   9 ++
>>.../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c  |  22 
>>.../amd/display/dc/irq/dce110/irq_service_dce110.c |   7 +-
>>.../amd/display/dc/irq/dce120/irq_service_dce120.c |   7 +-
>>.../amd/display/dc/irq/dce80/irq_service_dce80.c   |   6 +-
>>.../amd/display/dc/irq/dcn10/irq_service_dcn10.c   |  40 +--
>>8 files changed, 205 insertions(+), 16 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index f88761a..64167dd 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -827,6 +827,7 @@ struct amdgpu_device {
>>  /* For pre-DCE11. DCE11 and later are in "struct amdgpu_device->dm" */
>>  struct work_struct  hotplug_work;
>>  struct amdgpu_irq_src   crtc_irq;
>> +struct amdgpu_irq_src   vupdate_irq;
>>  struct amdgpu_irq_src   pageflip_irq;
>>  struct amdgpu_irq_src   hpd_irq;
>>
>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> index 85e4f87..555d9e9f 100644
>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> @@ -315,6 +315,32 @@ static void dm_pflip_high_irq(void *interrupt_params)
>>  drm_crtc_vblank_put(&amdgpu_crtc->base);
>>}
>>
>> +static void dm_vupdate_high_irq(void *interrupt_params)
>> +{
>> +struct common_irq_params *irq_params = interrupt_params;
>> +struct amdgpu_device *adev = irq_params->adev;
>> +struct amdgpu_crtc *acrtc;
>> +struct dm_crtc_state *acrtc_state;
>> +
>> +acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - 
>> IRQ_TYPE_VUPDATE);
>> +
>> +if (acrtc) {
>> +acrtc_state = to_dm_crtc_state(acrtc->base.state);
>> +
>> +DRM_DEBUG_DRIVER("crtc:%d, vupdate-vrr:%d\n", acrtc->crtc_id,
>> + amdgpu_dm_vrr_active(acrtc_state));
>> +
>> +/* Core vblank handling is done here after end of front-porch in
>> + * vrr mode, as vblank timestamping will give valid results
>> + * while now done after front-porch. This will also deliver
>> + * page-flip completion events that have been queued to us
>> + * if a pageflip happened inside front-porch.
>> + */
>> +if (amdgpu_dm_vrr_active(acrtc_state))
>> +drm_crtc_handle_vblank(&acrtc->base)
> I was thinking that 3 and 4 might have to be merged, but VRR pflip
> timestamping seems to be the same as it was before (off by a line or
> two) since it's not handled here yet. This seems to fix vblank events
> and timestamping at least.
> 
>> +}
>> +}
>> +
>>static void dm_crtc_high_irq(void *interrupt_params)
>>{
>>  struct common_irq_params *irq_params = interrupt_params;
>> @@ -325,11 +351,24 @@ static void dm_crtc_high_irq(void *interrupt_params)
>>  acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - 
>> IRQ_TYPE_VBLANK);
>>
>>  if (acrtc) {
>> -drm_crtc_handle_vblank(&acrtc->base);
>> -amdgpu_dm_crtc_handle_crc_irq(&acrtc->base);
>> -
>>  acrtc_state = to_dm_crtc_state(acrtc->base.state);
>>
>> +DRM_DEBUG_DRIVER("crtc:%d, vupdate-vrr:%d\n", acrtc->crtc_id,
>> + amdgpu_dm_vrr_active(acrtc_state));
>> +
>> +/* Core vblank handling at start of front-porch is only possible
>> + * in non-vrr mode, as only there vblank timestamping will give
>> + * valid results while done in front-porch. Otherwise defer it
>> + * to dm_vupdate_high_irq after end of front-porch.
>> + */
>> +if (!amdgpu_dm_vrr_active

[PATCH] drm/amdgpu: add one rlc version into gfxoff blacklist

2019-03-19 Thread Huang Rui
RLC #53815 ucode has the noise issue on 4k playback while gfxoff enabled.

Signed-off-by: Huang Rui 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 6b48d4c..3765d97 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -589,6 +589,7 @@ static void gfx_v9_0_check_if_need_gfxoff(struct 
amdgpu_device *adev)
if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
break;
if ((adev->gfx.rlc_fw_version < 531) ||
+   (adev->gfx.rlc_fw_version == 53815) ||
(adev->gfx.rlc_feature_version < 1) ||
!adev->gfx.rlc.is_rlc_v2_1)
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 3/4] drm/amd/display: In VRR mode, do DRM core vblank handling at end of vblank.

2019-03-19 Thread Kazlauskas, Nicholas
On 3/18/19 1:19 PM, Mario Kleiner wrote:
> In VRR mode, proper vblank/pageflip timestamps can only be computed
> after the display scanout position has left front-porch. Therefore
> delay calls to drm_crtc_handle_vblank(), and thereby calls to
> drm_update_vblank_count() and pageflip event delivery, to after the
> end of front-porch when in VRR mode.
> 
> We add a new vupdate irq, which triggers at the end of the vupdate
> interval, ie. at the end of vblank, and calls the core vblank handler
> function. The new irq handler is not executed in standard non-VRR
> mode, so vblank handling for fixed refresh rate mode is identical
> to the past implementation.
> 
> Signed-off-by: Mario Kleiner 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h|   1 +
>   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c  | 129 
> -
>   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h  |   9 ++
>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c  |  22 
>   .../amd/display/dc/irq/dce110/irq_service_dce110.c |   7 +-
>   .../amd/display/dc/irq/dce120/irq_service_dce120.c |   7 +-
>   .../amd/display/dc/irq/dce80/irq_service_dce80.c   |   6 +-
>   .../amd/display/dc/irq/dcn10/irq_service_dcn10.c   |  40 +--
>   8 files changed, 205 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index f88761a..64167dd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -827,6 +827,7 @@ struct amdgpu_device {
>   /* For pre-DCE11. DCE11 and later are in "struct amdgpu_device->dm" */
>   struct work_struct  hotplug_work;
>   struct amdgpu_irq_src   crtc_irq;
> + struct amdgpu_irq_src   vupdate_irq;
>   struct amdgpu_irq_src   pageflip_irq;
>   struct amdgpu_irq_src   hpd_irq;
>   
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 85e4f87..555d9e9f 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -315,6 +315,32 @@ static void dm_pflip_high_irq(void *interrupt_params)
>   drm_crtc_vblank_put(&amdgpu_crtc->base);
>   }
>   
> +static void dm_vupdate_high_irq(void *interrupt_params)
> +{
> + struct common_irq_params *irq_params = interrupt_params;
> + struct amdgpu_device *adev = irq_params->adev;
> + struct amdgpu_crtc *acrtc;
> + struct dm_crtc_state *acrtc_state;
> +
> + acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - 
> IRQ_TYPE_VUPDATE);
> +
> + if (acrtc) {
> + acrtc_state = to_dm_crtc_state(acrtc->base.state);
> +
> + DRM_DEBUG_DRIVER("crtc:%d, vupdate-vrr:%d\n", acrtc->crtc_id,
> +  amdgpu_dm_vrr_active(acrtc_state));
> +
> + /* Core vblank handling is done here after end of front-porch in
> +  * vrr mode, as vblank timestamping will give valid results
> +  * while now done after front-porch. This will also deliver
> +  * page-flip completion events that have been queued to us
> +  * if a pageflip happened inside front-porch.
> +  */
> + if (amdgpu_dm_vrr_active(acrtc_state))
> + drm_crtc_handle_vblank(&acrtc->base)
I was thinking that 3 and 4 might have to be merged, but VRR pflip 
timestamping seems to be the same as it was before (off by a line or 
two) since it's not handled here yet. This seems to fix vblank events 
and timestamping at least.

> + }
> +}
> +
>   static void dm_crtc_high_irq(void *interrupt_params)
>   {
>   struct common_irq_params *irq_params = interrupt_params;
> @@ -325,11 +351,24 @@ static void dm_crtc_high_irq(void *interrupt_params)
>   acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - 
> IRQ_TYPE_VBLANK);
>   
>   if (acrtc) {
> - drm_crtc_handle_vblank(&acrtc->base);
> - amdgpu_dm_crtc_handle_crc_irq(&acrtc->base);
> -
>   acrtc_state = to_dm_crtc_state(acrtc->base.state);
>   
> + DRM_DEBUG_DRIVER("crtc:%d, vupdate-vrr:%d\n", acrtc->crtc_id,
> +  amdgpu_dm_vrr_active(acrtc_state));
> +
> + /* Core vblank handling at start of front-porch is only possible
> +  * in non-vrr mode, as only there vblank timestamping will give
> +  * valid results while done in front-porch. Otherwise defer it
> +  * to dm_vupdate_high_irq after end of front-porch.
> +  */
> + if (!amdgpu_dm_vrr_active(acrtc_state))
> + drm_crtc_handle_vblank(&acrtc->base);
> +
> + /* Following stuff must happen at start of vblank, for crc
> +  * computation and below-the-range btr support in vrr mode.
> +  */
> + amdgpu_dm_crtc_handle_crc_irq(&

Re: [PATCH] drm/amdgpu: revert "XGMI pstate switch initial support"

2019-03-19 Thread Kuehling, Felix
On 3/19/2019 8:49 AM, Christian König wrote:
> Yeah, all that is perfectly fine.
>
> The problem is Shaoyun didn't put this into the mapping code, but 
> rather into the VM state machine. So this won't work at all (the 
> counter and increment/decrement unbalanced and multiple times).

We tried to consider all the possible ways that this could go wrong. 
Basically, every time a mapping is updated, we update the is_xgmi state 
and update the counter if it changed. Have you seen the counter become 
unbalanced?


>
> The correct place to add this is amdgpu_vm_bo_add/amdgpu_vm_bo_rmv.

I think we considered that. The problem is that a BO can be migrated 
between bo_add and bo_rmv. I found that even bo->preferred_domain can 
change with AMDGPU_GEM_OP_SET_PLACEMENT. So you can't reliably know 
whether to increment your counter, and your counter can become 
unbalanced if a migration or AMDGPU_GEM_OP_SET_PLACEMENT happens between 
bo_add and bo_rmv.

Therefore we're trying to check for XGMI mappings every time the mapping 
changes and keep track of the state in amdgpu_bo_va_mapping.


>
> Additional to that the approach with the counter doesn't work because 
> you don't have a lock protecting the hw update itself. E.g. while 
> powering down you can add a mapping which needs to power it up again 
> and so powering down and powering up race with each other.

That's a good point.

Regards,
   Felix


>
> Regards,
> Christian.
>
> Am 19.03.19 um 13:42 schrieb Kuehling, Felix:
>> We discussed a few different approaches before settling on this one.
>>
>> Maybe it needs some more background. XGMI links are quite power hungry.
>> Being able to power them down improves performance for power-limited
>> workloads that don't need XGMI. In machine learning, pretty much all
>> workloads are power limited on our GPUs, so this is not just a
>> theoretical thing. The problem is, how do you know whether you need
>> XGMI? You need to know whether there are P2P memory mappings involving
>> XGMI. So the natural place to put that is in the memory mapping code.
>>
>> If you do spot a race condition in the code, let's talk about how to 
>> fix it.
>>
>> Regards,
>>     Felix
>>
>> On 3/19/2019 8:07 AM, Christian König wrote:
>>> This reverts commit c9115f8904eef0f880d3b4f8306f553b1bb1c532.
>>>
>>> Adding this to the mapping is complete nonsense and the whole
>>> implementation looks racy. This patch wasn't thoughtfully reviewed
>>> and should be reverted for now.
>>>
>>> Signed-off-by: Christian König 
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  3 ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  1 -
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 29 
>>> +-
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   | 15 ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   |  2 --
>>>    6 files changed, 1 insertion(+), 52 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index b5720c1610e1..1db192150532 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -931,9 +931,6 @@ struct amdgpu_device {
>>>    int asic_reset_res;
>>>    struct work_struct    xgmi_reset_work;
>>>    -    /* counter of mapped memory through xgmi */
>>> -    atomic_t    xgmi_map_counter;
>>> -
>>>    bool    in_baco_reset;
>>>    };
>>>    diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index 964a4d3f1f43..206583707124 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -2018,9 +2018,6 @@ static void 
>>> amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
>>>    r = amdgpu_device_enable_mgpu_fan_boost();
>>>    if (r)
>>>    DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
>>> -
>>> -    /*set to low pstate by default */
>>> -    amdgpu_xgmi_set_pstate(adev, 0);
>>>    }
>>>       static void amdgpu_device_delay_enable_gfx_off(struct 
>>> work_struct *work)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> index 6f176bbe4cf2..220a6a7b1bc1 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> @@ -54,7 +54,6 @@ struct amdgpu_bo_va_mapping {
>>>    uint64_t    __subtree_last;
>>>    uint64_t    offset;
>>>    uint64_t    flags;
>>> -    bool    is_xgmi;
>>>    };
>>>       /* User space allocated BO in a VM */
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index c5230a9fb7f6..c8f0e4ca05fb 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -34,7 

Re: [PATCH 4/4] drm/amd/display: Make pageflip event delivery compatible with VRR.

2019-03-19 Thread Kazlauskas, Nicholas
On 3/18/19 1:19 PM, Mario Kleiner wrote:
> We want vblank counts and timestamps of flip completion as sent
> in pageflip completion events to be consistent with the vblank
> count and timestamp of the vblank of flip completion, like in non
> VRR mode.
> 
> In VRR mode, drm_update_vblank_count() - and thereby vblank
> count and timestamp updates - must be delayed until after the
> end of front-porch of each vblank, as it is only safe to
> calculate vblank timestamps outside of the front-porch, when
> we actually know when the vblank will end or has ended.
> 
> The function drm_update_vblank_count() which updates timestamps
> and counts gets called by drm_crtc_accurate_vblank_count() or by
> drm_crtc_handle_vblank().
> 
> Therefore we must make sure that pageflip events for a completed
> flip are only sent out after drm_crtc_accurate_vblank_count() or
> drm_crtc_handle_vblank() is executed, after end of front-porch
> for the vblank of flip completion.
> 
> Two cases:
> 
> a) Pageflip irq handler executes inside front-porch:
> In this case we must defer sending pageflip events until
> drm_crtc_handle_vblank() executes after end of front-porch,
> and thereby calculates proper vblank count and timestamp.
> Iow. the pflip irq handler must just arm a pageflip event
> to be sent out by drm_crtc_handle_vblank() later on.
> 
> b) Pageflip irq handler executes after end of front-porch, e.g.,
> after flip completion in back-porch or due to a massively
> delayed handler invocation into the active scanout of the new
> frame. In this case we can call drm_crtc_accurate_vblank_count()
> to safely force calculation of a proper vblank count and
> timestamp, and must send the pageflip completion event
> ourselves from the pageflip irq handler.
> 
> This is the same behaviour as needed for standard fixed refresh
> rate mode.
> 
> To decide from within pageflip handler if we are in case a) or b),
> we check the current scanout position against the boundary of
> front-porch. In non-VRR mode we just do what we did in the past.
> 
> Signed-off-by: Mario Kleiner 

Reviewed-by: Nicholas Kazlauskas 

This patch looks fine to me for the most part, but it's still pending on 
the other patches in the series.

> ---
>   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 68 
> ++-
>   1 file changed, 55 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 555d9e9f..499284d 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -263,6 +263,10 @@ static void dm_pflip_high_irq(void *interrupt_params)
>   struct common_irq_params *irq_params = interrupt_params;
>   struct amdgpu_device *adev = irq_params->adev;
>   unsigned long flags;
> + struct drm_pending_vblank_event *e;
> + struct dm_crtc_state *acrtc_state;
> + uint32_t vpos, hpos, v_blank_start, v_blank_end;
> + bool vrr_active;
>   
>   amdgpu_crtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - 
> IRQ_TYPE_PFLIP);
>   
> @@ -285,18 +289,57 @@ static void dm_pflip_high_irq(void *interrupt_params)
>   return;
>   }
>   
> - /* Update to correct count(s) if racing with vblank irq */
> - drm_crtc_accurate_vblank_count(&amdgpu_crtc->base);
> + /* page flip completed. */
> + e = amdgpu_crtc->event;
> + amdgpu_crtc->event = NULL;
>   
> - /* wake up userspace */
> - if (amdgpu_crtc->event) {
> - drm_crtc_send_vblank_event(&amdgpu_crtc->base, 
> amdgpu_crtc->event);
> + if (!e)
> + WARN_ON(1);
>   
> - /* page flip completed. clean up */
> - amdgpu_crtc->event = NULL;
> + acrtc_state = to_dm_crtc_state(amdgpu_crtc->base.state);
> + vrr_active = amdgpu_dm_vrr_active(acrtc_state);
> +
> + /* Fixed refresh rate, or VRR scanout position outside front-porch? */
> + if (!vrr_active ||
> + !dc_stream_get_scanoutpos(acrtc_state->stream, &v_blank_start,
> +   &v_blank_end, &hpos, &vpos) ||
> + (vpos < v_blank_start)) {
> + /* Update to correct count and vblank timestamp if racing with
> +  * vblank irq. This also updates to the correct vblank timestamp
> +  * even in VRR mode, as scanout is past the front-porch atm.
> +  */
> + drm_crtc_accurate_vblank_count(&amdgpu_crtc->base);
>   
> - } else
> - WARN_ON(1);
> + /* Wake up userspace by sending the pageflip event with proper
> +  * count and timestamp of vblank of flip completion.
> +  */
> + if (e) {
> + drm_crtc_send_vblank_event(&amdgpu_crtc->base, e);
> +
> + /* Event sent, so done with vblank for this flip */
> + drm_crtc_

Re: [PATCH] drm/amdgpu: revert "XGMI pstate switch initial support"

2019-03-19 Thread Christian König

Yeah, all that is perfectly fine.

The problem is Shaoyun didn't put this into the mapping code, but rather 
into the VM state machine. So this won't work at all (the counter and 
increment/decrement unbalanced and multiple times).


The correct place to add this is amdgpu_vm_bo_add/amdgpu_vm_bo_rmv.

Additional to that the approach with the counter doesn't work because 
you don't have a lock protecting the hw update itself. E.g. while 
powering down you can add a mapping which needs to power it up again and 
so powering down and powering up race with each other.


Regards,
Christian.

Am 19.03.19 um 13:42 schrieb Kuehling, Felix:

We discussed a few different approaches before settling on this one.

Maybe it needs some more background. XGMI links are quite power hungry.
Being able to power them down improves performance for power-limited
workloads that don't need XGMI. In machine learning, pretty much all
workloads are power limited on our GPUs, so this is not just a
theoretical thing. The problem is, how do you know whether you need
XGMI? You need to know whether there are P2P memory mappings involving
XGMI. So the natural place to put that is in the memory mapping code.

If you do spot a race condition in the code, let's talk about how to fix it.

Regards,
    Felix

On 3/19/2019 8:07 AM, Christian König wrote:

This reverts commit c9115f8904eef0f880d3b4f8306f553b1bb1c532.

Adding this to the mapping is complete nonsense and the whole
implementation looks racy. This patch wasn't thoughtfully reviewed
and should be reverted for now.

Signed-off-by: Christian König 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu.h|  3 ---
   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 ---
   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  1 -
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 29 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   | 15 ---
   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   |  2 --
   6 files changed, 1 insertion(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b5720c1610e1..1db192150532 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -931,9 +931,6 @@ struct amdgpu_device {
int asic_reset_res;
struct work_struct  xgmi_reset_work;
   
-	/* counter of mapped memory through xgmi */

-   atomic_txgmi_map_counter;
-
boolin_baco_reset;
   };
   
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 964a4d3f1f43..206583707124 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2018,9 +2018,6 @@ static void 
amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
r = amdgpu_device_enable_mgpu_fan_boost();
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
-
-   /*set to low pstate by default */
-   amdgpu_xgmi_set_pstate(adev, 0);
   }
   
   static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 6f176bbe4cf2..220a6a7b1bc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -54,7 +54,6 @@ struct amdgpu_bo_va_mapping {
uint64_t__subtree_last;
uint64_toffset;
uint64_tflags;
-   boolis_xgmi;
   };
   
   /* User space allocated BO in a VM */

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index c5230a9fb7f6..c8f0e4ca05fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -34,7 +34,6 @@
   #include "amdgpu_trace.h"
   #include "amdgpu_amdkfd.h"
   #include "amdgpu_gmc.h"
-#include "amdgpu_xgmi.h"
   
   /**

* DOC: GPUVM
@@ -2022,9 +2021,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct ttm_mem_reg *mem;
struct drm_mm_node *nodes;
struct dma_fence *exclusive, **last_update;
-   struct amdgpu_device *bo_adev = adev;
-   bool is_xgmi = false;
uint64_t flags;
+   struct amdgpu_device *bo_adev = adev;
int r;
   
   	if (clear || !bo) {

@@ -2046,10 +2044,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
if (bo) {
flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
-   if (adev != bo_adev &&
-   adev->gmc.xgmi.hive_id &&
-   adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id)
-   is_xgmi = true;
} else {
flags = 0x0;
}
@@ -2068,19 +2062,6 @@ int amdgpu_vm_bo_updat

[PATCH 8/8] drm/amdgpu: use the new VM backend for clears

2019-03-19 Thread Christian König
And remove the existing code when it is unused.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 89 +-
 1 file changed, 32 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 729da1c486cd..af1a7020c3ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -711,11 +711,9 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 {
struct ttm_operation_ctx ctx = { true, false };
unsigned level = adev->vm_manager.root_level;
+   struct amdgpu_vm_update_params params;
struct amdgpu_bo *ancestor = bo;
-   struct dma_fence *fence = NULL;
unsigned entries, ats_entries;
-   struct amdgpu_ring *ring;
-   struct amdgpu_job *job;
uint64_t addr;
int r;
 
@@ -750,8 +748,6 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
}
}
 
-   ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched);
-
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (r)
return r;
@@ -772,60 +768,45 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 
}
 
-   r = amdgpu_job_alloc_with_ib(adev, 64, &job);
+   memset(¶ms, 0, sizeof(params));
+   params.adev = adev;
+   params.vm = vm;
+
+   r = vm->update_funcs->prepare(¶ms, AMDGPU_FENCE_OWNER_KFD, NULL);
if (r)
return r;
 
-   do {
-   addr = amdgpu_bo_gpu_offset(bo);
-   if (ats_entries) {
-   uint64_t ats_value;
-
-   ats_value = AMDGPU_PTE_DEFAULT_ATC;
-   if (level != AMDGPU_VM_PTB)
-   ats_value |= AMDGPU_PDE_PTE;
-
-   amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
- ats_entries, 0, ats_value);
-   addr += ats_entries * 8;
-   }
-
-   if (entries) {
-   uint64_t value = 0;
-
-   /* Workaround for fault priority problem on GMC9 */
-   if (level == AMDGPU_VM_PTB &&
-   adev->asic_type >= CHIP_VEGA10)
-   value = AMDGPU_PTE_EXECUTABLE;
-
-   amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
- entries, 0, value);
-   }
+   addr = 0;
+   if (ats_entries) {
+   uint64_t ats_value;
 
-   bo = bo->shadow;
-   } while (bo);
+   ats_value = AMDGPU_PTE_DEFAULT_ATC;
+   if (level != AMDGPU_VM_PTB)
+   ats_value |= AMDGPU_PDE_PTE;
 
-   amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+   r = vm->update_funcs->update(¶ms, bo, addr, 0, ats_entries,
+0, ats_value);
+   if (r)
+   return r;
 
-   WARN_ON(job->ibs[0].length_dw > 64);
-   r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv,
-AMDGPU_FENCE_OWNER_KFD, false);
-   if (r)
-   goto error_free;
+   addr += ats_entries * 8;
+   }
 
-   r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_UNDEFINED,
- &fence);
-   if (r)
-   goto error_free;
+   if (entries) {
+   uint64_t value = 0;
 
-   amdgpu_bo_fence(vm->root.base.bo, fence, true);
-   dma_fence_put(fence);
+   /* Workaround for fault priority problem on GMC9 */
+   if (level == AMDGPU_VM_PTB &&
+   adev->asic_type >= CHIP_VEGA10)
+   value = AMDGPU_PTE_EXECUTABLE;
 
-   return 0;
+   r = vm->update_funcs->update(¶ms, bo, addr, 0, entries,
+0, value);
+   if (r)
+   return r;
+   }
 
-error_free:
-   amdgpu_job_free(job);
-   return r;
+   return vm->update_funcs->commit(¶ms, NULL);
 }
 
 /**
@@ -913,7 +894,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
if (r)
goto error_free_pt;
 
-   return 1;
+   return 0;
 
 error_free_pt:
amdgpu_bo_unref(&pt->shadow);
@@ -1421,12 +1402,10 @@ static int amdgpu_vm_update_ptes(struct 
amdgpu_vm_update_params *params,
unsigned shift, parent_shift, mask;
uint64_t incr, entry_end, pe_start;
struct amdgpu_bo *pt;
-   bool need_to_sync;
 
r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor);
-   if (r < 0)
+   if (r)
return r;
-   need_to_sync = (r && params->vm->use_cpu_for_update);
 
 

[PATCH 5/8] drm/amdgpu: new VM update backends

2019-03-19 Thread Christian König
Separate out all functions for SDMA and CPU based page table
updates into separate backends.

This way we can keep most of the complexity of those from the
core VM code.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/Makefile |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |   7 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  30 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c  | 116 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 248 
 5 files changed, 401 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 6039944abb71..7d539ba6400d 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -53,7 +53,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
-   amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o
+   amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
+   amdgpu_vm_sdma.o
 
 # add asic specific block
 amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index e83ad0548801..86e12acb8493 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1243,7 +1243,7 @@ static void amdgpu_vm_do_copy_ptes(struct 
amdgpu_vm_update_params *params,
  * Returns:
  * The pointer for the page table entry.
  */
-static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
+uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
 {
uint64_t result;
 
@@ -2975,6 +2975,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
 vm->use_cpu_for_update ? "CPU" : "SDMA");
WARN_ONCE((vm->use_cpu_for_update && 
!amdgpu_gmc_vram_full_visible(&adev->gmc)),
  "CPU update of VM recommended only for large BAR system\n");
+
+   if (vm->use_cpu_for_update)
+   vm->update_funcs = &amdgpu_vm_cpu_funcs;
+   else
+   vm->update_funcs = &amdgpu_vm_sdma_funcs;
vm->last_update = NULL;
 
amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, &bp);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 6df4d9e382ac..a99b4caba13c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -203,11 +203,21 @@ struct amdgpu_vm_update_params {
 */
uint64_t src;
 
+   /**
+* @job: job to used for hw submission
+*/
+   struct amdgpu_job *job;
+
/**
 * @ib: indirect buffer to fill with commands
 */
struct amdgpu_ib *ib;
 
+   /**
+* @num_dw_left: number of dw left for the IB
+*/
+   unsigned int num_dw_left;
+
/**
 * @func: Function which actually does the update
 */
@@ -217,6 +227,17 @@ struct amdgpu_vm_update_params {
 uint64_t flags);
 };
 
+struct amdgpu_vm_update_funcs {
+
+   int (*prepare)(struct amdgpu_vm_update_params *p, void * owner,
+  struct dma_fence *exclusive);
+   int (*update)(struct amdgpu_vm_update_params *p,
+ struct amdgpu_bo *bo, uint64_t pe, uint64_t addr,
+ unsigned count, uint32_t incr, uint64_t flags);
+   int (*commit)(struct amdgpu_vm_update_params *p,
+ struct dma_fence **fence);
+};
+
 struct amdgpu_vm {
/* tree of virtual addresses mapped */
struct rb_root_cached   va;
@@ -252,7 +273,10 @@ struct amdgpu_vm {
struct amdgpu_vmid  *reserved_vmid[AMDGPU_MAX_VMHUBS];
 
/* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */
-   booluse_cpu_for_update;
+   booluse_cpu_for_update;
+
+   /* Functions to use for VM table updates */
+   const struct amdgpu_vm_update_funcs *update_funcs;
 
/* Flag to indicate ATS support from PTE for GFX9 */
boolpte_support_ats;
@@ -319,6 +343,9 @@ struct amdgpu_vm_manager {
 #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) 
((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), 
(incr)))
 #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) 
((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), 
(incr), (flags)))
 
+extern const struct amdgpu_vm_update_funcs amdgpu_vm_cpu_funcs;
+extern const struct amdgpu

[PATCH 6/8] drm/amdgpu: use the new VM backend for PDEs

2019-03-19 Thread Christian König
And remove the existing code when it is unused.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 74 +-
 1 file changed, 14 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 86e12acb8493..735a32b8a596 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1320,10 +1320,10 @@ static void amdgpu_vm_update_func(struct 
amdgpu_vm_update_params *params,
  *
  * Makes sure the requested entry in parent is up to date.
  */
-static void amdgpu_vm_update_pde(struct amdgpu_vm_update_params *params,
-struct amdgpu_vm *vm,
-struct amdgpu_vm_pt *parent,
-struct amdgpu_vm_pt *entry)
+static int amdgpu_vm_update_pde(struct amdgpu_vm_update_params *params,
+   struct amdgpu_vm *vm,
+   struct amdgpu_vm_pt *parent,
+   struct amdgpu_vm_pt *entry)
 {
struct amdgpu_bo *bo = parent->base.bo, *pbo;
uint64_t pde, pt, flags;
@@ -1335,7 +1335,7 @@ static void amdgpu_vm_update_pde(struct 
amdgpu_vm_update_params *params,
level += params->adev->vm_manager.root_level;
amdgpu_gmc_get_pde_for_bo(entry->base.bo, level, &pt, &flags);
pde = (entry - parent->entries) * 8;
-   amdgpu_vm_update_func(params, bo, pde, pt, 1, 0, flags);
+   return vm->update_funcs->update(params, bo, pde, pt, 1, 0, flags);
 }
 
 /*
@@ -1372,33 +1372,18 @@ int amdgpu_vm_update_directories(struct amdgpu_device 
*adev,
 struct amdgpu_vm *vm)
 {
struct amdgpu_vm_update_params params;
-   struct amdgpu_job *job;
-   unsigned ndw = 0;
-   int r = 0;
+   int r;
 
if (list_empty(&vm->relocated))
return 0;
 
-restart:
memset(¶ms, 0, sizeof(params));
params.adev = adev;
+   params.vm = vm;
 
-   if (vm->use_cpu_for_update) {
-   r = amdgpu_bo_sync_wait(vm->root.base.bo,
-   AMDGPU_FENCE_OWNER_VM, true);
-   if (unlikely(r))
-   return r;
-
-   params.func = amdgpu_vm_cpu_set_ptes;
-   } else {
-   ndw = 512;
-   r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
-   if (r)
-   return r;
-
-   params.ib = &job->ibs[0];
-   params.func = amdgpu_vm_do_set_ptes;
-   }
+   r = vm->update_funcs->prepare(¶ms, AMDGPU_FENCE_OWNER_VM, NULL);
+   if (r)
+   return r;
 
while (!list_empty(&vm->relocated)) {
struct amdgpu_vm_pt *pt, *entry;
@@ -1411,49 +1396,18 @@ int amdgpu_vm_update_directories(struct amdgpu_device 
*adev,
if (!pt)
continue;
 
-   amdgpu_vm_update_pde(¶ms, vm, pt, entry);
-
-   if (!vm->use_cpu_for_update &&
-   (ndw - params.ib->length_dw) < 32)
-   break;
-   }
-
-   if (vm->use_cpu_for_update) {
-   /* Flush HDP */
-   mb();
-   amdgpu_asic_flush_hdp(adev, NULL);
-   } else if (params.ib->length_dw == 0) {
-   amdgpu_job_free(job);
-   } else {
-   struct amdgpu_bo *root = vm->root.base.bo;
-   struct amdgpu_ring *ring;
-   struct dma_fence *fence;
-
-   ring = container_of(vm->entity.rq->sched, struct amdgpu_ring,
-   sched);
-
-   amdgpu_ring_pad_ib(ring, params.ib);
-   amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
-AMDGPU_FENCE_OWNER_VM, false);
-   WARN_ON(params.ib->length_dw > ndw);
-   r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_VM,
- &fence);
+   r = amdgpu_vm_update_pde(¶ms, vm, pt, entry);
if (r)
goto error;
-
-   amdgpu_bo_fence(root, fence, true);
-   dma_fence_put(vm->last_update);
-   vm->last_update = fence;
}
 
-   if (!list_empty(&vm->relocated))
-   goto restart;
-
+   r = vm->update_funcs->commit(¶ms, &vm->last_update);
+   if (r)
+   goto error;
return 0;
 
 error:
amdgpu_vm_invalidate_pds(adev, vm);
-   amdgpu_job_free(job);
return r;
 }
 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 7/8] drm/amdgpu: use the new VM backend for PTEs

2019-03-19 Thread Christian König
And remove the existing code when it is unused.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 229 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  13 --
 2 files changed, 6 insertions(+), 236 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 735a32b8a596..729da1c486cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1171,66 +1171,6 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm 
*vm,
return NULL;
 }
 
-/**
- * amdgpu_vm_do_set_ptes - helper to call the right asic function
- *
- * @params: see amdgpu_vm_update_params definition
- * @bo: PD/PT to update
- * @pe: addr of the page entry
- * @addr: dst addr to write into pe
- * @count: number of page entries to update
- * @incr: increase next addr by incr bytes
- * @flags: hw access flags
- *
- * Traces the parameters and calls the right asic functions
- * to setup the page table using the DMA.
- */
-static void amdgpu_vm_do_set_ptes(struct amdgpu_vm_update_params *params,
- struct amdgpu_bo *bo,
- uint64_t pe, uint64_t addr,
- unsigned count, uint32_t incr,
- uint64_t flags)
-{
-   pe += amdgpu_bo_gpu_offset(bo);
-   trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
-
-   if (count < 3) {
-   amdgpu_vm_write_pte(params->adev, params->ib, pe,
-   addr | flags, count, incr);
-
-   } else {
-   amdgpu_vm_set_pte_pde(params->adev, params->ib, pe, addr,
- count, incr, flags);
-   }
-}
-
-/**
- * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART
- *
- * @params: see amdgpu_vm_update_params definition
- * @bo: PD/PT to update
- * @pe: addr of the page entry
- * @addr: dst addr to write into pe
- * @count: number of page entries to update
- * @incr: increase next addr by incr bytes
- * @flags: hw access flags
- *
- * Traces the parameters and calls the DMA function to copy the PTEs.
- */
-static void amdgpu_vm_do_copy_ptes(struct amdgpu_vm_update_params *params,
-  struct amdgpu_bo *bo,
-  uint64_t pe, uint64_t addr,
-  unsigned count, uint32_t incr,
-  uint64_t flags)
-{
-   uint64_t src = (params->src + (addr >> 12) * 8);
-
-   pe += amdgpu_bo_gpu_offset(bo);
-   trace_amdgpu_vm_copy_ptes(pe, src, count);
-
-   amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count);
-}
-
 /**
  * amdgpu_vm_map_gart - Resolve gart mapping of addr
  *
@@ -1258,58 +1198,6 @@ uint64_t amdgpu_vm_map_gart(const dma_addr_t 
*pages_addr, uint64_t addr)
return result;
 }
 
-/**
- * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
- *
- * @params: see amdgpu_vm_update_params definition
- * @bo: PD/PT to update
- * @pe: kmap addr of the page entry
- * @addr: dst addr to write into pe
- * @count: number of page entries to update
- * @incr: increase next addr by incr bytes
- * @flags: hw access flags
- *
- * Write count number of PT/PD entries directly.
- */
-static void amdgpu_vm_cpu_set_ptes(struct amdgpu_vm_update_params *params,
-  struct amdgpu_bo *bo,
-  uint64_t pe, uint64_t addr,
-  unsigned count, uint32_t incr,
-  uint64_t flags)
-{
-   unsigned int i;
-   uint64_t value;
-
-   pe += (unsigned long)amdgpu_bo_kptr(bo);
-
-   trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
-
-   for (i = 0; i < count; i++) {
-   value = params->pages_addr ?
-   amdgpu_vm_map_gart(params->pages_addr, addr) :
-   addr;
-   amdgpu_gmc_set_pte_pde(params->adev, (void *)(uintptr_t)pe,
-  i, value, flags);
-   addr += incr;
-   }
-}
-
-/**
- * amdgpu_vm_update_func - helper to call update function
- *
- * Calls the update function for both the given BO as well as its shadow.
- */
-static void amdgpu_vm_update_func(struct amdgpu_vm_update_params *params,
- struct amdgpu_bo *bo,
- uint64_t pe, uint64_t addr,
- unsigned count, uint32_t incr,
- uint64_t flags)
-{
-   if (bo->shadow)
-   params->func(params, bo->shadow, pe, addr, count, incr, flags);
-   params->func(params, bo, pe, addr, count, incr, flags);
-}
-
 /*
  * amdgpu_vm_update_pde - update a single level in the hierarchy
  *
@@ -1435,7 +1323,8 @@ static void amdgpu_vm_update_flags(struct 
amdgpu_vm_update_params *params,
flags |

[PATCH 3/8] drm/amdgpu: move and rename amdgpu_pte_update_params

2019-03-19 Thread Christian König
Move the update parameter into the VM header and rename them.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 75 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 45 
 2 files changed, 60 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 2d61d74d32a8..560658406e8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -65,51 +65,6 @@ INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, 
uint64_t, __subtree_last,
 #undef START
 #undef LAST
 
-/**
- * struct amdgpu_pte_update_params - Local structure
- *
- * Encapsulate some VM table update parameters to reduce
- * the number of function parameters
- *
- */
-struct amdgpu_pte_update_params {
-
-   /**
-* @adev: amdgpu device we do this update for
-*/
-   struct amdgpu_device *adev;
-
-   /**
-* @vm: optional amdgpu_vm we do this update for
-*/
-   struct amdgpu_vm *vm;
-
-   /**
-* @pages_addr:
-*
-* DMA addresses to use for mapping
-*/
-   dma_addr_t *pages_addr;
-
-   /**
-* @src: address where to copy page table entries from
-*/
-   uint64_t src;
-
-   /**
-* @ib: indirect buffer to fill with commands
-*/
-   struct amdgpu_ib *ib;
-
-   /**
-* @func: Function which actually does the update
-*/
-   void (*func)(struct amdgpu_pte_update_params *params,
-struct amdgpu_bo *bo, uint64_t pe,
-uint64_t addr, unsigned count, uint32_t incr,
-uint64_t flags);
-};
-
 /**
  * struct amdgpu_prt_cb - Helper to disable partial resident texture feature 
from a fence callback
  */
@@ -1219,7 +1174,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm 
*vm,
 /**
  * amdgpu_vm_do_set_ptes - helper to call the right asic function
  *
- * @params: see amdgpu_pte_update_params definition
+ * @params: see amdgpu_vm_update_params definition
  * @bo: PD/PT to update
  * @pe: addr of the page entry
  * @addr: dst addr to write into pe
@@ -1230,7 +1185,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm 
*vm,
  * Traces the parameters and calls the right asic functions
  * to setup the page table using the DMA.
  */
-static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
+static void amdgpu_vm_do_set_ptes(struct amdgpu_vm_update_params *params,
  struct amdgpu_bo *bo,
  uint64_t pe, uint64_t addr,
  unsigned count, uint32_t incr,
@@ -1252,7 +1207,7 @@ static void amdgpu_vm_do_set_ptes(struct 
amdgpu_pte_update_params *params,
 /**
  * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART
  *
- * @params: see amdgpu_pte_update_params definition
+ * @params: see amdgpu_vm_update_params definition
  * @bo: PD/PT to update
  * @pe: addr of the page entry
  * @addr: dst addr to write into pe
@@ -1262,7 +1217,7 @@ static void amdgpu_vm_do_set_ptes(struct 
amdgpu_pte_update_params *params,
  *
  * Traces the parameters and calls the DMA function to copy the PTEs.
  */
-static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,
+static void amdgpu_vm_do_copy_ptes(struct amdgpu_vm_update_params *params,
   struct amdgpu_bo *bo,
   uint64_t pe, uint64_t addr,
   unsigned count, uint32_t incr,
@@ -1306,7 +1261,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t 
*pages_addr, uint64_t addr)
 /**
  * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
  *
- * @params: see amdgpu_pte_update_params definition
+ * @params: see amdgpu_vm_update_params definition
  * @bo: PD/PT to update
  * @pe: kmap addr of the page entry
  * @addr: dst addr to write into pe
@@ -1316,7 +1271,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t 
*pages_addr, uint64_t addr)
  *
  * Write count number of PT/PD entries directly.
  */
-static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
+static void amdgpu_vm_cpu_set_ptes(struct amdgpu_vm_update_params *params,
   struct amdgpu_bo *bo,
   uint64_t pe, uint64_t addr,
   unsigned count, uint32_t incr,
@@ -1344,7 +1299,7 @@ static void amdgpu_vm_cpu_set_ptes(struct 
amdgpu_pte_update_params *params,
  *
  * Calls the update function for both the given BO as well as its shadow.
  */
-static void amdgpu_vm_update_func(struct amdgpu_pte_update_params *params,
+static void amdgpu_vm_update_func(struct amdgpu_vm_update_params *params,
  struct amdgpu_bo *bo,
  uint64_t pe, uint64_t addr,
  unsigned co

[PATCH 2/8] drm/amdgpu: always set and check dma addresses in the VM code

2019-03-19 Thread Christian König
Clean that up a bit and allow to always have the DMA addresses around.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 21 +
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index c8f0e4ca05fb..2d61d74d32a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -84,6 +84,13 @@ struct amdgpu_pte_update_params {
 */
struct amdgpu_vm *vm;
 
+   /**
+* @pages_addr:
+*
+* DMA addresses to use for mapping
+*/
+   dma_addr_t *pages_addr;
+
/**
 * @src: address where to copy page table entries from
 */
@@ -101,12 +108,6 @@ struct amdgpu_pte_update_params {
 struct amdgpu_bo *bo, uint64_t pe,
 uint64_t addr, unsigned count, uint32_t incr,
 uint64_t flags);
-   /**
-* @pages_addr:
-*
-* DMA addresses to use for mapping, used during VM update by CPU
-*/
-   dma_addr_t *pages_addr;
 };
 
 /**
@@ -1573,7 +1574,7 @@ static void amdgpu_vm_fragment(struct 
amdgpu_pte_update_params *params,
max_frag = 31;
 
/* system pages are non continuously */
-   if (params->src) {
+   if (params->pages_addr) {
*frag = 0;
*frag_end = end;
return;
@@ -1753,16 +1754,13 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
memset(¶ms, 0, sizeof(params));
params.adev = adev;
params.vm = vm;
+   params.pages_addr = pages_addr;
 
/* sync to everything except eviction fences on unmapping */
if (!(flags & AMDGPU_PTE_VALID))
owner = AMDGPU_FENCE_OWNER_KFD;
 
if (vm->use_cpu_for_update) {
-   /* params.src is used as flag to indicate system Memory */
-   if (pages_addr)
-   params.src = ~0;
-
/* Wait for PT BOs to be idle. PTs share the same resv. object
 * as the root PD BO
 */
@@ -1778,7 +1776,6 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
}
 
params.func = amdgpu_vm_cpu_set_ptes;
-   params.pages_addr = pages_addr;
return amdgpu_vm_update_ptes(¶ms, start, last + 1,
 addr, flags);
}
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/8] drm/amdgpu: remove some unused VM defines

2019-03-19 Thread Christian König
Not needed any more.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 5 -
 1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index f5c25c0ae367..8348804c46cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -165,11 +165,6 @@ struct amdgpu_vm_pte_funcs {
uint32_t incr, uint64_t flags);
 };
 
-#define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) | (addr))
-#define AMDGPU_VM_FAULT_PASID(fault) ((u64)(fault) >> 48)
-#define AMDGPU_VM_FAULT_ADDR(fault)  ((u64)(fault) & 0xf000ULL)
-
-
 struct amdgpu_task_info {
charprocess_name[TASK_COMM_LEN];
chartask_name[TASK_COMM_LEN];
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 4/8] drm/amdgpu: reserve less memory for PDE updates

2019-03-19 Thread Christian König
Allocating 16KB was way to much, just use 2KB as a start for now.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 560658406e8b..e83ad0548801 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1391,7 +1391,7 @@ int amdgpu_vm_update_directories(struct amdgpu_device 
*adev,
 
params.func = amdgpu_vm_cpu_set_ptes;
} else {
-   ndw = 512 * 8;
+   ndw = 512;
r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
if (r)
return r;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: revert "XGMI pstate switch initial support"

2019-03-19 Thread Kuehling, Felix
We discussed a few different approaches before settling on this one.

Maybe it needs some more background. XGMI links are quite power hungry. 
Being able to power them down improves performance for power-limited 
workloads that don't need XGMI. In machine learning, pretty much all 
workloads are power limited on our GPUs, so this is not just a 
theoretical thing. The problem is, how do you know whether you need 
XGMI? You need to know whether there are P2P memory mappings involving 
XGMI. So the natural place to put that is in the memory mapping code.

If you do spot a race condition in the code, let's talk about how to fix it.

Regards,
   Felix

On 3/19/2019 8:07 AM, Christian König wrote:
> This reverts commit c9115f8904eef0f880d3b4f8306f553b1bb1c532.
>
> Adding this to the mapping is complete nonsense and the whole
> implementation looks racy. This patch wasn't thoughtfully reviewed
> and should be reverted for now.
>
> Signed-off-by: Christian König 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h|  3 ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  1 -
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 29 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   | 15 ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   |  2 --
>   6 files changed, 1 insertion(+), 52 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index b5720c1610e1..1db192150532 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -931,9 +931,6 @@ struct amdgpu_device {
>   int asic_reset_res;
>   struct work_struct  xgmi_reset_work;
>   
> - /* counter of mapped memory through xgmi */
> - atomic_txgmi_map_counter;
> -
>   boolin_baco_reset;
>   };
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 964a4d3f1f43..206583707124 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2018,9 +2018,6 @@ static void 
> amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
>   r = amdgpu_device_enable_mgpu_fan_boost();
>   if (r)
>   DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
> -
> - /*set to low pstate by default */
> - amdgpu_xgmi_set_pstate(adev, 0);
>   }
>   
>   static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 6f176bbe4cf2..220a6a7b1bc1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -54,7 +54,6 @@ struct amdgpu_bo_va_mapping {
>   uint64_t__subtree_last;
>   uint64_toffset;
>   uint64_tflags;
> - boolis_xgmi;
>   };
>   
>   /* User space allocated BO in a VM */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index c5230a9fb7f6..c8f0e4ca05fb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -34,7 +34,6 @@
>   #include "amdgpu_trace.h"
>   #include "amdgpu_amdkfd.h"
>   #include "amdgpu_gmc.h"
> -#include "amdgpu_xgmi.h"
>   
>   /**
>* DOC: GPUVM
> @@ -2022,9 +2021,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>   struct ttm_mem_reg *mem;
>   struct drm_mm_node *nodes;
>   struct dma_fence *exclusive, **last_update;
> - struct amdgpu_device *bo_adev = adev;
> - bool is_xgmi = false;
>   uint64_t flags;
> + struct amdgpu_device *bo_adev = adev;
>   int r;
>   
>   if (clear || !bo) {
> @@ -2046,10 +2044,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>   if (bo) {
>   flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
>   bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
> - if (adev != bo_adev &&
> - adev->gmc.xgmi.hive_id &&
> - adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id)
> - is_xgmi = true;
>   } else {
>   flags = 0x0;
>   }
> @@ -2068,19 +2062,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>   }
>   
>   list_for_each_entry(mapping, &bo_va->invalids, list) {
> - if (mapping->is_xgmi != is_xgmi) {
> - if (is_xgmi) {
> - /* Adding an XGMI mapping to the PT */
> - if (atomic_inc_return(&adev->xgmi_map_counter) 
> == 1)
> - amdgpu_xgmi_set_pstate(adev, 1);
> - } else {
> - /* Removing an XGMI mapping from the 

Re: [PATCH 1/9] dma-buf: add new dma_fence_chain container v5

2019-03-19 Thread Christian König

Am 19.03.19 um 13:27 schrieb Lionel Landwerlin:

On 15/03/2019 12:09, Chunming Zhou wrote:

[SNIP]
+/**
+ * struct dma_fence_chain - fence to represent an node of a fence chain
+ * @base: fence base class
+ * @lock: spinlock for fence handling
+ * @prev: previous fence of the chain
+ * @prev_seqno: original previous seqno before garbage collection
+ * @fence: encapsulated fence
+ * @cb: callback structure for signaling
+ * @work: irq work item for signaling
+ */
+struct dma_fence_chain {
+    struct dma_fence base;
+    spinlock_t lock;
+    struct dma_fence *prev;



Not an expert on the rcu thing, but drm_syncobj has a __rcu on its 
fence field.


Should this be? :


struct dma_fence __rcu *prev;


Yeah, the kbuild robot has already complained about this as well.

Needs to be fixed before pushed, but its only a minor change.

Christian.





+    u64 prev_seqno;
+    struct dma_fence *fence;
+    struct dma_fence_cb cb;
+    struct irq_work work;
+};
+
+extern const struct dma_fence_ops dma_fence_chain_ops;
+
+/**
+ * to_dma_fence_chain - cast a fence to a dma_fence_chain
+ * @fence: fence to cast to a dma_fence_array
+ *
+ * Returns NULL if the fence is not a dma_fence_chain,
+ * or the dma_fence_chain otherwise.
+ */
+static inline struct dma_fence_chain *
+to_dma_fence_chain(struct dma_fence *fence)
+{
+    if (!fence || fence->ops != &dma_fence_chain_ops)
+    return NULL;
+
+    return container_of(fence, struct dma_fence_chain, base);
+}
+
+/**
+ * dma_fence_chain_for_each - iterate over all fences in chain
+ * @iter: current fence
+ * @head: starting point
+ *
+ * Iterate over all fences in the chain. We keep a reference to the 
current

+ * fence while inside the loop which must be dropped when breaking out.
+ */
+#define dma_fence_chain_for_each(iter, head)    \
+    for (iter = dma_fence_get(head); iter; \
+ iter = dma_fence_chain_walk(iter))
+
+struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence);
+int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t 
seqno);

+void dma_fence_chain_init(struct dma_fence_chain *chain,
+  struct dma_fence *prev,
+  struct dma_fence *fence,
+  uint64_t seqno);
+
+#endif /* __LINUX_DMA_FENCE_CHAIN_H */





___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/9] dma-buf: add new dma_fence_chain container v5

2019-03-19 Thread Lionel Landwerlin

On 15/03/2019 12:09, Chunming Zhou wrote:

From: Christian König 

Lockless container implementation similar to a dma_fence_array, but with
only two elements per node and automatic garbage collection.

v2: properly document dma_fence_chain_for_each, add dma_fence_chain_find_seqno,
 drop prev reference during garbage collection if it's not a chain fence.
v3: use head and iterator for dma_fence_chain_for_each
v4: fix reference count in dma_fence_chain_enable_signaling
v5: fix iteration when walking each chain node

Signed-off-by: Christian König 
---
  drivers/dma-buf/Makefile  |   3 +-
  drivers/dma-buf/dma-fence-chain.c | 241 ++
  include/linux/dma-fence-chain.h   |  81 ++
  3 files changed, 324 insertions(+), 1 deletion(-)
  create mode 100644 drivers/dma-buf/dma-fence-chain.c
  create mode 100644 include/linux/dma-fence-chain.h

diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
index 0913a6ccab5a..1f006e083eb9 100644
--- a/drivers/dma-buf/Makefile
+++ b/drivers/dma-buf/Makefile
@@ -1,4 +1,5 @@
-obj-y := dma-buf.o dma-fence.o dma-fence-array.o reservation.o seqno-fence.o
+obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \
+reservation.o seqno-fence.o
  obj-$(CONFIG_SYNC_FILE)   += sync_file.o
  obj-$(CONFIG_SW_SYNC) += sw_sync.o sync_debug.o
  obj-$(CONFIG_UDMABUF) += udmabuf.o
diff --git a/drivers/dma-buf/dma-fence-chain.c 
b/drivers/dma-buf/dma-fence-chain.c
new file mode 100644
index ..0c5e3c902fa0
--- /dev/null
+++ b/drivers/dma-buf/dma-fence-chain.c
@@ -0,0 +1,241 @@
+/*
+ * fence-chain: chain fences together in a timeline
+ *
+ * Copyright (C) 2018 Advanced Micro Devices, Inc.
+ * Authors:
+ * Christian König 
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include 
+
+static bool dma_fence_chain_enable_signaling(struct dma_fence *fence);
+
+/**
+ * dma_fence_chain_get_prev - use RCU to get a reference to the previous fence
+ * @chain: chain node to get the previous node from
+ *
+ * Use dma_fence_get_rcu_safe to get a reference to the previous fence of the
+ * chain node.
+ */
+static struct dma_fence *dma_fence_chain_get_prev(struct dma_fence_chain 
*chain)
+{
+   struct dma_fence *prev;
+
+   rcu_read_lock();
+   prev = dma_fence_get_rcu_safe(&chain->prev);
+   rcu_read_unlock();
+   return prev;
+}
+
+/**
+ * dma_fence_chain_walk - chain walking function
+ * @fence: current chain node
+ *
+ * Walk the chain to the next node. Returns the next fence or NULL if we are at
+ * the end of the chain. Garbage collects chain nodes which are already
+ * signaled.
+ */
+struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
+{
+   struct dma_fence_chain *chain, *prev_chain;
+   struct dma_fence *prev, *replacement, *tmp;
+
+   chain = to_dma_fence_chain(fence);
+   if (!chain) {
+   dma_fence_put(fence);
+   return NULL;
+   }
+
+   while ((prev = dma_fence_chain_get_prev(chain))) {
+
+   prev_chain = to_dma_fence_chain(prev);
+   if (prev_chain) {
+   if (!dma_fence_is_signaled(prev_chain->fence))
+   break;
+
+   replacement = dma_fence_chain_get_prev(prev_chain);
+   } else {
+   if (!dma_fence_is_signaled(prev))
+   break;
+
+   replacement = NULL;
+   }
+
+   tmp = cmpxchg(&chain->prev, prev, replacement);
+   if (tmp == prev)
+   dma_fence_put(tmp);
+   else
+   dma_fence_put(replacement);
+   dma_fence_put(prev);
+   }
+
+   dma_fence_put(fence);
+   return prev;
+}
+EXPORT_SYMBOL(dma_fence_chain_walk);
+
+/**
+ * dma_fence_chain_find_seqno - find fence chain node by seqno
+ * @pfence: pointer to the chain node where to start
+ * @seqno: the sequence number to search for
+ *
+ * Advance the fence pointer to the chain node which will signal this sequence
+ * number. If no sequence number is provided then this is a no-op.
+ *
+ * Returns EINVAL if the fence is not a chain node or the sequence number has
+ * not yet advanced far enough.
+ */
+int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t seqno)
+{
+   struct dma_fence_chain *chain;
+
+   if (!seqno)
+   return 0;
+
+   chain = to_dma_fence_chain(*pfence);
+   if (!chain || chain->base.seqno < seqno)
+   re

[PATCH] drm/amdgpu: revert "XGMI pstate switch initial support"

2019-03-19 Thread Christian König
This reverts commit c9115f8904eef0f880d3b4f8306f553b1bb1c532.

Adding this to the mapping is complete nonsense and the whole
implementation looks racy. This patch wasn't thoughtfully reviewed
and should be reverted for now.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  3 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 29 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   | 15 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   |  2 --
 6 files changed, 1 insertion(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b5720c1610e1..1db192150532 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -931,9 +931,6 @@ struct amdgpu_device {
int asic_reset_res;
struct work_struct  xgmi_reset_work;
 
-   /* counter of mapped memory through xgmi */
-   atomic_txgmi_map_counter;
-
boolin_baco_reset;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 964a4d3f1f43..206583707124 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2018,9 +2018,6 @@ static void 
amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
r = amdgpu_device_enable_mgpu_fan_boost();
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
-
-   /*set to low pstate by default */
-   amdgpu_xgmi_set_pstate(adev, 0);
 }
 
 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 6f176bbe4cf2..220a6a7b1bc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -54,7 +54,6 @@ struct amdgpu_bo_va_mapping {
uint64_t__subtree_last;
uint64_toffset;
uint64_tflags;
-   boolis_xgmi;
 };
 
 /* User space allocated BO in a VM */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index c5230a9fb7f6..c8f0e4ca05fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -34,7 +34,6 @@
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_gmc.h"
-#include "amdgpu_xgmi.h"
 
 /**
  * DOC: GPUVM
@@ -2022,9 +2021,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct ttm_mem_reg *mem;
struct drm_mm_node *nodes;
struct dma_fence *exclusive, **last_update;
-   struct amdgpu_device *bo_adev = adev;
-   bool is_xgmi = false;
uint64_t flags;
+   struct amdgpu_device *bo_adev = adev;
int r;
 
if (clear || !bo) {
@@ -2046,10 +2044,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
if (bo) {
flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
-   if (adev != bo_adev &&
-   adev->gmc.xgmi.hive_id &&
-   adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id)
-   is_xgmi = true;
} else {
flags = 0x0;
}
@@ -2068,19 +2062,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
}
 
list_for_each_entry(mapping, &bo_va->invalids, list) {
-   if (mapping->is_xgmi != is_xgmi) {
-   if (is_xgmi) {
-   /* Adding an XGMI mapping to the PT */
-   if (atomic_inc_return(&adev->xgmi_map_counter) 
== 1)
-   amdgpu_xgmi_set_pstate(adev, 1);
-   } else {
-   /* Removing an XGMI mapping from the PT */
-   if (atomic_dec_return(&adev->xgmi_map_counter) 
== 0)
-   amdgpu_xgmi_set_pstate(adev, 0);
-   }
-   mapping->is_xgmi = is_xgmi;
-   }
-
r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
   mapping, flags, bo_adev, nodes,
   last_update);
@@ -2298,13 +2279,6 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
mapping->start, mapping->last,
init_pte_value, 0, &f);
-
-   if (mapping->is_xgmi) {
-   /* Re

Re: [PATCH 8/9] drm/syncobj: add timeline signal ioctl for syncobj v3

2019-03-19 Thread Lionel Landwerlin

On 15/03/2019 12:09, Chunming Zhou wrote:

v2: individually allocate chain array, since chain node is free independently.
v3: all existing points must be already signaled before cpu perform signal 
operation,
 so add check condition for that.

Signed-off-by: Chunming Zhou 
---
  drivers/gpu/drm/drm_internal.h |   2 +
  drivers/gpu/drm/drm_ioctl.c|   2 +
  drivers/gpu/drm/drm_syncobj.c  | 103 +
  include/uapi/drm/drm.h |   1 +
  4 files changed, 108 insertions(+)

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index dd11ae5f1eef..d9a483a5fce0 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -190,6 +190,8 @@ int drm_syncobj_reset_ioctl(struct drm_device *dev, void 
*data,
struct drm_file *file_private);
  int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
 struct drm_file *file_private);
+int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_private);
  int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_private);
  
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c

index 92b3b7b2fd81..d337f161909c 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -696,6 +696,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
  DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
  DRM_UNLOCKED|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, 
drm_syncobj_timeline_signal_ioctl,
+ DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
  DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, 
DRM_UNLOCKED),
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 306c7b7e2770..eaeb038f97d7 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -1183,6 +1183,109 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void 
*data,
return ret;
  }
  
+int

+drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_private)
+{
+   struct drm_syncobj_timeline_array *args = data;
+   struct drm_syncobj **syncobjs;
+   struct dma_fence_chain **chains;
+   uint64_t *points;
+   uint32_t i, j, timeline_count = 0;
+   int ret;
+
+   if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+   return -EOPNOTSUPP;
+
+   if (args->pad != 0)
+   return -EINVAL;
+
+   if (args->count_handles == 0)
+   return -EINVAL;
+
+   ret = drm_syncobj_array_find(file_private,
+u64_to_user_ptr(args->handles),
+args->count_handles,
+&syncobjs);
+   if (ret < 0)
+   return ret;
+
+   for (i = 0; i < args->count_handles; i++) {
+   struct dma_fence_chain *chain;
+struct dma_fence *fence;
+
+fence = drm_syncobj_fence_get(syncobjs[i]);
+chain = to_dma_fence_chain(fence);
+if (chain) {
+struct dma_fence *iter;
+
+dma_fence_chain_for_each(iter, fence) {
+if (!iter)
+break;
+   if (!dma_fence_is_signaled(iter)) {
+   dma_fence_put(iter);
+   DRM_ERROR("Client must guarantee all 
existing timeline points signaled before performing host signal operation!");
+   ret = -EPERM;
+   goto out;



Sorry if I'm failing to remember whether we discussed this before.


Signaling a point from the host should be fine even if the previous 
points in the timeline are not signaled.


After all this can happen on the device side as well (out of order 
signaling).



I thought the thing we didn't want is out of order submission.

Just checking the last chain node seqno against the host signal point 
should be enough.



What about simply returning -EPERM, we can warn the application from 
userspace?




+   }
+}
+}
+}
+
+   points = kmalloc_array(args->count_handles, sizeof(*points),
+  GFP_KERNEL);
+   if (!points) {
+   ret = -ENOMEM;
+   goto out;
+   }
+   if (!u64_to_user_ptr(args->points)) {
+   memset(points, 

Re: Potential NULL pointer dereference in radeon_ttm_tt_populate

2019-03-19 Thread Christian König

Hi Shaobo,

that question came up a couple of times now. And the answer is: No, 
there can't be a NULL pointer dereference.


The function radeon_ttm_tt_to_gtt returns NULL only when it is an AGP 
ttm structure, and that case is checked right before the offending code.


Unfortunately I don't see how an automated code checker should ever be 
able to figure that out by itself.


Regards,
Christian.

Am 18.03.19 um 21:58 schrieb Shaobo He:

Hello everyone,

My name is Shaobo He and I am a graduate student at University of 
Utah. I am using a static analysis tool to search for null pointer 
dereferences and came across a potentially invalid memory access in 
the file drivers/gpu/drm/radeon/radeon_ttm.c: in function 
`radeon_ttm_tt_populate`, function `radeon_ttm_tt_to_gtt` can return a 
NULL pointer which is dereferenced by the call to 
`drm_prime_sg_to_page_addr_arrays`.


Please let me know if it makes sense. I am looking forward to your reply.

Best,
Shaobo
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: Input lag bug in programs like Blender

2019-03-19 Thread Michel Dänzer
On 2019-03-18 7:56 p.m., Bill Messenger wrote:
> On my system, programs like Blender have very noticeable input lag that
> makes it hard to use. It happens no matter what Linux distro or
> compositor I try. At first the only fix that seemed to work was
> installing the proprietary amdgpu-pro drivers on Ubuntu. Running "env
> LIBGL_DRI3_DISABLE=1 blender" seemed to help, but it still wasn't
> perfect.
> 
> However, I recently found out how to truly fix it on any distro with
> the open source drivers. If I create a file called .drirc in my home
> folder with the contents:
> 
> 
>   
> 
>   
> 
>   
> 
> 
> then restart my system, it fixes the issue. I'm not sure what this
> does, but it seems like there's some sort of bug here.

Yeah, sounds like blender doesn't work well with sync-to-vblank enabled.
The above disables that, which probably works around a blender issue.


-- 
Earthling Michel Dänzer   |  https://www.amd.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 3/4] drm/amd/display: In VRR mode, do DRM core vblank handling at end of vblank.

2019-03-19 Thread Paul Menzel

Dear Mario,


On 18.03.19 18:19, Mario Kleiner wrote:

In VRR mode, proper vblank/pageflip timestamps can only be computed
after the display scanout position has left front-porch. Therefore
delay calls to drm_crtc_handle_vblank(), and thereby calls to
drm_update_vblank_count() and pageflip event delivery, to after the
end of front-porch when in VRR mode.

We add a new vupdate irq, which triggers at the end of the vupdate
interval, ie. at the end of vblank, and calls the core vblank handler
function. The new irq handler is not executed in standard non-VRR
mode, so vblank handling for fixed refresh rate mode is identical
to the past implementation.

Signed-off-by: Mario Kleiner 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h|   1 +
  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c  | 129 -
  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h  |   9 ++
  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c  |  22 
  .../amd/display/dc/irq/dce110/irq_service_dce110.c |   7 +-
  .../amd/display/dc/irq/dce120/irq_service_dce120.c |   7 +-
  .../amd/display/dc/irq/dce80/irq_service_dce80.c   |   6 +-
  .../amd/display/dc/irq/dcn10/irq_service_dcn10.c   |  40 +--
  8 files changed, 205 insertions(+), 16 deletions(-)


[…]

`scripts/checkpatch.pl` shows two errors on your commit.


+   /* Use VUPDATE interrupt */
+   for (i = VISLANDS30_IV_SRCID_D1_V_UPDATE_INT; i <= 
VISLANDS30_IV_SRCID_D6_V_UPDATE_INT; i+=2) {


ERROR: spaces required around that '+=' (ctx:VxV)
#107: FILE: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c:1490:
+	for (i = VISLANDS30_IV_SRCID_D1_V_UPDATE_INT; i <= 
VISLANDS30_IV_SRCID_D6_V_UPDATE_INT; i+=2) {
 	 
   ^


[…]


  static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable)
  {
enum dc_irq_source irq_source;
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
struct amdgpu_device *adev = crtc->dev->dev_private;
+   struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state);
+   int rc = 0;
+
+   if (enable) {
+   /* vblank irq on -> Only need vupdate irq in vrr mode */
+   if (amdgpu_dm_vrr_active(acrtc_state))
+   rc = dm_set_vupdate_irq(crtc, true);
+   }
+   else {
+   /* vblank irq off -> vupdate irq off */
+   rc = dm_set_vupdate_irq(crtc, false);
+   }


ERROR: else should follow close brace '}'
#198: FILE: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c:3346:
+   }
+   else {

Also:

WARNING: Prefer 'unsigned int' to bare use of 'unsigned'
#258: FILE: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c:679:
+  unsigned crtc_id,

[…]


Kind regards,

Paul
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx