[Intel-gfx] [PATCH 01/15] drm/i915: Copy user requested buffers into the error state

2017-03-16 Thread Chris Wilson
Introduce a new execobject.flag (EXEC_OBJECT_CAPTURE) that userspace may
use to indicate that it wants the contents of this buffer preserved in
the error state (/sys/class/drm/cardN/error) following a GPU hang
involving this batch.

Use this at your discretion, the contents of the error state. although
compressed, are allocated with GFP_ATOMIC (i.e. limited) and kept for all
eternity (until the error state is destroyed).

Based on an earlier patch by Ben Widawsky 
Signed-off-by: Chris Wilson 
Cc: Ben Widawsky 
Cc: Matt Turner 
Acked-by: Ben Widawsky 
Reviewed-by: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/i915_drv.c|  1 +
 drivers/gpu/drm/i915/i915_drv.h|  3 +++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 12 +
 drivers/gpu/drm/i915/i915_gem_request.c| 16 
 drivers/gpu/drm/i915/i915_gem_request.h| 11 
 drivers/gpu/drm/i915/i915_gpu_error.c  | 40 +-
 include/uapi/drm/i915_drm.h| 15 ++-
 7 files changed, 96 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 9164167cd147..9d8c8b928aab 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -350,6 +350,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_EXEC_SOFTPIN:
case I915_PARAM_HAS_EXEC_ASYNC:
case I915_PARAM_HAS_EXEC_FENCE:
+   case I915_PARAM_HAS_EXEC_CAPTURE:
/* For the time being all of these are always true;
 * if some supported hardware does not have one of these
 * features this value needs to be provided from
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6e14c7d089b8..3c9551147e28 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1035,6 +1035,9 @@ struct i915_gpu_state {
u32 *pages[0];
} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
 
+   struct drm_i915_error_object **user_bo;
+   long user_bo_count;
+
struct drm_i915_error_object *wa_ctx;
 
struct drm_i915_error_request {
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index dd7181ed5eca..cc6082a80d2d 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1112,6 +1112,18 @@ i915_gem_execbuffer_move_to_gpu(struct 
drm_i915_gem_request *req,
list_for_each_entry(vma, vmas, exec_list) {
struct drm_i915_gem_object *obj = vma->obj;
 
+   if (vma->exec_entry->flags & EXEC_OBJECT_CAPTURE) {
+   struct i915_gem_capture_list *capture;
+
+   capture = kmalloc(sizeof(*capture), GFP_KERNEL);
+   if (unlikely(!capture))
+   return -ENOMEM;
+
+   capture->next = req->capture_list;
+   capture->vma = vma;
+   req->capture_list = capture;
+   }
+
if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
continue;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 1e1d9f2072cd..73e34cdc67c4 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -270,6 +270,19 @@ void i915_gem_retire_noop(struct i915_gem_active *active,
/* Space left intentionally blank */
 }
 
+static void request_free_capture_list(struct drm_i915_gem_request *request)
+{
+   struct i915_gem_capture_list *capture;
+
+   capture = request->capture_list;
+   while (capture) {
+   struct i915_gem_capture_list *next = capture->next;
+
+   kfree(capture);
+   capture = next;
+   }
+}
+
 static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 {
struct intel_engine_cs *engine = request->engine;
@@ -304,6 +317,8 @@ static void i915_gem_request_retire(struct 
drm_i915_gem_request *request)
}
unreserve_seqno(request->engine);
 
+   request_free_capture_list(request);
+
/* Walk through the active list, calling retire on each. This allows
 * objects to track their GPU activity and mark themselves as idle
 * when their *last* active request is completed (updating state
@@ -602,6 +617,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
req->global_seqno = 0;
req->file_priv = NULL;
req->batch = NULL;
+   req->capture_list = NULL;
 
/*
 * Reserve space in the ring buffer for all the commands required to
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h 
b/drivers/gpu/drm/i915/i915_gem_request.h
index 0cef887b0de4..4eb642960393 100644
--- a/drivers/gpu/drm

Re: [Intel-gfx] [PATCH 01/15] drm/i915: Copy user requested buffers into the error state

2017-02-28 Thread Joonas Lahtinen
On to, 2017-02-23 at 16:18 +, Chris Wilson wrote:
> Introduce a new execobject.flag (EXEC_OBJECT_CAPTURE) that userspace may
> use to indicate that it wants the contents of this buffer preserved in
> the error state (/sys/class/drm/cardN/error) following a GPU hang
> involving this batch.
> 
> Use this at your discretion, the contents of the error state. although
> compressed, are allocated with GFP_ATOMIC (i.e. limited) and kept for all
> eternity (until the error state is destroyed).
> 
> Based on an earlier patch by Ben Widawsky 
> Signed-off-by: Chris Wilson 
> Cc: Ben Widawsky 
> Cc: Matt Turner 

Reviewed-by: Joonas Lahtinen 

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 01/15] drm/i915: Copy user requested buffers into the error state

2017-02-27 Thread Ben Widawsky

On 17-02-23 16:18:16, Chris Wilson wrote:

Introduce a new execobject.flag (EXEC_OBJECT_CAPTURE) that userspace may
use to indicate that it wants the contents of this buffer preserved in
the error state (/sys/class/drm/cardN/error) following a GPU hang
involving this batch.

Use this at your discretion, the contents of the error state. although
compressed, are allocated with GFP_ATOMIC (i.e. limited) and kept for all
eternity (until the error state is destroyed).

Based on an earlier patch by Ben Widawsky 
Signed-off-by: Chris Wilson 
Cc: Ben Widawsky 
Cc: Matt Turner 


Haven't tested it or used it, but I wanted it.
Acked-by: Ben Widawsky 


---
drivers/gpu/drm/i915/i915_drv.c|  1 +
drivers/gpu/drm/i915/i915_drv.h|  3 +++
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 12 +
drivers/gpu/drm/i915/i915_gem_request.c| 16 
drivers/gpu/drm/i915/i915_gem_request.h| 11 
drivers/gpu/drm/i915/i915_gpu_error.c  | 40 +-
include/uapi/drm/i915_drm.h| 15 ++-
7 files changed, 96 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 409fc32ce2bd..842c62b96a83 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -353,6 +353,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_EXEC_ASYNC:
case I915_PARAM_HAS_EXEC_FENCE:
case I915_PARAM_HAS_EXEC_FENCE_DMABUF:
+   case I915_PARAM_HAS_EXEC_CAPTURE:
/* For the time being all of these are always true;
 * if some supported hardware does not have one of these
 * features this value needs to be provided from
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 440a4725b87f..2cc0253d6ef7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1018,6 +1018,9 @@ struct i915_gpu_state {
u32 *pages[0];
} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;

+   struct drm_i915_error_object **user_bo;
+   long user_bo_count;
+
struct drm_i915_error_object *wa_ctx;

struct drm_i915_error_request {
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3f2796131410..e8ffe0c9a20e 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1113,6 +1113,18 @@ i915_gem_execbuffer_move_to_gpu(struct 
drm_i915_gem_request *req,
list_for_each_entry(vma, vmas, exec_list) {
struct drm_i915_gem_object *obj = vma->obj;

+   if (vma->exec_entry->flags & EXEC_OBJECT_CAPTURE) {
+   struct i915_gem_capture_list *capture;
+
+   capture = kmalloc(sizeof(*capture), GFP_KERNEL);
+   if (unlikely(!capture))
+   return -ENOMEM;
+
+   capture->next = req->capture_list;
+   capture->vma = vma;
+   req->capture_list = capture;
+   }
+
if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
continue;

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index ad9d4ce07fb6..3a159cac2172 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -286,6 +286,19 @@ void i915_gem_retire_noop(struct i915_gem_active *active,
/* Space left intentionally blank */
}

+static void request_free_capture_list(struct drm_i915_gem_request *request)
+{
+   struct i915_gem_capture_list *capture;
+
+   capture = request->capture_list;
+   while (capture) {
+   struct i915_gem_capture_list *next = capture->next;
+
+   kfree(capture);
+   capture = next;
+   }
+}
+
static void i915_gem_request_retire(struct drm_i915_gem_request *request)
{
struct intel_engine_cs *engine = request->engine;
@@ -320,6 +333,8 @@ static void i915_gem_request_retire(struct 
drm_i915_gem_request *request)
}
unreserve_seqno(request->engine);

+   request_free_capture_list(request);
+
/* Walk through the active list, calling retire on each. This allows
 * objects to track their GPU activity and mark themselves as idle
 * when their *last* active request is completed (updating state
@@ -615,6 +630,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
req->global_seqno = 0;
req->file_priv = NULL;
req->batch = NULL;
+   req->capture_list = NULL;

/*
 * Reserve space in the ring buffer for all the commands required to
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h 
b/drivers/gpu/drm/i915/i915_gem_request.h
index 0ef

[Intel-gfx] [PATCH 01/15] drm/i915: Copy user requested buffers into the error state

2017-02-23 Thread Chris Wilson
Introduce a new execobject.flag (EXEC_OBJECT_CAPTURE) that userspace may
use to indicate that it wants the contents of this buffer preserved in
the error state (/sys/class/drm/cardN/error) following a GPU hang
involving this batch.

Use this at your discretion, the contents of the error state. although
compressed, are allocated with GFP_ATOMIC (i.e. limited) and kept for all
eternity (until the error state is destroyed).

Based on an earlier patch by Ben Widawsky 
Signed-off-by: Chris Wilson 
Cc: Ben Widawsky 
Cc: Matt Turner 
---
 drivers/gpu/drm/i915/i915_drv.c|  1 +
 drivers/gpu/drm/i915/i915_drv.h|  3 +++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 12 +
 drivers/gpu/drm/i915/i915_gem_request.c| 16 
 drivers/gpu/drm/i915/i915_gem_request.h| 11 
 drivers/gpu/drm/i915/i915_gpu_error.c  | 40 +-
 include/uapi/drm/i915_drm.h| 15 ++-
 7 files changed, 96 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 409fc32ce2bd..842c62b96a83 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -353,6 +353,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_EXEC_ASYNC:
case I915_PARAM_HAS_EXEC_FENCE:
case I915_PARAM_HAS_EXEC_FENCE_DMABUF:
+   case I915_PARAM_HAS_EXEC_CAPTURE:
/* For the time being all of these are always true;
 * if some supported hardware does not have one of these
 * features this value needs to be provided from
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 440a4725b87f..2cc0253d6ef7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1018,6 +1018,9 @@ struct i915_gpu_state {
u32 *pages[0];
} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
 
+   struct drm_i915_error_object **user_bo;
+   long user_bo_count;
+
struct drm_i915_error_object *wa_ctx;
 
struct drm_i915_error_request {
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3f2796131410..e8ffe0c9a20e 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1113,6 +1113,18 @@ i915_gem_execbuffer_move_to_gpu(struct 
drm_i915_gem_request *req,
list_for_each_entry(vma, vmas, exec_list) {
struct drm_i915_gem_object *obj = vma->obj;
 
+   if (vma->exec_entry->flags & EXEC_OBJECT_CAPTURE) {
+   struct i915_gem_capture_list *capture;
+
+   capture = kmalloc(sizeof(*capture), GFP_KERNEL);
+   if (unlikely(!capture))
+   return -ENOMEM;
+
+   capture->next = req->capture_list;
+   capture->vma = vma;
+   req->capture_list = capture;
+   }
+
if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
continue;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index ad9d4ce07fb6..3a159cac2172 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -286,6 +286,19 @@ void i915_gem_retire_noop(struct i915_gem_active *active,
/* Space left intentionally blank */
 }
 
+static void request_free_capture_list(struct drm_i915_gem_request *request)
+{
+   struct i915_gem_capture_list *capture;
+
+   capture = request->capture_list;
+   while (capture) {
+   struct i915_gem_capture_list *next = capture->next;
+
+   kfree(capture);
+   capture = next;
+   }
+}
+
 static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 {
struct intel_engine_cs *engine = request->engine;
@@ -320,6 +333,8 @@ static void i915_gem_request_retire(struct 
drm_i915_gem_request *request)
}
unreserve_seqno(request->engine);
 
+   request_free_capture_list(request);
+
/* Walk through the active list, calling retire on each. This allows
 * objects to track their GPU activity and mark themselves as idle
 * when their *last* active request is completed (updating state
@@ -615,6 +630,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
req->global_seqno = 0;
req->file_priv = NULL;
req->batch = NULL;
+   req->capture_list = NULL;
 
/*
 * Reserve space in the ring buffer for all the commands required to
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h 
b/drivers/gpu/drm/i915/i915_gem_request.h
index 0efee879df23..cc24a6c72748 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i9