On 03/14/2013 08:52 AM, Mika Kuoppala wrote:
To count context losses, add struct ctx_reset_state for
both i915_hw_context and drm_i915_file_private.
drm_i915_file_private is used when there is no context.

Signed-off-by: Mika Kuoppala <mika.kuopp...@intel.com>
---
  drivers/gpu/drm/i915/i915_dma.c         |    4 +++-
  drivers/gpu/drm/i915/i915_drv.h         |   19 +++++++++++++++++++
  drivers/gpu/drm/i915/i915_gem_context.c |   11 +++++++++++
  3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index e16099b..7902d97 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1792,7 +1792,7 @@ int i915_driver_open(struct drm_device *dev, struct 
drm_file *file)
        struct drm_i915_file_private *file_priv;

        DRM_DEBUG_DRIVER("\n");
-       file_priv = kmalloc(sizeof(*file_priv), GFP_KERNEL);
+       file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
        if (!file_priv)
                return -ENOMEM;

@@ -1801,6 +1801,8 @@ int i915_driver_open(struct drm_device *dev, struct 
drm_file *file)
        spin_lock_init(&file_priv->mm.lock);
        INIT_LIST_HEAD(&file_priv->mm.request_list);

+       i915_gem_context_init_reset_state(dev, &file_priv->reset_state);
+
        idr_init(&file_priv->context_idr);

        return 0;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a54c507..d004548 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -433,6 +433,19 @@ struct i915_hw_ppgtt {
        void (*cleanup)(struct i915_hw_ppgtt *ppgtt);
  };

+struct ctx_reset_state {
+       /* guilty and reset counts when context initialized */
+       unsigned long guilty_cnt;
+       unsigned long reset_cnt;

I think we can afford to spell out "count." The first time I saw cnt, it looked like a dirty word. :)

I think this structure could you some better description of the overall architecture. It's not completely obvious from the individual pieces... and that makes it really hard to evaluate.

reset_cnt is the number of resets since start-up. What is guilty_cnt? What are innocent and guilty (below)?

All of this makes it difficult for me to tell whether or not the logic in patch 16 is correct... and I don't think it is.

+
+       unsigned innocent;
+       unsigned guilty;
+

        /* Time when this context was last blamed for a GPU reset.
         */
+       unsigned long last_guilty_reset;
+
+       /* banned to submit more work */
+       bool banned;
+};

  /* This must match up with the value previously used for execbuf2.rsvd1. */
  #define DEFAULT_CONTEXT_ID 0
@@ -443,6 +456,7 @@ struct i915_hw_context {
        struct drm_i915_file_private *file_priv;
        struct intel_ring_buffer *ring;
        struct drm_i915_gem_object *obj;
+       struct ctx_reset_state reset_state;
  };

  enum no_fbc_reason {
@@ -805,6 +819,7 @@ struct i915_gpu_error {

        unsigned long last_reset;

+       unsigned long guilty_cnt;
        /**
         * State variable and reset counter controlling the reset flow
         *
@@ -1257,6 +1272,8 @@ struct drm_i915_file_private {
                struct list_head request_list;
        } mm;
        struct idr context_idr;
+
+       struct ctx_reset_state reset_state;
  };

  #define INTEL_INFO(dev)       (((struct drm_i915_private *) 
(dev)->dev_private)->info)
@@ -1677,6 +1694,8 @@ struct i915_hw_context * __must_check
  i915_switch_context(struct intel_ring_buffer *ring,
                    struct drm_file *file, int to_id);
  void i915_gem_context_free(struct kref *ctx_ref);
+void i915_gem_context_init_reset_state(struct drm_device *dev,
+                                      struct ctx_reset_state *rs);
  int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
                                  struct drm_file *file);
  int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 8fb4d3c..dbd14b8 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -145,6 +145,15 @@ static void do_destroy(struct i915_hw_context *ctx)
        kfree(ctx);
  }

+void i915_gem_context_init_reset_state(struct drm_device *dev,
+                                      struct ctx_reset_state *rs)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       rs->reset_cnt = atomic_read(&dev_priv->gpu_error.reset_counter);
+       rs->guilty_cnt = dev_priv->gpu_error.guilty_cnt;
+}
+
  static struct i915_hw_context *
  create_hw_context(struct drm_device *dev,
                  struct drm_i915_file_private *file_priv)
@@ -177,6 +186,8 @@ create_hw_context(struct drm_device *dev,

        ctx->file_priv = file_priv;

+       i915_gem_context_init_reset_state(dev, &ctx->reset_state);
+
  again:
        if (idr_pre_get(&file_priv->context_idr, GFP_KERNEL) == 0) {
                ret = -ENOMEM;


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to