Re: [Intel-gfx] [PATCH] drm/i915: read EU powergating status from command streamer

2018-06-21 Thread Chris Wilson
Quoting Lionel Landwerlin (2018-06-21 14:14:52)
> On 21/06/18 12:54, Chris Wilson wrote:
> > Other than questioning your sanity here at doing this rather than just
> > deleting the debugfs, there's nothing inherently broken. I would do it
> > for gen8 as well, no point leaving the odd one out.
> 
> We have an igt tests checking these values and because you haven't 
> brought up deleting the test before, I assumed we still wanted it fixed.
> But now that you brought it up, I'm equally fine deleting this debugfs 
> code and removing the igt test.

I presume as it exists someone found it useful. If not, wave goodbye :)

Checking over the bug again, it has only been ourselves discussing it,
no one else has noticed the bug so one presumes it is only igt that
cares.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: read EU powergating status from command streamer

2018-06-21 Thread Lionel Landwerlin

On 21/06/18 12:54, Chris Wilson wrote:

Quoting Lionel Landwerlin (2018-06-21 12:27:12)

Powergating of the EU array is configured as part of the context
image. This seems to imply we need a context to have run before we can
read the slice/subslice/EU powergating status.

This change captures the values of the powergating status registers
from the command streamer to ensure a valid we get valid values. This
is currently used only on gen9+ but someone could rightfully argue to
go as far as gen8.

Signed-off-by: Lionel Landwerlin 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103484
---
  drivers/gpu/drm/i915/i915_debugfs.c | 177 
  1 file changed, 155 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index c400f42a54ec..0da8ce8acbcb 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4310,14 +4310,120 @@ static void cherryview_sseu_device_status(struct 
drm_i915_private *dev_priv,
  #undef SS_MAX
  }
  
-static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,

-struct sseu_dev_info *sseu)
+static struct drm_i915_gem_object *
+gen9_read_registers(struct drm_i915_private *i915,
+   u32 *register_offsets,
+   u32 n_registers)

unsigned int n_register (or even unsigned long for total pedantry). I
don't see why you want to specify a bitwidth.


Sure.




+{
+   struct drm_i915_gem_object *bo;
+   struct i915_request *rq;
+   struct i915_vma *vma;
+   u32 *cs, bo_size = PAGE_SIZE * DIV_ROUND_UP(n_registers * 4, PAGE_SIZE);
+   int ret, r;
+
+   ret = i915_mutex_lock_interruptible(>drm);
+   if (ret)
+   return ERR_PTR(ret);
+
+   bo = i915_gem_object_create(i915, bo_size);

create_internal() (just be sure you don't read back uninitialised data)

bo_size = PAGE_ALIGN(sizeof(u32) * n_registers);


Thanks.




+   if (IS_ERR(bo)) {
+   ret = PTR_ERR(bo);
+   goto unlock;
+   }
+
+   ret = i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
+   if (ret)
+   goto put_bo;
+
+
+   vma = i915_vma_instance(bo,
+   >kernel_context->ppgtt->vm,
+   NULL);
+   if (IS_ERR(vma)) {
+   ret = PTR_ERR(vma);
+   goto put_bo;
+   }
+
+   ret = i915_vma_pin(vma, 0, GEN8_LR_CONTEXT_ALIGN, PIN_USER);

CONTEXT_ALIGN?


Dammit... copy-paste...




+   if (ret)
+   goto vma_unpin;
+
+
+   rq = i915_request_alloc(i915->engine[RCS], i915->kernel_context);
+   if (IS_ERR(rq)) {
+   ret = PTR_ERR(rq);
+   goto vma_unpin;
+   }
+
+   cs = intel_ring_begin(rq, n_registers * 4);
+   if (IS_ERR(cs)) {
+   i915_request_add(rq);
+   ret = PTR_ERR(cs);
+   goto vma_unpin;
+   }
+
+   for (r = 0; r < n_registers; r++) {
+   u64 offset = vma->node.start + r * 4;
+
+   *cs++ = MI_STORE_REGISTER_MEM_GEN8;
+   *cs++ = register_offsets[r];
+   *cs++ = lower_32_bits(offset);
+   *cs++ = upper_32_bits(offset);
+   }

I think you should i915_vma_move_to_active() for safety and give it
an active reference.


Indeed.




+   intel_ring_advance(rq, cs);
+
+   i915_request_add(rq);
+
+   mutex_unlock(>drm.struct_mutex);
+
+   i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);

For sanity, check for an error. (Hence why using vma_move_to_active
becomes relevant.)


Thanks!




+   return bo;
+
+vma_unpin:
+   i915_vma_unpin(vma);
+put_bo:
+   i915_gem_object_put(bo);
+unlock:
+   mutex_unlock(>drm.struct_mutex);
+   return bo;

Report the ERR_PTR(ret) (otherwise, a neat use-after-free).


Oops my bad.




+}
+
+
+static int gen10_sseu_device_status(struct drm_i915_private *dev_priv,
+   struct sseu_dev_info *sseu)
  {
  #define SS_MAX 6
 const struct intel_device_info *info = INTEL_INFO(dev_priv);
-   u32 s_reg[SS_MAX], eu_reg[2 * SS_MAX], eu_mask[2];
+   struct drm_i915_gem_object *bo;
+   struct {
+   u32 s_reg[SS_MAX];
+   u32 eu_reg[2 * SS_MAX];
+   } reg_offsets, *reg_data;
+   u32 eu_mask[2];
 int s, ss;
  
+   for (s = 0; s < info->sseu.max_slices; s++) {

+   reg_offsets.s_reg[s] =
+   i915_mmio_reg_offset(GEN10_SLICE_PGCTL_ACK(s));
+   reg_offsets.eu_reg[2 * s] =
+   i915_mmio_reg_offset(GEN10_SS01_EU_PGCTL_ACK(s));
+   reg_offsets.eu_reg[2 * s] =
+   i915_mmio_reg_offset(GEN10_SS23_EU_PGCTL_ACK(s));

I started by wishing you used i915_mmio_t, but I can appreciate the
logic of having offset/data tied together in the same layout.


+   }
+
+ 

Re: [Intel-gfx] [PATCH] drm/i915: read EU powergating status from command streamer

2018-06-21 Thread Chris Wilson
Quoting Lionel Landwerlin (2018-06-21 12:27:12)
> Powergating of the EU array is configured as part of the context
> image. This seems to imply we need a context to have run before we can
> read the slice/subslice/EU powergating status.
> 
> This change captures the values of the powergating status registers
> from the command streamer to ensure a valid we get valid values. This
> is currently used only on gen9+ but someone could rightfully argue to
> go as far as gen8.
> 
> Signed-off-by: Lionel Landwerlin 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103484
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c | 177 
>  1 file changed, 155 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
> b/drivers/gpu/drm/i915/i915_debugfs.c
> index c400f42a54ec..0da8ce8acbcb 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -4310,14 +4310,120 @@ static void cherryview_sseu_device_status(struct 
> drm_i915_private *dev_priv,
>  #undef SS_MAX
>  }
>  
> -static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
> -struct sseu_dev_info *sseu)
> +static struct drm_i915_gem_object *
> +gen9_read_registers(struct drm_i915_private *i915,
> +   u32 *register_offsets,
> +   u32 n_registers)

unsigned int n_register (or even unsigned long for total pedantry). I
don't see why you want to specify a bitwidth.

> +{
> +   struct drm_i915_gem_object *bo;
> +   struct i915_request *rq;
> +   struct i915_vma *vma;
> +   u32 *cs, bo_size = PAGE_SIZE * DIV_ROUND_UP(n_registers * 4, 
> PAGE_SIZE);
> +   int ret, r;
> +
> +   ret = i915_mutex_lock_interruptible(>drm);
> +   if (ret)
> +   return ERR_PTR(ret);
> +
> +   bo = i915_gem_object_create(i915, bo_size);

create_internal() (just be sure you don't read back uninitialised data)

bo_size = PAGE_ALIGN(sizeof(u32) * n_registers);

> +   if (IS_ERR(bo)) {
> +   ret = PTR_ERR(bo);
> +   goto unlock;
> +   }
> +
> +   ret = i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
> +   if (ret)
> +   goto put_bo;
> +
> +
> +   vma = i915_vma_instance(bo,
> +   >kernel_context->ppgtt->vm,
> +   NULL);
> +   if (IS_ERR(vma)) {
> +   ret = PTR_ERR(vma);
> +   goto put_bo;
> +   }
> +
> +   ret = i915_vma_pin(vma, 0, GEN8_LR_CONTEXT_ALIGN, PIN_USER);

CONTEXT_ALIGN?

> +   if (ret)
> +   goto vma_unpin;
> +
> +
> +   rq = i915_request_alloc(i915->engine[RCS], i915->kernel_context);
> +   if (IS_ERR(rq)) {
> +   ret = PTR_ERR(rq);
> +   goto vma_unpin;
> +   }
> +
> +   cs = intel_ring_begin(rq, n_registers * 4);
> +   if (IS_ERR(cs)) {
> +   i915_request_add(rq);
> +   ret = PTR_ERR(cs);
> +   goto vma_unpin;
> +   }
> +
> +   for (r = 0; r < n_registers; r++) {
> +   u64 offset = vma->node.start + r * 4;
> +
> +   *cs++ = MI_STORE_REGISTER_MEM_GEN8;
> +   *cs++ = register_offsets[r];
> +   *cs++ = lower_32_bits(offset);
> +   *cs++ = upper_32_bits(offset);
> +   }

I think you should i915_vma_move_to_active() for safety and give it
an active reference.

> +   intel_ring_advance(rq, cs);
> +
> +   i915_request_add(rq);
> +
> +   mutex_unlock(>drm.struct_mutex);
> +
> +   i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);

For sanity, check for an error. (Hence why using vma_move_to_active
becomes relevant.)

> +   return bo;
> +
> +vma_unpin:
> +   i915_vma_unpin(vma);
> +put_bo:
> +   i915_gem_object_put(bo);
> +unlock:
> +   mutex_unlock(>drm.struct_mutex);
> +   return bo;

Report the ERR_PTR(ret) (otherwise, a neat use-after-free).

> +}
> +
> +
> +static int gen10_sseu_device_status(struct drm_i915_private *dev_priv,
> +   struct sseu_dev_info *sseu)
>  {
>  #define SS_MAX 6
> const struct intel_device_info *info = INTEL_INFO(dev_priv);
> -   u32 s_reg[SS_MAX], eu_reg[2 * SS_MAX], eu_mask[2];
> +   struct drm_i915_gem_object *bo;
> +   struct {
> +   u32 s_reg[SS_MAX];
> +   u32 eu_reg[2 * SS_MAX];
> +   } reg_offsets, *reg_data;
> +   u32 eu_mask[2];
> int s, ss;
>  
> +   for (s = 0; s < info->sseu.max_slices; s++) {
> +   reg_offsets.s_reg[s] =
> +   i915_mmio_reg_offset(GEN10_SLICE_PGCTL_ACK(s));
> +   reg_offsets.eu_reg[2 * s] =
> +   i915_mmio_reg_offset(GEN10_SS01_EU_PGCTL_ACK(s));
> +   reg_offsets.eu_reg[2 * s] =
> +   i915_mmio_reg_offset(GEN10_SS23_EU_PGCTL_ACK(s));

I started by wishing you used i915_mmio_t,