Re: [Intel-gfx] [PATCH] drm/i915: release mutex in i915_gem_init()'s error path

2014-03-21 Thread Ben Widawsky
On Tue, Feb 04, 2014 at 12:11:03PM +0100, Daniel Vetter wrote:
> On Fri, Jan 31, 2014 at 05:14:02PM +0200, Mika Kuoppala wrote:
> > Found with smatch.
> > 
> > Signed-off-by: Mika Kuoppala 
> 
> Both smatch patches merged to dinq, thanks.
> -Daniel

CC stable?


-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Don't save/restore RS when not used

2014-03-20 Thread Ben Widawsky
Cc: Kenneth Graunke 
Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_gem_context.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 185c926..ae4597c 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -588,6 +588,7 @@ mi_set_context(struct intel_ring_buffer *ring,
   struct i915_hw_context *new_context,
   u32 hw_flags)
 {
+   u32 flags = hw_flags | MI_MM_SPACE_GTT;
int ret;
 
/* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB
@@ -601,6 +602,10 @@ mi_set_context(struct intel_ring_buffer *ring,
return ret;
}
 
+   /* These flags are for resource streamer on HSW+ */
+   if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8)
+   flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN);
+
ret = intel_ring_begin(ring, 6);
if (ret)
return ret;
@@ -613,11 +618,8 @@ mi_set_context(struct intel_ring_buffer *ring,
 
intel_ring_emit(ring, MI_NOOP);
intel_ring_emit(ring, MI_SET_CONTEXT);
-   intel_ring_emit(ring, i915_gem_obj_ggtt_offset(new_context->obj) |
-   MI_MM_SPACE_GTT |
-   MI_SAVE_EXT_STATE_EN |
-   MI_RESTORE_EXT_STATE_EN |
-   hw_flags);
+   intel_ring_emit(ring,
+   i915_gem_obj_ggtt_offset(new_context->obj) | flags);
/*
 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
 * WaMiSetContext_Hang:snb,ivb,vlv
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/2] drm/i915: Print how many objects are shared in per-process statsg

2014-03-19 Thread Ben Widawsky
On Wed, Mar 19, 2014 at 01:45:46PM +, Chris Wilson wrote:
> Signed-off-by: Chris Wilson 

Any clue how you intend to use this for a commit message (I'm actually
curious)? Also, the subject is wrong, you're counting size, not
quantity. Anyhoo, looks correct.

Reviewed-by: Ben Widawsky 

> ---
>  drivers/gpu/drm/i915/i915_debugfs.c | 10 --
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
> b/drivers/gpu/drm/i915/i915_debugfs.c
> index 4e1787ee8f37..9cc1c9360238 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -301,7 +301,9 @@ static int i915_gem_stolen_list_info(struct seq_file *m, 
> void *data)
>  struct file_stats {
>   struct drm_i915_file_private *file_priv;
>   int count;
> - size_t total, global, active, inactive, unbound;
> + size_t total, unbound;
> + size_t global, shared;
> + size_t active, inactive;
>  };
>  
>  static int per_file_stats(int id, void *ptr, void *data)
> @@ -313,6 +315,9 @@ static int per_file_stats(int id, void *ptr, void *data)
>   stats->count++;
>   stats->total += obj->base.size;
>  
> + if (obj->base.name || obj->base.dma_buf)
> + stats->shared += obj->base.size;
> +
>   if (USES_FULL_PPGTT(obj->base.dev)) {
>   list_for_each_entry(vma, &obj->vma_list, vma_link) {
>   struct i915_hw_ppgtt *ppgtt;
> @@ -450,13 +455,14 @@ static int i915_gem_object_info(struct seq_file *m, 
> void* data)
>*/
>   rcu_read_lock();
>   task = pid_task(file->pid, PIDTYPE_PID);
> - seq_printf(m, "%s: %u objects, %zu bytes (%zu active, %zu 
> inactive, %zu global, %zu unbound)\n",
> + seq_printf(m, "%s: %u objects, %zu bytes (%zu active, %zu 
> inactive, %zu global, %zu shared, %zu unbound)\n",
>  task ? task->comm : "",
>  stats.count,
>  stats.total,
>  stats.active,
>  stats.inactive,
>  stats.global,
> +stats.shared,
>  stats.unbound);
>       rcu_read_unlock();
>   }
> -- 
> 1.9.1
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] prime_mmap: Add new test for calling mmap() on dma-buf fds

2014-03-19 Thread Ben Widawsky
*/
> + handle = gem_create(fd, BO_SIZE);
> + fill_bo(handle, BO_SIZE);
> + gem_close(fd, handle);
> + prime_handle_to_fd_no_assert(handle, &dma_buf_fd);
> + igt_assert(dma_buf_fd == -1 && errno == ENOENT);
> + errno = 0;
> +
> + /* close fd before mapping */
> + handle = gem_create(fd, BO_SIZE);
> + fill_bo(handle, BO_SIZE);
> + dma_buf_fd = prime_handle_to_fd(fd, handle);
> + igt_assert(errno == 0);
> + close(dma_buf_fd);
> + ptr = mmap(NULL, BO_SIZE, PROT_READ, MAP_SHARED, dma_buf_fd, 0);
> + igt_assert(ptr == MAP_FAILED && errno == EBADF);
> + errno = 0;
> + gem_close(fd, handle);
> +
> + /* Map too big */
> + handle = gem_create(fd, BO_SIZE);
> + fill_bo(handle, BO_SIZE);
> + dma_buf_fd = prime_handle_to_fd(fd, handle);
> + igt_assert(errno == 0);
> + ptr = mmap(NULL, BO_SIZE * 2, PROT_READ, MAP_SHARED, dma_buf_fd, 0);
> + igt_assert(ptr == MAP_FAILED && errno == EINVAL);
> + errno = 0;
> + close(dma_buf_fd);
> + gem_close(fd, handle);
> +}
> +
> +static void
> +test_aperture_limit(void)
> +{
> + int dma_buf_fd1, dma_buf_fd2;
> + char *ptr1, *ptr2;
> + uint32_t handle1, handle2;
> + /* Two buffers the sum of which > mappable aperture */
> + uint64_t size1 = (gem_mappable_aperture_size() * 7) / 8;
> + uint64_t size2 = (gem_mappable_aperture_size() * 3) / 8;
> +
> + handle1 = gem_create(fd, size1);
> + fill_bo(handle1, BO_SIZE);
> +
> + dma_buf_fd1 = prime_handle_to_fd(fd, handle1);
> + igt_assert(errno == 0);
> + ptr1 = mmap(NULL, size1, PROT_READ, MAP_SHARED, dma_buf_fd1, 0);
> + igt_assert(ptr1 != MAP_FAILED);
> + igt_assert(pattern_check(ptr1, BO_SIZE) == 0);
> +
> + handle2 = gem_create(fd, size1);
> + fill_bo(handle2, BO_SIZE);
> + dma_buf_fd2 = prime_handle_to_fd(fd, handle2);
> + igt_assert(errno == 0);
> + ptr2 = mmap(NULL, size2, PROT_READ, MAP_SHARED, dma_buf_fd2, 0);
> + igt_assert(ptr2 != MAP_FAILED);
> + igt_assert(pattern_check(ptr2, BO_SIZE) == 0);
> +
> + igt_assert(memcmp(ptr1, ptr2, BO_SIZE) == 0);
> +
> + munmap(ptr1, size1);
> + munmap(ptr2, size2);
> + close(dma_buf_fd1);
> + close(dma_buf_fd2);
> + gem_close(fd, handle1);
> + gem_close(fd, handle2);
> +}
> +
> +static int
> +check_for_dma_buf_mmap(void)
> +{
> + int dma_buf_fd;
> + char *ptr;
> + uint32_t handle;
> + int ret = 1;
> +
> + handle = gem_create(fd, BO_SIZE);
> + dma_buf_fd = prime_handle_to_fd(fd, handle);
> + ptr = mmap(NULL, BO_SIZE, PROT_READ, MAP_SHARED, dma_buf_fd, 0);
> + if (ptr != MAP_FAILED)
> + ret = 0;
> + munmap(ptr, BO_SIZE);
> + gem_close(fd, handle);
> + close (dma_buf_fd);
> + return ret;
> +}
> +
> +igt_main
> +{
> + struct {
> + const char *name;
> + void (*fn)(void);
> + } tests[] = {
> + { "test_correct", test_correct },
> + { "test_map_unmap", test_map_unmap },
> + { "test_reprime", test_reprime },
> + { "test_forked", test_forked },
> + { "test_refcounting", test_refcounting },
> + { "test_dup", test_dup },
> + { "test_errors", test_errors },
> + { "test_aperture_limit", test_aperture_limit },
> + };
> + int i;
> +
> + igt_fixture {
> + fd = drm_open_any();
> + errno = 0;
> + }
> +
> + igt_skip_on((check_for_dma_buf_mmap() != 0));
> +
> + for (i = 0; i < ARRAY_SIZE(tests); i++) {
> + igt_subtest(tests[i].name)
> + tests[i].fn();
> + }
> +
> + igt_fixture
> + close(fd);
> +}
> -- 

lgtm. Put the commit message in the test description and call it
Reviewed-by: Ben Widawsky 

I'm pretty impressed you used all the igt_*_foo.

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 07/12] drm/i915/bdw: Set initial rps freq to RP0

2014-03-19 Thread Ben Widawsky
Programming it outside of the rp0-rp1 range is considered a programming
error. Since we do not know that the previous value would actually be in
the range, program something we've read from the hardware, and therefore
know will work.

This is potentially an issue for platforms whose ranges are outside the
norms given in the programming guide (ie. early silicon)

v2: Use RP1 instead of RPn

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/intel_pm.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index fd68f93..8a64ecc 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3285,8 +3285,10 @@ static void gen8_enable_rps(struct drm_device *dev)
rc6_mask);
 
/* 4 Program defaults and thresholds for RPS*/
-   I915_WRITE(GEN6_RPNSWREQ, HSW_FREQUENCY(10)); /* Request 500 MHz */
-   I915_WRITE(GEN6_RC_VIDEO_FREQ, HSW_FREQUENCY(12)); /* Request 600 MHz */
+   I915_WRITE(GEN6_RPNSWREQ,
+  HSW_FREQUENCY(dev_priv->rps.rp1_freq));
+   I915_WRITE(GEN6_RC_VIDEO_FREQ,
+  HSW_FREQUENCY(dev_priv->rps.rp1_freq));
/* NB: Docs say 1s, and 100 - which aren't equivalent */
I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1 / 128); /* 1 second timeout 
*/
 
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 06/12] drm/i915: remove rps local variables

2014-03-19 Thread Ben Widawsky
With the renamed RPS struct members, it's easier to skip the local
variables which no longer clarify anything, and if anything just make
the code harder to read.

The real motivation for this patch is actually the next patch, which
attempts to consolidate some of the functionality.

Cc: Jeff McGee 
Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_sysfs.c | 36 ---
 drivers/gpu/drm/i915/intel_pm.c   | 40 ---
 2 files changed, 33 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_sysfs.c 
b/drivers/gpu/drm/i915/i915_sysfs.c
index 49554d9..9c57029 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -313,7 +313,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
struct drm_minor *minor = dev_to_drm_minor(kdev);
struct drm_device *dev = minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
-   u32 val, hw_max, hw_min, non_oc_max;
+   u32 val;
ssize_t ret;
 
ret = kstrtou32(buf, 0, &val);
@@ -324,26 +324,19 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 
mutex_lock(&dev_priv->rps.hw_lock);
 
-   if (IS_VALLEYVIEW(dev_priv->dev)) {
+   if (IS_VALLEYVIEW(dev_priv->dev))
val = vlv_freq_opcode(dev_priv, val);
-
-   non_oc_max = hw_max = dev_priv->rps.max_freq;
-   hw_min = dev_priv->rps.min_freq;
-   } else {
+   else
val /= GT_FREQUENCY_MULTIPLIER;
 
-   hw_max = dev_priv->rps.max_freq;
-   non_oc_max = dev_priv->rps.rp0_freq;
-   hw_min = dev_priv->rps.min_freq;
-   }
-
-   if (val < hw_min || val > hw_max ||
+   if (val < dev_priv->rps.min_freq ||
+   val > dev_priv->rps.max_freq ||
val < dev_priv->rps.min_freq_softlimit) {
mutex_unlock(&dev_priv->rps.hw_lock);
return -EINVAL;
}
 
-   if (val > non_oc_max)
+   if (val > dev_priv->rps.rp0_freq)
DRM_DEBUG("User requested overclocking to %d\n",
  val * GT_FREQUENCY_MULTIPLIER);
 
@@ -392,7 +385,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
struct drm_minor *minor = dev_to_drm_minor(kdev);
struct drm_device *dev = minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
-   u32 val, hw_max, hw_min;
+   u32 val;
ssize_t ret;
 
ret = kstrtou32(buf, 0, &val);
@@ -403,19 +396,14 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 
mutex_lock(&dev_priv->rps.hw_lock);
 
-   if (IS_VALLEYVIEW(dev)) {
+   if (IS_VALLEYVIEW(dev))
val = vlv_freq_opcode(dev_priv, val);
-
-   hw_max = dev_priv->rps.max_freq;
-   hw_min = dev_priv->rps.min_freq;
-   } else {
+   else
val /= GT_FREQUENCY_MULTIPLIER;
 
-   hw_max = dev_priv->rps.max_freq;
-   hw_min = dev_priv->rps.min_freq;
-   }
-
-   if (val < hw_min || val > hw_max || val > 
dev_priv->rps.max_freq_softlimit) {
+   if (val < dev_priv->rps.min_freq ||
+   val > dev_priv->rps.max_freq ||
+   val > dev_priv->rps.max_freq_softlimit) {
mutex_unlock(&dev_priv->rps.hw_lock);
return -EINVAL;
}
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 3db7c40..fd68f93 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3324,7 +3324,7 @@ static void gen6_enable_rps(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_ring_buffer *ring;
-   u32 rp_state_cap, hw_max, hw_min;
+   u32 rp_state_cap;
u32 gt_perf_status;
u32 rc6vids, pcu_mbox = 0, rc6_mask = 0;
u32 gtfifodbg;
@@ -3353,21 +3353,22 @@ static void gen6_enable_rps(struct drm_device *dev)
gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
 
/* All of these values are in units of 50MHz */
-   dev_priv->rps.cur_freq = 0;
-   /* hw_max = RP0 until we check for overclocking */
-   dev_priv->rps.max_freq = hw_max = rp_state_cap & 0xff;
+   dev_priv->rps.cur_freq  = 0;
/* static values from HW: RP0 < RPe < RP1 < RPn (min_freq) */
-   dev_priv->rps.rp1_freq = (rp_state_cap >>  8) & 0xff;
-   dev_priv->rps.rp0_freq = (rp_state_cap >>  0) & 0xff;
-   dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq;
-   dev_priv->rps.min_freq = hw_min = (rp_state_cap >> 16) & 0xff;
+   dev_priv->rps.rp1_freq  = (rp_state_cap >>  8) &

[Intel-gfx] [PATCH 04/12] drm/i915: Rename and comment all the RPS *stuff*

2014-03-19 Thread Ben Widawsky
The names of the struct members for RPS are stupid. Every time I need to
do anything in this code I have to spend a significant amount of time to
remember what it all means. By renaming the variables (and adding the
comments) I hope to clear up the situation. Indeed doing this make some
upcoming patches more readable.

I've avoided ILK because it's possible that the naming used for Ironlake
matches what is in the docs. I believe the ILK power docs were never
published, and I am too lazy to dig them up.

v2: leave rp0, and rp1 in the names. It is useful to have these limits
available at times. min_freq and max_freq (which may be equal to rp0, or
rp1 depending on the platform) represent the actual HW min and max.

Cc: Chris Wilson 
Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_debugfs.c |  26 
 drivers/gpu/drm/i915/i915_drv.h |  26 +---
 drivers/gpu/drm/i915/i915_irq.c |  25 
 drivers/gpu/drm/i915/i915_sysfs.c   |  32 +-
 drivers/gpu/drm/i915/intel_pm.c | 118 ++--
 5 files changed, 120 insertions(+), 107 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 6037913..d1e0a36 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1026,7 +1026,7 @@ static int i915_cur_delayinfo(struct seq_file *m, void 
*unused)
   max_freq * GT_FREQUENCY_MULTIPLIER);
 
seq_printf(m, "Max overclocked frequency: %dMHz\n",
-  dev_priv->rps.hw_max * GT_FREQUENCY_MULTIPLIER);
+  dev_priv->rps.max_freq * GT_FREQUENCY_MULTIPLIER);
} else if (IS_VALLEYVIEW(dev)) {
u32 freq_sts, val;
 
@@ -1498,8 +1498,8 @@ static int i915_ring_freq_table(struct seq_file *m, void 
*unused)
 
seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring 
freq (MHz)\n");
 
-   for (gpu_freq = dev_priv->rps.min_delay;
-gpu_freq <= dev_priv->rps.max_delay;
+   for (gpu_freq = dev_priv->rps.min_freq_softlimit;
+gpu_freq <= dev_priv->rps.max_freq_softlimit;
 gpu_freq++) {
ia_freq = gpu_freq;
sandybridge_pcode_read(dev_priv,
@@ -3449,9 +3449,9 @@ i915_max_freq_get(void *data, u64 *val)
return ret;
 
if (IS_VALLEYVIEW(dev))
-   *val = vlv_gpu_freq(dev_priv, dev_priv->rps.max_delay);
+   *val = vlv_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit);
else
-   *val = dev_priv->rps.max_delay * GT_FREQUENCY_MULTIPLIER;
+   *val = dev_priv->rps.max_freq_softlimit * 
GT_FREQUENCY_MULTIPLIER;
mutex_unlock(&dev_priv->rps.hw_lock);
 
return 0;
@@ -3488,16 +3488,16 @@ i915_max_freq_set(void *data, u64 val)
do_div(val, GT_FREQUENCY_MULTIPLIER);
 
rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
-   hw_max = dev_priv->rps.hw_max;
+   hw_max = dev_priv->rps.max_freq;
hw_min = (rp_state_cap >> 16) & 0xff;
}
 
-   if (val < hw_min || val > hw_max || val < dev_priv->rps.min_delay) {
+   if (val < hw_min || val > hw_max || val < 
dev_priv->rps.min_freq_softlimit) {
mutex_unlock(&dev_priv->rps.hw_lock);
return -EINVAL;
}
 
-   dev_priv->rps.max_delay = val;
+   dev_priv->rps.max_freq_softlimit = val;
 
if (IS_VALLEYVIEW(dev))
valleyview_set_rps(dev, val);
@@ -3530,9 +3530,9 @@ i915_min_freq_get(void *data, u64 *val)
return ret;
 
if (IS_VALLEYVIEW(dev))
-   *val = vlv_gpu_freq(dev_priv, dev_priv->rps.min_delay);
+   *val = vlv_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit);
else
-   *val = dev_priv->rps.min_delay * GT_FREQUENCY_MULTIPLIER;
+   *val = dev_priv->rps.min_freq_softlimit * 
GT_FREQUENCY_MULTIPLIER;
mutex_unlock(&dev_priv->rps.hw_lock);
 
return 0;
@@ -3569,16 +3569,16 @@ i915_min_freq_set(void *data, u64 val)
do_div(val, GT_FREQUENCY_MULTIPLIER);
 
rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
-   hw_max = dev_priv->rps.hw_max;
+   hw_max = dev_priv->rps.max_freq;
hw_min = (rp_state_cap >> 16) & 0xff;
}
 
-   if (val < hw_min || val > hw_max || val > dev_priv->rps.max_delay) {
+   if (val < hw_min || val > hw_max || val > 
dev_priv->rps.max_freq_softlimit) {
mutex_unlock(&dev_priv->rps.hw_lock);
return -EINVAL;
}
 
-   dev_priv->rps.min_delay = val;
+   dev_priv->rps.min_freq_softlimit = val;
 
if (IS_

[Intel-gfx] [PATCH 03/12] drm/i915: Store the HW min frequency as min_freq

2014-03-19 Thread Ben Widawsky
this leaves a temporarily awkward min_delay (the soft limit) with the
new min_freq (the hardware limit). It's fixed in the next patch.

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_drv.h | 1 +
 drivers/gpu/drm/i915/intel_pm.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9cd870f..241f5e1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -987,6 +987,7 @@ struct intel_gen6_power_mgmt {
u8 rp1_delay;
u8 rp0_delay;
u8 hw_max;
+   u8 min_freq;
 
bool rp_up_masked;
bool rp_down_masked;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index dd3a121..dd631d1 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3354,7 +3354,7 @@ static void gen6_enable_rps(struct drm_device *dev)
 
/* In units of 50MHz */
dev_priv->rps.hw_max = hw_max = rp_state_cap & 0xff;
-   hw_min = (rp_state_cap >> 16) & 0xff;
+   dev_priv->rps.min_freq = hw_min = (rp_state_cap >> 16) & 0xff;
dev_priv->rps.rp1_delay = (rp_state_cap >>  8) & 0xff;
dev_priv->rps.rp0_delay = (rp_state_cap >>  0) & 0xff;
dev_priv->rps.rpe_delay = dev_priv->rps.rp1_delay;
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 09/12] drm/i915/bdw: RPS frequency bits are the same as HSW

2014-03-19 Thread Ben Widawsky
Reviewed-by: Rodrigo Vivi 
Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/intel_pm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index ab9e992..9486396 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3028,7 +3028,7 @@ void gen6_set_rps(struct drm_device *dev, u8 val)
 
gen6_set_rps_thresholds(dev_priv, val);
 
-   if (IS_HASWELL(dev))
+   if (IS_HASWELL(dev) || IS_BROADWELL(dev))
I915_WRITE(GEN6_RPNSWREQ,
   HSW_FREQUENCY(val));
else
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 01/12] drm/i915: Reorganize the overclock code

2014-03-19 Thread Ben Widawsky
The existing code (which I changed last) was very convoluted. I believe
it was attempting to skip the overclock portion if the previous pcode
write failed. When I last touched the code, I was preserving this
behavior. There is some benefit to doing it that way in that if the
first pcode access fails, the later is likely invalid.

Having a bit more confidence in my understanding of how things work, I
now feel it's better to have clear, readable, code than to try to skip
over this one operation in an unusual case.

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/intel_pm.c | 20 +---
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 39f3238..dd3a121 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3326,7 +3326,7 @@ static void gen6_enable_rps(struct drm_device *dev)
struct intel_ring_buffer *ring;
u32 rp_state_cap, hw_max, hw_min;
u32 gt_perf_status;
-   u32 rc6vids, pcu_mbox, rc6_mask = 0;
+   u32 rc6vids, pcu_mbox = 0, rc6_mask = 0;
u32 gtfifodbg;
int rc6_mode;
int i, ret;
@@ -3414,17 +3414,15 @@ static void gen6_enable_rps(struct drm_device *dev)
I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
 
ret = sandybridge_pcode_write(dev_priv, 
GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0);
-   if (!ret) {
-   pcu_mbox = 0;
-   ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, 
&pcu_mbox);
-   if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */
-   DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, 
Overclock max: %dMHz\n",
-(dev_priv->rps.max_delay & 0xff) * 50,
-(pcu_mbox & 0xff) * 50);
-   dev_priv->rps.hw_max = pcu_mbox & 0xff;
-   }
-   } else {
+   if (ret)
DRM_DEBUG_DRIVER("Failed to set the min frequency\n");
+
+   ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox);
+   if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */
+   DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock 
max: %dMHz\n",
+(dev_priv->rps.max_delay & 0xff) * 50,
+(pcu_mbox & 0xff) * 50);
+   dev_priv->rps.hw_max = pcu_mbox & 0xff;
}
 
dev_priv->rps.power = HIGH_POWER; /* force a reset */
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 12/12] drm/i915/bdw: Enable RC6

2014-03-19 Thread Ben Widawsky
It is tested and looking fairly stable now, so turn it on. It wasn't
intentionally turned off originally :P

Reviewed-by: Rodrigo Vivi 
Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/intel_pm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 38e4d60..8e98042 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4518,7 +4518,7 @@ void intel_enable_gt_powersave(struct drm_device *dev)
ironlake_enable_drps(dev);
ironlake_enable_rc6(dev);
intel_init_emon(dev);
-   } else if (IS_GEN6(dev) || IS_GEN7(dev)) {
+   } else if (IS_GEN6(dev) || IS_GEN7(dev) || IS_BROADWELL(dev)) {
if (IS_VALLEYVIEW(dev))
valleyview_setup_pctx(dev);
/*
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 00/12] [v3] BDW RPS + RC6 + rps fixlets

2014-03-19 Thread Ben Widawsky

Ben Widawsky (12):
  drm/i915: Reorganize the overclock code
  drm/i915: Fix coding style for RPS
  drm/i915: Store the HW min frequency as min_freq
  drm/i915: Rename and comment all the RPS *stuff*
  drm/i915: Remove extraneous MMIO for RPS
  drm/i915: remove rps local variables
  drm/i915/bdw: Set initial rps freq to RP0
  drm/i915/bdw: Extract rp_state_caps logic
  drm/i915/bdw: RPS frequency bits are the same as HSW
  drm/i915/bdw: Implement a basic PM interrupt handler
  drm/i915/bdw: Ensure a context is loaded before RC6
  drm/i915/bdw: Enable RC6

 drivers/gpu/drm/i915/i915_debugfs.c  |  26 ++---
 drivers/gpu/drm/i915/i915_drv.c  |   4 +-
 drivers/gpu/drm/i915/i915_drv.h  |  25 ++--
 drivers/gpu/drm/i915/i915_gem.c  |  10 ++
 drivers/gpu/drm/i915/i915_irq.c  | 109 +++---
 drivers/gpu/drm/i915/i915_reg.h  |   1 +
 drivers/gpu/drm/i915/i915_sysfs.c|  81 ++---
 drivers/gpu/drm/i915/intel_display.c |   5 +
 drivers/gpu/drm/i915/intel_drv.h |   2 +
 drivers/gpu/drm/i915/intel_pm.c  | 213 +--
 10 files changed, 306 insertions(+), 170 deletions(-)

-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 02/12] drm/i915: Fix coding style for RPS

2014-03-19 Thread Ben Widawsky
Introduced:
commit b8a5ff8d7c676a04e0da5ec16bb068dd39459042
Author: Jeff McGee 
Date:   Tue Feb 4 11:37:01 2014 -0600

drm/i915: Update rps interrupt limits

Cc: Jeff McGee 
Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_sysfs.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_sysfs.c 
b/drivers/gpu/drm/i915/i915_sysfs.c
index 0c741f4..e9ffefb 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -356,12 +356,12 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
valleyview_set_rps(dev, val);
else
gen6_set_rps(dev, val);
-   }
-   else if (!IS_VALLEYVIEW(dev))
-   /* We still need gen6_set_rps to process the new max_delay
-  and update the interrupt limits even though frequency
-  request is unchanged. */
+   } else if (!IS_VALLEYVIEW(dev)) {
+   /* We still need gen6_set_rps to process the new max_delay and
+* update the interrupt limits even though frequency request is
+* unchanged. */
gen6_set_rps(dev, dev_priv->rps.cur_delay);
+   }
 
mutex_unlock(&dev_priv->rps.hw_lock);
 
@@ -430,12 +430,12 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
valleyview_set_rps(dev, val);
else
gen6_set_rps(dev, val);
-   }
-   else if (!IS_VALLEYVIEW(dev))
-   /* We still need gen6_set_rps to process the new min_delay
-  and update the interrupt limits even though frequency
-  request is unchanged. */
+   } else if (!IS_VALLEYVIEW(dev)) {
+   /* We still need gen6_set_rps to process the new min_delay and
+* update the interrupt limits even though frequency request is
+* unchanged. */
gen6_set_rps(dev, dev_priv->rps.cur_delay);
+   }
 
mutex_unlock(&dev_priv->rps.hw_lock);
 
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 11/12] drm/i915/bdw: Ensure a context is loaded before RC6

2014-03-19 Thread Ben Widawsky
RC6 works a lot like HW contexts in that when the GPU enters RC6 it
saves away the state to a context, and loads it upon wake.

It's to be somewhat expected that BIOS will not set up valid GPU state.
As a result, if loading bad state can cause the GPU to get angry, it
would make sense then that we need to load state first. There are two
ways in which we can do this:

1. Create 3d state in the driver, load it up, then enable RC6.
1b. Reuse a known good state, [and if needed,] just bind objects where
needed. Then enable RC6.
2. Hold off enabling RC6 until userspace has had a chance to complete
batches.

There has been discussions in the past with #1 as it has been
recommended for fixes elsewhere. I'm not opposed to it, I'd just like to
do the easy thing now to enable the platform.

This patch is a hack that implement option #2. It will defer enabling
rc6 until the first batch from userspace has been retired. It suffers
two flaws. The first is, if the driver is loaded, but a batch is not
submitted/completed, we'll never enter rc6. The other is, it expects
userspace to submit a batch with 3d state first. Both of these things
are not actual flaws for most users because most users will boot to a
graphical composited desktop. Both mesa, and X will always emit the
necessary 3d state.

Once a context is loaded and we enable rc6, the default context should
inherit the proper state because we always inhibit the restore for the
default context. This assumes certain things about the workaround/issue
itself to which I am not privy (primarily that the indirect state
objects don't actually need to exist).

With that, there are currently 4 options for BDW:
1. Don't use RC6.
2. Use RC6 and expect a hang on the first batch submitted for every
context.
3. Use RC6 and use this patch.
4. Wait for another workaround implementation.

NOTE: This patch could be used against other platforms as well.

v2: Re-add accidentally dropped hunk (Ben)

v3: Now more compilable (Ben)

v4: Use the existing enable flag for rc6. This will also make the
suspend/resume case work properly, which is broken in v3.
Disable rc6 on reset, and defer re-enabling until the first batch.

The fact that RC6 residency continues to increment, and that this patch
prevents a hang on BDW silicon has been:
Tested-by: Kenneth Graunke  (v1)

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_drv.c  |  4 +++-
 drivers/gpu/drm/i915/i915_gem.c  | 10 ++
 drivers/gpu/drm/i915/intel_display.c |  5 +
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index fa5d0ed..4dc18ea 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -672,6 +672,8 @@ int i915_reset(struct drm_device *dev)
mutex_lock(&dev->struct_mutex);
 
i915_gem_reset(dev);
+   if (IS_BROADWELL(dev))
+   intel_disable_gt_powersave(dev);
 
simulated = dev_priv->gpu_error.stop_rings != 0;
 
@@ -726,7 +728,7 @@ int i915_reset(struct drm_device *dev)
 * reset and the re-install of drm irq. Skip for ironlake per
 * previous concerns that it doesn't respond well to some forms
 * of re-init after reset. */
-   if (INTEL_INFO(dev)->gen > 5) {
+   if (INTEL_INFO(dev)->gen > 5 && !IS_BROADWELL(dev)) {
mutex_lock(&dev->struct_mutex);
intel_enable_gt_powersave(dev);
mutex_unlock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ee32759..4de8800 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2436,6 +2436,7 @@ void i915_gem_reset(struct drm_device *dev)
 static void
 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
 {
+   struct drm_i915_private *dev_priv = ring->dev->dev_private;
uint32_t seqno;
 
if (list_empty(&ring->request_list))
@@ -2459,6 +2460,15 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer 
*ring)
if (!i915_seqno_passed(seqno, obj->last_read_seqno))
break;
 
+   /* Wa: can't find the w/a name.
+* This doesn't actually implement the w/a, but it a workaround
+* for the workaround. It defers using rc6 until we know valid
+* state exists.
+*/
+   if (IS_BROADWELL(ring->dev) && intel_enable_rc6(ring->dev) &&
+   !dev_priv->rps.enabled && ring->id == RCS)
+   intel_enable_gt_powersave(ring->dev);
+
i915_gem_object_move_to_inactive(obj);
}
 
diff --git a/drivers/gpu/drm/i915/intel_display.c 
b/drivers/gpu/drm/i915/intel_display.c
i

[Intel-gfx] [PATCH 05/12] drm/i915: Remove extraneous MMIO for RPS

2014-03-19 Thread Ben Widawsky
The values created at initialization must always exist to use the
interface. Reading them again is confusing, and pointless.

More cleanups are coming in the next patch. Since I am not 100% certain,
moreover on BYT, (though I am extremely close to that) that there is no
need to leave the MMIO here, I wanted to make it a separate patch for
the bisectable 'just-in-case'

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_sysfs.c | 21 +
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_sysfs.c 
b/drivers/gpu/drm/i915/i915_sysfs.c
index e3fa8cd..49554d9 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -313,7 +313,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
struct drm_minor *minor = dev_to_drm_minor(kdev);
struct drm_device *dev = minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
-   u32 val, rp_state_cap, hw_max, hw_min, non_oc_max;
+   u32 val, hw_max, hw_min, non_oc_max;
ssize_t ret;
 
ret = kstrtou32(buf, 0, &val);
@@ -327,16 +327,14 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
if (IS_VALLEYVIEW(dev_priv->dev)) {
val = vlv_freq_opcode(dev_priv, val);
 
-   hw_max = valleyview_rps_max_freq(dev_priv);
-   hw_min = valleyview_rps_min_freq(dev_priv);
-   non_oc_max = hw_max;
+   non_oc_max = hw_max = dev_priv->rps.max_freq;
+   hw_min = dev_priv->rps.min_freq;
} else {
val /= GT_FREQUENCY_MULTIPLIER;
 
-   rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
hw_max = dev_priv->rps.max_freq;
-   non_oc_max = (rp_state_cap & 0xff);
-   hw_min = ((rp_state_cap & 0xff) >> 16);
+   non_oc_max = dev_priv->rps.rp0_freq;
+   hw_min = dev_priv->rps.min_freq;
}
 
if (val < hw_min || val > hw_max ||
@@ -394,7 +392,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
struct drm_minor *minor = dev_to_drm_minor(kdev);
struct drm_device *dev = minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
-   u32 val, rp_state_cap, hw_max, hw_min;
+   u32 val, hw_max, hw_min;
ssize_t ret;
 
ret = kstrtou32(buf, 0, &val);
@@ -408,14 +406,13 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
if (IS_VALLEYVIEW(dev)) {
val = vlv_freq_opcode(dev_priv, val);
 
-   hw_max = valleyview_rps_max_freq(dev_priv);
-   hw_min = valleyview_rps_min_freq(dev_priv);
+   hw_max = dev_priv->rps.max_freq;
+   hw_min = dev_priv->rps.min_freq;
} else {
val /= GT_FREQUENCY_MULTIPLIER;
 
-   rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
hw_max = dev_priv->rps.max_freq;
-   hw_min = ((rp_state_cap & 0xff) >> 16);
+   hw_min = dev_priv->rps.min_freq;
}
 
if (val < hw_min || val > hw_max || val > 
dev_priv->rps.max_freq_softlimit) {
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 10/12] drm/i915/bdw: Implement a basic PM interrupt handler

2014-03-19 Thread Ben Widawsky
Almost all of it is reusable from the existing code. The primary
difference is we need to do even less in the interrupt handler, since
interrupts are not shared in the same way.

The patch is mostly a copy-paste of the existing snb+ code, with updates
to the relevant parts requiring changes to the interrupt handling. As
such it /should/ be relatively trivial. It's highly likely that I missed
some places where I need a gen8 version of the PM interrupts, but it has
become invisible to me by now.

This patch could probably be split into adding the new functions,
followed by actually handling the interrupts. Since the code is
currently disabled (and broken) I think the patch stands better by
itself.

v2: Move the commit about not touching the ringbuffer interrupt to the
snb_* function where it belongs (Rodrigo)

v3: Rebased on Paulo's runtime PM changes

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_irq.c  | 84 +---
 drivers/gpu/drm/i915/i915_reg.h  |  1 +
 drivers/gpu/drm/i915/intel_drv.h |  2 +
 drivers/gpu/drm/i915/intel_pm.c  | 39 ++-
 4 files changed, 119 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 4b4aeb3..2f9ec6e 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -175,6 +175,7 @@ static void snb_update_pm_irq(struct drm_i915_private 
*dev_priv,
return;
}
 
+   /* Make sure not to corrupt PMIMR state used by ringbuffer code */
new_val = dev_priv->pm_irq_mask;
new_val &= ~interrupt_mask;
new_val |= (~enabled_irq_mask & interrupt_mask);
@@ -214,6 +215,53 @@ static bool ivb_can_enable_err_int(struct drm_device *dev)
return true;
 }
 
+/**
+  * bdw_update_pm_irq - update GT interrupt 2
+  * @dev_priv: driver private
+  * @interrupt_mask: mask of interrupt bits to update
+  * @enabled_irq_mask: mask of interrupt bits to enable
+  *
+  * Copied from the snb function, updated with relevant register offsets
+  */
+static void bdw_update_pm_irq(struct drm_i915_private *dev_priv,
+ uint32_t interrupt_mask,
+ uint32_t enabled_irq_mask)
+{
+   uint32_t new_val;
+
+   assert_spin_locked(&dev_priv->irq_lock);
+
+   if (dev_priv->pm.irqs_disabled) {
+   WARN(1, "IRQs disabled\n");
+   dev_priv->pm.regsave.gen6_pmimr &= ~interrupt_mask;
+   dev_priv->pm.regsave.gen6_pmimr |= (~enabled_irq_mask &
+interrupt_mask);
+   return;
+   }
+
+   new_val = dev_priv->pm_irq_mask;
+   new_val &= ~interrupt_mask;
+   new_val |= (~enabled_irq_mask & interrupt_mask);
+
+   if (new_val != dev_priv->pm_irq_mask) {
+   dev_priv->pm_irq_mask = new_val;
+   I915_WRITE(GEN8_GT_IMR(2), I915_READ(GEN8_GT_IMR(2)) |
+  dev_priv->pm_irq_mask);
+   POSTING_READ(GEN8_GT_IMR(2));
+   }
+}
+
+void bdw_enable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask)
+{
+   bdw_update_pm_irq(dev_priv, mask, mask);
+}
+
+void bdw_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask)
+{
+   bdw_update_pm_irq(dev_priv, mask, 0);
+}
+
+
 static bool cpt_can_enable_serr_int(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1131,13 +1179,16 @@ static void gen6_pm_rps_work(struct work_struct *work)
spin_lock_irq(&dev_priv->irq_lock);
pm_iir = dev_priv->rps.pm_iir;
dev_priv->rps.pm_iir = 0;
-   /* Make sure not to corrupt PMIMR state used by ringbuffer code */
-   snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
+   if (IS_BROADWELL(dev_priv->dev))
+   bdw_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
+   else {
+   snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
+   /* Make sure we didn't queue anything we're not going to
+* process. */
+   WARN_ON(pm_iir & ~GEN6_PM_RPS_EVENTS);
+   }
spin_unlock_irq(&dev_priv->irq_lock);
 
-   /* Make sure we didn't queue anything we're not going to process. */
-   WARN_ON(pm_iir & ~GEN6_PM_RPS_EVENTS);
-
if ((pm_iir & GEN6_PM_RPS_EVENTS) == 0)
return;
 
@@ -1330,6 +1381,19 @@ static void snb_gt_irq_handler(struct drm_device *dev,
ivybridge_parity_error_irq_handler(dev, gt_iir);
 }
 
+static void gen8_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
+{
+   if ((pm_iir & GEN6_PM_RPS_EVENTS) == 0)
+   return;
+
+   spin_lock(&dev_priv->irq_lock);
+   dev_priv->rps.pm_iir |= pm_iir & GEN6_PM_RPS_EVENTS;
+   bdw_

[Intel-gfx] [PATCH 08/12] drm/i915/bdw: Extract rp_state_caps logic

2014-03-19 Thread Ben Widawsky
We have a need for duplicated parsing of the RP_STATE_CAPS register (and
the setting of the associated fields). To reuse some code, we can
extract the function into a simple helper.

This patch also addresses the fact that we missed doing this for gen8,
something we should have done anyway.

This could be two patches, one to extract, and one to add gen8, but it's
trivial enough that I think one is fine. I will accept a request to
split it. Please notice the fix addressed by v2 below.

Valleyview is left untouched because it is different.

v2: Logically rebased on top of
commit dd0a1aa19bd3d7203e58157b84cea78bbac605ac
Author: Jeff McGee 
Date:   Tue Feb 4 11:32:31 2014 -0600

drm/i915: Restore rps/rc6 on reset

Note with the above change the fix for gen8 is also handled (which was
not the case in Jeff's original patch).

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/intel_pm.c | 40 +++-
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 8a64ecc..ab9e992 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3248,6 +3248,27 @@ static void gen6_enable_rps_interrupts(struct drm_device 
*dev)
I915_WRITE(GEN6_PMINTRMSK, ~enabled_intrs);
 }
 
+static void parse_rp_state_cap(struct drm_i915_private *dev_priv, u32 
rp_state_cap)
+{
+   /* All of these values are in units of 50MHz */
+   dev_priv->rps.cur_freq  = 0;
+   /* static values from HW: RP0 < RPe < RP1 < RPn (min_freq) */
+   dev_priv->rps.rp1_freq  = (rp_state_cap >>  8) & 0xff;
+   dev_priv->rps.rp0_freq  = (rp_state_cap >>  0) & 0xff;
+   dev_priv->rps.min_freq  = (rp_state_cap >> 16) & 0xff;
+   /* XXX: only BYT has a special efficient freq */
+   dev_priv->rps.efficient_freq= dev_priv->rps.rp1_freq;
+   /* hw_max = RP0 until we check for overclocking */
+   dev_priv->rps.max_freq  = dev_priv->rps.rp0_freq;
+
+   /* Preserve min/max settings in case of re-init */
+   if (dev_priv->rps.max_freq_softlimit == 0)
+   dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
+
+   if (dev_priv->rps.min_freq_softlimit == 0)
+   dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
+}
+
 static void gen8_enable_rps(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3266,6 +3287,7 @@ static void gen8_enable_rps(struct drm_device *dev)
I915_WRITE(GEN6_RC_CONTROL, 0);
 
rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
+   parse_rp_state_cap(dev_priv, rp_state_cap);
 
/* 2b: Program RC6 thresholds.*/
I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
@@ -3354,23 +3376,7 @@ static void gen6_enable_rps(struct drm_device *dev)
rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
 
-   /* All of these values are in units of 50MHz */
-   dev_priv->rps.cur_freq  = 0;
-   /* static values from HW: RP0 < RPe < RP1 < RPn (min_freq) */
-   dev_priv->rps.rp1_freq  = (rp_state_cap >>  8) & 0xff;
-   dev_priv->rps.rp0_freq  = (rp_state_cap >>  0) & 0xff;
-   dev_priv->rps.min_freq  = (rp_state_cap >> 16) & 0xff;
-   /* XXX: only BYT has a special efficient freq */
-   dev_priv->rps.efficient_freq= dev_priv->rps.rp1_freq;
-   /* hw_max = RP0 until we check for overclocking */
-   dev_priv->rps.max_freq  = dev_priv->rps.rp0_freq;
-
-   /* Preserve min/max settings in case of re-init */
-   if (dev_priv->rps.max_freq_softlimit == 0)
-   dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
-
-   if (dev_priv->rps.min_freq_softlimit == 0)
-   dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
+   parse_rp_state_cap(dev_priv, rp_state_cap);
 
/* disable the counters and set deterministic thresholds */
I915_WRITE(GEN6_RC_CONTROL, 0);
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 11/11] [v4] drm/i915/bdw: Ensure a context is loaded before RC6

2014-03-19 Thread Ben Widawsky
On Tue, Mar 04, 2014 at 03:30:14PM +0100, Daniel Vetter wrote:
> On Wed, Feb 19, 2014 at 10:27:20PM -0800, Ben Widawsky wrote:
> > RC6 works a lot like HW contexts in that when the GPU enters RC6 it
> > saves away the state to a context, and loads it upon wake.
> > 
> > It's to be somewhat expected that BIOS will not set up valid GPU state.
> > As a result, if loading bad state can cause the GPU to get angry, it
> > would make sense then that we need to load state first. There are two
> > ways in which we can do this:
> > 
> > 1. Create 3d state in the driver, load it up, then enable RC6.
> > 1b. Reuse a known good state, [and if needed,] just bind objects where
> > needed. Then enable RC6.
> > 2. Hold off enabling RC6 until userspace has had a chance to complete
> > batches.
> > 
> > There has been discussions in the past with #1 as it has been
> > recommended for fixes elsewhere. I'm not opposed to it, I'd just like to
> > do the easy thing now to enable the platform.
> > 
> > This patch is a hack that implement option #2. It will defer enabling
> > rc6 until the first batch from userspace has been retired. It suffers
> > two flaws. The first is, if the driver is loaded, but a batch is not
> > submitted/completed, we'll never enter rc6. The other is, it expects
> > userspace to submit a batch with 3d state first. Both of these things
> > are not actual flaws for most users because most users will boot to a
> > graphical composited desktop. Both mesa, and X will always emit the
> > necessary 3d state.
> > 
> > Once a context is loaded and we enable rc6, the default context should
> > inherit the proper state because we always inhibit the restore for the
> > default context. This assumes certain things about the workaround/issue
> > itself to which I am not privy (primarily that the indirect state
> > objects don't actually need to exist).
> > 
> > With that, there are currently 4 options for BDW:
> > 1. Don't use RC6.
> > 2. Use RC6 and expect a hang on the first batch submitted for every
> > context.
> > 3. Use RC6 and use this patch.
> > 4. Wait for another workaround implementation.
> > 
> > NOTE: This patch could be used against other platforms as well.
> > 
> > v2: Re-add accidentally dropped hunk (Ben)
> > 
> > v3: Now more compilable (Ben)
> > 
> > v4: Use the existing enable flag for rc6. This will also make the
> > suspend/resume case work properly, which is broken in v3.
> > Disable rc6 on reset, and defer re-enabling until the first batch.
> > 
> > The fact that RC6 residency continues to increment, and that this patch
> > prevents a hang on BDW silicon has been:
> > Tested-by: Kenneth Graunke  (v1)
> > 
> > Cc: David E. Box 
> > Cc: Kristen Carlson Accardi 
> > Signed-off-by: Ben Widawsky 
> > 
> > squash! drm/i915/bdw: Ensure a context is loaded before RC6
> > ---
> >  drivers/gpu/drm/i915/i915_drv.c  |  4 +++-
> >  drivers/gpu/drm/i915/i915_gem.c  | 10 ++
> >  drivers/gpu/drm/i915/intel_display.c |  5 +
> >  3 files changed, 18 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c 
> > b/drivers/gpu/drm/i915/i915_drv.c
> > index 2d05d7c..7fdfc0e 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -679,6 +679,8 @@ int i915_reset(struct drm_device *dev)
> > mutex_lock(&dev->struct_mutex);
> >  
> > i915_gem_reset(dev);
> > +   if (IS_BROADWELL(dev))
> > +   intel_disable_gt_powersave(dev);
> >  
> > simulated = dev_priv->gpu_error.stop_rings != 0;
> >  
> > @@ -733,7 +735,7 @@ int i915_reset(struct drm_device *dev)
> >  * reset and the re-install of drm irq. Skip for ironlake per
> >  * previous concerns that it doesn't respond well to some forms
> >  * of re-init after reset. */
> > -   if (INTEL_INFO(dev)->gen > 5) {
> > +   if (INTEL_INFO(dev)->gen > 5 && !IS_BROADWELL(dev)) {
> > mutex_lock(&dev->struct_mutex);
> > intel_enable_gt_powersave(dev);
> > mutex_unlock(&dev->struct_mutex);
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c 
> > b/drivers/gpu/drm/i915/i915_gem.c
> > index 3618bb0..25a97a6 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -2420,6 +2420,7 @@ void i915_gem_reset(

Re: [Intel-gfx] [PATCH] drm/i915: bdw expands ACTHD to 64bit

2014-03-19 Thread Ben Widawsky
On Wed, Mar 19, 2014 at 09:54:48PM +, Chris Wilson wrote:
> As Broadwell has an increased virtual address size, it requires more
> than 32 bits to store offsets into its address space. This includes the
> debug registers to track the current HEAD of the individual rings, which
> may be anywhere within the per-process address spaces. In order to find
> the full location, we need to read the high bits from a second register.
> We then also need to expand our storage to keep track of the larger
> address.
> 
> Signed-off-by: Chris Wilson 
> Cc: Ben Widawsky 
> Cc: Timo Aaltonen 
> ---
>  drivers/gpu/drm/i915/i915_drv.h |  2 +-
>  drivers/gpu/drm/i915/i915_gpu_error.c   |  2 +-
>  drivers/gpu/drm/i915/i915_irq.c |  8 +---
>  drivers/gpu/drm/i915/i915_reg.h |  1 +
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 21 +++--
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  6 +++---
>  6 files changed, 26 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index ed67b4abf9e3..ee913b63a945 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -354,12 +354,12 @@ struct drm_i915_error_state {
>   u32 ipeir;
>   u32 ipehr;
>   u32 instdone;
> - u32 acthd;
>   u32 bbstate;
>   u32 instpm;
>   u32 instps;
>   u32 seqno;
>   u64 bbaddr;
> + u64 acthd;
>   u32 fault_reg;
>   u32 faddr;
>   u32 rc_psmi; /* sleep state */
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
> b/drivers/gpu/drm/i915/i915_gpu_error.c
> index b153a16ead0a..9519aa240614 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -248,7 +248,7 @@ static void i915_ring_error_state(struct 
> drm_i915_error_state_buf *m,
>   err_printf(m, "  TAIL: 0x%08x\n", ring->tail);
>   err_printf(m, "  CTL: 0x%08x\n", ring->ctl);
>   err_printf(m, "  HWS: 0x%08x\n", ring->hws);
> - err_printf(m, "  ACTHD: 0x%08x\n", ring->acthd);
> + err_printf(m, "  ACTHD: 0x%08llx\n", ring->acthd);

%016x?

if (gen8)
%016x
?

>   err_printf(m, "  IPEIR: 0x%08x\n", ring->ipeir);
>   err_printf(m, "  IPEHR: 0x%08x\n", ring->ipehr);
>   err_printf(m, "  INSTDONE: 0x%08x\n", ring->instdone);
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 1dd9d3628919..b79792317f39 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2507,7 +2507,8 @@ static struct intel_ring_buffer *
>  semaphore_waits_for(struct intel_ring_buffer *ring, u32 *seqno)
>  {
>   struct drm_i915_private *dev_priv = ring->dev->dev_private;
> - u32 cmd, ipehr, acthd, acthd_min;
> + u64 acthd, acthd_min;
> + u32 cmd, ipehr;
>  
>   ipehr = I915_READ(RING_IPEHR(ring->mmio_base));
>   if ((ipehr & ~(0x3 << 16)) !=
> @@ -2563,7 +2564,7 @@ static void semaphore_clear_deadlocks(struct 
> drm_i915_private *dev_priv)
>  }
>  
>  static enum intel_ring_hangcheck_action
> -ring_stuck(struct intel_ring_buffer *ring, u32 acthd)
> +ring_stuck(struct intel_ring_buffer *ring, u64 acthd)
>  {
>   struct drm_device *dev = ring->dev;
>   struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -2631,7 +2632,8 @@ static void i915_hangcheck_elapsed(unsigned long data)
>   return;
>  
>   for_each_ring(ring, dev_priv, i) {
> - u32 seqno, acthd;
> + u64 acthd;
> + u32 seqno;
>   bool busy = true;
>  
>   semaphore_clear_deadlocks(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index f010ff7e7e2a..3c464d307a2b 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -708,6 +708,7 @@ enum punit_power_well {
>  #define BLT_HWS_PGA_GEN7 (0x04280)
>  #define VEBOX_HWS_PGA_GEN7   (0x04380)
>  #define RING_ACTHD(base) ((base)+0x74)
> +#define RING_ACTHD_UDW(base) ((base)+0x5c)
>  #define RING_NOPID(base) ((base)+0x94)
>  #define RING_IMR(base)   ((base)+0xa8)
>  #define RING_TIMESTAMP(base) ((base)+0x358)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
> b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 7a01911c16f8..a6ceb2c6f36d 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
&

Re: [Intel-gfx] [PATCH 08/20] drm/i915: check if IIR is still zero at postinstall on Gen5+

2014-03-19 Thread Ben Widawsky
On Wed, Mar 19, 2014 at 09:28:32AM +0100, Daniel Vetter wrote:
> On Tue, Mar 18, 2014 at 11:20:09AM -0700, Ben Widawsky wrote:
> > On Fri, Mar 07, 2014 at 08:10:24PM -0300, Paulo Zanoni wrote:
> > > From: Paulo Zanoni 
> > > 
> > > Instead of trying to clear it again. It should already be masked and
> > > disabled and zeroed at preinstall/uninstall.
> > > 
> > > Signed-off-by: Paulo Zanoni 
> > > ---
> > >  drivers/gpu/drm/i915/i915_irq.c | 32 +++-
> > >  1 file changed, 15 insertions(+), 17 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_irq.c 
> > > b/drivers/gpu/drm/i915/i915_irq.c
> > > index 6d4daf2..4d0a8b1 100644
> > > --- a/drivers/gpu/drm/i915/i915_irq.c
> > > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > > @@ -103,12 +103,24 @@ static const u32 hpd_status_i915[] = { /* i915 and 
> > > valleyview are the same */
> > >   I915_WRITE(type##IIR, 0x); \
> > >  } while (0)
> > >  
> > > +/*
> > > + * We should clear IMR at preinstall/uninstall, and just check at 
> > > postinstall.
> > > + */
> > > +#define GEN5_ASSERT_IIR_IS_ZERO(reg) do { \
> > > + u32 val = I915_READ(reg); \
> > > + if (val) \
> > > + DRM_ERROR("Interrupt register 0x%x is not zero: 0x%08x\n", \
> > > +   (reg), val); \
> > > +} while (0)
> > > +
> > >  #define GEN8_IRQ_INIT_NDX(type, which, imr_val, ier_val) do { \
> > > + GEN5_ASSERT_IIR_IS_ZERO(GEN8_##type##_IIR(which)); \
> > >   I915_WRITE(GEN8_##type##_IMR(which), (imr_val)); \
> > >   I915_WRITE(GEN8_##type##_IER(which), (ier_val)); \
> > >  } while (0)
> > >  
> > >  #define GEN5_IRQ_INIT(type, imr_val, ier_val) do { \
> > > + GEN5_ASSERT_IIR_IS_ZERO(type##IIR); \
> > >   I915_WRITE(type##IMR, (imr_val)); \
> > >   I915_WRITE(type##IER, (ier_val)); \
> > >  } while (0)
> > 
> > Okay, this is replacing a POSTED_WRITE, with a (slower) POSTING_READ
> > which gives an error that we can do nothing about other than clear it
> > anyway.
> > 
> > I'd be in favor of dropping this patch.
> 
> The point of the assert is to make sure that the new IIR clearing logic
> with blocking everything+clearing in the preinstall hook actually does
> what it's supposed to do.
> 
> Since the point of this exercise is to reuse this code for runtime
> suspend/resume where races are much easier to hit I think this is a good
> self-check of the code.
> -Daniel
> 

Okay, I am feeling somewhat pressured to stick a reviewed-by on this
since Daniel likes it.

Change the macro to WARN instead of DRM_ERROR, and, clear the IIR if
it's non-zero. With that change, it's:
Reviewed-by-with-reservations: Ben Widawsky 

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 00/20] ILK+ interrupt improvements, v2

2014-03-19 Thread Ben Widawsky
On Wed, Mar 19, 2014 at 09:36:04AM +0100, Daniel Vetter wrote:
> On Tue, Mar 18, 2014 at 01:53:53PM -0700, Ben Widawsky wrote:
> > On Fri, Mar 07, 2014 at 08:10:16PM -0300, Paulo Zanoni wrote:
> > > From: Paulo Zanoni 
> > > 
> > > Hi
> > > 
> > > This is basically a rebase of "[PATCH 00/19] ILK+ interrupt 
> > > improvements", which
> > > was sent to the mailing list on January 22. There are no real differences,
> > > except for the last patch, which is new.
> > > 
> > > Original cover letter:
> > > http://lists.freedesktop.org/archives/intel-gfx/2014-January/038679.html
> > > 
> > > The idea behind this series is that at some point our runtime PM code 
> > > will just
> > > call our irq_preinstall, irq_postinstall and irq_uninstall functions 
> > > instead of
> > > using dev_priv->pc8.regsave, so I decided to audit, cleanup and add a few 
> > > WARNs
> > > to our code before we do that change. We gotta be in shape if we want to 
> > > be
> > > exposed to runtime!
> > > 
> > > Thanks,
> > > Paulo
> > > 
> > > Paulo Zanoni (20):
> > >   drm/i915: add GEN5_IRQ_INIT macro
> > >   drm/i915: also use GEN5_IRQ_INIT with south display interrupts
> > >   drm/i915: use GEN8_IRQ_INIT on GEN5
> > >   drm/i915: add GEN5_IRQ_FINI
> > >   drm/i915: don't forget to uninstall the PM IRQs
> > >   drm/i915: properly clear IIR at irq_uninstall on Gen5+
> > >   drm/i915: add GEN5_IRQ_INIT
> > >   drm/i915: check if IIR is still zero at postinstall on Gen5+
> > >   drm/i915: fix SERR_INT init/reset code
> > >   drm/i915: fix GEN7_ERR_INT init/reset code
> > >   drm/i915: fix open coded gen5_gt_irq_preinstall
> > >   drm/i915: extract ibx_irq_uninstall
> > >   drm/i915: call ibx_irq_uninstall from gen8_irq_uninstall
> > >   drm/i915: enable SDEIER later
> > >   drm/i915: remove ibx_irq_uninstall
> > >   drm/i915: add missing intel_hpd_irq_uninstall
> > >   drm/i915: add ironlake_irq_reset
> > >   drm/i915: add gen8_irq_reset
> > >   drm/i915: only enable HWSTAM interrupts on postinstall on ILK+
> > >   drm/i915: add POSTING_READs to the IRQ init/reset macros
> > > 
> > >  drivers/gpu/drm/i915/i915_irq.c | 270 
> > > ++--
> > >  1 file changed, 121 insertions(+), 149 deletions(-)
> > > 
> > 
> > Okay, here is the summary of my review. At first I was complaining to
> > myself about how many patches you used to do a simple thing. But, I must
> > admit it made reviewing the thing a lot easier, and when I look back at
> > how much stuff you combined, I'm really glad you did it this way. I'm
> > sure I've missed something silly though, since every patch looks so
> > similar :P
> > 
> > 1-5: Reviewed-by: Ben Widawsky  (with possible comment
> > improvement on #3)
> > 
> > 7: I don't like. Can we drop? I guess doing this would make a decent
> > amount of churn, so if you don't want to drop it, that's fine, and it's
> > functionally correct:
> >  Reviewed-by: Ben Widawsky 
> > 
> > 8: I'd really like to drop this one.
> 
> Comment on this and I think with a pimped commit message this is good to
> go imo. I really think the added self-checks are required to start using
> this code for runtime pm.
> 

So you don't need my reviewed-by then. I don't like it...

> > 9-10: Reviewed-by: Ben Widawsky 
> > 
> > 12-13: I wouldn't mind cpt_irq_* rename, but either way
> >Reviewed-by: Ben Widawsky 
> > 
> > 14: With the requested change in the mail:
> > Reviewed-by: Ben Widawsky 
> > 
> > 16: Reviewed-by: Ben Widawsky 
> > 
> > 20: Should be squashed, but
> > Reviewed-by: Ben Widawsky 
> > 
> > 6, 11, 15, 17, 18, 19: You introduce the term _reset as a verb which
> > seems to always mean "disable." I think disable makes the code so much
> > clearer, and would really love if you can apply this simple rename. With
> > the rename, they're:
> > Reviewed-by: Ben Widawsky 
> 
> Paulo's using "reset" functions/macros both in the preinstall hooks and in
> the uninstall/disable code. We already use reset for stuff run before
> init/enable code to get the hw in a state we expect it to, so I think
> Paulo's naming choice is accurate and a plain "disable" more misleading.
> 

I cannot disagree more. Every time I read "reset" it confuses me. But it
seems like I am the minority.

> I think you raise some good points in your review, and besides the 3 cases
> I commented on I lack the detailed knowledge to avoid looking like a fool
> ;-) So I think I'll wait for Paulo's comments before pulling this all in.
> 
> Thanks,
> Daniel

Once Paulo responds, I'll make it a top priority to re-review whatever
is needed. Sorry for the original delay.

> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/2] drm/i915: Per-process stats work better when evaluated per-process

2014-03-19 Thread Ben Widawsky
On Wed, Mar 19, 2014 at 01:45:45PM +, Chris Wilson wrote:
> The idea of printing objects used by each process is to judge how each
> process is using them. This means that we need to evaluate whether the
> object is bound for that particular process, rather than just whether it
> is bound into the global GTT.
> 
> v2: Restore the non-full-ppgtt path for simplicity as we may not even
> create vma with older hardware.
> 
> v3: Tweak handling of global entries and default context entries.
> 
> Signed-off-by: Chris Wilson 
> Cc: Ben Widawsky 
Reviewed-by: Ben Widawsky 

[snip]

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 03/11] drm/i915: Rename and comment all the RPS *stuff*

2014-03-18 Thread Ben Widawsky
On Tue, Mar 18, 2014 at 06:27:03PM -0700, Ben Widawsky wrote:
> On Sat, Feb 22, 2014 at 01:37:16PM +, Chris Wilson wrote:
> > On Mon, Feb 17, 2014 at 07:01:44PM -0800, Ben Widawsky wrote:
> > > The names of the struct members for RPS are stupid. Every time I need to
> > > do anything in this code I have to spend a significant amount of time to
> > > remember what it all means. By renaming the variables (and adding the
> > > comments) I hope to clear up the situation. Indeed doing this make some
> > > upcoming patches more readable.
> > > 
> > > I've avoided ILK because it's possible that the naming used for Ironlake
> > > matches what is in the docs. I believe the ILK power docs were never
> > > published, and I am too lazy to dig them up.
> > > 
> > > While there may be mistakes, this patch was mostly done via sed. The
> > > renaming of "hw_max" required a bit of interactivity.
> > 
> > It lost the distinction between RPe and RPn. I am in favour of keeping
> > RP0, RP1, RPe, RPn for the hardware/spec values and adding the set of
> > soft values used for actual interaction.
> > -Chris
> > 
> 
> Okay, as stated before, you are correct - I need to bring back RPe/RPn
> distinction. I think using the mix of values (basically s/_delay/_freq)
> doesn't fully relize what I was hoping to achieve. I don't think there
> is ever a case, except when debugging where it's easier to refer to the
> RP mnemonic. How strongly do you feel about this one? I'd really prefer
> to just fix RPe/RPn.
> 
> Does anyone else have an opinion on:
> "max_freq_hardlimit" vs. "rp0"
> 
> Does anyone else want to review this one?
> 

Okay, I started on this, and I somewhat agree. How about:

u8 cur_freq;/* Current frequency (cached, may not == HW) */
u8 min_freq_softlimit;  /* Minimum frequency permitted by the driver */
u8 max_freq_softlimit;  /* Max frequency permitted by the driver */
u8 max_freq;/* Maximum frequency, RP0 if not overclocking */
u8 min_freq;/* AKA RPn. Minimum frequency */
u8 efficient_freq;  /* AKA RPe. Pre-determined balanced frequency */
u8 rp1_freq;/* "less than" RP0 power/freqency */
u8 rp0_freq;/* Non-overclocked max frequency. */

Conveniently, this matches sysfs, minus the efficiency one, and I don't think
there's a point in explicitly storing RPn, since it's always == min_freq.

> > -- 
> > Chris Wilson, Intel Open Source Technology Centre
> > ___
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
> -- 
> Ben Widawsky, Intel Open Source Technology Center
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] linux-next: build failure after merge of the drm-intel tree

2014-03-18 Thread Ben Widawsky
On Tue, Mar 18, 2014 at 09:18:42PM -0400, Steven Rostedt wrote:
> On Wed, 19 Mar 2014 11:53:50 +1100
> Stephen Rothwell  wrote:
> 
> 
> > Caused by commit a25ca17c1eac ("drm/i915: Do not dereference pointers
> > from ring buffer in evict event").
> > 
> > I have used the drm-intel tree from next-20140318 for today.
> > 
> 
> Bah! I'm still suffering from jet lag (just came back from Linux-Tage
> in Chemnitz).
> 
> The next time I compile test a patch for a module, I'll make sure I have
> that module's config option set :-(  The woe of using localmodconfig. I
> should have picked the box with the i915. :-/
> 
> Below is the fix. I'll repost a v2 of the original patch.
> 
> Sorry about that.
> 

I was about to send out the same fix when I saw this.

Reviewed-by: Ben Widawsky 

> -- Steve
> 
> diff --git a/drivers/gpu/drm/i915/i915_trace.h 
> b/drivers/gpu/drm/i915/i915_trace.h
> index f3e8a90..783ae08 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -243,7 +243,7 @@ TRACE_EVENT(i915_gem_evict_vm,
>   ),
>  
>   TP_fast_assign(
> -__entry->dev = dev->primary->index;
> +__entry->dev = vm->dev->primary->index;
>  __entry->vm = vm;
> ),
>  
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 03/11] drm/i915: Rename and comment all the RPS *stuff*

2014-03-18 Thread Ben Widawsky
On Sat, Feb 22, 2014 at 01:37:16PM +, Chris Wilson wrote:
> On Mon, Feb 17, 2014 at 07:01:44PM -0800, Ben Widawsky wrote:
> > The names of the struct members for RPS are stupid. Every time I need to
> > do anything in this code I have to spend a significant amount of time to
> > remember what it all means. By renaming the variables (and adding the
> > comments) I hope to clear up the situation. Indeed doing this make some
> > upcoming patches more readable.
> > 
> > I've avoided ILK because it's possible that the naming used for Ironlake
> > matches what is in the docs. I believe the ILK power docs were never
> > published, and I am too lazy to dig them up.
> > 
> > While there may be mistakes, this patch was mostly done via sed. The
> > renaming of "hw_max" required a bit of interactivity.
> 
> It lost the distinction between RPe and RPn. I am in favour of keeping
> RP0, RP1, RPe, RPn for the hardware/spec values and adding the set of
> soft values used for actual interaction.
> -Chris
> 

Okay, as stated before, you are correct - I need to bring back RPe/RPn
distinction. I think using the mix of values (basically s/_delay/_freq)
doesn't fully relize what I was hoping to achieve. I don't think there
is ever a case, except when debugging where it's easier to refer to the
RP mnemonic. How strongly do you feel about this one? I'd really prefer
to just fix RPe/RPn.

Does anyone else have an opinion on:
"max_freq_hardlimit" vs. "rp0"

Does anyone else want to review this one?

> -- 
> Chris Wilson, Intel Open Source Technology Centre
> _______
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 12/26] drm/i915: Page table helpers, and define renames

2014-03-18 Thread Ben Widawsky
On Tue, Mar 18, 2014 at 11:29:58AM -0700, Jesse Barnes wrote:
> On Tue, 18 Mar 2014 09:05:58 +
> Chris Wilson  wrote:
> 
> > On Mon, Mar 17, 2014 at 10:48:44PM -0700, Ben Widawsky wrote:
> > > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> > > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> > > @@ -1,8 +1,11 @@
> > >  #ifndef _I915_GEM_GTT_H
> > >  #define _I915_GEM_GTT_H
> > >  
> > > -#define GEN6_PPGTT_PD_ENTRIES 512
> > > -#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
> > > +/* GEN Agnostic defines */
> > > +#define I915_PDES_PER_PD 512
> > > +#define I915_PTE_MASK(PAGE_SHIFT-1)
> > 
> > That looks decidely fishy.
> > 
> > PAGE_SHIFT is 12 -> PTE_MASK = 0xb
> > 

Thanks for catching this. I'll presume the define isn't even used.

> > > +#define I915_PDE_MASK(I915_PDES_PER_PD-1)
> > > +
> > >  typedef uint32_t gen6_gtt_pte_t;
> > >  typedef uint64_t gen8_gtt_pte_t;
> > >  typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> > > @@ -23,6 +26,98 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> > >  #define GEN6_PTE_ADDR_ENCODE(addr)   GEN6_GTT_ADDR_ENCODE(addr)
> > >  #define HSW_PTE_ADDR_ENCODE(addr)HSW_GTT_ADDR_ENCODE(addr)
> > >  
> > > +
> > > +/* GEN6 PPGTT resembles a 2 level page table:
> > > + * 31:22 | 21:12 |  11:0
> > > + *  PDE  |  PTE  | offset
> > > + */
> > > +#define GEN6_PDE_SHIFT   22
> > > +#define GEN6_PTES_PER_PT (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
> > > +
> > > +static inline uint32_t i915_pte_index(uint64_t address, uint32_t 
> > > pde_shift)
> > > +{
> > > + const uint32_t mask = (1 << (pde_shift - PAGE_SHIFT)) - 1;
> > > + return (address >> PAGE_SHIFT) & mask;
> > > +}
> > > +
> > > +/* Helper to counts the number of PTEs within the given length. This 
> > > count does
> > > + * not cross a page table boundary, so the max value would be
> > > + * GEN6_PTES_PER_PT for GEN6, and GEN8_PTES_PER_PT for GEN8.
> > > + */
> > > +static inline size_t i915_pte_count(uint64_t addr, size_t length,
> > > + uint32_t pde_shift)
> > > +{
> > > + const uint64_t pd_mask = ~((1 << pde_shift) - 1);
> > > + uint64_t end;
> > > +
> > > + if (WARN_ON(!length))
> > > + return 0;
> > > +
> > > + if (WARN_ON(addr % PAGE_SIZE))
> > > + addr = round_down(addr, PAGE_SIZE);
> > > +
> > > + if (WARN_ON(length % PAGE_SIZE))
> > > + length = round_up(length, PAGE_SIZE);
> > 
> > Oh oh. I think these fixups are very suspect, so just
> > BUG_ON(length == 0);
> > BUG_ON(offset_in_page(addr|length));
> > 

I thought someone might have an issue with the BUG_ON. But I prefer it
as well.

> > > +
> > > + end = addr + length;
> > > +
> > > + if ((addr & pd_mask) != (end & pd_mask))
> > > + return (1 << (pde_shift - PAGE_SHIFT)) -
> > 
> > #define NUM_PTE(pde_shift) (1 << (pde_shift - PAGE_SHIFT))
> > here and for computing the pd_mask.
> > 
> > > + i915_pte_index(addr, pde_shift);
> > > +
> > > + return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift);
> > > +}
> > 
> > Otherwise the helpers look a useful improvement in readability.
> > -Chris
> > 
> 
> Can we use GTT_PAGE_SIZE here too?  I'm worried the kernel PAGE_SIZE
> will change at some point and blow us up.  At least in places where
> we're doing our own thing rather than using the x86 bits...

That's fine with me. We have quite a few other places in our code which
depend on PAGE_SIZE being 4k though.

It's likely I'll be maintaining this branch myself for a while, but I'll
modify these both locally.

> 
> -- 
> Jesse Barnes, Intel Open Source Technology Center
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Fix up the forcewake timer initialization

2014-03-18 Thread Ben Widawsky
On Tue, Mar 18, 2014 at 04:31:03PM +0100, Daniel Vetter wrote:
> This is a regression introduced in
> 
> commit 0294ae7b44bba7ab0d4cef9a8736287f38bdb4fd
> Author: Chris Wilson 
> Date:   Thu Mar 13 12:00:29 2014 +
> 
> drm/i915: Consolidate forcewake resetting to a single function
> 
> The reordered setup sequence ended up calling del_timer_sync before
> the timer was set up correctly, resulting in endless hilarity when
> loading the driver.
> 
> Compared to Ben's patch (which moved around the setup_timer call to
> sanitize_early) this moves the sanitize_early call around in the
> driver load call. This way we avoid calling setup_timer again in the
> resume code (where we also call sanitize_early).
> 
> Cc: Chris Wilson 
> Cc: Mika Kuoppala 
> Cc: Ben Widawsky 
> Tested-by: Rodrigo Vivi 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76242
> Signed-off-by: Daniel Vetter 
> ---
>  drivers/gpu/drm/i915/i915_dma.c | 2 --
>  drivers/gpu/drm/i915/intel_uncore.c | 2 ++
>  2 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index e4d2b9f15ae2..9faee49f210d 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1608,8 +1608,6 @@ int i915_driver_load(struct drm_device *dev, unsigned 
> long flags)
>   goto put_bridge;
>   }
>  
> - intel_uncore_early_sanitize(dev);
> -
>   /* This must be called before any calls to HAS_PCH_* */
>   intel_detect_pch(dev);
>  
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
> b/drivers/gpu/drm/i915/intel_uncore.c
> index e2e328d86aff..c3832d9270a6 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -736,6 +736,8 @@ void intel_uncore_init(struct drm_device *dev)
>   setup_timer(&dev_priv->uncore.force_wake_timer,
>   gen6_force_wake_timer, (unsigned long)dev_priv);
>  
> + intel_uncore_early_sanitize(dev);
> +
>   if (IS_VALLEYVIEW(dev)) {
>   dev_priv->uncore.funcs.force_wake_get = __vlv_force_wake_get;
>   dev_priv->uncore.funcs.force_wake_put = __vlv_force_wake_put;

If you only want to setup_timer once, the setup_timer call should be in
intel_uncore_init() which is the only one called only at load time. And
of course, this is where the bug is. Otherwise, thaw calls
uncore_early_sanitize, which will setup_timer again (which I thought was
your complaint with my original patch).

How about this, (only minimally tested):

diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
b/drivers/gpu/drm/i915/intel_uncore.c
index e2e328d..7ef5aa3 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -387,8 +387,6 @@ void intel_uncore_early_sanitize(struct drm_device *dev)
if (IS_GEN6(dev) || IS_GEN7(dev))
__raw_i915_write32(dev_priv, GTFIFODBG,
   __raw_i915_read32(dev_priv, GTFIFODBG));
-
-   intel_uncore_forcewake_reset(dev, false);
 }
 
 void intel_uncore_sanitize(struct drm_device *dev)
@@ -413,6 +411,8 @@ void intel_uncore_sanitize(struct drm_device *dev)
mutex_unlock(&dev_priv->rps.hw_lock);
 
}
+
+   intel_uncore_forcewake_reset(dev, false);
 }
 
 /*
@@ -846,7 +846,6 @@ void intel_uncore_fini(struct drm_device *dev)
 {
/* Paranoia: make sure we have disabled everything before we exit. */
intel_uncore_sanitize(dev);
-   intel_uncore_forcewake_reset(dev, false);
 }
 


-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 3/4] drm/i915: Per-process stats work better when evaluated per-process

2014-03-18 Thread Ben Widawsky
On Thu, Mar 13, 2014 at 11:57:00AM -0300, Rodrigo Vivi wrote:
> From: Chris Wilson 
> 
> The idea of printing objects used by each process is to judge how each
> process is using them. This means that we need to evaluate whether the
> object is bound for that particular process, rather than just whether it
> is bound into the global GTT.
> 
> Signed-off-by: Chris Wilson 
> Cc: Ben Widawsky 
> Signed-off-by: Rodrigo Vivi 
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c | 34 
> ++---
>  drivers/gpu/drm/i915/i915_drv.h |  2 ++
>  drivers/gpu/drm/i915/i915_gem_context.c |  1 +
>  3 files changed, 30 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
> b/drivers/gpu/drm/i915/i915_debugfs.c
> index a90d31c..ed3965f 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -299,28 +299,46 @@ static int i915_gem_stolen_list_info(struct seq_file 
> *m, void *data)
>  } while (0)
>  
>  struct file_stats {
> + struct drm_i915_file_private *file_priv;
>   int count;
> - size_t total, active, inactive, unbound;
> + size_t total, global, active, inactive, unbound;
>  };
>  
>  static int per_file_stats(int id, void *ptr, void *data)
>  {
>   struct drm_i915_gem_object *obj = ptr;
>   struct file_stats *stats = data;
> + struct i915_vma *vma;
>  
>   stats->count++;
>   stats->total += obj->base.size;
>  
> - if (i915_gem_obj_ggtt_bound(obj)) {
> - if (!list_empty(&obj->ring_list))
> + list_for_each_entry(vma, &obj->vma_list, vma_link) {
> + struct i915_hw_ppgtt *ppgtt;
> +
> + if (!drm_mm_node_allocated(&vma->node))
> + continue;
> +
> + ppgtt = container_of(vma->vm, struct i915_hw_ppgtt, base);
> + if (ppgtt->ctx == NULL) {
> + stats->global += obj->base.size;
> + continue;
> + }

I'm not really clear how this is supposed to work for global. Can you
make me happy and change it to:

if (i915_is_ggtt(vma->vm))

> +
> + if (ppgtt->ctx->file_priv != stats->file_priv)
> + continue;
> +
> + if (obj->ring) /* XXX per-vma statistic */
>   stats->active += obj->base.size;

Doesn't active get counted too many times if multiple VMAs exist for the
same active object (not a new problem to this patch)?

>   else
>   stats->inactive += obj->base.size;
> - } else {
> - if (!list_empty(&obj->global_list))
> - stats->unbound += obj->base.size;
> +
> + return 0;
>   }
>  
> + if (!list_empty(&obj->global_list))
> + stats->unbound += obj->base.size;
> +
>   return 0;
>  }
>  
> @@ -411,6 +429,7 @@ static int i915_gem_object_info(struct seq_file *m, void* 
> data)
>   struct task_struct *task;
>  
>   memset(&stats, 0, sizeof(stats));
> + stats.file_priv = file->driver_priv;
>   idr_for_each(&file->object_idr, per_file_stats, &stats);
>   /*
>* Although we have a valid reference on file->pid, that does
> @@ -420,12 +439,13 @@ static int i915_gem_object_info(struct seq_file *m, 
> void* data)
>*/
>   rcu_read_lock();
>   task = pid_task(file->pid, PIDTYPE_PID);
> - seq_printf(m, "%s: %u objects, %zu bytes (%zu active, %zu 
> inactive, %zu unbound)\n",
> + seq_printf(m, "%s: %u objects, %zu bytes (%zu active, %zu 
> inactive, %zu global, %zu unbound)\n",
>  task ? task->comm : "",
>  stats.count,
>  stats.total,
>  stats.active,
>  stats.inactive,
> +stats.global,
>  stats.unbound);
>   rcu_read_unlock();
>   }
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 2a319ba..b76c6de 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -721,6 +721,8 @@ struct i915_hw_ppgtt {
>   dma_addr_t *gen8_pt_dma_addr[4];
>   };
>  
> + struct i915_hw_context *ctx;
> +
>   int (*enable)(struct i915_hw_ppgtt *ppgtt);
>   int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
>         

[Intel-gfx] [PATCH] drm/i915/bdw: Restore PPAT on thaw

2014-03-18 Thread Ben Widawsky
Apparently it is wiped out from under us, and we get some really fun
caching artifacts upon resume (it seems to be WB for all types by
default).

Reported-by: James Ausmus 
Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index bd016e2..1b45a04 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -30,6 +30,8 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 
+static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv);
+
 bool intel_enable_ppgtt(struct drm_device *dev, bool full)
 {
if (i915.enable_ppgtt == 0 || !HAS_ALIASING_PPGTT(dev))
@@ -1371,8 +1373,10 @@ void i915_gem_restore_gtt_mappings(struct drm_device 
*dev)
}
 
 
-   if (INTEL_INFO(dev)->gen >= 8)
+   if (INTEL_INFO(dev)->gen >= 8) {
+   gen8_setup_private_ppat(dev_priv);
return;
+   }
 
list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
/* TODO: Perhaps it shouldn't be gen6 specific */
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 00/20] ILK+ interrupt improvements, v2

2014-03-18 Thread Ben Widawsky
On Fri, Mar 07, 2014 at 08:10:16PM -0300, Paulo Zanoni wrote:
> From: Paulo Zanoni 
> 
> Hi
> 
> This is basically a rebase of "[PATCH 00/19] ILK+ interrupt improvements", 
> which
> was sent to the mailing list on January 22. There are no real differences,
> except for the last patch, which is new.
> 
> Original cover letter:
> http://lists.freedesktop.org/archives/intel-gfx/2014-January/038679.html
> 
> The idea behind this series is that at some point our runtime PM code will 
> just
> call our irq_preinstall, irq_postinstall and irq_uninstall functions instead 
> of
> using dev_priv->pc8.regsave, so I decided to audit, cleanup and add a few 
> WARNs
> to our code before we do that change. We gotta be in shape if we want to be
> exposed to runtime!
> 
> Thanks,
> Paulo
> 
> Paulo Zanoni (20):
>   drm/i915: add GEN5_IRQ_INIT macro
>   drm/i915: also use GEN5_IRQ_INIT with south display interrupts
>   drm/i915: use GEN8_IRQ_INIT on GEN5
>   drm/i915: add GEN5_IRQ_FINI
>   drm/i915: don't forget to uninstall the PM IRQs
>   drm/i915: properly clear IIR at irq_uninstall on Gen5+
>   drm/i915: add GEN5_IRQ_INIT
>   drm/i915: check if IIR is still zero at postinstall on Gen5+
>   drm/i915: fix SERR_INT init/reset code
>   drm/i915: fix GEN7_ERR_INT init/reset code
>   drm/i915: fix open coded gen5_gt_irq_preinstall
>   drm/i915: extract ibx_irq_uninstall
>   drm/i915: call ibx_irq_uninstall from gen8_irq_uninstall
>   drm/i915: enable SDEIER later
>   drm/i915: remove ibx_irq_uninstall
>   drm/i915: add missing intel_hpd_irq_uninstall
>   drm/i915: add ironlake_irq_reset
>   drm/i915: add gen8_irq_reset
>   drm/i915: only enable HWSTAM interrupts on postinstall on ILK+
>   drm/i915: add POSTING_READs to the IRQ init/reset macros
> 
>  drivers/gpu/drm/i915/i915_irq.c | 270 
> ++--
>  1 file changed, 121 insertions(+), 149 deletions(-)
> 

Okay, here is the summary of my review. At first I was complaining to
myself about how many patches you used to do a simple thing. But, I must
admit it made reviewing the thing a lot easier, and when I look back at
how much stuff you combined, I'm really glad you did it this way. I'm
sure I've missed something silly though, since every patch looks so
similar :P

1-5: Reviewed-by: Ben Widawsky  (with possible comment
improvement on #3)

7: I don't like. Can we drop? I guess doing this would make a decent
amount of churn, so if you don't want to drop it, that's fine, and it's
functionally correct:
 Reviewed-by: Ben Widawsky 

8: I'd really like to drop this one.

9-10: Reviewed-by: Ben Widawsky 

12-13: I wouldn't mind cpt_irq_* rename, but either way
   Reviewed-by: Ben Widawsky 

14: With the requested change in the mail:
Reviewed-by: Ben Widawsky 

16: Reviewed-by: Ben Widawsky 

20: Should be squashed, but
Reviewed-by: Ben Widawsky 

6, 11, 15, 17, 18, 19: You introduce the term _reset as a verb which
seems to always mean "disable." I think disable makes the code so much
clearer, and would really love if you can apply this simple rename. With
the rename, they're:
Reviewed-by: Ben Widawsky 

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 20/20] drm/i915: add POSTING_READs to the IRQ init/reset macros

2014-03-18 Thread Ben Widawsky
On Fri, Mar 07, 2014 at 08:10:36PM -0300, Paulo Zanoni wrote:
> From: Paulo Zanoni 
> 
> I previously chose to keep the POSTING_READ calls as something to be
> done by the macro callers, but the conclusion after discussing this on
> the mailing list is that leaving the POSTING_READ calls to the macros
> makes the code safer, and the additional useless register reads
> shouldn't be noticeable. So move the POSTING_READ calls to the
> callers.

Can you just squash this into the earlier patch? Either way, 
Reviewed-by: Ben Widawsky 

> 
> Signed-off-by: Paulo Zanoni 
> ---
>  drivers/gpu/drm/i915/i915_irq.c | 16 +---
>  1 file changed, 5 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 79a8196..dee3a3b 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -80,11 +80,7 @@ static const u32 hpd_status_i915[] = { /* i915 and 
> valleyview are the same */
>   [HPD_PORT_D] = PORTD_HOTPLUG_INT_STATUS
>  };
>  
> -/*
> - * IIR can theoretically queue up two events. Be paranoid.
> - * Also, make sure callers of these macros have something equivalent to a
> - * POSTING_READ on the IIR register.
> - * */
> +/* IIR can theoretically queue up two events. Be paranoid. */
>  #define GEN8_IRQ_RESET_NDX(type, which) do { \
>   I915_WRITE(GEN8_##type##_IMR(which), 0x); \
>   POSTING_READ(GEN8_##type##_IMR(which)); \
> @@ -92,6 +88,7 @@ static const u32 hpd_status_i915[] = { /* i915 and 
> valleyview are the same */
>   I915_WRITE(GEN8_##type##_IIR(which), 0x); \
>   POSTING_READ(GEN8_##type##_IIR(which)); \
>   I915_WRITE(GEN8_##type##_IIR(which), 0x); \
> + POSTING_READ(GEN8_##type##_IIR(which)); \
>  } while (0)
>  
>  #define GEN5_IRQ_RESET(type) do { \
> @@ -101,6 +98,7 @@ static const u32 hpd_status_i915[] = { /* i915 and 
> valleyview are the same */
>   I915_WRITE(type##IIR, 0x); \
>   POSTING_READ(type##IIR); \
>   I915_WRITE(type##IIR, 0x); \
> + POSTING_READ(type##IIR); \
>  } while (0)
>  
>  /*
> @@ -117,12 +115,14 @@ static const u32 hpd_status_i915[] = { /* i915 and 
> valleyview are the same */
>   GEN5_ASSERT_IIR_IS_ZERO(GEN8_##type##_IIR(which)); \
>   I915_WRITE(GEN8_##type##_IMR(which), (imr_val)); \
>   I915_WRITE(GEN8_##type##_IER(which), (ier_val)); \
> + POSTING_READ(GEN8_##type##_IER(which)); \
>  } while (0)
>  
>  #define GEN5_IRQ_INIT(type, imr_val, ier_val) do { \
>   GEN5_ASSERT_IIR_IS_ZERO(type##IIR); \
>   I915_WRITE(type##IMR, (imr_val)); \
>   I915_WRITE(type##IER, (ier_val)); \
> + POSTING_READ(type##IER); \
>  } while (0)
>  
>  /* For display hotplug interrupt */
> @@ -2843,7 +2843,6 @@ static void gen5_gt_irq_reset(struct drm_device *dev)
>   GEN5_IRQ_RESET(GT);
>   if (INTEL_INFO(dev)->gen >= 6)
>   GEN5_IRQ_RESET(GEN6_PM);
> - POSTING_READ(GTIIR);
>  }
>  
>  /* drm_dma.h hooks
> @@ -2917,7 +2916,6 @@ static void gen8_irq_reset(struct drm_device *dev)
>   GEN5_IRQ_RESET(GEN8_DE_PORT_);
>   GEN5_IRQ_RESET(GEN8_DE_MISC_);
>   GEN5_IRQ_RESET(GEN8_PCU_);
> - POSTING_READ(GEN8_PCU_IIR);
>  
>   ibx_irq_reset(dev);
>  }
> @@ -3016,7 +3014,6 @@ static void gen5_gt_irq_postinstall(struct drm_device 
> *dev)
>   dev_priv->pm_irq_mask = 0x;
>   GEN5_IRQ_INIT(GEN6_PM, dev_priv->pm_irq_mask, pm_irqs);
>   }
> - POSTING_READ(GTIER);
>  }
>  
>  static int ironlake_irq_postinstall(struct drm_device *dev)
> @@ -3213,7 +3210,6 @@ static void gen8_gt_irq_postinstall(struct 
> drm_i915_private *dev_priv)
>  
>   for (i = 0; i < ARRAY_SIZE(gt_interrupts); i++)
>   GEN8_IRQ_INIT_NDX(GT, i, ~gt_interrupts[i], gt_interrupts[i]);
> - POSTING_READ(GEN8_GT_IER(0));
>  }
>  
>  static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv)
> @@ -3232,10 +3228,8 @@ static void gen8_de_irq_postinstall(struct 
> drm_i915_private *dev_priv)
>   for_each_pipe(pipe)
>   GEN8_IRQ_INIT_NDX(DE_PIPE, pipe, dev_priv->de_irq_mask[pipe],
> de_pipe_enables);
> - POSTING_READ(GEN8_DE_PIPE_ISR(0));
>  
>   GEN5_IRQ_INIT(GEN8_DE_PORT_, ~GEN8_AUX_CHANNEL_A, GEN8_AUX_CHANNEL_A);
> - POSTING_READ(GEN8_DE_PORT_IER);
>  }
>  
>  static int gen8_irq_postinstall(struct drm_device *dev)
> -- 
> 1.8.5.3
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 16/20] drm/i915: add missing intel_hpd_irq_uninstall

2014-03-18 Thread Ben Widawsky
On Fri, Mar 07, 2014 at 08:10:32PM -0300, Paulo Zanoni wrote:
> From: Paulo Zanoni 
> 
> Missing from gen8_irq_uninstall.
> 
> Signed-off-by: Paulo Zanoni 

Reviewed-by: Ben Widawsky 

> ---
>  drivers/gpu/drm/i915/i915_irq.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 3584a16d..1e5cc5b 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -3251,6 +3251,8 @@ static void gen8_irq_uninstall(struct drm_device *dev)
>   if (!dev_priv)
>   return;
>  
> + intel_hpd_irq_uninstall(dev_priv);
> +
>   I915_WRITE(GEN8_MASTER_IRQ, 0);
>  
>   GEN8_IRQ_RESET_NDX(GT, 0);
> -- 
> 1.8.5.3
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 18/20] drm/i915: add gen8_irq_reset

2014-03-18 Thread Ben Widawsky
On Fri, Mar 07, 2014 at 08:10:34PM -0300, Paulo Zanoni wrote:
> From: Paulo Zanoni 
> 
> So we can merge all the common code from postinstall and uninstall.
> 
> Signed-off-by: Paulo Zanoni 
> ---
>  drivers/gpu/drm/i915/i915_irq.c | 26 +++---
>  1 file changed, 7 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 4917a8c..d6723ab 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2899,7 +2899,7 @@ static void valleyview_irq_preinstall(struct drm_device 
> *dev)
>   POSTING_READ(VLV_IER);
>  }
>  
> -static void gen8_irq_preinstall(struct drm_device *dev)
> +static void gen8_irq_reset(struct drm_device *dev)
>  {
>   struct drm_i915_private *dev_priv = dev->dev_private;
>   int pipe;
> @@ -2924,6 +2924,11 @@ static void gen8_irq_preinstall(struct drm_device *dev)
>   ibx_irq_reset(dev);
>  }
>  
> +static void gen8_irq_preinstall(struct drm_device *dev)
> +{
> + gen8_irq_reset(dev);
> +}
> +
>  static void ibx_hpd_irq_setup(struct drm_device *dev)
>  {
>   drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
> @@ -3253,30 +3258,13 @@ static int gen8_irq_postinstall(struct drm_device 
> *dev)
>  static void gen8_irq_uninstall(struct drm_device *dev)
>  {
>   struct drm_i915_private *dev_priv = dev->dev_private;
> - int pipe;
>  
>   if (!dev_priv)
>   return;
>  
>   intel_hpd_irq_uninstall(dev_priv);
>  
> - I915_WRITE(GEN8_MASTER_IRQ, 0);
> -
> - GEN8_IRQ_RESET_NDX(GT, 0);
> - GEN8_IRQ_RESET_NDX(GT, 1);
> - GEN8_IRQ_RESET_NDX(GT, 2);
> - GEN8_IRQ_RESET_NDX(GT, 3);
> -
> - for_each_pipe(pipe)
> - GEN8_IRQ_RESET_NDX(DE_PIPE, pipe);
> -
> - GEN5_IRQ_RESET(GEN8_DE_PORT_);
> - GEN5_IRQ_RESET(GEN8_DE_MISC_);
> - GEN5_IRQ_RESET(GEN8_PCU_);
> -
> - POSTING_READ(GEN8_PCU_IIR);
> -
> - ibx_irq_reset(dev);
> + gen8_irq_reset(dev);

BTW: This looks like a bad hunk. I've merged up to this point, and I do
not have ibx_irq_reset().


>  }
>  
>  static void valleyview_irq_uninstall(struct drm_device *dev)
> -- 
> 1.8.5.3
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 14/20] drm/i915: enable SDEIER later

2014-03-18 Thread Ben Widawsky
On Fri, Mar 07, 2014 at 08:10:30PM -0300, Paulo Zanoni wrote:
> From: Paulo Zanoni 
> 
> On the preinstall stage we should just disable all the interrupts, but
> we currently enable all the south display interrupts due to the way we
> touch SDEIER at the IRQ handlers (note: they are still masked and our
> IRQ handler is disabled).

I think this statement is false. The interrupt is enabled right after
preinstall(). For the nomodeset case, this actually seems to make some
difference. It still looks fine to me though.

> Instead of doing that, let's make the
> preinstall stage just disable all the south interrupts, and do the
> proper interrupt dance/ordering at the postinstall stage, including an
> assert to check if everything is behaving as expected.
> 
> Signed-off-by: Paulo Zanoni 
> ---
>  drivers/gpu/drm/i915/i915_irq.c | 27 +--
>  1 file changed, 21 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 95f535b..4479e29 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2814,13 +2814,24 @@ static void ibx_irq_preinstall(struct drm_device *dev)
>  
>   if (HAS_PCH_CPT(dev) || HAS_PCH_LPT(dev))
>   I915_WRITE(SERR_INT, 0x);
> +}
>  
> - /*
> -  * SDEIER is also touched by the interrupt handler to work around missed
> -  * PCH interrupts. Hence we can't update it after the interrupt handler
> -  * is enabled - instead we unconditionally enable all PCH interrupt
> -  * sources here, but then only unmask them as needed with SDEIMR.
> -  */
> +/*
> + * SDEIER is also touched by the interrupt handler to work around missed PCH
> + * interrupts. Hence we can't update it after the interrupt handler is 
> enabled -
> + * instead we unconditionally enable all PCH interrupt sources here, but then
> + * only unmask them as needed with SDEIMR.
> + *
> + * This function needs to be called before interrupts are enabled.
> + */
> +static void ibx_irq_pre_postinstall(struct drm_device *dev)

sde_irq_postinstall()?

> +{
> + struct drm_i915_private *dev_priv = dev->dev_private;
> +
> + if (HAS_PCH_NOP(dev))
> + return;
> +
> + WARN_ON(I915_READ(SDEIER) != 0);
>   I915_WRITE(SDEIER, 0x);
>   POSTING_READ(SDEIER);
>  }
> @@ -3026,6 +3037,8 @@ static int ironlake_irq_postinstall(struct drm_device 
> *dev)
>  
>   dev_priv->irq_mask = ~display_mask;
>  
> + ibx_irq_pre_postinstall(dev);
> +
>   GEN5_IRQ_INIT(DE, dev_priv->irq_mask, display_mask | extra_mask);
>  
>   gen5_gt_irq_postinstall(dev);
> @@ -3217,6 +3230,8 @@ static int gen8_irq_postinstall(struct drm_device *dev)
>  {
>   struct drm_i915_private *dev_priv = dev->dev_private;
>  
> + ibx_irq_pre_postinstall(dev);
> +
>   gen8_gt_irq_postinstall(dev_priv);
>       gen8_de_irq_postinstall(dev_priv);
>  
> -- 
> 1.8.5.3
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 10/20] drm/i915: fix GEN7_ERR_INT init/reset code

2014-03-18 Thread Ben Widawsky
On Fri, Mar 07, 2014 at 08:10:26PM -0300, Paulo Zanoni wrote:
> From: Paulo Zanoni 
> 
> Same as SERR_INT and the other IIR registers: reset on
> preinstall/uninstall and WARN for non-zero values at postinstall. This
> one also doesn't need double-clear.
> 
> Signed-off-by: Paulo Zanoni 

This one just like patch 9 is:
Reviewed-by: Ben Widawsky 

Like that, I'd prefer to get rid of the IIR assertion

> ---
>  drivers/gpu/drm/i915/i915_irq.c | 7 +--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index d295624..02eb493 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2845,6 +2845,9 @@ static void ironlake_irq_preinstall(struct drm_device 
> *dev)
>  
>   GEN5_IRQ_RESET(DE);
>  
> + if (IS_GEN7(dev))
> + I915_WRITE(GEN7_ERR_INT, 0x);
> +
>   gen5_gt_irq_preinstall(dev);
>  
>   ibx_irq_preinstall(dev);
> @@ -3011,7 +3014,7 @@ static int ironlake_irq_postinstall(struct drm_device 
> *dev)
>   extra_mask = (DE_PIPEC_VBLANK_IVB | DE_PIPEB_VBLANK_IVB |
> DE_PIPEA_VBLANK_IVB);
>  
> - I915_WRITE(GEN7_ERR_INT, I915_READ(GEN7_ERR_INT));
> + GEN5_ASSERT_IIR_IS_ZERO(GEN7_ERR_INT);
>   } else {
>   display_mask = (DE_MASTER_IRQ_CONTROL | DE_GSE | DE_PCH_EVENT |
>   DE_PLANEA_FLIP_DONE | DE_PLANEB_FLIP_DONE |
> @@ -3295,7 +3298,7 @@ static void ironlake_irq_uninstall(struct drm_device 
> *dev)
>  
>   GEN5_IRQ_RESET(DE);
>   if (IS_GEN7(dev))
> - I915_WRITE(GEN7_ERR_INT, I915_READ(GEN7_ERR_INT));
> + I915_WRITE(GEN7_ERR_INT, 0x);
>  
>   GEN5_IRQ_RESET(GT);
>   if (INTEL_INFO(dev)->gen >= 6)
> -- 
> 1.8.5.3
> 
> _______
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 09/20] drm/i915: fix SERR_INT init/reset code

2014-03-18 Thread Ben Widawsky
On Fri, Mar 07, 2014 at 08:10:25PM -0300, Paulo Zanoni wrote:
> From: Paulo Zanoni 
> 
> The SERR_INT register is very similar to the other IIR registers, so
> let's zero it at preinstall/uninstall and WARN for a non-zero value at
> postinstall, just like we do with the other IIR registers. For this
> one, there's no need to double-clear since it can't store more than
> one interrupt.
> 
> Signed-off-by: Paulo Zanoni 

Without the assert that I don't like, this is
Reviewed-by: Ben Widawsky 
> ---
>  drivers/gpu/drm/i915/i915_irq.c | 8 ++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 4d0a8b1..d295624 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2811,6 +2811,10 @@ static void ibx_irq_preinstall(struct drm_device *dev)
>   return;
>  
>   GEN5_IRQ_RESET(SDE);
> +
> + if (HAS_PCH_CPT(dev) || HAS_PCH_LPT(dev))
> + I915_WRITE(SERR_INT, 0x);
> +
>   /*
>* SDEIER is also touched by the interrupt handler to work around missed
>* PCH interrupts. Hence we can't update it after the interrupt handler
> @@ -2949,7 +2953,7 @@ static void ibx_irq_postinstall(struct drm_device *dev)
>   } else {
>   mask = SDE_GMBUS_CPT | SDE_AUX_MASK_CPT | SDE_ERROR_CPT;
>  
> - I915_WRITE(SERR_INT, I915_READ(SERR_INT));
> + GEN5_ASSERT_IIR_IS_ZERO(SERR_INT);
>   }
>  
>   GEN5_ASSERT_IIR_IS_ZERO(SDEIIR);
> @@ -3303,7 +3307,7 @@ static void ironlake_irq_uninstall(struct drm_device 
> *dev)
>  
>   GEN5_IRQ_RESET(SDE);
>   if (HAS_PCH_CPT(dev) || HAS_PCH_LPT(dev))
> - I915_WRITE(SERR_INT, I915_READ(SERR_INT));
> + I915_WRITE(SERR_INT, 0x);
>  }
>  
>  static void i8xx_irq_preinstall(struct drm_device * dev)
> -- 
> 1.8.5.3
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 08/20] drm/i915: check if IIR is still zero at postinstall on Gen5+

2014-03-18 Thread Ben Widawsky
On Fri, Mar 07, 2014 at 08:10:24PM -0300, Paulo Zanoni wrote:
> From: Paulo Zanoni 
> 
> Instead of trying to clear it again. It should already be masked and
> disabled and zeroed at preinstall/uninstall.
> 
> Signed-off-by: Paulo Zanoni 
> ---
>  drivers/gpu/drm/i915/i915_irq.c | 32 +++-
>  1 file changed, 15 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 6d4daf2..4d0a8b1 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -103,12 +103,24 @@ static const u32 hpd_status_i915[] = { /* i915 and 
> valleyview are the same */
>   I915_WRITE(type##IIR, 0x); \
>  } while (0)
>  
> +/*
> + * We should clear IMR at preinstall/uninstall, and just check at 
> postinstall.
> + */
> +#define GEN5_ASSERT_IIR_IS_ZERO(reg) do { \
> + u32 val = I915_READ(reg); \
> + if (val) \
> + DRM_ERROR("Interrupt register 0x%x is not zero: 0x%08x\n", \
> +   (reg), val); \
> +} while (0)
> +
>  #define GEN8_IRQ_INIT_NDX(type, which, imr_val, ier_val) do { \
> + GEN5_ASSERT_IIR_IS_ZERO(GEN8_##type##_IIR(which)); \
>   I915_WRITE(GEN8_##type##_IMR(which), (imr_val)); \
>   I915_WRITE(GEN8_##type##_IER(which), (ier_val)); \
>  } while (0)
>  
>  #define GEN5_IRQ_INIT(type, imr_val, ier_val) do { \
> + GEN5_ASSERT_IIR_IS_ZERO(type##IIR); \
>   I915_WRITE(type##IMR, (imr_val)); \
>   I915_WRITE(type##IER, (ier_val)); \
>  } while (0)

Okay, this is replacing a POSTED_WRITE, with a (slower) POSTING_READ
which gives an error that we can do nothing about other than clear it
anyway.

I'd be in favor of dropping this patch.

> @@ -2940,7 +2952,7 @@ static void ibx_irq_postinstall(struct drm_device *dev)
>   I915_WRITE(SERR_INT, I915_READ(SERR_INT));
>   }
>  
> - I915_WRITE(SDEIIR, I915_READ(SDEIIR));
> + GEN5_ASSERT_IIR_IS_ZERO(SDEIIR);
>   I915_WRITE(SDEIMR, ~mask);
>  }
>  
> @@ -2966,7 +2978,6 @@ static void gen5_gt_irq_postinstall(struct drm_device 
> *dev)
>   gt_irqs |= GT_BLT_USER_INTERRUPT | GT_BSD_USER_INTERRUPT;
>   }
>  
> - I915_WRITE(GTIIR, I915_READ(GTIIR));
>   GEN5_IRQ_INIT(GT, dev_priv->gt_irq_mask, gt_irqs);
>  
>   if (INTEL_INFO(dev)->gen >= 6) {
> @@ -2976,7 +2987,6 @@ static void gen5_gt_irq_postinstall(struct drm_device 
> *dev)
>   pm_irqs |= PM_VEBOX_USER_INTERRUPT;
>  
>   dev_priv->pm_irq_mask = 0x;
> - I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
>   GEN5_IRQ_INIT(GEN6_PM, dev_priv->pm_irq_mask, pm_irqs);
>   }
>   POSTING_READ(GTIER);
> @@ -3010,8 +3020,6 @@ static int ironlake_irq_postinstall(struct drm_device 
> *dev)
>  
>   dev_priv->irq_mask = ~display_mask;
>  
> - /* should always can generate irq */
> - I915_WRITE(DEIIR, I915_READ(DEIIR));
>   GEN5_IRQ_INIT(DE, dev_priv->irq_mask, display_mask | extra_mask);
>  
>   gen5_gt_irq_postinstall(dev);
> @@ -3172,13 +3180,8 @@ static void gen8_gt_irq_postinstall(struct 
> drm_i915_private *dev_priv)
>   GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT
>   };
>  
> - for (i = 0; i < ARRAY_SIZE(gt_interrupts); i++) {
> - u32 tmp = I915_READ(GEN8_GT_IIR(i));
> - if (tmp)
> - DRM_ERROR("Interrupt (%d) should have been masked in 
> pre-install 0x%08x\n",
> -   i, tmp);
> + for (i = 0; i < ARRAY_SIZE(gt_interrupts); i++)
>   GEN8_IRQ_INIT_NDX(GT, i, ~gt_interrupts[i], gt_interrupts[i]);
> - }
>   POSTING_READ(GEN8_GT_IER(0));
>  }
>  
> @@ -3195,14 +3198,9 @@ static void gen8_de_irq_postinstall(struct 
> drm_i915_private *dev_priv)
>   dev_priv->de_irq_mask[PIPE_B] = ~de_pipe_masked;
>   dev_priv->de_irq_mask[PIPE_C] = ~de_pipe_masked;
>  
> - for_each_pipe(pipe) {
> - u32 tmp = I915_READ(GEN8_DE_PIPE_IIR(pipe));
> - if (tmp)
> - DRM_ERROR("Interrupt (%d) should have been masked in 
> pre-install 0x%08x\n",
> -   pipe, tmp);
> + for_each_pipe(pipe)
>   GEN8_IRQ_INIT_NDX(DE_PIPE, pipe, dev_priv->de_irq_mask[pipe],
> de_pipe_enables);
> - }
>   POSTING_READ(GEN8_DE_PIPE_ISR(0));
>  
>   GEN5_IRQ_INIT(GEN8_DE_PORT_, ~GEN8_AUX_CHANNEL_A, GEN8_AUX_CHANNEL_A);
> -- 
> 1.8.5.3
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 07/20] drm/i915: add GEN5_IRQ_INIT

2014-03-18 Thread Ben Widawsky
On Fri, Mar 07, 2014 at 08:10:23PM -0300, Paulo Zanoni wrote:
> From: Paulo Zanoni 
> 
> And the equivalent GEN8_IRQ_INIT_NDX macro. These macros are for the
> postinstall functions. The next patch will improve this macro.
> 
> Notice that I could have included POSTING_READ calls to the macro, but
> that would mean the code would do a few more POSTING_READs than
> necessary. OTOH it would be more fail-proof. I can change that if
> needed.
> 
> Signed-off-by: Paulo Zanoni 
> ---
>  drivers/gpu/drm/i915/i915_irq.c | 33 ++---
>  1 file changed, 18 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 73f1125..6d4daf2 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -103,6 +103,16 @@ static const u32 hpd_status_i915[] = { /* i915 and 
> valleyview are the same */
>   I915_WRITE(type##IIR, 0x); \
>  } while (0)
>  
> +#define GEN8_IRQ_INIT_NDX(type, which, imr_val, ier_val) do { \
> + I915_WRITE(GEN8_##type##_IMR(which), (imr_val)); \
> + I915_WRITE(GEN8_##type##_IER(which), (ier_val)); \
> +} while (0)
> +
> +#define GEN5_IRQ_INIT(type, imr_val, ier_val) do { \
> + I915_WRITE(type##IMR, (imr_val)); \
> + I915_WRITE(type##IER, (ier_val)); \
> +} while (0)
> +

I don't like these macros. IMO they make the code less readable, and
only save a couple LOC. They don't prevent any programmer errors either,
since all the logic is still contained in the values you pass in.

I'll read on ahead to see if they're required in your grand scheme.

>  /* For display hotplug interrupt */
>  static void
>  ironlake_enable_display_irq(drm_i915_private_t *dev_priv, u32 mask)
> @@ -2957,9 +2967,7 @@ static void gen5_gt_irq_postinstall(struct drm_device 
> *dev)
>   }
>  
>   I915_WRITE(GTIIR, I915_READ(GTIIR));
> - I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
> - I915_WRITE(GTIER, gt_irqs);
> - POSTING_READ(GTIER);
> + GEN5_IRQ_INIT(GT, dev_priv->gt_irq_mask, gt_irqs);
>  
>   if (INTEL_INFO(dev)->gen >= 6) {
>   pm_irqs |= GEN6_PM_RPS_EVENTS;
> @@ -2969,10 +2977,9 @@ static void gen5_gt_irq_postinstall(struct drm_device 
> *dev)
>  
>   dev_priv->pm_irq_mask = 0x;
>   I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
> - I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask);
> - I915_WRITE(GEN6_PMIER, pm_irqs);
> - POSTING_READ(GEN6_PMIER);
> + GEN5_IRQ_INIT(GEN6_PM, dev_priv->pm_irq_mask, pm_irqs);
>   }
> + POSTING_READ(GTIER);
>  }
>  
>  static int ironlake_irq_postinstall(struct drm_device *dev)
> @@ -3005,9 +3012,7 @@ static int ironlake_irq_postinstall(struct drm_device 
> *dev)
>  
>   /* should always can generate irq */
>   I915_WRITE(DEIIR, I915_READ(DEIIR));
> - I915_WRITE(DEIMR, dev_priv->irq_mask);
> - I915_WRITE(DEIER, display_mask | extra_mask);
> - POSTING_READ(DEIER);
> + GEN5_IRQ_INIT(DE, dev_priv->irq_mask, display_mask | extra_mask);
>  
>   gen5_gt_irq_postinstall(dev);
>  
> @@ -3172,8 +3177,7 @@ static void gen8_gt_irq_postinstall(struct 
> drm_i915_private *dev_priv)
>   if (tmp)
>   DRM_ERROR("Interrupt (%d) should have been masked in 
> pre-install 0x%08x\n",
> i, tmp);
> - I915_WRITE(GEN8_GT_IMR(i), ~gt_interrupts[i]);
> - I915_WRITE(GEN8_GT_IER(i), gt_interrupts[i]);
> + GEN8_IRQ_INIT_NDX(GT, i, ~gt_interrupts[i], gt_interrupts[i]);
>   }
>   POSTING_READ(GEN8_GT_IER(0));
>  }
> @@ -3196,13 +3200,12 @@ static void gen8_de_irq_postinstall(struct 
> drm_i915_private *dev_priv)
>   if (tmp)
>   DRM_ERROR("Interrupt (%d) should have been masked in 
> pre-install 0x%08x\n",
> pipe, tmp);
> - I915_WRITE(GEN8_DE_PIPE_IMR(pipe), dev_priv->de_irq_mask[pipe]);
> - I915_WRITE(GEN8_DE_PIPE_IER(pipe), de_pipe_enables);
> + GEN8_IRQ_INIT_NDX(DE_PIPE, pipe, dev_priv->de_irq_mask[pipe],
> +   de_pipe_enables);
>   }
>   POSTING_READ(GEN8_DE_PIPE_ISR(0));
>  
> - I915_WRITE(GEN8_DE_PORT_IMR, ~GEN8_AUX_CHANNEL_A);
> - I915_WRITE(GEN8_DE_PORT_IER, GEN8_AUX_CHANNEL_A);
> + GEN5_IRQ_INIT(GEN8_DE_PORT_, ~GEN8_AUX_CHANNEL_A, GEN8_AUX_CHANNEL_A);
>   POSTING_READ(GEN8_DE_PORT_IER);
>  }
>  
> -- 
> 1.8.5.3
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 05/20] drm/i915: don't forget to uninstall the PM IRQs

2014-03-18 Thread Ben Widawsky
On Fri, Mar 07, 2014 at 08:10:21PM -0300, Paulo Zanoni wrote:
> From: Paulo Zanoni 
> 
> It's the only thihg missing, apparently.

s/thihg/thing

There is a potential fixup in patch 3, but with or without,
everything up through here is:
Reviewed-by: Ben Widawsky 

> 
> Signed-off-by: Paulo Zanoni 
> ---
>  drivers/gpu/drm/i915/i915_irq.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index a9f173c..f681462 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -3314,6 +3314,8 @@ static void ironlake_irq_uninstall(struct drm_device 
> *dev)
>   I915_WRITE(GEN7_ERR_INT, I915_READ(GEN7_ERR_INT));
>  
>   GEN5_IRQ_FINI(GT);
> + if (INTEL_INFO(dev)->gen >= 6)
> + GEN5_IRQ_FINI(GEN6_PM);
>  
>   if (HAS_PCH_NOP(dev))
>   return;
> -- 
> 1.8.5.3
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 06/20] drm/i915: properly clear IIR at irq_uninstall on Gen5+

2014-03-18 Thread Ben Widawsky
fff); \
> - I915_WRITE(GEN8_##type##_IER, 0); \
> - I915_WRITE(GEN8_##type##_IIR, 0x); \
> - } while (0)
> + GEN8_IRQ_RESET_NDX(GT, 0);
> + GEN8_IRQ_RESET_NDX(GT, 1);
> + GEN8_IRQ_RESET_NDX(GT, 2);
> + GEN8_IRQ_RESET_NDX(GT, 3);
>  
> - GEN8_IRQ_FINI_NDX(GT, 0);
> - GEN8_IRQ_FINI_NDX(GT, 1);
> - GEN8_IRQ_FINI_NDX(GT, 2);
> - GEN8_IRQ_FINI_NDX(GT, 3);
> -
> - for_each_pipe(pipe) {
> - GEN8_IRQ_FINI_NDX(DE_PIPE, pipe);
> - }
> + for_each_pipe(pipe)
> + GEN8_IRQ_RESET_NDX(DE_PIPE, pipe);
>  
> - GEN8_IRQ_FINI(DE_PORT);
> - GEN8_IRQ_FINI(DE_MISC);
> - GEN8_IRQ_FINI(PCU);
> -#undef GEN8_IRQ_FINI
> -#undef GEN8_IRQ_FINI_NDX
> + GEN5_IRQ_RESET(GEN8_DE_PORT_);
> + GEN5_IRQ_RESET(GEN8_DE_MISC_);
> + GEN5_IRQ_RESET(GEN8_PCU_);
>  
>   POSTING_READ(GEN8_PCU_IIR);
>  }
> @@ -3309,18 +3288,19 @@ static void ironlake_irq_uninstall(struct drm_device 
> *dev)
>  
>   I915_WRITE(HWSTAM, 0x);
>  
> - GEN5_IRQ_FINI(DE);
> + GEN5_IRQ_RESET(DE);
>   if (IS_GEN7(dev))
>   I915_WRITE(GEN7_ERR_INT, I915_READ(GEN7_ERR_INT));
>  
> - GEN5_IRQ_FINI(GT);
> + GEN5_IRQ_RESET(GT);
>   if (INTEL_INFO(dev)->gen >= 6)
> - GEN5_IRQ_FINI(GEN6_PM);
> + GEN5_IRQ_RESET(GEN6_PM);
> + POSTING_READ(GTIIR);
>  
>   if (HAS_PCH_NOP(dev))
>   return;
>  
> - GEN5_IRQ_FINI(SDE);
> + GEN5_IRQ_RESET(SDE);
>   if (HAS_PCH_CPT(dev) || HAS_PCH_LPT(dev))
>   I915_WRITE(SERR_INT, I915_READ(SERR_INT));
>  }
> -- 
> 1.8.5.3
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 03/20] drm/i915: use GEN8_IRQ_INIT on GEN5

2014-03-18 Thread Ben Widawsky
On Fri, Mar 07, 2014 at 08:10:19PM -0300, Paulo Zanoni wrote:
> From: Paulo Zanoni 
> 
> And rename is to GEN5_IRQ_INIT.
> 
> We have discussed doing equivalent changes on July 2013, and I even
> sent a patch series for this: "[PATCH 00/15] Unify interrupt register
> init/reset". Now that the BDW code was merged, I have one more
> argument in favor of these changes.
> 
> Here's what really changes with the Gen 5 IRQ init code:
>   - We now clear the IIR registers at preinstall (they are also
> cleared at postinstall, but we will change that later).
>   - We have an additional POSTING_READ at the IMR register.
> 
> Signed-off-by: Paulo Zanoni 
> ---
>  drivers/gpu/drm/i915/i915_irq.c | 49 
> +++--
>  1 file changed, 23 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 852844d..7be7da1 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -80,12 +80,30 @@ static const u32 hpd_status_i915[] = { /* i915 and 
> valleyview are the same */
>   [HPD_PORT_D] = PORTD_HOTPLUG_INT_STATUS
>  };
>  
> +/*
> + * IIR can theoretically queue up two events. Be paranoid.
> + * Also, make sure callers of these macros have something equivalent to a
> + * POSTING_READ on the IIR register.
> + * */

I don't understand what you mean in this comment. If you're always going
to sending a posting read after the second IIR write, why not just put
it in the macro?

The reason it wasn't in my original macro is because we've done the
posting read on IER, and IMR - so we're not going to get new interrupts.
When the second IIR write lands is irrelevant. The POSTING_READ in
between is to prevent the [probably impossible] case of the writes
getting collapsed into one write.

> +#define GEN8_IRQ_INIT_NDX(type, which) do { \
> + I915_WRITE(GEN8_##type##_IMR(which), 0x); \
> + POSTING_READ(GEN8_##type##_IMR(which)); \
> + I915_WRITE(GEN8_##type##_IER(which), 0); \
> + I915_WRITE(GEN8_##type##_IIR(which), 0x); \
> + POSTING_READ(GEN8_##type##_IIR(which)); \
> + I915_WRITE(GEN8_##type##_IIR(which), 0x); \
> +} while (0)
> +
>  #define GEN5_IRQ_INIT(type) do { \
>   I915_WRITE(type##IMR, 0x); \
> + POSTING_READ(type##IMR); \
>   I915_WRITE(type##IER, 0); \
> - POSTING_READ(type##IER); \
> + I915_WRITE(type##IIR, 0x); \
> + POSTING_READ(type##IIR); \
> + I915_WRITE(type##IIR, 0x); \
>  } while (0)
>  
> +
>  /* For display hotplug interrupt */
>  static void
>  ironlake_enable_display_irq(drm_i915_private_t *dev_priv, u32 mask)
> @@ -2789,6 +2807,7 @@ static void gen5_gt_irq_preinstall(struct drm_device 
> *dev)
>   GEN5_IRQ_INIT(GT);
>   if (INTEL_INFO(dev)->gen >= 6)
>   GEN5_IRQ_INIT(GEN6_PM);
> + POSTING_READ(GTIIR);
>  }
>  
>  /* drm_dma.h hooks
> @@ -2843,25 +2862,6 @@ static void gen8_irq_preinstall(struct drm_device *dev)
>   I915_WRITE(GEN8_MASTER_IRQ, 0);
>   POSTING_READ(GEN8_MASTER_IRQ);
>  
> - /* IIR can theoretically queue up two events. Be paranoid */
> -#define GEN8_IRQ_INIT_NDX(type, which) do { \
> - I915_WRITE(GEN8_##type##_IMR(which), 0x); \
> - POSTING_READ(GEN8_##type##_IMR(which)); \
> - I915_WRITE(GEN8_##type##_IER(which), 0); \
> - I915_WRITE(GEN8_##type##_IIR(which), 0x); \
> - POSTING_READ(GEN8_##type##_IIR(which)); \
> - I915_WRITE(GEN8_##type##_IIR(which), 0x); \
> - } while (0)
> -
> -#define GEN8_IRQ_INIT(type) do { \
> - I915_WRITE(GEN8_##type##_IMR, 0x); \
> - POSTING_READ(GEN8_##type##_IMR); \
> - I915_WRITE(GEN8_##type##_IER, 0); \
> - I915_WRITE(GEN8_##type##_IIR, 0x); \
> - POSTING_READ(GEN8_##type##_IIR); \
> - I915_WRITE(GEN8_##type##_IIR, 0x); \
> - } while (0)
> -
>   GEN8_IRQ_INIT_NDX(GT, 0);
>   GEN8_IRQ_INIT_NDX(GT, 1);
>   GEN8_IRQ_INIT_NDX(GT, 2);
> @@ -2871,12 +2871,9 @@ static void gen8_irq_preinstall(struct drm_device *dev)
>   GEN8_IRQ_INIT_NDX(DE_PIPE, pipe);
>   }
>  
> - GEN8_IRQ_INIT(DE_PORT);
> - GEN8_IRQ_INIT(DE_MISC);
> - GEN8_IRQ_INIT(PCU);
> -#undef GEN8_IRQ_INIT
> -#undef GEN8_IRQ_INIT_NDX
> -
> + GEN5_IRQ_INIT(GEN8_DE_PORT_);
> + GEN5_IRQ_INIT(GEN8_DE_MISC_);
> + GEN5_IRQ_INIT(GEN8_PCU_);
>   POSTING_READ(GEN8_PCU_IIR);
>  
>   ibx_irq_preinstall(dev);
> -- 
> 1.8.5.3
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 23/26] drm/i915: Force pd restore when PDEs change, gen6-7

2014-03-17 Thread Ben Widawsky
The docs say you cannot change the PDEs of a currently running context. If you
are changing the PDEs of the currently running context then. We never
map new PDEs of a running context, and expect them to be present - so I
think this is okay. (We can unmap, but this should also be okay since we
only unmap unreferenced objects that the GPU shouldn't be tryingto
va->pa xlate.) The MI_SET_CONTEXT command does have a flag to signal
that even if the context is the same, force a reload. It's unclear
exactly what this does, but I have a hunch it's the right thing to do.

The logic assumes that we always emit a context switch after mapping new
PDEs, and before we submit a batch. This is the case today, and has been
the case since the inception of hardware contexts. A note in the comment
let's the user know.

NOTE: I have no evidence to suggest this is actually needed other than a
few tidbits which lead me to believe there are some corner cases that
will require it. I'm mostly depending on the reload of DCLV to
invalidate the old TLBs. We can try to remove this patch and see what
happens.

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_gem_context.c| 15 ---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  5 +
 drivers/gpu/drm/i915/i915_gem_gtt.c| 17 -
 drivers/gpu/drm/i915/i915_gem_gtt.h|  2 ++
 4 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index a899e11..6ad5380 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -636,9 +636,18 @@ mi_set_context(struct intel_ring_buffer *ring,
 
 static inline bool should_skip_switch(struct intel_ring_buffer *ring,
  struct i915_hw_context *from,
- struct i915_hw_context *to)
+ struct i915_hw_context *to,
+ u32 *flags)
 {
-   if (from == to && from->last_ring == ring && !to->remap_slice)
+   if (test_and_clear_bit(ring->id, &to->vm->pd_reload_mask)) {
+   *flags |= MI_FORCE_RESTORE;
+   return false;
+   }
+
+   if (to->remap_slice)
+   return false;
+
+   if (from == to && from->last_ring == ring)
return true;
 
return false;
@@ -658,7 +667,7 @@ static int do_switch(struct intel_ring_buffer *ring,
BUG_ON(!i915_gem_obj_is_pinned(from->obj));
}
 
-   if (should_skip_switch(ring, from, to))
+   if (should_skip_switch(ring, from, to, &hw_flags))
return 0;
 
/* Trying to pin first makes error handling easier. */
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 856fa9d..bb901e8 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1162,6 +1162,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
if (ret)
goto err;
 
+   /* XXX: Reserve has possibly change PDEs which means we must do a
+* context switch before we can coherently read some of the reserved
+* VMAs. */
+
/* The objects are in their final locations, apply the relocations. */
if (need_relocs)
ret = i915_gem_execbuffer_relocate(eb);
@@ -1263,6 +1267,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
goto err;
}
} else {
+   WARN_ON(vm->pd_reload_mask & (1<id));
ret = ring->dispatch_execbuffer(ring,
exec_start, exec_len,
flags);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index d3c77d1..6d904c9 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1228,6 +1228,16 @@ int i915_gem_init_ppgtt(struct drm_device *dev, struct 
i915_hw_ppgtt *ppgtt)
return 0;
 }
 
+/* PDE TLBs are a pain invalidate pre GEN8. It requires a context reload. If we
+ * are switching between contexts with the same LRCA, we also must do a force
+ * restore.
+ */
+#define ppgtt_invalidate_tlbs(vm) do {\
+   if (INTEL_INFO(vm->dev)->gen < 8) { \
+   vm->pd_reload_mask = INTEL_INFO(vm->dev)->ring_mask; \
+   } \
+} while(0)
+
 static int
 ppgtt_bind_vma(struct i915_vma *vma,
   enum i915_cache_level cache_level,
@@ -1242,10 +1252,13 @@ ppgtt_bind_vma(struct i915_vma *vma,
 vma->node.size);
if (ret)
return ret;
+
+  

[Intel-gfx] [PATCH 24/26] drm/i915: Finish gen6/7 dynamic page table allocation

2014-03-17 Thread Ben Widawsky
This patch continues on the idea from the previous patch. From here on,
in the steady state, PDEs are all pointing to the scratch page table (as
recommended in the spec). When an object is allocated in the VA range,
the code will determine if we need to allocate a page for the page
table. Similarly when the object is destroyed, we will remove, and free
the page table pointing the PDE back to the scratch page.

Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.

The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV.  Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.

We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_drv.h |   2 +-
 drivers/gpu/drm/i915/i915_gem_context.c |   2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c | 123 +---
 drivers/gpu/drm/i915/i915_trace.h   | 108 
 4 files changed, 224 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b19442c..eeef032 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2373,7 +2373,7 @@ static inline void i915_gem_chipset_flush(struct 
drm_device *dev)
if (INTEL_INFO(dev)->gen < 6)
intel_gtt_chipset_flush();
 }
-int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt);
+int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt, 
bool aliasing);
 bool intel_enable_ppgtt(struct drm_device *dev, bool full);
 
 /* i915_gem_stolen.c */
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 6ad5380..185c926 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -209,7 +209,7 @@ create_vm_for_ctx(struct drm_device *dev, struct 
i915_hw_context *ctx)
if (!ppgtt)
return ERR_PTR(-ENOMEM);
 
-   ret = i915_gem_init_ppgtt(dev, ppgtt);
+   ret = i915_gem_init_ppgtt(dev, ppgtt, ctx->file_priv == NULL);
if (ret) {
kfree(ppgtt);
return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 6d904c9..846a5b5 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1016,13 +1016,54 @@ static void gen6_ppgtt_insert_entries(struct 
i915_address_space *vm,
kunmap_atomic(pt_vaddr);
 }
 
+static DECLARE_BITMAP(new_page_tables, I915_PDES_PER_PD);
 static int gen6_alloc_va_range(struct i915_address_space *vm,
   uint64_t start, uint64_t length)
 {
+   struct drm_device *dev = vm->dev;
+   struct drm_i915_private *dev_priv = dev->dev_private;
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
struct i915_pagetab *pt;
+   const uint32_t start_save = start, length_save = length;
uint32_t pde, temp;
+   int ret;
+
+   BUG_ON(upper_32_bits(start));
+
+   bitmap_zero(new_page_tables, I915_PDES_PER_PD);
+
+   trace_i915_va_alloc(vm, start, length);
+
+   /* The allocation is done in two stages so that we can bail out with
+* minimal amount of pain. The first stage finds new page tables that
+* need allocation. The second stage marks use ptes within the page
+* tables.
+*/
+   gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
+   if (pt != ppgtt->scratch_pt) {
+   WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES_PER_PT));
+   continue;
+   }
+
+   /* We've already allocated a page table */
+   WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES_PER_PT));
+
+   pt = alloc_pt_single(dev);
+   if (IS_ERR(pt)) {
+   ret = PTR_ERR(pt);
+   goto unwind_out;
+   

[Intel-gfx] [PATCH 16/26] drm/i915: Generalize GEN6 mapping

2014-03-17 Thread Ben Widawsky
Having a more general way of doing mappings will allow the ability to
easy map and unmap a specific page table. Specifically in this case, we
pass down the page directory + entry, and the page table to map. This
works similarly to the x86 code.

The same work will need to happen for GEN8. At that point I will try to
combine functionality.

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 61 +++--
 drivers/gpu/drm/i915/i915_gem_gtt.h |  2 ++
 2 files changed, 34 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 5c08cf9..35acccb 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -663,18 +663,13 @@ bail:
 
 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 {
-   struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
struct i915_address_space *vm = &ppgtt->base;
-   gen6_gtt_pte_t __iomem *pd_addr;
gen6_gtt_pte_t scratch_pte;
uint32_t pd_entry;
int pte, pde;
 
scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
 
-   pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm +
-   ppgtt->pd.pd_offset / sizeof(gen6_gtt_pte_t);
-
seq_printf(m, "  VM %p (pd_offset %x-%x):\n", vm,
   ppgtt->pd.pd_offset,
   ppgtt->pd.pd_offset + ppgtt->num_pd_entries);
@@ -682,7 +677,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, 
struct seq_file *m)
u32 expected;
gen6_gtt_pte_t *pt_vaddr;
dma_addr_t pt_addr = ppgtt->pd.page_tables[pde]->daddr;
-   pd_entry = readl(pd_addr + pde);
+   pd_entry = readl(ppgtt->pd_addr + pde);
expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
 
if (pd_entry != expected)
@@ -718,39 +713,43 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, 
struct seq_file *m)
}
 }
 
-static void gen6_map_single(struct i915_hw_ppgtt *ppgtt,
-   const unsigned pde_index,
-   dma_addr_t daddr)
+/* Map pde (index) from the page directory @pd to the page table @pt */
+static void gen6_map_single(struct i915_pagedir *pd,
+   const int pde, struct i915_pagetab *pt)
 {
-   struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
-   uint32_t pd_entry;
-   gen6_gtt_pte_t __iomem *pd_addr = (gen6_gtt_pte_t 
__iomem*)dev_priv->gtt.gsm;
-   pd_addr += ppgtt->pd.pd_offset / sizeof(gen6_gtt_pte_t);
+   struct i915_hw_ppgtt *ppgtt =
+   container_of(pd, struct i915_hw_ppgtt, pd);
+   u32 pd_entry;
 
-   pd_entry = GEN6_PDE_ADDR_ENCODE(daddr);
+   pd_entry = GEN6_PDE_ADDR_ENCODE(pt->daddr);
pd_entry |= GEN6_PDE_VALID;
 
-   writel(pd_entry, pd_addr + pde_index);
+   writel(pd_entry, ppgtt->pd_addr + pde);
+
+   /* XXX: Caller needs to make sure the write completes if necessary */
 }
 
 /* Map all the page tables found in the ppgtt structure to incrementing page
  * directories. */
-static void gen6_map_page_tables(struct i915_hw_ppgtt *ppgtt)
+static void gen6_map_page_range(struct drm_i915_private *dev_priv,
+   struct i915_pagedir *pd, unsigned pde, size_t n)
 {
-   struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
-   int i;
+   if (WARN_ON(pde + n > I915_PDES_PER_PD))
+   n = I915_PDES_PER_PD - pde;
 
-   WARN_ON(ppgtt->pd.pd_offset & 0x3f);
-   for (i = 0; i < ppgtt->num_pd_entries; i++)
-   gen6_map_single(ppgtt, i, ppgtt->pd.page_tables[i]->daddr);
+   n += pde;
+
+   for (; pde < n; pde++)
+   gen6_map_single(pd, pde, pd->page_tables[pde]);
 
+   /* Make sure write is complete before other code can use this page
+* table. Also require for WC mapped PTEs */
readl(dev_priv->gtt.gsm);
 }
 
 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
 {
BUG_ON(ppgtt->pd.pd_offset & 0x3f);
-
return (ppgtt->pd.pd_offset / 64) << 16;
 }
 
@@ -1184,7 +1183,10 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
ppgtt->pd.pd_offset =
ppgtt->node.start / PAGE_SIZE * sizeof(gen6_gtt_pte_t);
 
-   gen6_map_page_tables(ppgtt);
+   ppgtt->pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm +
+   ppgtt->pd.pd_offset / sizeof(gen6_gtt_pte_t);
+
+   gen6_map_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->num_pd_entries);
 
DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n",
 ppgtt->node.size >> 20,
@@ -1355,13 +1357,14 @@ void i

[Intel-gfx] [PATCH 22/26] drm/i915: Extract context switch skip logic

2014-03-17 Thread Ben Widawsky
We have some fanciness coming up. This patch just breaks out the logic.

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_gem_context.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index f918f2c..a899e11 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -634,6 +634,16 @@ mi_set_context(struct intel_ring_buffer *ring,
return ret;
 }
 
+static inline bool should_skip_switch(struct intel_ring_buffer *ring,
+ struct i915_hw_context *from,
+ struct i915_hw_context *to)
+{
+   if (from == to && from->last_ring == ring && !to->remap_slice)
+   return true;
+
+   return false;
+}
+
 static int do_switch(struct intel_ring_buffer *ring,
 struct i915_hw_context *to)
 {
@@ -648,7 +658,7 @@ static int do_switch(struct intel_ring_buffer *ring,
BUG_ON(!i915_gem_obj_is_pinned(from->obj));
}
 
-   if (from == to && from->last_ring == ring && !to->remap_slice)
+   if (should_skip_switch(ring, from, to))
return 0;
 
/* Trying to pin first makes error handling easier. */
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 26/26] FOR REFERENCE ONLY

2014-03-17 Thread Ben Widawsky
Start using size/length through the GEN8 code. The same approach was
taken for gen7. The difference with gen8 to this point is we need to
take care to the do the page directory allocations, as well as the page
tables.

This patch is meant to show how things will look (more or less) if I
keep up in the same direction.
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 104 +++-
 drivers/gpu/drm/i915/i915_gem_gtt.h |  37 +
 2 files changed, 115 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 846a5b5..1348d48 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -488,29 +488,50 @@ static void gen8_ppgtt_insert_entries(struct 
i915_address_space *vm,
kunmap_atomic(pt_vaddr);
 }
 
-static void gen8_free_page_tables(struct i915_pagedir *pd, struct drm_device 
*dev)
+static void gen8_free_page_tables(struct i915_pagedir *pd,
+ uint64_t start, uint64_t length,
+ struct drm_device *dev)
 {
int i;
 
if (!pd->page)
return;
 
-   for (i = 0; i < I915_PDES_PER_PD; i++) {
+   for (i = gen8_pte_index(start);
+length && i < GEN8_PTES_PER_PT; i++, length -= PAGE_SIZE) {
+   if (!pd->page_tables[i])
+   continue;
+
free_pt_single(pd->page_tables[i], dev);
pd->page_tables[i] = NULL;
}
 }
 
-static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
+static void gen8_teardown_va_range(struct i915_hw_ppgtt *ppgtt,
+  uint64_t start, uint64_t length)
 {
-   int i;
+   struct drm_device *dev = ppgtt->base.dev;
+   struct i915_pagedir *pd;
+   struct i915_pagetab *pt;
+   uint64_t temp, temp2;
+   uint32_t pdpe, pde;
+
+   gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) {
+   uint64_t pd_start = start;
+   uint64_t pd_len = gen8_bound_pt(start, length);
+   gen8_for_each_pde(pt, pd, pd_start, pd_len, temp2, pde) {
+   gen8_free_page_tables(pd, pd_start, pd_len, dev);
+   }
 
-   for (i = 0; i < ppgtt->num_pd_pages; i++) {
-   gen8_free_page_tables(ppgtt->pdp.pagedir[i], ppgtt->base.dev);
-   free_pd_single(ppgtt->pdp.pagedir[i], ppgtt->base.dev);
+   free_pd_single(pd, dev);
}
 }
 
+static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
+{
+   gen8_teardown_va_range(ppgtt, ppgtt->base.start, ppgtt->base.total);
+}
+
 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
 {
struct i915_hw_ppgtt *ppgtt =
@@ -537,41 +558,75 @@ static int gen8_ppgtt_allocate_page_tables(struct 
i915_hw_ppgtt *ppgtt)
 
 unwind_out:
while (i--)
-   gen8_free_page_tables(ppgtt->pdp.pagedir[i], ppgtt->base.dev);
+   gen8_free_page_tables(ppgtt->pdp.pagedir[i],
+ i * I915_PDES_PER_PD * GEN8_PTES_PER_PT,
+ (i + 1)* I915_PDES_PER_PD * 
GEN8_PTES_PER_PT,
+ ppgtt->base.dev);
 
return -ENOMEM;
 }
 
 static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt,
-   const int max_pdp)
+   uint64_t start, uint64_t length)
 {
-   int i;
+   struct i915_pagedir *unused;
+   uint64_t temp;
+   uint32_t pdpe;
 
-   for (i = 0; i < max_pdp; i++) {
-   ppgtt->pdp.pagedir[i] = alloc_pd_single(ppgtt->base.dev);
-   if (IS_ERR(ppgtt->pdp.pagedir[i]))
-   goto unwind_out;
+   gen8_for_each_pdpe(unused, &ppgtt->pdp, start, length, temp, pdpe) {
+   struct i915_pagedir *pd;
+
+   BUG_ON(unused);
+   pd = alloc_pd_single(ppgtt->base.dev);
+   if (!pd)
+   goto pd_fail;
+
+   ppgtt->pdp.pagedir[pdpe] = pd;
+   ppgtt->num_pd_pages++;
}
 
-   ppgtt->num_pd_pages = max_pdp;
BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS);
 
return 0;
 
-unwind_out:
-   while (i--)
-   free_pd_single(ppgtt->pdp.pagedir[i],
-  ppgtt->base.dev);
+pd_fail:
+   while (pdpe--)
+   free_pd_single(ppgtt->pdp.pagedir[pdpe], ppgtt->base.dev);
 
return -ENOMEM;
 }
 
+static void gen8_alloc_va_range(struct i915_hw_ppgtt *ppgtt,
+   uint64_t start, uint64_t length)
+{
+   struct i915_pagedir *pd;
+   struct i915_pagetab *pt;
+   uint64_t temp, temp2;
+   uint32_t pdpe, pde;
+
+   gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) {
+   uint64_t pd_start = start;
+   uint64_

[Intel-gfx] [PATCH 14/26] drm/i915: Complete page table structures

2014-03-17 Thread Ben Widawsky
Move the remaining members over to the new page table structures.

This can be squashed with the previous commit if desire. The reasoning
is the same as that patch. I simply felt it is easier to review if split.

Signed-off-by: Ben Widawsky 

Conflicts:
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem_gtt.c
---
 drivers/gpu/drm/i915/i915_debugfs.c   |  2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c   | 85 +--
 drivers/gpu/drm/i915/i915_gem_gtt.h   | 15 +++
 drivers/gpu/drm/i915/i915_gpu_error.c |  1 -
 4 files changed, 38 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index b226788..5f3666a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1788,7 +1788,7 @@ static void gen8_ppgtt_info(struct seq_file *m, struct 
drm_device *dev, int verb
 static void print_ppgtt(struct seq_file *m, struct i915_hw_ppgtt *ppgtt, const 
char *name)
 {
seq_printf(m, "%s:\n", name);
-   seq_printf(m, "pd gtt offset: 0x%08x\n", ppgtt->pd_offset);
+   seq_printf(m, "pd gtt offset: 0x%08x\n", ppgtt->pd.pd_offset);
 }
 
 static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev, bool 
verbose)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 5b283f2..d91a545 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -223,7 +223,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
int used_pd = ppgtt->num_pd_entries / I915_PDES_PER_PD;
 
for (i = used_pd - 1; i >= 0; i--) {
-   dma_addr_t addr = ppgtt->pd_dma_addr[i];
+   dma_addr_t addr = ppgtt->pdp.pagedir[i].daddr;
ret = gen8_write_pdp(ring, i, addr, synchronous);
if (ret)
return ret;
@@ -341,7 +341,6 @@ static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
for (i = 0; i < ppgtt->num_pd_pages; i++) {
gen8_free_page_tables(&ppgtt->pdp.pagedir[i]);
gen8_free_page_directories(&ppgtt->pdp.pagedir[i]);
-   kfree(ppgtt->gen8_pt_dma_addr[i]);
}
 }
 
@@ -353,14 +352,14 @@ static void gen8_ppgtt_dma_unmap_pages(struct 
i915_hw_ppgtt *ppgtt)
for (i = 0; i < ppgtt->num_pd_pages; i++) {
/* TODO: In the future we'll support sparse mappings, so this
 * will have to change. */
-   if (!ppgtt->pd_dma_addr[i])
+   if (!ppgtt->pdp.pagedir[i].daddr)
continue;
 
-   pci_unmap_page(hwdev, ppgtt->pd_dma_addr[i], PAGE_SIZE,
+   pci_unmap_page(hwdev, ppgtt->pdp.pagedir[i].daddr, PAGE_SIZE,
   PCI_DMA_BIDIRECTIONAL);
 
for (j = 0; j < I915_PDES_PER_PD; j++) {
-   dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
+   dma_addr_t addr = 
ppgtt->pdp.pagedir[i].page_tables[j].daddr;
if (addr)
pci_unmap_page(hwdev, addr, PAGE_SIZE,
   PCI_DMA_BIDIRECTIONAL);
@@ -380,31 +379,18 @@ static void gen8_ppgtt_cleanup(struct i915_address_space 
*vm)
gen8_ppgtt_free(ppgtt);
 }
 
-static int gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt *ppgtt)
-{
-   int i;
-
-   for (i = 0; i < ppgtt->num_pd_pages; i++) {
-   ppgtt->gen8_pt_dma_addr[i] = kcalloc(I915_PDES_PER_PD,
-sizeof(dma_addr_t),
-GFP_KERNEL);
-   if (!ppgtt->gen8_pt_dma_addr[i])
-   return -ENOMEM;
-   }
-
-   return 0;
-}
-
 static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt)
 {
int i, j;
 
for (i = 0; i < ppgtt->num_pd_pages; i++) {
+   struct i915_pagedir *pd = &ppgtt->pdp.pagedir[i];
for (j = 0; j < I915_PDES_PER_PD; j++) {
-   struct i915_pagetab *pt = 
&ppgtt->pdp.pagedir[i].page_tables[j];
+   struct i915_pagetab *pt = &pd->page_tables[j];
pt->page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!pt->page)
goto unwind_out;
+
}
}
 
@@ -464,9 +450,7 @@ static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt,
 
ppgtt->num_pd_entries = max_pdp * I915_PDES_PER_PD;
 
-   ret = gen8_ppgtt_allocate_dma(ppgtt);
-   if (!ret)
-   return ret;
+   return 0;
 
/* TODO: Check this for all cases */
 err_out:
@@ -488,7 +472,7 @@ static int gen8_ppgtt_setup_page_directories(struc

[Intel-gfx] [PATCH 18/26] drm/i915: Always dma map page table allocations

2014-03-17 Thread Ben Widawsky
There is never a case where we don't want to do it. Since we've broken
up the allocations into nice clean helper functions, it's both easy and
obvious to do the dma mapping at the same time.

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 78 -
 1 file changed, 17 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 92e03dd..9630109 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -187,20 +187,6 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
pci_unmap_page((dev)->pdev, (pt)->daddr, 4096, PCI_DMA_BIDIRECTIONAL); \
 } while (0);
 
-
-static void dma_unmap_pt_range(struct i915_pagedir *pd,
-  unsigned pde, size_t n,
-  struct drm_device *dev)
-{
-   if (WARN_ON(pde + n > I915_PDES_PER_PD))
-   n = I915_PDES_PER_PD - pde;
-
-   n += pde;
-
-   for (; pde < n; pde++)
-   dma_unmap_pt_single(pd->page_tables[pde], dev);
-}
-
 /**
  * dma_map_pt_single() - Create a dma mapping for a page table
  * @pt:Page table to get a DMA map for
@@ -230,33 +216,12 @@ static int dma_map_pt_single(struct i915_pagetab *pt, 
struct drm_device *dev)
return 0;
 }
 
-static int dma_map_pt_range(struct i915_pagedir *pd,
-   unsigned pde, size_t n,
-   struct drm_device *dev)
-{
-   const int first = pde;
-
-   if (WARN_ON(pde + n > I915_PDES_PER_PD))
-   n = I915_PDES_PER_PD - pde;
-
-   n += pde;
-
-   for (; pde < n; pde++) {
-   int ret;
-   ret = dma_map_pt_single(pd->page_tables[pde], dev);
-   if (ret) {
-   dma_unmap_pt_range(pd, first, pde, dev);
-   return ret;
-   }
-   }
-
-   return 0;
-}
-
-static void free_pt_single(struct i915_pagetab *pt)
+static void free_pt_single(struct i915_pagetab *pt, struct drm_device *dev)
 {
if (WARN_ON(!pt->page))
return;
+
+   dma_unmap_pt_single(pt, dev);
__free_page(pt->page);
kfree(pt);
 }
@@ -264,6 +229,7 @@ static void free_pt_single(struct i915_pagetab *pt)
 static struct i915_pagetab *alloc_pt_single(struct drm_device *dev)
 {
struct i915_pagetab *pt;
+   int ret;
 
pt = kzalloc(sizeof(*pt), GFP_KERNEL);
if (!pt)
@@ -275,6 +241,13 @@ static struct i915_pagetab *alloc_pt_single(struct 
drm_device *dev)
return ERR_PTR(-ENOMEM);
}
 
+   ret = dma_map_pt_single(pt, dev);
+   if (ret) {
+   __free_page(pt->page);
+   kfree(pt);
+   return ERR_PTR(ret);
+   }
+
return pt;
 }
 
@@ -318,7 +291,7 @@ static int alloc_pt_range(struct i915_pagedir *pd, uint16_t 
pde, size_t count,
 
 err_out:
while (i--)
-   free_pt_single(pd->page_tables[i]);
+   free_pt_single(pd->page_tables[i], dev);
return ret;
 }
 
@@ -486,7 +459,7 @@ static void gen8_ppgtt_insert_entries(struct 
i915_address_space *vm,
kunmap_atomic(pt_vaddr);
 }
 
-static void gen8_free_page_tables(struct i915_pagedir *pd)
+static void gen8_free_page_tables(struct i915_pagedir *pd, struct drm_device 
*dev)
 {
int i;
 
@@ -494,7 +467,7 @@ static void gen8_free_page_tables(struct i915_pagedir *pd)
return;
 
for (i = 0; i < I915_PDES_PER_PD; i++) {
-   free_pt_single(pd->page_tables[i]);
+   free_pt_single(pd->page_tables[i], dev);
pd->page_tables[i] = NULL;
}
 }
@@ -504,7 +477,7 @@ static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
int i;
 
for (i = 0; i < ppgtt->num_pd_pages; i++) {
-   gen8_free_page_tables(ppgtt->pdp.pagedir[i]);
+   gen8_free_page_tables(ppgtt->pdp.pagedir[i], ppgtt->base.dev);
free_pd_single(ppgtt->pdp.pagedir[i]);
}
 }
@@ -561,7 +534,7 @@ static int gen8_ppgtt_allocate_page_tables(struct 
i915_hw_ppgtt *ppgtt)
 
 unwind_out:
while (i--)
-   gen8_free_page_tables(ppgtt->pdp.pagedir[i]);
+   gen8_free_page_tables(ppgtt->pdp.pagedir[i], ppgtt->base.dev);
 
return -ENOMEM;
 }
@@ -659,18 +632,9 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, 
uint64_t size)
 * 2. Create DMA mappings for the page directories and page tables.
 */
for (i = 0; i < max_pdp; i++) {
-   struct i915_pagedir *pd;
ret = gen8_ppgtt_setup_page_directories(ppgtt, i);
if (ret)
goto bail;
-
-   pd = ppgtt->pdp.pagedir[i];
-
-   for (j = 0; j < I915_PD

[Intel-gfx] [PATCH 21/26] drm/i915: Track GEN6 page table usage

2014-03-17 Thread Ben Widawsky
Instead of implementing the full tracking + dynamic allocation, this
patch does a bit less than half of the work, by tracking and warning on
unexpected conditions. The tracking itself follows which PTEs within a
page table are currently being used for objects. The next patch will
modify this to actually allocate the page tables only when necessary.

With the current patch there isn't much in the way of making a gen
agnostic range allocation function. However, in the next patch we'll add
more specificity which makes having separate functions a bit easier to
manage.

Notice that aliasing PPGTT is not managed here. The patch which actually
begins dynamic allocation/teardown explains the reasoning forthis.

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 170 +---
 drivers/gpu/drm/i915/i915_gem_gtt.h | 117 +++--
 2 files changed, 212 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index ad2f2c5..d3c77d1 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -53,9 +53,9 @@ bool intel_enable_ppgtt(struct drm_device *dev, bool full)
return HAS_ALIASING_PPGTT(dev);
 }
 
-static void ppgtt_bind_vma(struct i915_vma *vma,
-  enum i915_cache_level cache_level,
-  u32 flags);
+static int ppgtt_bind_vma(struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags);
 static void ppgtt_unbind_vma(struct i915_vma *vma);
 static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt);
 
@@ -204,39 +204,71 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
 (px)->page, 0, 4096, \
 PCI_DMA_BIDIRECTIONAL))
 
-static void free_pt_single(struct i915_pagetab *pt, struct drm_device *dev)
+static void __free_pt_single(struct i915_pagetab *pt, struct drm_device *dev,
+int scratch)
 {
+   if (WARN(scratch ^ pt->scratch,
+"Tried to free scratch = %d. Is scratch = %d\n",
+scratch, pt->scratch))
+   return;
+
if (WARN_ON(!pt->page))
return;
 
+   if (!scratch) {
+   const size_t count = INTEL_INFO(dev)->gen >= 8 ?
+   GEN8_PTES_PER_PT : GEN6_PTES_PER_PT;
+   WARN(!bitmap_empty(pt->used_ptes, count),
+"Free page table with %d used pages\n",
+bitmap_weight(pt->used_ptes, count));
+   }
+
i915_dma_unmap_single(pt, dev);
__free_page(pt->page);
+   kfree(pt->used_ptes);
kfree(pt);
 }
 
+#define free_pt_single(pt, dev) \
+   __free_pt_single(pt, dev, false)
+#define free_pt_scratch(pt, dev) \
+   __free_pt_single(pt, dev, true)
+
 static struct i915_pagetab *alloc_pt_single(struct drm_device *dev)
 {
struct i915_pagetab *pt;
-   int ret;
+   const size_t count = INTEL_INFO(dev)->gen >= 8 ?
+   GEN8_PTES_PER_PT : GEN6_PTES_PER_PT;
+   int ret = -ENOMEM;
 
pt = kzalloc(sizeof(*pt), GFP_KERNEL);
if (!pt)
return ERR_PTR(-ENOMEM);
 
+   pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
+   GFP_KERNEL);
+
+   if (!pt->used_ptes)
+   goto fail_bitmap;
+
pt->page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-   if (!pt->page) {
-   kfree(pt);
-   return ERR_PTR(-ENOMEM);
-   }
+   if (!pt->page)
+   goto fail_page;
 
ret = i915_dma_map_px_single(pt, dev);
-   if (ret) {
-   __free_page(pt->page);
-   kfree(pt);
-   return ERR_PTR(ret);
-   }
+   if (ret)
+   goto fail_dma;
 
return pt;
+
+fail_dma:
+   __free_page(pt->page);
+fail_page:
+   kfree(pt->used_ptes);
+fail_bitmap:
+   kfree(pt);
+
+   return ERR_PTR(ret);
 }
 
 /**
@@ -689,15 +721,13 @@ static void gen6_map_single(struct i915_pagedir *pd,
 /* Map all the page tables found in the ppgtt structure to incrementing page
  * directories. */
 static void gen6_map_page_range(struct drm_i915_private *dev_priv,
-   struct i915_pagedir *pd, unsigned pde, size_t n)
+   struct i915_pagedir *pd, uint32_t start, 
uint32_t length)
 {
-   if (WARN_ON(pde + n > I915_PDES_PER_PD))
-   n = I915_PDES_PER_PD - pde;
-
-   n += pde;
+   struct i915_pagetab *pt;
+   uint32_t pde, temp;
 
-   for (; pde < n; pde++)
-   gen6_map_single(pd, pde, pd->page_tables[pde]);
+   gen6_for_each_pde(pt, pd, s

[Intel-gfx] [PATCH 17/26] drm/i915: Clean up pagetable DMA map & unmap

2014-03-17 Thread Ben Widawsky
Map and unmap are common operations across all generations for
pagetables. With a simple helper, we can get a nice net code reduction
as well as simplified complexity.

There is some room for optimization here, for instance with the multiple
page mapping, that can be done in one pci_map operation. In that case
however, the max value we'll ever see there is 512, and so I believe the
simpler code makes this a worthwhile trade-off. Also, the range mapping
functions are place holders to help transition the code. Eventually,
mapping will only occur during a page allocation which will always be a
discrete operation.

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 147 +---
 1 file changed, 85 insertions(+), 62 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 35acccb..92e03dd 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -183,6 +183,76 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
return pte;
 }
 
+#define dma_unmap_pt_single(pt, dev) do { \
+   pci_unmap_page((dev)->pdev, (pt)->daddr, 4096, PCI_DMA_BIDIRECTIONAL); \
+} while (0);
+
+
+static void dma_unmap_pt_range(struct i915_pagedir *pd,
+  unsigned pde, size_t n,
+  struct drm_device *dev)
+{
+   if (WARN_ON(pde + n > I915_PDES_PER_PD))
+   n = I915_PDES_PER_PD - pde;
+
+   n += pde;
+
+   for (; pde < n; pde++)
+   dma_unmap_pt_single(pd->page_tables[pde], dev);
+}
+
+/**
+ * dma_map_pt_single() - Create a dma mapping for a page table
+ * @pt:Page table to get a DMA map for
+ * @dev:   drm device
+ *
+ * Page table allocations are unified across all gens. They always require a
+ * single 4k allocation, as well as a DMA mapping.
+ *
+ * Return: 0 if success.
+ */
+static int dma_map_pt_single(struct i915_pagetab *pt, struct drm_device *dev)
+{
+   struct page *page;
+   dma_addr_t pt_addr;
+   int ret;
+
+   page = pt->page;
+   pt_addr = pci_map_page(dev->pdev, page, 0, 4096,
+  PCI_DMA_BIDIRECTIONAL);
+
+   ret = pci_dma_mapping_error(dev->pdev, pt_addr);
+   if (ret)
+   return ret;
+
+   pt->daddr = pt_addr;
+
+   return 0;
+}
+
+static int dma_map_pt_range(struct i915_pagedir *pd,
+   unsigned pde, size_t n,
+   struct drm_device *dev)
+{
+   const int first = pde;
+
+   if (WARN_ON(pde + n > I915_PDES_PER_PD))
+   n = I915_PDES_PER_PD - pde;
+
+   n += pde;
+
+   for (; pde < n; pde++) {
+   int ret;
+   ret = dma_map_pt_single(pd->page_tables[pde], dev);
+   if (ret) {
+   dma_unmap_pt_range(pd, first, pde, dev);
+   return ret;
+   }
+   }
+
+   return 0;
+}
+
 static void free_pt_single(struct i915_pagetab *pt)
 {
if (WARN_ON(!pt->page))
@@ -191,7 +261,7 @@ static void free_pt_single(struct i915_pagetab *pt)
kfree(pt);
 }
 
-static struct i915_pagetab *alloc_pt_single(void)
+static struct i915_pagetab *alloc_pt_single(struct drm_device *dev)
 {
struct i915_pagetab *pt;
 
@@ -214,6 +284,7 @@ static struct i915_pagetab *alloc_pt_single(void)
  * available to point to the allocated page tables.
  * @pde:   First page directory entry for which we are allocating.
  * @count: Number of pages to allocate.
+ * @devDRM device used for DMA mapping.
  *
  * Allocates multiple page table pages and sets the appropriate entries in the
  * page table structure within the page directory. Function cleans up after
@@ -221,7 +292,8 @@ static struct i915_pagetab *alloc_pt_single(void)
  *
  * Return: 0 if allocation succeeded.
  */
-static int alloc_pt_range(struct i915_pagedir *pd, uint16_t pde, size_t count)
+static int alloc_pt_range(struct i915_pagedir *pd, uint16_t pde, size_t count,
+ struct drm_device *dev)
 {
int i, ret;
 
@@ -231,7 +303,7 @@ static int alloc_pt_range(struct i915_pagedir *pd, uint16_t 
pde, size_t count)
BUG_ON(pde + count > I915_PDES_PER_PD);
 
for (i = pde; i < pde + count; i++) {
-   struct i915_pagetab *pt = alloc_pt_single();
+   struct i915_pagetab *pt = alloc_pt_single(dev);
if (IS_ERR(pt)) {
ret = PTR_ERR(pt);
goto err_out;
@@ -480,7 +552,7 @@ static int gen8_ppgtt_allocate_page_tables(struct 
i915_hw_ppgtt *ppgtt)
 
for (i = 0; i < ppgtt->num_pd_pages; i++) {
ret = alloc_pt_range(ppgtt->pdp.pagedir[i],
-0, I915_PDES_PER_PD);
+0, 

[Intel-gfx] [PATCH 25/26] drm/i915: Print used ppgtt pages for gen6 in debugfs

2014-03-17 Thread Ben Widawsky
Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_debugfs.c | 19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 5f3666a..04d40fa 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1785,10 +1785,26 @@ static void gen8_ppgtt_info(struct seq_file *m, struct 
drm_device *dev, int verb
}
 }
 
+static size_t gen6_ppgtt_count_pt_pages(struct i915_hw_ppgtt *ppgtt)
+{
+   struct i915_pagedir *pd = &ppgtt->pd;
+   struct i915_pagetab **pt = &pd->page_tables[0];
+   size_t cnt = 0;
+   int i;
+
+   for (i = 0; i < ppgtt->num_pd_entries; i++) {
+   if (pt[i] != ppgtt->scratch_pt)
+   cnt++;
+   }
+
+   return cnt;
+}
+
 static void print_ppgtt(struct seq_file *m, struct i915_hw_ppgtt *ppgtt, const 
char *name)
 {
seq_printf(m, "%s:\n", name);
seq_printf(m, "pd gtt offset: 0x%08x\n", ppgtt->pd.pd_offset);
+   seq_printf(m, "\tpd pages: %zu\n", gen6_ppgtt_count_pt_pages(ppgtt));
 }
 
 static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev, bool 
verbose)
@@ -1809,6 +1825,8 @@ static void gen6_ppgtt_info(struct seq_file *m, struct 
drm_device *dev, bool ver
seq_printf(m, "PP_DIR_BASE_READ: 0x%08x\n", 
I915_READ(RING_PP_DIR_BASE_READ(ring)));
seq_printf(m, "PP_DIR_DCLV: 0x%08x\n", 
I915_READ(RING_PP_DIR_DCLV(ring)));
}
+   seq_printf(m, "ECOCHK: 0x%08x\n\n", I915_READ(GAM_ECOCHK));
+
if (dev_priv->mm.aliasing_ppgtt) {
struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
 
@@ -1829,7 +1847,6 @@ static void gen6_ppgtt_info(struct seq_file *m, struct 
drm_device *dev, bool ver
if (verbose)
idr_for_each(&file_priv->context_idr, per_file_ctx, m);
}
-   seq_printf(m, "ECOCHK: 0x%08x\n", I915_READ(GAM_ECOCHK));
 }
 
 static int i915_ppgtt_info(struct seq_file *m, void *data)
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 19/26] drm/i915: Consolidate dma mappings

2014-03-17 Thread Ben Widawsky
With a little bit of macro magic, and the fact that every page
table/dir/etc. we wish to map will have a page, and daddr member, we can
greatly simplify and reduce code.

The patch introduces an i915_dma_map/unmap which has the same semantics
as pci_map_page, but is 1 line, and doesn't require newlines, or local
variables to make it fit cleanly.

Notice that even the page allocation shares this same attribute. For
now, I am leaving that code untouched because the macro version would be
a bit on the big side - but it's a nice cleanup as well (IMO)

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 56 -
 1 file changed, 18 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 9630109..abef33dd 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -183,45 +183,33 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
return pte;
 }
 
-#define dma_unmap_pt_single(pt, dev) do { \
-   pci_unmap_page((dev)->pdev, (pt)->daddr, 4096, PCI_DMA_BIDIRECTIONAL); \
+#define i915_dma_unmap_single(px, dev) do { \
+   pci_unmap_page((dev)->pdev, (px)->daddr, 4096, PCI_DMA_BIDIRECTIONAL); \
 } while (0);
 
 /**
- * dma_map_pt_single() - Create a dma mapping for a page table
- * @pt:Page table to get a DMA map for
+ * i915_dma_map_px_single() - Create a dma mapping for a page table/dir/etc.
+ * @px:Page table/dir/etc to get a DMA map for
  * @dev:   drm device
  *
  * Page table allocations are unified across all gens. They always require a
- * single 4k allocation, as well as a DMA mapping.
+ * single 4k allocation, as well as a DMA mapping. If we keep the structs
+ * symmetric here, the simple macro covers us for every page table type.
  *
  * Return: 0 if success.
  */
-static int dma_map_pt_single(struct i915_pagetab *pt, struct drm_device *dev)
-{
-   struct page *page;
-   dma_addr_t pt_addr;
-   int ret;
-
-   page = pt->page;
-   pt_addr = pci_map_page(dev->pdev, page, 0, 4096,
-  PCI_DMA_BIDIRECTIONAL);
-
-   ret = pci_dma_mapping_error(dev->pdev, pt_addr);
-   if (ret)
-   return ret;
-
-   pt->daddr = pt_addr;
-
-   return 0;
-}
+#define i915_dma_map_px_single(px, dev) \
+   pci_dma_mapping_error((dev)->pdev, \
+ (px)->daddr = pci_map_page((dev)->pdev, \
+(px)->page, 0, 4096, \
+PCI_DMA_BIDIRECTIONAL))
 
 static void free_pt_single(struct i915_pagetab *pt, struct drm_device *dev)
 {
if (WARN_ON(!pt->page))
return;
 
-   dma_unmap_pt_single(pt, dev);
+   i915_dma_unmap_single(pt, dev);
__free_page(pt->page);
kfree(pt);
 }
@@ -241,7 +229,7 @@ static struct i915_pagetab *alloc_pt_single(struct 
drm_device *dev)
return ERR_PTR(-ENOMEM);
}
 
-   ret = dma_map_pt_single(pt, dev);
+   ret = i915_dma_map_px_single(pt, dev);
if (ret) {
__free_page(pt->page);
kfree(pt);
@@ -484,7 +472,7 @@ static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
 
 static void gen8_ppgtt_dma_unmap_pages(struct i915_hw_ppgtt *ppgtt)
 {
-   struct pci_dev *hwdev = ppgtt->base.dev->pdev;
+   struct drm_device *dev = ppgtt->base.dev;
int i, j;
 
for (i = 0; i < ppgtt->num_pd_pages; i++) {
@@ -493,16 +481,14 @@ static void gen8_ppgtt_dma_unmap_pages(struct 
i915_hw_ppgtt *ppgtt)
if (!ppgtt->pdp.pagedir[i]->daddr)
continue;
 
-   pci_unmap_page(hwdev, ppgtt->pdp.pagedir[i]->daddr, PAGE_SIZE,
-  PCI_DMA_BIDIRECTIONAL);
+   i915_dma_unmap_single(ppgtt->pdp.pagedir[i], dev);
 
for (j = 0; j < I915_PDES_PER_PD; j++) {
struct i915_pagedir *pd = ppgtt->pdp.pagedir[i];
struct i915_pagetab *pt =  pd->page_tables[j];
dma_addr_t addr = pt->daddr;
if (addr)
-   pci_unmap_page(hwdev, addr, PAGE_SIZE,
-  PCI_DMA_BIDIRECTIONAL);
+   i915_dma_unmap_single(pt, dev);
}
}
 }
@@ -588,19 +574,13 @@ err_out:
 static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt,
 const int pdpe)
 {
-   dma_addr_t pd_addr;
int ret;
 
-   pd_addr = pci_map_page(ppgtt->base.dev->pdev,
-  ppgtt->pdp.pagedir[pdpe]->page, 0,
-  PAGE_SIZ

[Intel-gfx] [PATCH 05/26] drm/i915: Setup less PPGTT on failed pagedir

2014-03-17 Thread Ben Widawsky
The current code will both potentially print a WARN, and setup part of
the PPGTT structure. Neither of these harm the current code, it is
simply for clarity, and to perhaps prevent later bugs, or weird
debug messages.

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 08a1e1c..09556d1 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1085,11 +1085,14 @@ alloc:
goto alloc;
}
 
+   if (ret)
+   return ret;
+
if (ppgtt->node.start < dev_priv->gtt.mappable_end)
DRM_DEBUG("Forced to use aperture for PDEs\n");
 
ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES;
-   return ret;
+   return 0;
 }
 
 static int gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt)
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 09/26] drm/i915: Split out gtt specific header file

2014-03-17 Thread Ben Widawsky
TODO: Do header files need a copyright?

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_drv.h | 162 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c |  57 -
 drivers/gpu/drm/i915/i915_gem_gtt.h | 225 
 3 files changed, 227 insertions(+), 217 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_gtt.h

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 084e82f..b19442c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -44,6 +44,8 @@
 #include 
 #include 
 
+#include "i915_gem_gtt.h"
+
 /* General customization:
  */
 
@@ -572,166 +574,6 @@ enum i915_cache_level {
I915_CACHE_WT, /* hsw:gt3e WriteThrough for scanouts */
 };
 
-typedef uint32_t gen6_gtt_pte_t;
-
-/**
- * A VMA represents a GEM BO that is bound into an address space. Therefore, a
- * VMA's presence cannot be guaranteed before binding, or after unbinding the
- * object into/from the address space.
- *
- * To make things as simple as possible (ie. no refcounting), a VMA's lifetime
- * will always be <= an objects lifetime. So object refcounting should cover 
us.
- */
-struct i915_vma {
-   struct drm_mm_node node;
-   struct drm_i915_gem_object *obj;
-   struct i915_address_space *vm;
-
-   /** This object's place on the active/inactive lists */
-   struct list_head mm_list;
-
-   struct list_head vma_link; /* Link in the object's VMA list */
-
-   /** This vma's place in the batchbuffer or on the eviction list */
-   struct list_head exec_list;
-
-   /**
-* Used for performing relocations during execbuffer insertion.
-*/
-   struct hlist_node exec_node;
-   unsigned long exec_handle;
-   struct drm_i915_gem_exec_object2 *exec_entry;
-
-   /**
-* How many users have pinned this object in GTT space. The following
-* users can each hold at most one reference: pwrite/pread, pin_ioctl
-* (via user_pin_count), execbuffer (objects are not allowed multiple
-* times for the same batchbuffer), and the framebuffer code. When
-* switching/pageflipping, the framebuffer code has at most two buffers
-* pinned per crtc.
-*
-* In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
-* bits with absolutely no headroom. So use 4 bits. */
-   unsigned int pin_count:4;
-#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf
-
-   /** Unmap an object from an address space. This usually consists of
-* setting the valid PTE entries to a reserved scratch page. */
-   void (*unbind_vma)(struct i915_vma *vma);
-   /* Map an object into an address space with the given cache flags. */
-#define GLOBAL_BIND (1<<0)
-   void (*bind_vma)(struct i915_vma *vma,
-enum i915_cache_level cache_level,
-u32 flags);
-};
-
-struct i915_address_space {
-   struct drm_mm mm;
-   struct drm_device *dev;
-   struct list_head global_link;
-   unsigned long start;/* Start offset always 0 for dri2 */
-   size_t total;   /* size addr space maps (ex. 2GB for ggtt) */
-
-   struct {
-   dma_addr_t addr;
-   struct page *page;
-   } scratch;
-
-   /**
-* List of objects currently involved in rendering.
-*
-* Includes buffers having the contents of their GPU caches
-* flushed, not necessarily primitives.  last_rendering_seqno
-* represents when the rendering involved will be completed.
-*
-* A reference is held on the buffer while on this list.
-*/
-   struct list_head active_list;
-
-   /**
-* LRU list of objects which are not in the ringbuffer and
-* are ready to unbind, but are still in the GTT.
-*
-* last_rendering_seqno is 0 while an object is in this list.
-*
-* A reference is not held on the buffer while on this list,
-* as merely being GTT-bound shouldn't prevent its being
-* freed, and we'll pull it off the list in the free path.
-*/
-   struct list_head inactive_list;
-
-   /* FIXME: Need a more generic return type */
-   gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr,
-enum i915_cache_level level,
-bool valid); /* Create a valid PTE */
-   void (*clear_range)(struct i915_address_space *vm,
-   uint64_t start,
-   uint64_t length,
-   bool use_scratch);
-   void (*insert_entries)(struct i915_address_space *vm,
-  struct sg_table *st,
-  uint64_t start,
-  enum i915_cache_level cache_level);

[Intel-gfx] [PATCH 10/26] drm/i915: Make gen6_write_pdes gen6_map_page_tables

2014-03-17 Thread Ben Widawsky
Split out single mappings which will help with upcoming work. Also while
here, rename the function because it is a better description - but this
function is going away soon.

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 39 ++---
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index a239196..d89054d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -655,26 +655,33 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, 
struct seq_file *m)
}
 }
 
-static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt)
+static void gen6_map_single(struct i915_hw_ppgtt *ppgtt,
+   const unsigned pde_index,
+   dma_addr_t daddr)
 {
struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
-   gen6_gtt_pte_t __iomem *pd_addr;
uint32_t pd_entry;
+   gen6_gtt_pte_t __iomem *pd_addr =
+   (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm + ppgtt->pd_offset / 
sizeof(gen6_gtt_pte_t);
+
+   pd_entry = GEN6_PDE_ADDR_ENCODE(daddr);
+   pd_entry |= GEN6_PDE_VALID;
+
+   writel(pd_entry, pd_addr + pde_index);
+}
+
+/* Map all the page tables found in the ppgtt structure to incrementing page
+ * directories. */
+static void gen6_map_page_tables(struct i915_hw_ppgtt *ppgtt)
+{
+   struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
int i;
 
WARN_ON(ppgtt->pd_offset & 0x3f);
-   pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm +
-   ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
-   for (i = 0; i < ppgtt->num_pd_entries; i++) {
-   dma_addr_t pt_addr;
-
-   pt_addr = ppgtt->pt_dma_addr[i];
-   pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
-   pd_entry |= GEN6_PDE_VALID;
+   for (i = 0; i < ppgtt->num_pd_entries; i++)
+   gen6_map_single(ppgtt, i, ppgtt->pt_dma_addr[i]);
 
-   writel(pd_entry, pd_addr + i);
-   }
-   readl(pd_addr);
+   readl(dev_priv->gtt.gsm);
 }
 
 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
@@ -1145,7 +1152,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
ppgtt->pd_offset =
ppgtt->node.start / PAGE_SIZE * sizeof(gen6_gtt_pte_t);
 
-   gen6_write_pdes(ppgtt);
+   gen6_map_page_tables(ppgtt);
 
ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
 
@@ -1319,11 +1326,11 @@ void i915_gem_restore_gtt_mappings(struct drm_device 
*dev)
/* TODO: Perhaps it shouldn't be gen6 specific */
if (i915_is_ggtt(vm)) {
if (dev_priv->mm.aliasing_ppgtt)
-   gen6_write_pdes(dev_priv->mm.aliasing_ppgtt);
+   
gen6_map_page_tables(dev_priv->mm.aliasing_ppgtt);
continue;
}
 
-   gen6_write_pdes(container_of(vm, struct i915_hw_ppgtt, base));
+   gen6_map_page_tables(container_of(vm, struct i915_hw_ppgtt, 
base));
}
 
i915_gem_chipset_flush(dev);
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 04/26] drm/i915: rename map/unmap to dma_map/unmap

2014-03-17 Thread Ben Widawsky
Upcoming patches will use the terms map and unmap in references to the
page table entries. Having this distinction will really help with code
clarity at that point.

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index b26b186..08a1e1c 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -394,7 +394,7 @@ static void gen8_ppgtt_free(const struct i915_hw_ppgtt 
*ppgtt)
__free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << 
PAGE_SHIFT));
 }
 
-static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
+static void gen8_ppgtt_dma_unmap_pages(struct i915_hw_ppgtt *ppgtt)
 {
struct pci_dev *hwdev = ppgtt->base.dev->pdev;
int i, j;
@@ -425,7 +425,7 @@ static void gen8_ppgtt_cleanup(struct i915_address_space 
*vm)
list_del(&vm->global_link);
drm_mm_takedown(&vm->mm);
 
-   gen8_ppgtt_unmap_pages(ppgtt);
+   gen8_ppgtt_dma_unmap_pages(ppgtt);
gen8_ppgtt_free(ppgtt);
 }
 
@@ -651,7 +651,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, 
uint64_t size)
return 0;
 
 bail:
-   gen8_ppgtt_unmap_pages(ppgtt);
+   gen8_ppgtt_dma_unmap_pages(ppgtt);
gen8_ppgtt_free(ppgtt);
return ret;
 }
@@ -1019,7 +1019,7 @@ static void gen6_ppgtt_insert_entries(struct 
i915_address_space *vm,
kunmap_atomic(pt_vaddr);
 }
 
-static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
+static void gen6_ppgtt_dma_unmap_pages(struct i915_hw_ppgtt *ppgtt)
 {
int i;
 
@@ -1050,7 +1050,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space 
*vm)
drm_mm_takedown(&ppgtt->base.mm);
drm_mm_remove_node(&ppgtt->node);
 
-   gen6_ppgtt_unmap_pages(ppgtt);
+   gen6_ppgtt_dma_unmap_pages(ppgtt);
gen6_ppgtt_free(ppgtt);
 }
 
@@ -1150,7 +1150,7 @@ static int gen6_ppgtt_setup_page_tables(struct 
i915_hw_ppgtt *ppgtt)
   PCI_DMA_BIDIRECTIONAL);
 
if (pci_dma_mapping_error(dev->pdev, pt_addr)) {
-   gen6_ppgtt_unmap_pages(ppgtt);
+   gen6_ppgtt_dma_unmap_pages(ppgtt);
return -EIO;
}
 
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 03/26] drm/i915: s/pd/pdpe, s/pt/pde

2014-03-17 Thread Ben Widawsky
The actual correct way to think about this with the new style of page
table data structures is as the actual entry that is being indexed into
the array. "pd", and "pt" aren't representative of what the operation is
doing.

The clarity here will improve the readability of future patches.

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index bd016e2..b26b186 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -537,40 +537,40 @@ static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt,
 }
 
 static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt,
-const int pd)
+const int pdpe)
 {
dma_addr_t pd_addr;
int ret;
 
pd_addr = pci_map_page(ppgtt->base.dev->pdev,
-  &ppgtt->pd_pages[pd], 0,
+  &ppgtt->pd_pages[pdpe], 0,
   PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 
ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr);
if (ret)
return ret;
 
-   ppgtt->pd_dma_addr[pd] = pd_addr;
+   ppgtt->pd_dma_addr[pdpe] = pd_addr;
 
return 0;
 }
 
 static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt,
-   const int pd,
-   const int pt)
+   const int pdpe,
+   const int pde)
 {
dma_addr_t pt_addr;
struct page *p;
int ret;
 
-   p = ppgtt->gen8_pt_pages[pd][pt];
+   p = ppgtt->gen8_pt_pages[pdpe][pde];
pt_addr = pci_map_page(ppgtt->base.dev->pdev,
   p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr);
if (ret)
return ret;
 
-   ppgtt->gen8_pt_dma_addr[pd][pt] = pt_addr;
+   ppgtt->gen8_pt_dma_addr[pdpe][pde] = pt_addr;
 
return 0;
 }
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 20/26] drm/i915: Always dma map page directory allocations

2014-03-17 Thread Ben Widawsky
Similar to the patch a few back in the series, we can always map and
unmap page directories when we do their allocation and teardown. Page
directory pages only exist on gen8+, so this should only effect behavior
on those platforms.

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 79 +
 1 file changed, 19 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index abef33dd..ad2f2c5 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -283,21 +283,23 @@ err_out:
return ret;
 }
 
-static void __free_pd_single(struct i915_pagedir *pd)
+static void __free_pd_single(struct i915_pagedir *pd, struct drm_device *dev)
 {
+   i915_dma_unmap_single(pd, dev);
__free_page(pd->page);
kfree(pd);
 }
 
-#define free_pd_single(pd) do { \
+#define free_pd_single(pd, dev) do { \
if ((pd)->page) { \
-   __free_pd_single(pd); \
+   __free_pd_single(pd, dev); \
} \
 } while (0)
 
-static struct i915_pagedir *alloc_pd_single(void)
+static struct i915_pagedir *alloc_pd_single(struct drm_device *dev)
 {
struct i915_pagedir *pd;
+   int ret;
 
pd = kzalloc(sizeof(*pd), GFP_KERNEL);
if (!pd)
@@ -309,6 +311,13 @@ static struct i915_pagedir *alloc_pd_single(void)
return ERR_PTR(-ENOMEM);
}
 
+   ret = i915_dma_map_px_single(pd, dev);
+   if (ret) {
+   __free_page(pd->page);
+   kfree(pd);
+   return ERR_PTR(ret);
+   }
+
return pd;
 }
 
@@ -466,30 +475,7 @@ static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
 
for (i = 0; i < ppgtt->num_pd_pages; i++) {
gen8_free_page_tables(ppgtt->pdp.pagedir[i], ppgtt->base.dev);
-   free_pd_single(ppgtt->pdp.pagedir[i]);
-   }
-}
-
-static void gen8_ppgtt_dma_unmap_pages(struct i915_hw_ppgtt *ppgtt)
-{
-   struct drm_device *dev = ppgtt->base.dev;
-   int i, j;
-
-   for (i = 0; i < ppgtt->num_pd_pages; i++) {
-   /* TODO: In the future we'll support sparse mappings, so this
-* will have to change. */
-   if (!ppgtt->pdp.pagedir[i]->daddr)
-   continue;
-
-   i915_dma_unmap_single(ppgtt->pdp.pagedir[i], dev);
-
-   for (j = 0; j < I915_PDES_PER_PD; j++) {
-   struct i915_pagedir *pd = ppgtt->pdp.pagedir[i];
-   struct i915_pagetab *pt =  pd->page_tables[j];
-   dma_addr_t addr = pt->daddr;
-   if (addr)
-   i915_dma_unmap_single(pt, dev);
-   }
+   free_pd_single(ppgtt->pdp.pagedir[i], ppgtt->base.dev);
}
 }
 
@@ -501,7 +487,6 @@ static void gen8_ppgtt_cleanup(struct i915_address_space 
*vm)
list_del(&vm->global_link);
drm_mm_takedown(&vm->mm);
 
-   gen8_ppgtt_dma_unmap_pages(ppgtt);
gen8_ppgtt_free(ppgtt);
 }
 
@@ -531,7 +516,7 @@ static int gen8_ppgtt_allocate_page_directories(struct 
i915_hw_ppgtt *ppgtt,
int i;
 
for (i = 0; i < max_pdp; i++) {
-   ppgtt->pdp.pagedir[i] = alloc_pd_single();
+   ppgtt->pdp.pagedir[i] = alloc_pd_single(ppgtt->base.dev);
if (IS_ERR(ppgtt->pdp.pagedir[i]))
goto unwind_out;
}
@@ -543,7 +528,8 @@ static int gen8_ppgtt_allocate_page_directories(struct 
i915_hw_ppgtt *ppgtt,
 
 unwind_out:
while (i--)
-   free_pd_single(ppgtt->pdp.pagedir[i]);
+   free_pd_single(ppgtt->pdp.pagedir[i],
+  ppgtt->base.dev);
 
return -ENOMEM;
 }
@@ -571,19 +557,6 @@ err_out:
return ret;
 }
 
-static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt,
-const int pdpe)
-{
-   int ret;
-
-   ret = i915_dma_map_px_single(ppgtt->pdp.pagedir[pdpe],
-ppgtt->base.dev);
-   if (ret)
-   return ret;
-
-   return 0;
-}
-
 /**
  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
  * with a net effect resembling a 2-level page table in normal x86 terms. Each
@@ -609,16 +582,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, 
uint64_t size)
return ret;
 
/*
-* 2. Create DMA mappings for the page directories and page tables.
-*/
-   for (i = 0; i < max_pdp; i++) {
-   ret = gen8_ppgtt_setup_page_directories(ppgtt, i);
-   if (ret)
-   goto bail;
-   }
-
-   /*
-* 3. Map all the page directory entires to point to the pag

[Intel-gfx] [PATCH 00/26] [RFCish] GEN7 dynamic page tables

2014-03-17 Thread Ben Widawsky
These patches live here, based on my temporary Broadwell branch:
http://cgit.freedesktop.org/~bwidawsk/drm-intel/log/?h=dynamic_pt_alloc

First, and most importantly, this work should have no impact on current
drm-intel code because PPGTT is currently shut off there. To actually
test this patch series, one must re-enable PPGTT. On a single run of IGT
on IVB, it seem this doesn't introduce any regressions, but y'know, it's
PPGTT, so there's some instability, and it's hard to claim for certain
this doesn't break anything on top. Also, as stated below, the gen8 work
is only partially done.

Before I go too much further with this, I wanted to get eyes on it. I am
really open to any feedback. Before you do request a change though,
please realize that I've gone through several iterations of the
functions/interfaces. So please, spare me some pain and try to think
through what your request is before rattling it off. Daniel has
expressed to me already that he is unwilling to merge certain things
until PPGTT problems are fixed, and that can be enabled by default.
That's okay. In my opinion, many of the patches don't really have any
major behavioral changes, and only make the code so much more readable
and easy to deal with, that I believe merging it would only improve
PPGTT debugging in the future. There are several cleanups in the series
which could also go in relatively harmlessly.

Okay, so what does this do?
The patch series /dynamicizes/ page table allocation and teardown for
GEN7. It also starts to introduce GEN8, but the tricky stuff is still
not done. Up until now, all our page tables are pre-allocated when the
address space is created. That's actually okay for current GENs since we
don't use many address spaces, and the page tables occupy only 2MB each.
However, on GEN8 we can use a deeper page table, and to preallocate such
an address space would be very costly. This work was done for GEN7 first
because this is the most well tested with full PPGTT, and stable
platforms are readily available.

In this patch series, I've demonstrated how we will manage tracking used
page tables (bitmaps), and broken things out into much more discrete
functions. I'm hoping I'll get feedback on the way I've implemented
things (primarily if it seems fundamentally flawed in any way). The real
goal was to prove out the dynamic allocation so we can begin to enable
GEN8 in the same way. I'll emphasize now that I put in a lot of effort
limit risk with each patch, and this does result in some excess churn.

My next step is bring GEN8 up to par with GEN7. Once GEN8 is working,
and clean we can find where GEN7, and GEN8 overlap, and then recombine
where I haven't done so already. It's possible this plan will not work
out, and the above 2 steps will end up as one. After that, I plan to
merge the VA range allocation, and teardown into the insert/clear
entries (currently it's two steps). I think both of those steps should
be distinct.

On x86 code overlap:
I spent more time that I would have liked trying to conjoin our
pagetable management with x86 code. In the end I decided not to depend
on any of the x86 definitions (other than PAGE_SIZE) because I found the
maze of conditional compiles and defines a bit too cumbersome.  I also
didn't feel the abstract pagetable topology used in x86 code was
worthwhile given that with about 6 #defines, we achieve the same thing.
We just don't support nearly as many configurations, and our page table
format differs in too many places. One thing I had really considered,
and toyed around with was not having data structures to track the page
tables we've allocated and simply use the one that's in memory (which is
what x86 does). I was not able to make this work because of IOMMU. The
address we write into our page tables is an IOMMU address.  This means
we need to know, or be able to easily derive both the physical address
(or pfn, or struct page), and the DMA address. I failed to accomplish
this. I think using the bitmaps should be a fast way than having to kmap
the pagetables to determine their status anyway. And, one thing to keep
in mind is currently we don't have any GPU faulting capability. This
will greatly limit the ability to map things sparsely, which also will
greatly limit the effective virtual address space we can use.

Ben Widawsky (26):
  drm/i915: Split out verbose PPGTT dumping
  drm/i915: Extract switch to default context
  drm/i915: s/pd/pdpe, s/pt/pde
  drm/i915: rename map/unmap to dma_map/unmap
  drm/i915: Setup less PPGTT on failed pagedir
  drm/i915: Wrap VMA binding
  drm/i915: clean up PPGTT init error path
  drm/i915: Un-hardcode number of page directories
  drm/i915: Split out gtt specific header file
  drm/i915: Make gen6_write_pdes gen6_map_page_tables
  drm/i915: Range clearing is PPGTT agnostic
  drm/i915: Page table helpers, and define renames
  drm/i91

[Intel-gfx] [PATCH 07/26] drm/i915: clean up PPGTT init error path

2014-03-17 Thread Ben Widawsky
The old code (I'm having trouble finding the commit) had a reason for
doing things when there was an error, and would continue on, thus the
!ret. For the newer code however, this looks completely silly.

Follow the normal idiom of if (ret) return ret.

Also, put the pde wiring in the gen specific init, now that GEN8 exists.

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 22 +-
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 1620211..5f73284 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1202,6 +1202,8 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
ppgtt->pd_offset =
ppgtt->node.start / PAGE_SIZE * sizeof(gen6_gtt_pte_t);
 
+   gen6_write_pdes(ppgtt);
+
ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
 
DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n",
@@ -1226,20 +1228,14 @@ int i915_gem_init_ppgtt(struct drm_device *dev, struct 
i915_hw_ppgtt *ppgtt)
else
BUG();
 
-   if (!ret) {
-   struct drm_i915_private *dev_priv = dev->dev_private;
-   kref_init(&ppgtt->ref);
-   drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
-   ppgtt->base.total);
-   i915_init_vm(dev_priv, &ppgtt->base);
-   if (INTEL_INFO(dev)->gen < 8) {
-   gen6_write_pdes(ppgtt);
-   DRM_DEBUG("Adding PPGTT at offset %x\n",
- ppgtt->pd_offset << 10);
-   }
-   }
+   if (ret)
+   return ret;
 
-   return ret;
+   kref_init(&ppgtt->ref);
+   drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, ppgtt->base.total);
+   i915_init_vm(dev_priv, &ppgtt->base);
+
+   return 0;
 }
 
 static void
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 15/26] drm/i915: Create page table allocators

2014-03-17 Thread Ben Widawsky
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks.  This makes the
code easier to write, read, and verify.

Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,

The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain non-trivial complexity as a
result.

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 226 +++-
 drivers/gpu/drm/i915/i915_gem_gtt.h |   4 +-
 2 files changed, 147 insertions(+), 83 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index d91a545..5c08cf9 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -183,6 +183,102 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
return pte;
 }
 
+static void free_pt_single(struct i915_pagetab *pt)
+{
+   if (WARN_ON(!pt->page))
+   return;
+   __free_page(pt->page);
+   kfree(pt);
+}
+
+static struct i915_pagetab *alloc_pt_single(void)
+{
+   struct i915_pagetab *pt;
+
+   pt = kzalloc(sizeof(*pt), GFP_KERNEL);
+   if (!pt)
+   return ERR_PTR(-ENOMEM);
+
+   pt->page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+   if (!pt->page) {
+   kfree(pt);
+   return ERR_PTR(-ENOMEM);
+   }
+
+   return pt;
+}
+
+/**
+ * alloc_pt_range() - Allocate a multiple page tables
+ * @pd:The page directory which will have at least @count 
entries
+ * available to point to the allocated page tables.
+ * @pde:   First page directory entry for which we are allocating.
+ * @count: Number of pages to allocate.
+ *
+ * Allocates multiple page table pages and sets the appropriate entries in the
+ * page table structure within the page directory. Function cleans up after
+ * itself on any failures.
+ *
+ * Return: 0 if allocation succeeded.
+ */
+static int alloc_pt_range(struct i915_pagedir *pd, uint16_t pde, size_t count)
+{
+   int i, ret;
+
+   /* 512 is the max page tables per pagedir on any platform.
+* TODO: make WARN after patch series is done
+*/
+   BUG_ON(pde + count > I915_PDES_PER_PD);
+
+   for (i = pde; i < pde + count; i++) {
+   struct i915_pagetab *pt = alloc_pt_single();
+   if (IS_ERR(pt)) {
+   ret = PTR_ERR(pt);
+   goto err_out;
+   }
+   WARN(pd->page_tables[i],
+"Leaking page directory entry %d (%pa)\n",
+i, pd->page_tables[i]);
+   pd->page_tables[i] = pt;
+   }
+
+   return 0;
+
+err_out:
+   while (i--)
+   free_pt_single(pd->page_tables[i]);
+   return ret;
+}
+
+static void __free_pd_single(struct i915_pagedir *pd)
+{
+   __free_page(pd->page);
+   kfree(pd);
+}
+
+#define free_pd_single(pd) do { \
+   if ((pd)->page) { \
+   __free_pd_single(pd); \
+   } \
+} while (0)
+
+static struct i915_pagedir *alloc_pd_single(void)
+{
+   struct i915_pagedir *pd;
+
+   pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+   if (!pd)
+   return ERR_PTR(-ENOMEM);
+
+   pd->page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+   if (!pd->page) {
+   kfree(pd);
+   return ERR_PTR(-ENOMEM);
+   }
+
+   return pd;
+}
+
 /* Broadwell Page Directory Pointer Descriptors */
 static int gen8_write_pdp(struct intel_ring_buffer *ring, unsigned entry,
   uint64_t val, bool synchronous)
@@ -223,7 +319,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
int used_pd = ppgtt->num_pd_entries / I915_PDES_PER_PD;
 
for (i = used_pd - 1; i >= 0; i--) {
-   dma_addr_t addr = ppgtt->pdp.pagedir[i].daddr;
+   dma_addr_t addr = ppgtt->pdp.pagedir[i]->daddr;
ret = gen8_write_pdp(ring, i, addr, synchronous);
if (ret)
return ret;
@@ -250,8 +346,9 @@ static void gen8_ppgtt_clear_range(struct 
i915_address_space *vm,
  I915_CACHE_LLC, use_scratch);
 
while (num_entries) {
-   struct i915_pagedir *pd = &ppgtt->pdp.pagedir[pdpe];
-   struct page *page_table = pd->page_tables[pde].page;
+   struct i915_pagedir *pd = ppgtt->pdp.pagedir[pdpe];
+   struct i915_pagetab *pt = pd->page_tables[pde];
+

[Intel-gfx] [PATCH 12/26] drm/i915: Page table helpers, and define renames

2014-03-17 Thread Ben Widawsky
These page table helpers make the code much cleaner. There is some
room to use the arch/x86 header files. The reason I've opted not to is
in several cases, the definitions are dictated by the CONFIG_ options
which do not always indicate the restrictions in the GPU. While here,
clean up the defines to have more concise names, and consolidate between
gen6 and gen8 where appropriate.

I've made a lot of tiny errors in these helpers. Often I'd correct an
error only to introduce another one. While IGT was capable of catching
them, the tests often took a while to catch, and where hard/slow to
debug in the kernel. As a result, to test this, I compiled
i915_gem_gtt.h in userspace, and ran tests from userspace. What follows
isn't by any means complete, but it was able to catch lot of bugs. Gen8
is also untested, but since the current code is almost identical, I feel
pretty comfortable with that.

void test_pte(uint32_t base) {
uint32_t ret;
assert_pte_index((base + 0), 0);
assert_pte_index((base + 1), 0);
assert_pte_index((base + 0x1000), 1);
assert_pte_index((base + (1<<22)), 0);
assert_pte_index((base + ((1<<22) - 1)), 1023);
assert_pte_index((base + (1<<21)), 512);

assert_pte_count(base + 0, 0, 0);
assert_pte_count(base + 0, 1, 1);
assert_pte_count(base + 0, 0x1000, 1);
assert_pte_count(base + 0, 0x1001, 2);
assert_pte_count(base + 0, 1<<21, 512);

assert_pte_count(base + 0, 1<<22, 1024);
assert_pte_count(base + 0, (1<<22) - 1, 1024);
assert_pte_count(base + (1<<21), 1<<22, 512);
assert_pte_count(base + (1<<21), (1<<22)+1, 512);
assert_pte_count(base + (1<<21), 10<<22, 512);
}

void test_pde(uint32_t base) {
assert(gen6_pde_index(base + 0) == 0);
assert(gen6_pde_index(base + 1) == 0);
assert(gen6_pde_index(base + (1<<21)) == 0);
assert(gen6_pde_index(base + (1<<22)) == 1);
assert(gen6_pde_index(base + ((256<<22)))== 256);
assert(gen6_pde_index(base + ((512<<22))) == 0);
assert(gen6_pde_index(base + ((513<<22))) == 1); /* This is
actually not possible on gen6 */

assert(gen6_pde_count(base + 0, 0) == 0);
assert(gen6_pde_count(base + 0, 1) == 1);
assert(gen6_pde_count(base + 0, 1<<21) == 1);
assert(gen6_pde_count(base + 0, 1<<22) == 1);
assert(gen6_pde_count(base + 0, (1<<22) + 0x1000) == 2);
assert(gen6_pde_count(base + 0x1000, 1<<22) == 2);
assert(gen6_pde_count(base + 0, 511<<22) == 511);
assert(gen6_pde_count(base + 0, 512<<22) == 512);
assert(gen6_pde_count(base + 0x1000, 512<<22) == 512);
assert(gen6_pde_count(base + (1<<22), 512<<22) == 511);
}

int main()
{
test_pde(0);
while (1)
test_pte(rand() & ~((1<<22) - 1));

return 0;
}

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c |  90 +-
 drivers/gpu/drm/i915/i915_gem_gtt.h | 125 ++--
 2 files changed, 162 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 77556ac..7afa5f4 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -220,7 +220,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
int i, ret;
 
/* bit of a hack to find the actual last used pd */
-   int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE;
+   int used_pd = ppgtt->num_pd_entries / I915_PDES_PER_PD;
 
for (i = used_pd - 1; i >= 0; i--) {
dma_addr_t addr = ppgtt->pd_dma_addr[i];
@@ -240,9 +240,9 @@ static void gen8_ppgtt_clear_range(struct 
i915_address_space *vm,
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
gen8_gtt_pte_t *pt_vaddr, scratch_pte;
-   unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
-   unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
-   unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
+   unsigned pdpe = gen8_pdpe_index(start);
+   unsigned pde = gen8_pde_index(start);
+   unsigned pte = gen8_pte_index(start);
unsigned num_entries = length >> PAGE_SHIFT;
unsigned last_pte, i;
 
@@ -253,8 +253,8 @@ static void gen8_ppgtt_clear_range(struct 
i915_address_space *vm,
struct page *page_table = ppgtt->gen8_pt_pages[pdpe][pde];
 
last_pte = pte + num_entries;
-   if (last_pte > GEN8_PTES_PER_PAGE)
-   last_pte = GEN8_PTES_PER_PAGE;
+   if (last_pte > GEN8_PTES_PER_PT)
+  

[Intel-gfx] [PATCH 13/26] drm/i915: construct page table abstractions

2014-03-17 Thread Ben Widawsky
Thus far we've opted to make complex code requiring difficult review. In
the future, the code is only going to become more complex, and as such
we'll take the hit now and start to encapsulate things.

To help transition the code nicely there is some wasted space in gen6/7.
This will be ameliorated shortly.

NOTE: The pun in the subject was intentional.

Signed-off-by: Ben Widawsky 

Conflicts:
drivers/gpu/drm/i915/i915_drv.h
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 175 ++--
 drivers/gpu/drm/i915/i915_gem_gtt.h |  24 +++--
 2 files changed, 104 insertions(+), 95 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 7afa5f4..5b283f2 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -250,7 +250,8 @@ static void gen8_ppgtt_clear_range(struct 
i915_address_space *vm,
  I915_CACHE_LLC, use_scratch);
 
while (num_entries) {
-   struct page *page_table = ppgtt->gen8_pt_pages[pdpe][pde];
+   struct i915_pagedir *pd = &ppgtt->pdp.pagedir[pdpe];
+   struct page *page_table = pd->page_tables[pde].page;
 
last_pte = pte + num_entries;
if (last_pte > GEN8_PTES_PER_PT)
@@ -292,8 +293,11 @@ static void gen8_ppgtt_insert_entries(struct 
i915_address_space *vm,
if (WARN_ON(pdpe >= GEN8_LEGACY_PDPS))
break;
 
-   if (pt_vaddr == NULL)
-   pt_vaddr = kmap_atomic(ppgtt->gen8_pt_pages[pdpe][pde]);
+   if (pt_vaddr == NULL) {
+   struct i915_pagedir *pd = &ppgtt->pdp.pagedir[pdpe];
+   struct page *page_table = pd->page_tables[pde].page;
+   pt_vaddr = kmap_atomic(page_table);
+   }
 
pt_vaddr[pte] =
gen8_pte_encode(sg_page_iter_dma_address(&sg_iter),
@@ -312,29 +316,33 @@ static void gen8_ppgtt_insert_entries(struct 
i915_address_space *vm,
kunmap_atomic(pt_vaddr);
 }
 
-static void gen8_free_page_tables(struct page **pt_pages)
+static void gen8_free_page_tables(struct i915_pagedir *pd)
 {
int i;
 
-   if (pt_pages == NULL)
+   if (pd->page_tables == NULL)
return;
 
for (i = 0; i < I915_PDES_PER_PD; i++)
-   if (pt_pages[i])
-   __free_pages(pt_pages[i], 0);
+   if (pd->page_tables[i].page)
+   __free_page(pd->page_tables[i].page);
+}
+
+static void gen8_free_page_directories(struct i915_pagedir *pd)
+{
+   kfree(pd->page_tables);
+   __free_page(pd->page);
 }
 
-static void gen8_ppgtt_free(const struct i915_hw_ppgtt *ppgtt)
+static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
 {
int i;
 
for (i = 0; i < ppgtt->num_pd_pages; i++) {
-   gen8_free_page_tables(ppgtt->gen8_pt_pages[i]);
-   kfree(ppgtt->gen8_pt_pages[i]);
+   gen8_free_page_tables(&ppgtt->pdp.pagedir[i]);
+   gen8_free_page_directories(&ppgtt->pdp.pagedir[i]);
kfree(ppgtt->gen8_pt_dma_addr[i]);
}
-
-   __free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << 
PAGE_SHIFT));
 }
 
 static void gen8_ppgtt_dma_unmap_pages(struct i915_hw_ppgtt *ppgtt)
@@ -372,87 +380,73 @@ static void gen8_ppgtt_cleanup(struct i915_address_space 
*vm)
gen8_ppgtt_free(ppgtt);
 }
 
-static struct page **__gen8_alloc_page_tables(void)
+static int gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt *ppgtt)
 {
-   struct page **pt_pages;
int i;
 
-   pt_pages = kcalloc(I915_PDES_PER_PD, sizeof(struct page *), GFP_KERNEL);
-   if (!pt_pages)
-   return ERR_PTR(-ENOMEM);
-
-   for (i = 0; i < I915_PDES_PER_PD; i++) {
-   pt_pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
-   if (!pt_pages[i])
-   goto bail;
+   for (i = 0; i < ppgtt->num_pd_pages; i++) {
+   ppgtt->gen8_pt_dma_addr[i] = kcalloc(I915_PDES_PER_PD,
+sizeof(dma_addr_t),
+GFP_KERNEL);
+   if (!ppgtt->gen8_pt_dma_addr[i])
+   return -ENOMEM;
}
 
-   return pt_pages;
-
-bail:
-   gen8_free_page_tables(pt_pages);
-   kfree(pt_pages);
-   return ERR_PTR(-ENOMEM);
+   return 0;
 }
 
-static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt,
-  const int max_pdp)
+static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt)
 {
-   struct page **pt_pages[GEN8_LEGACY_PDPS];
-   int i, ret;
+   int i, j;
 
-   for (i 

[Intel-gfx] [PATCH 06/26] drm/i915: Wrap VMA binding

2014-03-17 Thread Ben Widawsky
This will be useful for some upcoming patches which do more platform
specific work. Having it in one central place just makes things a bit
cleaner and easier.

There is a small functional change here. There are more calls to the
tracepoints.

NOTE: I didn't actually end up using this patch for the intended purpose, but I
thought it was a nice patch to keep around.

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_drv.h|  3 +++
 drivers/gpu/drm/i915/i915_gem.c|  8 
 drivers/gpu/drm/i915/i915_gem_context.c|  2 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  5 +++--
 drivers/gpu/drm/i915/i915_gem_gtt.c| 16 ++--
 5 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c59b707..b3e31fd 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2408,6 +2408,9 @@ bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
struct i915_address_space *vm);
 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
struct i915_address_space *vm);
+void i915_gem_bind_vma(struct i915_vma *vma, enum i915_cache_level,
+  unsigned flags);
+void i915_gem_unbind_vma(struct i915_vma *vma);
 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
 struct i915_address_space *vm);
 struct i915_vma *
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ed09dda..0a3f4ac 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2765,7 +2765,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 
trace_i915_vma_unbind(vma);
 
-   vma->unbind_vma(vma);
+   i915_gem_unbind_vma(vma);
 
i915_gem_gtt_finish_object(obj);
 
@@ -3514,8 +3514,8 @@ int i915_gem_object_set_cache_level(struct 
drm_i915_gem_object *obj,
 
list_for_each_entry(vma, &obj->vma_list, vma_link)
if (drm_mm_node_allocated(&vma->node))
-   vma->bind_vma(vma, cache_level,
- obj->has_global_gtt_mapping ? 
GLOBAL_BIND : 0);
+   i915_gem_bind_vma(vma, cache_level,
+ obj->has_global_gtt_mapping ? 
GLOBAL_BIND : 0);
}
 
list_for_each_entry(vma, &obj->vma_list, vma_link)
@@ -3878,7 +3878,7 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj,
}
 
if (flags & PIN_GLOBAL && !obj->has_global_gtt_mapping)
-   vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND);
+   i915_gem_bind_vma(vma, obj->cache_level, GLOBAL_BIND);
 
vma->pin_count++;
if (flags & PIN_MAPPABLE)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 7dfdc02..f918f2c 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -693,7 +693,7 @@ static int do_switch(struct intel_ring_buffer *ring,
if (!to->obj->has_global_gtt_mapping) {
struct i915_vma *vma = i915_gem_obj_to_vma(to->obj,
   &dev_priv->gtt.base);
-   vma->bind_vma(vma, to->obj->cache_level, GLOBAL_BIND);
+   i915_gem_bind_vma(vma, to->obj->cache_level, GLOBAL_BIND);
}
 
if (!to->is_initialized || i915_gem_context_is_default(to))
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3851a1b..856fa9d 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -369,7 +369,8 @@ i915_gem_execbuffer_relocate_entry(struct 
drm_i915_gem_object *obj,
struct i915_vma *vma =
list_first_entry(&target_i915_obj->vma_list,
 typeof(*vma), vma_link);
-   vma->bind_vma(vma, target_i915_obj->cache_level, GLOBAL_BIND);
+   i915_gem_bind_vma(vma, target_i915_obj->cache_level,
+ GLOBAL_BIND);
}
 
/* Validate that the target is in a valid r/w GPU domain */
@@ -1209,7 +1210,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 * allocate space first */
struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
BUG_ON(!vma);
-   vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
+   i915_gem_bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
}
 
if (flags & I915_DISPATCH_SECURE)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem

[Intel-gfx] [PATCH 02/26] drm/i915: Extract switch to default context

2014-03-17 Thread Ben Widawsky
This patch existed for another reason which no longer exists. I liked
it, so I kept it in the series. It can skipped if undesirable.

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_drv.h | 2 ++
 drivers/gpu/drm/i915/i915_gem.c | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 35f9a37..c59b707 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2476,6 +2476,8 @@ int i915_gem_context_enable(struct drm_i915_private 
*dev_priv);
 void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
 int i915_switch_context(struct intel_ring_buffer *ring,
struct drm_file *file, struct i915_hw_context *to);
+#define i915_switch_to_default(ring) \
+   i915_switch_context(ring, NULL, ring->default_context)
 struct i915_hw_context *
 i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id);
 void i915_gem_context_free(struct kref *ctx_ref);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b2565d2..ed09dda 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2799,7 +2799,7 @@ int i915_gpu_idle(struct drm_device *dev)
 
/* Flush everything onto the inactive list. */
for_each_ring(ring, dev_priv, i) {
-   ret = i915_switch_context(ring, NULL, ring->default_context);
+   ret = i915_switch_to_default(ring);
if (ret)
return ret;
 
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 08/26] drm/i915: Un-hardcode number of page directories

2014-03-17 Thread Ben Widawsky
trivial.

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_drv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b3e31fd..084e82f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -722,7 +722,7 @@ struct i915_hw_ppgtt {
};
union {
dma_addr_t *pt_dma_addr;
-   dma_addr_t *gen8_pt_dma_addr[4];
+   dma_addr_t *gen8_pt_dma_addr[GEN8_LEGACY_PDPS];
};
 
int (*enable)(struct i915_hw_ppgtt *ppgtt);
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 01/26] drm/i915: Split out verbose PPGTT dumping

2014-03-17 Thread Ben Widawsky
There often is not enough memory to dump the full contents of the PPGTT.
As a temporary bandage, to continue getting valuable basic PPGTT info,
wrap the dangerous, memory hungry part inside of a new verbose version
of the debugfs file.

Also while here we can split out the ppgtt print function so it's more
reusable.

I'd really like to get ppgtt info into our error state, but I found it too
difficult to make work in the limited time I have. Maybe Mika can find a way.

Cc: Mika Kuoppala 
Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_debugfs.c | 28 ++--
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 1031c43..b226788 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1760,7 +1760,7 @@ static int per_file_ctx(int id, void *ptr, void *data)
return 0;
 }
 
-static void gen8_ppgtt_info(struct seq_file *m, struct drm_device *dev)
+static void gen8_ppgtt_info(struct seq_file *m, struct drm_device *dev, int 
verbose)
 {
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_ring_buffer *ring;
@@ -1785,7 +1785,13 @@ static void gen8_ppgtt_info(struct seq_file *m, struct 
drm_device *dev)
}
 }
 
-static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev)
+static void print_ppgtt(struct seq_file *m, struct i915_hw_ppgtt *ppgtt, const 
char *name)
+{
+   seq_printf(m, "%s:\n", name);
+   seq_printf(m, "pd gtt offset: 0x%08x\n", ppgtt->pd_offset);
+}
+
+static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev, bool 
verbose)
 {
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_ring_buffer *ring;
@@ -1806,10 +1812,9 @@ static void gen6_ppgtt_info(struct seq_file *m, struct 
drm_device *dev)
if (dev_priv->mm.aliasing_ppgtt) {
struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
 
-   seq_puts(m, "aliasing PPGTT:\n");
-   seq_printf(m, "pd gtt offset: 0x%08x\n", ppgtt->pd_offset);
-
-   ppgtt->debug_dump(ppgtt, m);
+   print_ppgtt(m, ppgtt, "Aliasing PPGTT");
+   if (verbose)
+   ppgtt->debug_dump(ppgtt, m);
} else
return;
 
@@ -1820,8 +1825,9 @@ static void gen6_ppgtt_info(struct seq_file *m, struct 
drm_device *dev)
pvt_ppgtt = ctx_to_ppgtt(file_priv->private_default_ctx);
seq_printf(m, "proc: %s\n",
   get_pid_task(file->pid, PIDTYPE_PID)->comm);
-   seq_puts(m, "  default context:\n");
-   idr_for_each(&file_priv->context_idr, per_file_ctx, m);
+   print_ppgtt(m, pvt_ppgtt, "Default context");
+   if (verbose)
+   idr_for_each(&file_priv->context_idr, per_file_ctx, m);
}
seq_printf(m, "ECOCHK: 0x%08x\n", I915_READ(GAM_ECOCHK));
 }
@@ -1831,6 +1837,7 @@ static int i915_ppgtt_info(struct seq_file *m, void *data)
struct drm_info_node *node = (struct drm_info_node *) m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
+   bool verbose = node->info_ent->data ? true : false;
 
int ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
@@ -1838,9 +1845,9 @@ static int i915_ppgtt_info(struct seq_file *m, void *data)
intel_runtime_pm_get(dev_priv);
 
if (INTEL_INFO(dev)->gen >= 8)
-   gen8_ppgtt_info(m, dev);
+   gen8_ppgtt_info(m, dev, verbose);
else if (INTEL_INFO(dev)->gen >= 6)
-   gen6_ppgtt_info(m, dev);
+   gen6_ppgtt_info(m, dev, verbose);
 
intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev->struct_mutex);
@@ -3826,6 +3833,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
{"i915_gen6_forcewake_count", i915_gen6_forcewake_count_info, 0},
{"i915_swizzle_info", i915_swizzle_info, 0},
{"i915_ppgtt_info", i915_ppgtt_info, 0},
+   {"i915_ppgtt_verbose_info", i915_ppgtt_info, 0, (void *)1},
{"i915_dpio", i915_dpio_info, 0},
{"i915_llc", i915_llc, 0},
{"i915_edp_psr_status", i915_edp_psr_status, 0},
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 11/26] drm/i915: Range clearing is PPGTT agnostic

2014-03-17 Thread Ben Widawsky
Therefore we can do it from our general init function. Eventually, I
hope to have a lot more commonality like this. It won't arrive yet, but
this was a nice easy one.

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index d89054d..77556ac 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -584,8 +584,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, 
uint64_t size)
ppgtt->base.start = 0;
ppgtt->base.total = ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * 
PAGE_SIZE;
 
-   ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
-
DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d 
wasted)\n",
 ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp);
DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n",
@@ -1154,8 +1152,6 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 
gen6_map_page_tables(ppgtt);
 
-   ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
-
DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n",
 ppgtt->node.size >> 20,
 ppgtt->node.start / PAGE_SIZE);
@@ -1183,6 +1179,7 @@ int i915_gem_init_ppgtt(struct drm_device *dev, struct 
i915_hw_ppgtt *ppgtt)
 
kref_init(&ppgtt->ref);
drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, ppgtt->base.total);
+   ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
i915_init_vm(dev_priv, &ppgtt->base);
 
return 0;
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Don't del_timer_sync uninitialized timer

2014-03-15 Thread Ben Widawsky
On Sat, Mar 15, 2014 at 03:20:23PM +, Chris Wilson wrote:
> On Sat, Mar 15, 2014 at 12:47:22PM +0100, Daniel Vetter wrote:
> > On Fri, Mar 14, 2014 at 05:21:36PM -0700, Ben Widawsky wrote:
> > > Broken by:
> > > commit 0294ae7b44bba7ab0d4cef9a8736287f38bdb4fd
> > > Author: Chris Wilson 
> > > Date:   Thu Mar 13 12:00:29 2014 +
> > > 
> > > drm/i915: Consolidate forcewake resetting to a single function
> > > 
> > > Cc: Chris Wilson 
> > > Cc: Mika Kuoppala 
> > > Signed-off-by: Ben Widawsky 
> > > ---
> > >  drivers/gpu/drm/i915/intel_uncore.c | 6 +++---
> > >  1 file changed, 3 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
> > > b/drivers/gpu/drm/i915/intel_uncore.c
> > > index e6bb421..7e55ceb 100644
> > > --- a/drivers/gpu/drm/i915/intel_uncore.c
> > > +++ b/drivers/gpu/drm/i915/intel_uncore.c
> > > @@ -362,6 +362,9 @@ void intel_uncore_early_sanitize(struct drm_device 
> > > *dev)
> > >  {
> > >   struct drm_i915_private *dev_priv = dev->dev_private;
> > >  
> > > + setup_timer(&dev_priv->uncore.force_wake_timer,
> > > + gen6_force_wake_timer, (unsigned long)dev_priv);
> > 
> > We call early_sanitize also from our resume code, so this will now
> > re-setup the timer again. We generally don't do that since if we ever leak
> > the timer to here in an enabled state it causes havoc.
> 
> Gah, really? intel_uncore_early_init()! There must be a clean way to
> break this up.
> -Chris

At least in the code base I was looking at, we currently do this also,
so I didn't think this was any worse.

With lockdep turned on, the module will not even load, so please either
revert the original, or merge this.

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Don't del_timer_sync uninitialized timer

2014-03-15 Thread Ben Widawsky
Broken by:
commit 0294ae7b44bba7ab0d4cef9a8736287f38bdb4fd
Author: Chris Wilson 
Date:   Thu Mar 13 12:00:29 2014 +

drm/i915: Consolidate forcewake resetting to a single function

Cc: Chris Wilson 
Cc: Mika Kuoppala 
Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/intel_uncore.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
b/drivers/gpu/drm/i915/intel_uncore.c
index e6bb421..7e55ceb 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -362,6 +362,9 @@ void intel_uncore_early_sanitize(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = dev->dev_private;
 
+   setup_timer(&dev_priv->uncore.force_wake_timer,
+   gen6_force_wake_timer, (unsigned long)dev_priv);
+
if (HAS_FPGA_DBG_UNCLAIMED(dev))
__raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
 
@@ -724,9 +727,6 @@ void intel_uncore_init(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = dev->dev_private;
 
-   setup_timer(&dev_priv->uncore.force_wake_timer,
-   gen6_force_wake_timer, (unsigned long)dev_priv);
-
if (IS_VALLEYVIEW(dev)) {
dev_priv->uncore.funcs.force_wake_get = __vlv_force_wake_get;
dev_priv->uncore.funcs.force_wake_put = __vlv_force_wake_put;
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] intel_audio_dump: fix CTS/M value index

2014-03-15 Thread Ben Widawsky
On Fri, Mar 14, 2014 at 09:44:03AM +0800, Xiang, Haihao wrote:
> On Thu, 2014-03-13 at 16:38 -0400, mengdong@intel.com wrote: 
> > From: Mengdong Lin 
> > 
> > This patch fixes the reversed CTS/M value index when dumping the
> > 'audio M/CTS programing enable' register.
> > 
> > Signed-off-by: Mengdong Lin 
> > 
> > diff --git a/tools/intel_audio_dump.c b/tools/intel_audio_dump.c
> > index 46eebdb..3ed2918 100644
> > --- a/tools/intel_audio_dump.c
> > +++ b/tools/intel_audio_dump.c
> > @@ -97,6 +97,11 @@ static int get_num_pipes(void)
> > return num_pipes;
> >  }
> >  
> > +static const char * const cts_m_value_index[] = {
> > +   [0] = "CTS",
> > +   [1] = "M",
> > +};
> > +
> >  static const char * const pixel_clock[] = {
> > [0] = "25.2 / 1.001 MHz",
> > [1] = "25.2 MHz",
> > @@ -1408,7 +1413,8 @@ static void dump_aud_m_cts_enable(int index)
> >  
> > printf("%s  CTS_programming\t\t\t%#lx\n",prefix, BITS(dword, 
> > 19, 0));
> > printf("%s  Enable_CTS_or_M_programming\t%lu\n", prefix, BIT(dword, 
> > 20));
> > -   printf("%s  CTS_M value Index\t\t\t%s\n",prefix, BIT(dword, 21) 
> > ? "CTS" : "M");
> > +   printf("%s  CTS_M value Index\t\t\t[0x%lx] %s\n",prefix, BIT(dword, 21),
> > +   OPNAME(cts_m_value_index, BIT(dword, 21)));
> >  }
> >  
> >  static void dump_aud_power_state(void)
> 
> It is OK for me.
> Reviewed-by: Haihao Xiang 

Pushed. Thanks.

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/2] drm/i915: Add FIXME for bdw semaphore detection in hancheck

2014-03-15 Thread Ben Widawsky
On Sat, Mar 15, 2014 at 12:08:56AM +0100, Daniel Vetter wrote:
> Currently not an issue since we don't emit sempahores, but better
> not forget about those.
> 
> As a little prep work extract the ipehr decoding for cleaner control
> flow. And apply a bit of polish.
> 
> Cc: Ben Widawsky 
> Signed-off-by: Daniel Vetter 

Reviewed-by: Ben Widawsky 

I've made a note to add the relevant BDW patch already.

one comment below.
> ---
>  drivers/gpu/drm/i915/i915_irq.c | 20 ++--
>  drivers/gpu/drm/i915/i915_reg.h |  3 ++-
>  2 files changed, 20 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 473372a6c97d..0f3a6d791502 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2525,6 +2525,23 @@ ring_idle(struct intel_ring_buffer *ring, u32 seqno)
>   i915_seqno_passed(seqno, ring_last_seqno(ring)));
>  }
>  
> +static bool
> +ipehr_is_semaphore_wait(struct drm_device *dev, u32 ipehr)
> +{
> + if (INTEL_INFO(dev)->gen >= 8) {
> + /*
> +  * FIXME: gen8 semaphore support - currently we don't emit
> +  * semaphores on bdw anyway, but this needs to be addressed when
> +  * we merge that code.
> +  */
> + return false;
> + } else {

If you wanted to be paranoid:
WARN_ON((ipehr & MI_SEMAPHORE_SYNC_MASK) == MI_SEMAPHORE_SYNC_INVALID)

> + ipehr &= ~MI_SEMAPHORE_SYNC_MASK;
> + return ipehr == (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE |
> +  MI_SEMAPHORE_REGISTER);
> + }
> +}
> +
>  static struct intel_ring_buffer *
>  semaphore_waits_for(struct intel_ring_buffer *ring, u32 *seqno)
>  {
> @@ -2533,8 +2550,7 @@ semaphore_waits_for(struct intel_ring_buffer *ring, u32 
> *seqno)
>   int i;
>  
>   ipehr = I915_READ(RING_IPEHR(ring->mmio_base));
> - if ((ipehr & ~(0x3 << 16)) !=
> - (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | MI_SEMAPHORE_REGISTER))
> + if (!ipehr_is_semaphore_wait(ring->dev, ipehr))
>   return NULL;
>  
>   /*
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 146609ab42bb..23267859156f 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -244,7 +244,8 @@
>  #define   MI_SEMAPHORE_SYNC_BVE  (0<<16) /* VECS wait for BCS  
> (VEBSYNC) */
>  #define   MI_SEMAPHORE_SYNC_VVE  (1<<16) /* VECS wait for VCS  
> (VEVSYNC) */
>  #define   MI_SEMAPHORE_SYNC_RVE  (2<<16) /* VECS wait for RCS  
> (VERSYNC) */
> -#define   MI_SEMAPHORE_SYNC_INVALID  (3<<16)
> +#define   MI_SEMAPHORE_SYNC_INVALID (3<<16)
> +#define   MI_SEMAPHORE_SYNC_MASK(3<<16)
>  #define MI_SET_CONTEXT   MI_INSTR(0x18, 0)
>  #define   MI_MM_SPACE_GTT(1<<8)
>  #define   MI_MM_SPACE_PHYSICAL   (0<<8)
> -- 
> 1.8.1.4
> 

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Actually capture PP_DIR_BASE on error

2014-03-15 Thread Ben Widawsky
I have been seeing this for a long time, but ignored it because it's
typically not terribly important. Recently, I really needed this info,
and it was garbage. Proof that I should have fixed it sooner. Originally
wrong from:

commit 6c7a01ec3743a5a6ce9e53a69d7a6c2d8c715eb1
Author: Ben Widawsky 
Date:   Thu Jan 30 00:19:40 2014 -0800

drm/i915: Capture PPGTT info on error capture

Cc: Chris Wilson 
Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 7a9bba1..d7ac688 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -899,10 +899,12 @@ static void i915_record_ring_state(struct drm_device *dev,
}
break;
case 7:
-   ering->vm_info.pp_dir_base = RING_PP_DIR_BASE(ring);
+   ering->vm_info.pp_dir_base =
+   I915_READ(RING_PP_DIR_BASE(ring));
break;
case 6:
-   ering->vm_info.pp_dir_base = 
RING_PP_DIR_BASE_READ(ring);
+   ering->vm_info.pp_dir_base =
+   I915_READ(RING_PP_DIR_BASE_READ(ring));
break;
}
}
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/bdw: Fix GEN8 GTT size calculation

2014-03-14 Thread Ben Widawsky
On Fri, Mar 14, 2014 at 07:40:30PM +0100, Daniel Vetter wrote:
> On Fri, Mar 14, 2014 at 09:58:06AM -0700, Ben Widawsky wrote:
> > The preliminary HW support check is no longer needed, and the
> > calculation is simplified while here.
> > 
> > Reported-by: David Woodhouse 
> > Signed-off-by: Ben Widawsky 
> > ---
> >  drivers/gpu/drm/i915/i915_gem_gtt.c | 9 +
> >  1 file changed, 1 insertion(+), 8 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
> > b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index 40a2b36..694112a 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -1250,14 +1250,7 @@ static inline unsigned int 
> > gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
> >  {
> > bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
> > bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
> > -   if (bdw_gmch_ctl)
> > -   bdw_gmch_ctl = 1 << bdw_gmch_ctl;
> > -   if (bdw_gmch_ctl > 4) {
> > -   WARN_ON(!i915_preliminary_hw_support);
> > -   return 4<<20;
> > -   }
> > -
> > -   return bdw_gmch_ctl << 20;
> > +   return 1 << (bdw_gmch_ctl + 20);
> 
> I don't have this in my tree, and it seems to never have existed in
> upstream ...
> 
> /me is confused
> 
> Cheers, Daniel
> 

This was based off of linus master, since I assumed that is what David
was using. Honestly, I hadn't looked at our Intel trees. Maybe we're
good here already.


-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915/bdw: Fix GEN8 GTT size calculation

2014-03-14 Thread Ben Widawsky
The preliminary HW support check is no longer needed, and the
calculation is simplified while here.

Reported-by: David Woodhouse 
Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 9 +
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 40a2b36..694112a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1250,14 +1250,7 @@ static inline unsigned int gen8_get_total_gtt_size(u16 
bdw_gmch_ctl)
 {
bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
-   if (bdw_gmch_ctl)
-   bdw_gmch_ctl = 1 << bdw_gmch_ctl;
-   if (bdw_gmch_ctl > 4) {
-   WARN_ON(!i915_preliminary_hw_support);
-   return 4<<20;
-   }
-
-   return bdw_gmch_ctl << 20;
+   return 1 << (bdw_gmch_ctl + 20);
 }
 
 static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/bdw: The TLB invalidation mechanism has been removed from INSTPM

2014-03-13 Thread Ben Widawsky
On Thu, Mar 13, 2014 at 12:51 AM, Chris Wilson  wrote:
> Upon resume, the hardware continues writing the breadcrumbs into the old
> hws page (due to the stale TLB) and we try to read the seqno from the
> new page, so as shown by the error-states it appears that the breadcrumb
> writes are not happening. Since the hardware is writing to a random address,
> we are now corrupting random memory.
>
> Which is what I thought I said in the changelog.

Yes, you did say that. However, we should be idling on freeze, so the
explanation
I was missing is how or why the HW is continuing to use the old status page even
though we've had to do a TLB flush when we emit the next batch.
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/bdw: The TLB invalidation mechanism has been removed from INSTPM

2014-03-12 Thread Ben Widawsky
On Thu, Mar 13, 2014 at 01:40:28AM +, Damien Lespiau wrote:
> While wandering in the spec, I noticed that BDW removes those 2 bits
> from INSTPM. I couldn't find any direct way to invalidate the TLB (ie
> without the ring working already). Maybe someone will be more lucky.
> 
> At least, we now know we may be a problem.
> 
> Signed-off-by: Damien Lespiau 
> ---
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 10 --
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
> b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index c50388a..4eb3e06 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -981,8 +981,14 @@ void intel_ring_setup_status_page(struct 
> intel_ring_buffer *ring)
>   I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
>   POSTING_READ(mmio);
>  
> - /* Flush the TLB for this page */
> - if (INTEL_INFO(dev)->gen >= 6) {
> + /*
> +  * Flush the TLB for this page
> +  *
> +  * FIXME: These two bits have disappeared on gen8, so a question
> +  * arises: do we still need this and if so how should we go about
> +  * invalidating the TLB?
> +  */
> + if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) {
>   u32 reg = RING_INSTPM(ring->mmio_base);
>  
>   /* ring should be idle before issuing a sync flush*/

I'm missing something on the original patch,
884020bf3d2a3787a1cc6df902e98e0eec60330b. How were we emitting
breadcrumbs without flushing the TLB? All bathcbuffers should be
bookended by a TLB invalidate already, so I'm not sure the logic holds.
Chris could explain that one a bit further?

The only reason I bring this up is I'd like to rip this out completely
and have Thiago retest, or at least change the comment/commit message to
be to reflect whatever light Chris sheds on the matter.

Anyway, the bits are definitely gone, and I also can't find a non-ring
based replacement.
Reviewed-by: Ben Widawsky 

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/2] drm/i915/bdw: Use scratch page table for GEN8 PPGTT

2014-03-11 Thread Ben Widawsky
Way more coming in terms of sharing code. If you feel like looking
into the future:
http://cgit.freedesktop.org/~bwidawsk/drm-intel/log/?h=dynamic_pt_alloc

I've hoped, and continue to hope to kill insert/clear_entires
entirely. Still debugging some gen7 crap though for now.

On Tue, Mar 11, 2014 at 9:46 AM, Chris Wilson  wrote:
> On Tue, Mar 11, 2014 at 09:39:30AM -0700, Ben Widawsky wrote:
>> On Tue, Mar 11, 2014 at 5:24 AM, Chris Wilson  
>> wrote:
>> > On Sat, Mar 08, 2014 at 11:59:42AM -0800, Ben Widawsky wrote:
>> >> On Sat, Mar 08, 2014 at 11:58:16AM -0800, Ben Widawsky wrote:
>> >> > I'm not clear if the hardware is still subject to the same prefetching
>> >> > issues that made us use a scratch page in the first place. In either
>> >> > case, we're using garbage with the current code (we will end up using
>> >> > offset 0).
>> >> >
>> >> > This may be the cause of our current gem_cpu_reloc regression with
>> >> > PPGTT. I cannot test it at the moment.
>> >> >
>> >>
>> >> Wait NVM... that wasn't gen8. I can't associate this one with a bug.
>> >
>> > Yeah, this doesn't appear to achieve anything. ppgtt->base.scratch is
>> > only used by ppgtt->base.clear_range() and there is no caller between
>> > i915_gem_init_ppgtt() and ppgtt->base.scratch initialisation in
>> > gen6_ppgtt_init().
>>
>>
>> Still the right thing to do for gen8 though, right?
>
> Likewise vm->scratch.addr is only used by gen8_ppgtt_clear_range()...
> Except that it is never initialized to point to the scratch page in
> gen8_ppgtt_init(). So yes, wrt gen8 it is the right thing to do. There
> is more common code you could refactor if you so desired though...
>
> My bad for not realising this was to fix the gen8 bug, I was looking for
> something broken in the gen6 init sequence. So,
> Reviewed-by: Chris Wilson 
> -Chris
>
> --
> Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/2] drm/i915/bdw: Use scratch page table for GEN8 PPGTT

2014-03-11 Thread Ben Widawsky
On Tue, Mar 11, 2014 at 5:24 AM, Chris Wilson  wrote:
> On Sat, Mar 08, 2014 at 11:59:42AM -0800, Ben Widawsky wrote:
>> On Sat, Mar 08, 2014 at 11:58:16AM -0800, Ben Widawsky wrote:
>> > I'm not clear if the hardware is still subject to the same prefetching
>> > issues that made us use a scratch page in the first place. In either
>> > case, we're using garbage with the current code (we will end up using
>> > offset 0).
>> >
>> > This may be the cause of our current gem_cpu_reloc regression with
>> > PPGTT. I cannot test it at the moment.
>> >
>>
>> Wait NVM... that wasn't gen8. I can't associate this one with a bug.
>
> Yeah, this doesn't appear to achieve anything. ppgtt->base.scratch is
> only used by ppgtt->base.clear_range() and there is no caller between
> i915_gem_init_ppgtt() and ppgtt->base.scratch initialisation in
> gen6_ppgtt_init().


Still the right thing to do for gen8 though, right?
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Don't scream if there's no context for reset stats

2014-03-10 Thread Ben Widawsky
On Mon, Mar 10, 2014 at 09:30:22PM +0100, Daniel Vetter wrote:
> On Mon, Mar 10, 2014 at 7:30 PM, Ben Widawsky  wrote:
> > On Mon, Mar 10, 2014 at 09:44:22AM +0100, Daniel Vetter wrote:
> >> It can happen ...
> >>
> >> Fix up the check to match pre-gen6 reality where we don't have hw
> >> contexts and hence also don't need to set the reset status on them.
> >>
> >> This blows up when running any gpu reset testcase since for pre-gen6
> >> request->ctx is NULL. With this my ilk here is happy again.
> >>
> >> This regression has been introduced in
> >>
> >> commit 44e2c0705a19e09d7b0f30a591f92e473e5ef89e
> >> Author: Mika Kuoppala 
> >> Date:   Thu Jan 30 16:01:15 2014 +0200
> >>
> >> drm/i915: Use i915_hw_context to set reset stats
> >>
> >> Cc: Mika Kuoppala 
> >> Cc: Ben Widawsky 
> >> Signed-off-by: Daniel Vetter 
> >
> > Did you try playing around with setting last_context to
> > private_default_context? That is more in line with the original outlined
> > approach of "every platform has a context, be they fake, or real."
> 
> Nope. Currently the tests for that are a bit busted, so I think we
> should do that when we fix things up generally. Mika seems to be
> working on this. For now this keeps my machines happy. There's also
> the problem that our QA seems to have missed this ...
> -Daniel

As long as you've thought about it, lgtm, although I've not investigated
if other areas of the code need it. Without last_context always != NULL,
I wouldn't be surprised if you hit something similar elsewhere.

Reviewed-by: Ben Widawsky 

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Page table helpers

2014-03-10 Thread Ben Widawsky
On Mon, Mar 10, 2014 at 11:05:42PM +0200, Imre Deak wrote:
> On Tue, 2014-02-25 at 19:52 -0800, Ben Widawsky wrote:
> > These page table helpers make the code much cleaner. There is some
> > room to use the arch/x86 header files. The reason I've opted not to is
> > in several cases, the definitions are dictated by the CONFIG_ options
> > which do not always indicate the restrictions in the GPU.
> > 
> > Signed-off-by: Ben Widawsky 
> > ---
> > 
> > I have this patch queued up for the next round of /stuff/ I am working on. 
> > If
> > you want to pull it into this series, it's fine by me. As I deal with the 
> > code
> > more, it does become more obvious what looks good, and what does not.
> > 
> > ---
> > 
> >  drivers/gpu/drm/i915/i915_gem_gtt.c | 115 
> > +---
> >  1 file changed, 81 insertions(+), 34 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
> > b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index aa3ef7f..43d9129 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -30,8 +30,6 @@
> >  #include "i915_trace.h"
> >  #include "intel_drv.h"
> >  
> > -#define GEN6_PPGTT_PD_ENTRIES 512
> > -#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
> >  typedef uint64_t gen8_gtt_pte_t;
> >  typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> >  
> > @@ -51,6 +49,27 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> >  #define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr)
> >  #define HSW_PTE_ADDR_ENCODE(addr)  HSW_GTT_ADDR_ENCODE(addr)
> >  
> > +/* GEN6 PPGTT resembles a 2 level page table:
> > + * 31:22 | 21:12 |  11:0
> > + *  PDE  |  PTE  | offset
> > + */
> > +#define GEN6_PDE_SHIFT 22
> > +#define GEN6_PPGTT_PD_ENTRIES  512
> > +#define GEN6_PDE_MASK  (GEN6_PPGTT_PD_ENTRIES-1)
> > +#define GEN6_PTE_SHIFT 12
> > +#define GEN6_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
> > +#define GEN6_PTE_MASK  (GEN6_PPGTT_PT_ENTRIES-1)
> > +
> > +static inline uint32_t gen6_pte_index(uint32_t address)
> > +{
> > +   return (address >> GEN6_PTE_SHIFT) & GEN6_PTE_MASK;
> > +}
> > +
> > +static inline uint32_t gen6_pde_index(uint32_t address)
> > +{
> > +   return (address >> GEN6_PDE_SHIFT) & GEN6_PDE_MASK;
> > +}
> > +
> >  /* Cacheability Control is a 4-bit value. The low three bits are stored in 
> > *
> >   * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the 
> > PTE.
> >   */
> > @@ -63,6 +82,11 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> >  #define HSW_WT_ELLC_LLC_AGE0   HSW_CACHEABILITY_CONTROL(0x6)
> >  #define HSW_WT_ELLC_LLC_AGE3   HSW_CACHEABILITY_CONTROL(0x7)
> >  
> > +#define PPAT_UNCACHED_INDEX(_PAGE_PWT | _PAGE_PCD)
> > +#define PPAT_CACHED_PDE_INDEX  0 /* WB LLC */
> > +#define PPAT_CACHED_INDEX  _PAGE_PAT /* WB LLCeLLC */
> > +#define PPAT_DISPLAY_ELLC_INDEX_PAGE_PCD /* WT eLLC */
> > +
> >  #define GEN8_PTES_PER_PAGE (PAGE_SIZE / sizeof(gen8_gtt_pte_t))
> >  #define GEN8_PDES_PER_PAGE (PAGE_SIZE / sizeof(gen8_ppgtt_pde_t))
> >  
> > @@ -71,6 +95,10 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> >   * PDPE  |  PDE  |  PTE  | offset
> >   * The difference as compared to normal x86 3 level page table is the 
> > PDPEs are
> >   * programmed via register.
> > + *
> > + * The x86 pagetable code is flexible in its ability to handle varying page
> > + * table depths via abstracted PGDIR/PUD/PMD/PTE. I've opted to not do 
> > this and
> > + * instead replicate the interesting functionality.
> >   */
> >  #define GEN8_PDPE_SHIFT30
> >  #define GEN8_PDPE_MASK 0x3
> > @@ -79,10 +107,31 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> >  #define GEN8_PTE_SHIFT 12
> >  #define GEN8_PTE_MASK  0x1ff
> >  
> > -#define PPAT_UNCACHED_INDEX(_PAGE_PWT | _PAGE_PCD)
> > -#define PPAT_CACHED_PDE_INDEX  0 /* WB LLC */
> > -#define PPAT_CACHED_INDEX  _PAGE_PAT /* WB LLCeLLC */
> > -#define PPAT_DISPLAY_ELLC_INDEX_PAGE_PCD /* WT eLLC */
> > +static inline uint32_t gen8_pte_index(uint64_t address)
> > +{
> > +   return (address >> GEN8

Re: [Intel-gfx] [PATCH] drm/i915: Don't scream if there's no context for reset stats

2014-03-10 Thread Ben Widawsky
On Mon, Mar 10, 2014 at 09:44:22AM +0100, Daniel Vetter wrote:
> It can happen ...
> 
> Fix up the check to match pre-gen6 reality where we don't have hw
> contexts and hence also don't need to set the reset status on them.
> 
> This blows up when running any gpu reset testcase since for pre-gen6
> request->ctx is NULL. With this my ilk here is happy again.
> 
> This regression has been introduced in
> 
> commit 44e2c0705a19e09d7b0f30a591f92e473e5ef89e
> Author: Mika Kuoppala 
> Date:   Thu Jan 30 16:01:15 2014 +0200
> 
> drm/i915: Use i915_hw_context to set reset stats
> 
> Cc: Mika Kuoppala 
> Cc: Ben Widawsky 
> Signed-off-by: Daniel Vetter 

Did you try playing around with setting last_context to
private_default_context? That is more in line with the original outlined
approach of "every platform has a context, be they fake, or real."

> ---
>  drivers/gpu/drm/i915/i915_gem.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 92b0b4164b1d..25cc3f4f242e 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2291,8 +2291,10 @@ static void i915_set_reset_status(struct 
> drm_i915_private *dev_priv,
>  {
>   struct i915_ctx_hang_stats *hs;
>  
> - if (WARN_ON(!ctx))
> + if (!ctx) {
> + WARN_ON(HAS_HW_CONTEXTS(dev_priv->dev));
>   return;
> + }
>  
>   hs = &ctx->hang_stats;
>  
> -- 
> 1.8.1.4
> 

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Fail gpu reset if the forcewake fifo hasn't drained

2014-03-08 Thread Ben Widawsky
On Sat, Mar 08, 2014 at 08:58:24PM +0100, Daniel Vetter wrote:
> On Sat, Mar 8, 2014 at 7:50 PM, Ben Widawsky  wrote:
> > I've seen this too. Though I think the WARN does coincide with what the
> > docs state - it doesn't seem to match reality. So I totally agree this
> > is the right course.
> >
> > However, for my curiosity, Chris, can you elaborate on why you think it
> > doesn't make sense?
> 
> Our current fifo code would be broken - we stall for the fifo entries
> to refill if the value drops below NUM_FIFO_ENTRIES_RESERVED. Hence if
> the register value is zero right after reset, something is terribly
> broken.
> -Daniel

Oh that's right. fifo_entries should be MAX, not 0. Wonder if that one
would WARN. Anyway, I'm not actually sure if MAX is always known, so
probably a stupid idea anyway.

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/2] drm/i915/bdw: Use scratch page table for GEN8 PPGTT

2014-03-08 Thread Ben Widawsky
On Sat, Mar 08, 2014 at 11:58:16AM -0800, Ben Widawsky wrote:
> I'm not clear if the hardware is still subject to the same prefetching
> issues that made us use a scratch page in the first place. In either
> case, we're using garbage with the current code (we will end up using
> offset 0).
> 
> This may be the cause of our current gem_cpu_reloc regression with
> PPGTT. I cannot test it at the moment.
> 

Wait NVM... that wasn't gen8. I can't associate this one with a bug.

> Signed-off-by: Ben Widawsky 
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
> b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 5427d6d..0f39090 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1169,7 +1169,6 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>   ppgtt->base.clear_range = gen6_ppgtt_clear_range;
>   ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
>   ppgtt->base.cleanup = gen6_ppgtt_cleanup;
> - ppgtt->base.scratch = dev_priv->gtt.base.scratch;
>   ppgtt->base.start = 0;
>   ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * I915_PPGTT_PT_ENTRIES * 
> PAGE_SIZE;
>   ppgtt->debug_dump = gen6_dump_ppgtt;
> @@ -1192,6 +1191,7 @@ int i915_gem_init_ppgtt(struct drm_device *dev, struct 
> i915_hw_ppgtt *ppgtt)
>   int ret = 0;
>  
>   ppgtt->base.dev = dev;
> + ppgtt->base.scratch = dev_priv->gtt.base.scratch;
>  
>   if (INTEL_INFO(dev)->gen < 8)
>   ret = gen6_ppgtt_init(ppgtt);
> -- 
> 1.9.0
> 

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/2] drm/i915/bdw: Use scratch page table for GEN8 PPGTT

2014-03-08 Thread Ben Widawsky
I'm not clear if the hardware is still subject to the same prefetching
issues that made us use a scratch page in the first place. In either
case, we're using garbage with the current code (we will end up using
offset 0).

This may be the cause of our current gem_cpu_reloc regression with
PPGTT. I cannot test it at the moment.

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 5427d6d..0f39090 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1169,7 +1169,6 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
ppgtt->base.clear_range = gen6_ppgtt_clear_range;
ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
ppgtt->base.cleanup = gen6_ppgtt_cleanup;
-   ppgtt->base.scratch = dev_priv->gtt.base.scratch;
ppgtt->base.start = 0;
ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * I915_PPGTT_PT_ENTRIES * 
PAGE_SIZE;
ppgtt->debug_dump = gen6_dump_ppgtt;
@@ -1192,6 +1191,7 @@ int i915_gem_init_ppgtt(struct drm_device *dev, struct 
i915_hw_ppgtt *ppgtt)
int ret = 0;
 
ppgtt->base.dev = dev;
+   ppgtt->base.scratch = dev_priv->gtt.base.scratch;
 
if (INTEL_INFO(dev)->gen < 8)
ret = gen6_ppgtt_init(ppgtt);
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/2] drm/i915: Correct PPGTT total size

2014-03-08 Thread Ben Widawsky
Our code allows have a PPGTT that is smaller than the maximum size for
GEN6-GEN7. Though I don't think this actually ever occurs, the code may
as well work properly and more importantly look correct by using the
variable size instead of the HW max.

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 0f39090..68f55c4 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1170,7 +1170,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
ppgtt->base.cleanup = gen6_ppgtt_cleanup;
ppgtt->base.start = 0;
-   ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * I915_PPGTT_PT_ENTRIES * 
PAGE_SIZE;
+   ppgtt->base.total =  ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES * 
PAGE_SIZE;
ppgtt->debug_dump = gen6_dump_ppgtt;
 
ppgtt->pd_offset =
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Fail gpu reset if the forcewake fifo hasn't drained

2014-03-08 Thread Ben Widawsky
On Fri, Mar 07, 2014 at 10:35:56PM +0100, Daniel Vetter wrote:
> On Fri, Mar 07, 2014 at 09:09:03PM +0100, Daniel Vetter wrote:
> > Since the gpu reset + full ppgtt merge we have a hard hang on snb when
> > running the gem_reset_stat tests. Recently Mika also some more strict
> > forcewake fifo warnigns for gen6/7 in
> > 
> > commit 20277c60ed08ab4f7237854cc6c2046649f9200f
> > Author: Mika Kuoppala 
> > Date:   Wed Mar 5 18:08:19 2014 +0200
> > 
> > drm/i915: Always set fifo count to zero in gen6_reset
> > 
> > and they _do_ fire just right before the the final failing reset which
> > then results in the machine's ultimate demise.
> > 
> > So use this indicator to fail the gpu reset with an -EIO code,
> > preventing further command submission, further hangs and so the deadly
> > final gpu reset attempt. It seems to work and my snb survives now.
> > 
> > The gpu is still dead though unfortunately.
> > 
> > Cc: Mika Kuoppala 
> > References: https://bugs.freedesktop.org/show_bug.cgi?id=74100
> > Signed-off-by: Daniel Vetter 
> > ---
> >  drivers/gpu/drm/i915/intel_uncore.c | 8 +---
> >  1 file changed, 5 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
> > b/drivers/gpu/drm/i915/intel_uncore.c
> > index c666af8232ef..9e22b11d0b0c 100644
> > --- a/drivers/gpu/drm/i915/intel_uncore.c
> > +++ b/drivers/gpu/drm/i915/intel_uncore.c
> > @@ -989,9 +989,11 @@ static int gen6_do_reset(struct drm_device *dev)
> > if (fw_engine)
> > dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_engine);
> >  
> > -   if (IS_GEN6(dev) || IS_GEN7(dev))
> > -   WARN_ON((__raw_i915_read32(dev_priv, GTFIFOCTL) &
> > -GT_FIFO_FREE_ENTRIES_MASK) != 0);
> > +   if (IS_GEN6(dev) || IS_GEN7(dev)) {
> > +   if (WARN_ON((__raw_i915_read32(dev_priv, GTFIFOCTL) &
> > +GT_FIFO_FREE_ENTRIES_MASK) != 0))
> > +   ret = -EIO;
> 
> Chris pointed out that this WARN doesn't make much sense, and testing
> confirmed that this completely breaks gpu reset on my machines here.
> 
> I've backed out Mika's original patch, this seems to be the wrong path.
> -Daniel
> 
> > +   }
> >  
> > dev_priv->uncore.fifo_count = 0;
> >  

I've seen this too. Though I think the WARN does coincide with what the
docs state - it doesn't seem to match reality. So I totally agree this
is the right course.

However, for my curiosity, Chris, can you elaborate on why you think it
doesn't make sense?


-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Disable full ppgtt by default

2014-03-06 Thread Ben Widawsky
On Thu, Mar 06, 2014 at 09:30:01PM +0100, Daniel Vetter wrote:
> On Thu, Mar 06, 2014 at 10:17:12AM -0800, Ben Widawsky wrote:
> > On Thu, Mar 06, 2014 at 12:14:21PM +0100, Daniel Vetter wrote:
> > > There are too many oustanding issues:
> > > 
> > > - Fence handling in the current code is broken. There's a patch series
> > >   from me, but it's blocked on and extended review (which includes
> > >   writing the testcases).
> > > 
> > > - IOMMU mapping handling is broken, we need to properly refcount it -
> > >   currently it gets destroyed when the first vma is unbound, so way
> > >   too early.
> > > 
> > > - There's a pending reset issue on snb. Since Mika's reset work and
> > >   full ppgtt have been pulled in in separate branches and ended up
> > >   intermittingly breaking each another it's unclear who's the exact
> > >   culprit here.
> > > 
> > > - We still have persistent evidince of crazy recursion bugs through
> > >   vma_unbind and ppgtt_relase, e.g.
> > > 
> > >   https://bugs.freedesktop.org/show_bug.cgi?id=73383
> > > 
> > >   This issue (and a few others meanwhile resolved) have blocked our
> > >   performance measuring/tuning group since 3 months.
> > > 
> > > - Secure batch dispatching is broken. This is blocking Brad Volkin's
> > >   command checker work since 3 months.
> > > 
> > > All these issues are confirmed to only happen when full ppgtt is
> > > enabled, falling back to aliasing ppgtt resolves them. But even
> > > aliasing ppgtt itself still has a regression:
> > > 
> > > - We currently unconditionally bind objects into the aliasing ppgtt,
> > >   which means all priviledged objects like ringbuffers are visible to
> > >   unpriviledged access again. On top of that this also breaks the
> > >   command checker for aliasing ppgtt, since it can't hide the
> > >   validated batch any more.
> > > 
> > > Furthermore topic/full-ppgtt has never been reviewed:
> > > 
> > > - Lifetime rules around vma unbinding/release are unclear, resulting
> > >   into this awesome hack called ppgtt_release. Which seems to take the
> > >   blame for most of the recursion fallout.
> > > 
> > > - Context/ring init works different on gpu reset than anywhere else.
> > >   Such differeneces have in the past always lead to really hard to
> > >   track down bugs.
> > > 
> > > - Aliasing ppgtt is treated in a bunch of places as a real address
> > >   space, but it isn't - the real address space is always the global
> > >   gtt in that case. This results in a bit a mess between contexts and
> > >   ppgtt object, further complication the context/ppgtt/vma lifetime
> > >   rules.
> > > 
> > > - We don't have any docs describing the overall concepts introduced
> > >   with full ppgtt. A short, concise overview describing vmas and some
> > >   of the strange bits around them (like the unbound vmas used by
> > >   execbuf, or the new binding rules) really is needed.
> > > 
> > > Note that a lot of the post topic/full-ppgtt merge fallout has already
> > > been addressed, this entire list here of 10 issues really only contains
> > > the still outstanding issues.
> > > 
> > > Finally the 3.15 merge window is approaching and I think we need to
> > > use the remaining time to ensure that our fallback option of using
> > > aliasing ppgtt is in solid shape. Hence I think it's time to throw the
> > > switch. While at it demote the helper from static inline status
> > > because really.
> > > 
> > > Cc: Ben Widawsky 
> > > Cc: Dave Airlie 
> > > Signed-off-by: Daniel Vetter 
> > 
> > [snip]
> > 
> > I want a concise list in the commit message so it's obvious as we fix
> > things if we've achieved the goal or not. If you want to have nice prose
> > describing the reason and/or your feelings, that's fine, but please put
> > it after the concise list.
> > 
> > I'll start what I want, and please fill in as needed. I believe this is
> > all 10 you mentioned.
> > * Fence handling broken: BUG #
> 
> We have patches from me, and Paulo is signed up to do the review+igt
> testcase on our review board.
> 
> > * IOMMU Broken: BUG #
> 
> No bug report thus far. I can create one if people want, but that's more
> work than firing up my damn ivb, e

Re: [Intel-gfx] [PATCH] drm/i915: Disable full ppgtt by default

2014-03-06 Thread Ben Widawsky
On Thu, Mar 06, 2014 at 12:14:21PM +0100, Daniel Vetter wrote:
> There are too many oustanding issues:
> 
> - Fence handling in the current code is broken. There's a patch series
>   from me, but it's blocked on and extended review (which includes
>   writing the testcases).
> 
> - IOMMU mapping handling is broken, we need to properly refcount it -
>   currently it gets destroyed when the first vma is unbound, so way
>   too early.
> 
> - There's a pending reset issue on snb. Since Mika's reset work and
>   full ppgtt have been pulled in in separate branches and ended up
>   intermittingly breaking each another it's unclear who's the exact
>   culprit here.
> 
> - We still have persistent evidince of crazy recursion bugs through
>   vma_unbind and ppgtt_relase, e.g.
> 
>   https://bugs.freedesktop.org/show_bug.cgi?id=73383
> 
>   This issue (and a few others meanwhile resolved) have blocked our
>   performance measuring/tuning group since 3 months.
> 
> - Secure batch dispatching is broken. This is blocking Brad Volkin's
>   command checker work since 3 months.
> 
> All these issues are confirmed to only happen when full ppgtt is
> enabled, falling back to aliasing ppgtt resolves them. But even
> aliasing ppgtt itself still has a regression:
> 
> - We currently unconditionally bind objects into the aliasing ppgtt,
>   which means all priviledged objects like ringbuffers are visible to
>   unpriviledged access again. On top of that this also breaks the
>   command checker for aliasing ppgtt, since it can't hide the
>   validated batch any more.
> 
> Furthermore topic/full-ppgtt has never been reviewed:
> 
> - Lifetime rules around vma unbinding/release are unclear, resulting
>   into this awesome hack called ppgtt_release. Which seems to take the
>   blame for most of the recursion fallout.
> 
> - Context/ring init works different on gpu reset than anywhere else.
>   Such differeneces have in the past always lead to really hard to
>   track down bugs.
> 
> - Aliasing ppgtt is treated in a bunch of places as a real address
>   space, but it isn't - the real address space is always the global
>   gtt in that case. This results in a bit a mess between contexts and
>   ppgtt object, further complication the context/ppgtt/vma lifetime
>   rules.
> 
> - We don't have any docs describing the overall concepts introduced
>   with full ppgtt. A short, concise overview describing vmas and some
>   of the strange bits around them (like the unbound vmas used by
>   execbuf, or the new binding rules) really is needed.
> 
> Note that a lot of the post topic/full-ppgtt merge fallout has already
> been addressed, this entire list here of 10 issues really only contains
> the still outstanding issues.
> 
> Finally the 3.15 merge window is approaching and I think we need to
> use the remaining time to ensure that our fallback option of using
> aliasing ppgtt is in solid shape. Hence I think it's time to throw the
> switch. While at it demote the helper from static inline status
> because really.
> 
> Cc: Ben Widawsky 
> Cc: Dave Airlie 
> Signed-off-by: Daniel Vetter 

[snip]

I want a concise list in the commit message so it's obvious as we fix
things if we've achieved the goal or not. If you want to have nice prose
describing the reason and/or your feelings, that's fine, but please put
it after the concise list.

I'll start what I want, and please fill in as needed. I believe this is
all 10 you mentioned.
* Fence handling broken: BUG #
* IOMMU Broken: BUG #
* "Reset issue": Bug #
* Secure dispatch: Failing testcase: 
* Bug: https://bugs.freedesktop.org/show_bug.cgi?id=73383
* Documentation

Then there is fuzzy stuff that you "want" which need more clarification
on exactly what will satisfy you.
* Lifetime rules: No clear requirement from you.
* Context/ring init differences: What do you want?
* Aliasing PPGTT real address treatment: What do you want?

In my opinion, the last 3 are things you've imposed because of your
style as maintainer, whereas the first 7 are real issues that any sane
person would require before turning on.

Anyway, if you make the concise list like I want, at the top of the
commit, and you fill in the missing details, this is:
Acked-by: Ben Widawsky 

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/bdw: MU_FLUSH_DW a qword instead of dword

2014-03-05 Thread Ben Widawsky
On Wed, Mar 05, 2014 at 10:30:21PM +, Chris Wilson wrote:
> On Wed, Mar 05, 2014 at 11:05:15AM -0800, Ben Widawsky wrote:
> > On Wed, Mar 05, 2014 at 07:33:11PM +0100, Daniel Vetter wrote:
> > > On Wed, Mar 05, 2014 at 09:24:34AM +, Chris Wilson wrote:
> > > > On Tue, Mar 04, 2014 at 09:38:56AM -0800, Ben Widawsky wrote:
> > > > > The actual post sync op is "Write Immediate Data QWord." It is 
> > > > > therefore
> > > > > arguable that we should have always done a qword write.
> > > > 
> > > > Not really since the spec explicitly says that we can choose either a
> > > > dword or qword write. Note that qword writes also currently require a
> > > > 64 byte alignment.
> > > 
> > > Yeah, that's also my reading of the spec - the lenght field selects
> > > whether the hw does a qword or dword write, and the qword needs to be
> > > specially aligned.
> > > -Daniel
> > 
> > I think both of you only read this sentence, where I said it was
> > "arguable." The rest of the commit message was what actually mattered.
> 
> I'm just arguing that the changelog is misleading. What we are doing is
> papering over an elephant, and more importantly I think it overlooked
> the extra restrictions imposed upon qwords (though it looks like we
> fortuituously are ok). The changelog also implies that all our other
> code is similarly flawed.

It wasn't completely fortuitous, I did check. I was lucky you think my
check was satisfactory though. I agree it makes future code somewhat
risky so maybe some improvement is needed to safeguard. I also have/had
a patch to lengthen MI_STORE_DATA_INDEX. The decoder however does not
complain about that one, and the windows team did neither. So I didn't
want to change it for the sake of change.

I think the reasons for FLUSH_DW are valid, but as it seems unrelated to
the actual root cause of the bug, I'll leave this one to the fates.

> 
> The actual patch of splitting the code up into separate gen8 routines I
> thought was a nice improvement in readibility.
> -Chris
> 
> -- 
> Chris Wilson, Intel Open Source Technology Centre
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] [v2] drm/i915/bdw: Add FBC support

2014-03-05 Thread Ben Widawsky
On Wed, Mar 05, 2014 at 12:00:18PM +0100, Daniel Vetter wrote:
> On Fri, Feb 21, 2014 at 04:06:47PM -0300, Paulo Zanoni wrote:
> > 2014-02-20 21:01 GMT-03:00 Ben Widawsky :
> > > This got lost when we shuffled around our internal branch and
> > > GEN7_FEATURES macro. There were no HW changes to support FBC, so we just
> > > need to set the flag.
> > >
> > > v2: Don't allow FBC for any pipe but A on platforms with DDI. (Paulo)
> > >
> > > Cc: Daisy Sun 
> > > Signed-off-by: Ben Widawsky 
> > 
> > Reviewed-by: Paulo Zanoni 
> 
> Have you guys run the fbc igt testcase to make sure it actually works? In
> case it's broken I'll back it out again ...
> 
> Queued for -next, thanks for the patch.
> -Daniel

Daisy, I was led to believe you actually tested this. Is that correct?

Daniel, BTW, we have an unimplemented FBC bugfix (basically disable) on
HSW that carries over to BDW. It was not implemented last time I checked
anyway. I tried to implement it, but I don't know the code well enough.
Feel free to sign me up for review if someone else does it.

Also, 4k displays have some issue with FBC. So we'll need to address
that.

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/bdw: Fix 4g GGTT insert entries regression

2014-03-05 Thread Ben Widawsky
On Wed, Mar 05, 2014 at 07:24:37AM -0800, Ben Widawsky wrote:
> On Wed, Mar 05, 2014 at 11:13:12AM +0200, Jani Nikula wrote:
> > On Wed, 05 Mar 2014, Ben Widawsky  wrote:
> > > The PDE needs to wrap after writing all the PTEs. Quite a small/silly
> > > bug to find in the massive change. It was introduced:
> > > commit 307dc4f99f6d3a74a78b0e776838f35b2004f14d
> > > Author: Ben Widawsky 
> > > Date:   Thu Feb 20 11:51:21 2014 -0800
> > >
> > > drm/i915/bdw: Reorganize PT allocations
> > 
> > As noted in the bug, the commit merged to dinq is correct, and the
> > commit referenced above apparently only lives in some temporary branch.
> > 
> > BR,
> > Jani.
> > 
> 
> My eyes are playing tricks on me. Sorry for the noise.
> 

It's not my eyes. It's something else. WTF is going on?

bwidawsk@bolo_yeung ~/intel-gfx/drm-intel (drm-intel-nightly)$ tsocks git fetch 
upstream 
bwidawsk@bolo_yeung ~/intel-gfx/drm-intel (drm-intel-nightly)$ git reset --hard 
upstream/drm-intel-nightly 
HEAD is now at 97d10a0 drm-intel-nightly: 2014y-03m-05d-20h-31m-21s integration 
manifest
bwidawsk@bolo_yeung ~/intel-gfx/drm-intel (drm-intel-nightly)$ sed -n '338p' 
drivers/gpu/drm/i915/i915_gem_gtt.c
if (pde + 1 == GEN8_PDES_PER_PAGE) {

> > 
> > >
> > > I can't actually test this patch at the moment because my Broadwell is
> > > unresponsive. This should be squashed if possible.
> > >
> > > Cc: Imre Deak 
> > > Reported-by: Timo Aaltonen 
> > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=75763
> > > Signed-off-by: Ben Widawsky 
> > > ---
> > >  drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +-
> > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
> > > b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > index 5462037..2bde703 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > @@ -335,7 +335,7 @@ static void gen8_ppgtt_insert_entries(struct 
> > > i915_address_space *vm,
> > >   if (++pte == GEN8_PTES_PER_PAGE) {
> > >   kunmap_atomic(pt_vaddr);
> > >   pt_vaddr = NULL;
> > > - if (pde + 1 == GEN8_PDES_PER_PAGE) {
> > > + if (++pde == GEN8_PDES_PER_PAGE) {
> > >       pdpe++;
> > >       pde = 0;
> > >   }
> > > -- 
> > > 1.9.0
> > >
> > > ___
> > > Intel-gfx mailing list
> > > Intel-gfx@lists.freedesktop.org
> > > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> > 
> > -- 
> > Jani Nikula, Intel Open Source Technology Center
> 
> -- 
> Ben Widawsky, Intel Open Source Technology Center

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/bdw: MU_FLUSH_DW a qword instead of dword

2014-03-05 Thread Ben Widawsky
On Wed, Mar 05, 2014 at 07:33:11PM +0100, Daniel Vetter wrote:
> On Wed, Mar 05, 2014 at 09:24:34AM +, Chris Wilson wrote:
> > On Tue, Mar 04, 2014 at 09:38:56AM -0800, Ben Widawsky wrote:
> > > The actual post sync op is "Write Immediate Data QWord." It is therefore
> > > arguable that we should have always done a qword write.
> > 
> > Not really since the spec explicitly says that we can choose either a
> > dword or qword write. Note that qword writes also currently require a
> > 64 byte alignment.
> 
> Yeah, that's also my reading of the spec - the lenght field selects
> whether the hw does a qword or dword write, and the qword needs to be
> specially aligned.
> -Daniel

I think both of you only read this sentence, where I said it was
"arguable." The rest of the commit message was what actually mattered.

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/2] drm/i915: Always set fifo count to zero in gen6_reset

2014-03-05 Thread Ben Widawsky
On Wed, Mar 05, 2014 at 06:08:19PM +0200, Mika Kuoppala wrote:
> There should not be a case where fifo count is other
> than zero after a successful reset. Always set
> count to zero, but be paranoid enough to warn.
> 
> v2: rebased
> 
> Suggested-by: Ben Widawsky 
> Signed-off-by: Mika Kuoppala 
> ---
>  drivers/gpu/drm/i915/intel_uncore.c |7 ---
>  1 file changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
> b/drivers/gpu/drm/i915/intel_uncore.c
> index 00320fd..79eaba8 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -988,9 +988,10 @@ static int gen6_do_reset(struct drm_device *dev)
>   dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_engine);
>  
>   if (IS_GEN6(dev) || IS_GEN7(dev))
> - dev_priv->uncore.fifo_count =
> - __raw_i915_read32(dev_priv, GTFIFOCTL) &
> - GT_FIFO_FREE_ENTRIES_MASK;
> + WARN_ON((__raw_i915_read32(dev_priv, GTFIFOCTL) &
> +  GT_FIFO_FREE_ENTRIES_MASK) != 0);
> +
> + dev_priv->uncore.fifo_count = 0;
>  
>   spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
>   return ret;

Can you please add the following to the commit message:
"The GT FIFO is bypassed when not in RC6 from both IA and SA. As we've
just reset the GPU and not yet enabled RC6, there is no way the FIFO can
be anything but 0. If it is non-zero, it's a HW bug, and we can try to
carry on by faking it. It should be noted that RC6 is highly unlikely to
work properly if this WARN fires, however the system should continue on
just fine."

With that:
Reviewed-by: Ben Widawsky 

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/2] drm/i915: No need to put forcewake after a reset

2014-03-05 Thread Ben Widawsky
On Wed, Mar 05, 2014 at 06:08:18PM +0200, Mika Kuoppala wrote:
> As we now have intel_uncore_forcewake_reset() no need
> to do explicit put after reset.
> 
> v2: rebase
> 
> Signed-off-by: Mika Kuoppala 
> ---
>  drivers/gpu/drm/i915/intel_uncore.c |   19 ---
>  1 file changed, 8 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
> b/drivers/gpu/drm/i915/intel_uncore.c
> index 6ca24ac..00320fd 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -952,6 +952,7 @@ static int gen6_do_reset(struct drm_device *dev)
>   struct drm_i915_private *dev_priv = dev->dev_private;
>   int ret;
>   unsigned long irqflags;
> + u32 fw_engine = 0;
>  
>   /* Hold uncore.lock across reset to prevent any register access
>* with forcewake not set correctly
> @@ -971,25 +972,21 @@ static int gen6_do_reset(struct drm_device *dev)
>  
>   intel_uncore_forcewake_reset(dev);
>  
> - /* If reset with a user forcewake, try to restore, otherwise turn it 
> off */
> + /* If reset with a user forcewake, try to restore */

Technically with the introduction of the deferred forcewaker_ put, it's
not just user forcewake that this restores.

Wouldn't it be nice if we actually used something other than
FORCEWAKE_KERNEL for debugfs?

>   if (IS_VALLEYVIEW(dev)) {
>   if (dev_priv->uncore.fw_rendercount)
> - dev_priv->uncore.funcs.force_wake_get(dev_priv, 
> FORCEWAKE_RENDER);
> - else
> - dev_priv->uncore.funcs.force_wake_put(dev_priv, 
> FORCEWAKE_RENDER);
> + fw_engine |= FORCEWAKE_RENDER;
>  
>   if (dev_priv->uncore.fw_mediacount)
> - dev_priv->uncore.funcs.force_wake_get(dev_priv, 
> FORCEWAKE_MEDIA);
> - else
> - dev_priv->uncore.funcs.force_wake_put(dev_priv, 
> FORCEWAKE_MEDIA);
> + fw_engine |= FORCEWAKE_MEDIA;
>   } else {
>   if (dev_priv->uncore.forcewake_count)
> - dev_priv->uncore.funcs.force_wake_get(dev_priv, 
> FORCEWAKE_ALL);
> - else
> - dev_priv->uncore.funcs.force_wake_put(dev_priv, 
> FORCEWAKE_ALL);
> + fw_engine = FORCEWAKE_ALL;
>   }

P

>  
> - /* Restore fifo count */
> + if (fw_engine)
> + dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_engine);
> +
>   if (IS_GEN6(dev) || IS_GEN7(dev))
>   dev_priv->uncore.fifo_count =
>   __raw_i915_read32(dev_priv, GTFIFOCTL) &

I for one would not be opposed to a vlv_do_reset() and a
gen8_do_reset(). I implemented such a thing at some point, but threw it
away because I didn't actually have a vlv diff at the time.

I had to re-review a lot of the uncore.lock stuff, but lgtm:
Reviewed-by: Ben Widawsky 

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] [v2] drm/i915: Paranoia - get zeroed page table pages

2014-03-05 Thread Ben Widawsky
On Wed, Mar 05, 2014 at 06:32:06PM +0200, Imre Deak wrote:
> On Thu, 2014-02-27 at 19:47 -0800, Ben Widawsky wrote:
> > We normally clear the page tables as one of the first things during
> > initialization. They are however wired up (and potentially valid) before
> > we clear them.
> 
> I might be missing something, but afaics the page directories/tables are
> not in use until after ppgtt->enable()/mm_switch() is called on them,
> which is after the clear_range() call.
> 
> I'd understand if it's about leaving uninitialized stuff _after_
> clear_range() is called. But I think because of the 1G size alignment
> for ppgtt that's not possible either.
> 
> --Imre

The only case I was able to fathom was if we accidentally connect a PDE
before we populate the page table. I felt it was a rather harmless patch
though.

I do agree with the IRC conversation that it shouldn't happen. It was in
lines with the same reason of why we never BUG_ON.

> 
> > To prevent the GPU from doing anything we might later regret, simply get
> > zeroed pages, which always mean invalid on all GENs.
> > 
> > NOTE: that a similar paranoia could be applied to GGTT via making sure
> > all entries are invalid ASAP. I think the extra work required to fix
> > such a BIOS bug is unwarranted until proven necessary.
> > 
> > v2: Remove useless GFP_ZERO in the kcallocs
> > 
> > Signed-off-by: Ben Widawsky 
> > ---
> >  drivers/gpu/drm/i915/i915_gem_gtt.c | 7 ---
> >  1 file changed, 4 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
> > b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index 0c27d8a..5e3957e 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -359,7 +359,7 @@ static struct page **__gen8_alloc_page_tables(void)
> > return ERR_PTR(-ENOMEM);
> >  
> > for (i = 0; i < GEN8_PDES_PER_PAGE; i++) {
> > -   pt_pages[i] = alloc_page(GFP_KERNEL);
> > +   pt_pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
> > if (!pt_pages[i])
> > goto bail;
> > }
> > @@ -421,7 +421,8 @@ static int gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt 
> > *ppgtt)
> >  static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt 
> > *ppgtt,
> > const int max_pdp)
> >  {
> > -   ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << 
> > PAGE_SHIFT));
> > +   ppgtt->pd_pages = alloc_pages(GFP_KERNEL | __GFP_ZERO,
> > + get_order(max_pdp << PAGE_SHIFT));
> > if (!ppgtt->pd_pages)
> > return -ENOMEM;
> >  
> > @@ -1021,7 +1022,7 @@ static int gen6_ppgtt_allocate_page_tables(struct 
> > i915_hw_ppgtt *ppgtt)
> >     return -ENOMEM;
> >  
> > for (i = 0; i < ppgtt->num_pd_entries; i++) {
> > -   ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL);
> > +   ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
> > if (!ppgtt->pt_pages[i]) {
> > gen6_ppgtt_free(ppgtt);
> > return -ENOMEM;
> 



-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/bdw: Fix 4g GGTT insert entries regression

2014-03-05 Thread Ben Widawsky
On Wed, Mar 05, 2014 at 11:13:12AM +0200, Jani Nikula wrote:
> On Wed, 05 Mar 2014, Ben Widawsky  wrote:
> > The PDE needs to wrap after writing all the PTEs. Quite a small/silly
> > bug to find in the massive change. It was introduced:
> > commit 307dc4f99f6d3a74a78b0e776838f35b2004f14d
> > Author: Ben Widawsky 
> > Date:   Thu Feb 20 11:51:21 2014 -0800
> >
> > drm/i915/bdw: Reorganize PT allocations
> 
> As noted in the bug, the commit merged to dinq is correct, and the
> commit referenced above apparently only lives in some temporary branch.
> 
> BR,
> Jani.
> 

My eyes are playing tricks on me. Sorry for the noise.

> 
> >
> > I can't actually test this patch at the moment because my Broadwell is
> > unresponsive. This should be squashed if possible.
> >
> > Cc: Imre Deak 
> > Reported-by: Timo Aaltonen 
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=75763
> > Signed-off-by: Ben Widawsky 
> > ---
> >  drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
> > b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index 5462037..2bde703 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -335,7 +335,7 @@ static void gen8_ppgtt_insert_entries(struct 
> > i915_address_space *vm,
> > if (++pte == GEN8_PTES_PER_PAGE) {
> > kunmap_atomic(pt_vaddr);
> > pt_vaddr = NULL;
> > -   if (pde + 1 == GEN8_PDES_PER_PAGE) {
> > +   if (++pde == GEN8_PDES_PER_PAGE) {
> > pdpe++;
> > pde = 0;
> > }
> > -- 
> > 1.9.0
> >
> > ___
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
> -- 
> Jani Nikula, Intel Open Source Technology Center

-- 
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Fix PSR programming

2014-03-04 Thread Ben Widawsky
| has a higher precedence than ?. Therefore, the calculation doesn't do
at all what you would expect. Thanks to Ken for convincing me that this
was indeed the issue. Send me back to C programmer school, please.

I'm sort of surprised PSR was continuing to work for people. It should
be broken IMO (and it was broken for me, but I had assumed it never
worked).

Regression from:
commit ed8546ac1f99b850879f07b1e9b06b42fb0a36d9
Author: Ben Widawsky 
Date:   Mon Nov 4 22:45:05 2013 -0800

drm/i915/bdw: Support eDP PSR

I am pretty certain PSR is disabled by default, so no CC stable.

Cc: Rodrigo Vivi 
Cc: Kenneth Graunke 
Cc: Art Runyan 
Reported-by: "Kumar, Kiran S" 
Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/intel_dp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index c512d78..2c0ceb4 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1723,7 +1723,7 @@ static void intel_edp_psr_enable_source(struct intel_dp 
*intel_dp)
val |= EDP_PSR_LINK_DISABLE;
 
I915_WRITE(EDP_PSR_CTL(dev), val |
-  IS_BROADWELL(dev) ? 0 : link_entry_time |
+  (IS_BROADWELL(dev) ? 0 : link_entry_time) |
   max_sleep_time << EDP_PSR_MAX_SLEEP_TIME_SHIFT |
   idle_frames << EDP_PSR_IDLE_FRAME_SHIFT |
   EDP_PSR_ENABLE);
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 50/62] [v5] drm/i915/bdw: Support eDP PSR

2014-03-04 Thread Ben Widawsky
This is a bug. Someone needs to send me back to C-programmer school.

Bits 26:25 are reserved in the spec. Furthermore, there shouldn't be a
functional difference since link_entry_time =
EDP_PSR_MIN_LINK_ENTRY_TIME_8_LINES   (0<<25). So you found the bug, but I
think the solution is actually:

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index c512d78..2c0ceb4 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1723,7 +1723,7 @@ static void intel_edp_psr_enable_source(struct intel_dp 
*intel_dp)
val |= EDP_PSR_LINK_DISABLE;
 
I915_WRITE(EDP_PSR_CTL(dev), val |
-  IS_BROADWELL(dev) ? 0 : link_entry_time |
+  (IS_BROADWELL(dev) ? 0 : link_entry_time) |
   max_sleep_time << EDP_PSR_MAX_SLEEP_TIME_SHIFT |
   idle_frames << EDP_PSR_IDLE_FRAME_SHIFT |
   EDP_PSR_ENABLE);




On Tue, Mar 04, 2014 at 09:31:28AM +, Kumar, Kiran S wrote:
> Hi Ben,
> 
> Can you please let me know the reason for explicit about not setting min link 
> entry time for BDW. During my PSR testing on BDW, I found perf counter not 
> getting increment and SRD control is setting to 0x0 with the following check:
>   IS_BROADWELL(dev) ? 0 : link_entry_time
> 
> When I remove and used only "link_entry_time" without check for BDW, PSR 
> worked fine. (perf counter started incrementing)
> 
> Thanks
> Kiran
> 
> -Original Message-
> From: intel-gfx-boun...@lists.freedesktop.org 
> [mailto:intel-gfx-boun...@lists.freedesktop.org] On Behalf Of Ben Widawsky
> Sent: Tuesday, November 05, 2013 12:15 PM
> To: Intel GFX
> Cc: Nikula, Jani; Ben Widawsky; Widawsky, Benjamin
> Subject: [Intel-gfx] [PATCH 50/62] [v5] drm/i915/bdw: Support eDP PSR
> 
> Broadwell PSR support is a superset of Haswell. With this simple register 
> base calculation, everything that worked on HSW for eDP PSR should work on 
> BDW.
> 
> Note that Broadwell provides additional PSR support. This is not addressed at 
> this time.
> 
> v2: Make the HAS_PSR include BDW
> 
> v3: Use the correct offset (I had incorrectly used one from my faulty
> brain) (Art!)
> 
> v4: It helps if you git add
> 
> v5: Be explicit about not setting min link entry time for BDW. This should be 
> no functional change over v4 (Jani)
> 
> Reviewed-by: Art Runyan 
> Reviewed-by: Jani Nikula 
> Signed-off-by: Ben Widawsky 
> ---
>  drivers/gpu/drm/i915/i915_drv.h | 2 +-
>  drivers/gpu/drm/i915/i915_reg.h | 4 ++--  drivers/gpu/drm/i915/intel_dp.c | 
> 3 ++-
>  3 files changed, 5 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
> b/drivers/gpu/drm/i915/i915_drv.h index f222eb4..dc79a0f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1808,7 +1808,7 @@ struct drm_i915_file_private {
>  #define HAS_DDI(dev) (INTEL_INFO(dev)->has_ddi)
>  #define HAS_POWER_WELL(dev)  (IS_HASWELL(dev) || IS_GEN8(dev))
>  #define HAS_FPGA_DBG_UNCLAIMED(dev)  (INTEL_INFO(dev)->has_fpga_dbg)
> -#define HAS_PSR(dev) (IS_HASWELL(dev))
> +#define HAS_PSR(dev) (IS_HASWELL(dev) || IS_BROADWELL(dev))
>  
>  #define INTEL_PCH_DEVICE_ID_MASK 0xff00
>  #define INTEL_PCH_IBX_DEVICE_ID_TYPE 0x3b00
> diff --git a/drivers/gpu/drm/i915/i915_reg.h 
> b/drivers/gpu/drm/i915/i915_reg.h index ba1fe7e..3761c80 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -1959,8 +1959,8 @@
>  #define BCLRPAT(pipe) _PIPE(pipe, _BCLRPAT_A, _BCLRPAT_B)  #define 
> VSYNCSHIFT(trans) _TRANSCODER(trans, _VSYNCSHIFT_A, _VSYNCSHIFT_B)
>  
> -/* HSW eDP PSR registers */
> -#define EDP_PSR_BASE(dev)0x64800
> +/* HSW+ eDP PSR registers */
> +#define EDP_PSR_BASE(dev)   (IS_HASWELL(dev) ? 0x64800 : 
> 0x6f800)
>  #define EDP_PSR_CTL(dev) (EDP_PSR_BASE(dev) + 0)
>  #define   EDP_PSR_ENABLE (1<<31)
>  #define   EDP_PSR_LINK_DISABLE   (0<<27)
> diff --git a/drivers/gpu/drm/i915/intel_dp.c 
> b/drivers/gpu/drm/i915/intel_dp.c index 7725f81..6e4246f 100644
> --- a/drivers/gpu/drm/i915/intel_dp.c
> +++ b/drivers/gpu/drm/i915/intel_dp.c
> @@ -1603,6 +1603,7 @@ static void intel_edp_psr_enable_source(struct intel_dp 
> *intel_dp)
>   uint32_t max_sleep_time = 0x1f;
>   uint32_t idle_frames = 1;
>   uint32_t val = 0x0;
> + const uint32_t link_entry_time = EDP_PSR_MIN_LINK_ENTRY_TIME_8_LINES;
>  
>   if (intel_dp->psr_dpcd[1] & DP_PSR_NO_TRAIN_ON_EXIT) {
>   val |

[Intel-gfx] [PATCH] drm/i915/bdw: Fix 4g GGTT insert entries regression

2014-03-04 Thread Ben Widawsky
The PDE needs to wrap after writing all the PTEs. Quite a small/silly
bug to find in the massive change. It was introduced:
commit 307dc4f99f6d3a74a78b0e776838f35b2004f14d
Author: Ben Widawsky 
Date:   Thu Feb 20 11:51:21 2014 -0800

drm/i915/bdw: Reorganize PT allocations

I can't actually test this patch at the moment because my Broadwell is
unresponsive. This should be squashed if possible.

Cc: Imre Deak 
Reported-by: Timo Aaltonen 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=75763
Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 5462037..2bde703 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -335,7 +335,7 @@ static void gen8_ppgtt_insert_entries(struct 
i915_address_space *vm,
if (++pte == GEN8_PTES_PER_PAGE) {
kunmap_atomic(pt_vaddr);
pt_vaddr = NULL;
-   if (pde + 1 == GEN8_PDES_PER_PAGE) {
+   if (++pde == GEN8_PDES_PER_PAGE) {
pdpe++;
pde = 0;
}
-- 
1.9.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


<    3   4   5   6   7   8   9   10   11   12   >