Re: [Intel-gfx] [PATCH] drm/i915: make SDVO TV-out work for multifunction devices v2
On Tue, Aug 06, 2013 at 08:57:35AM +0200, Daniel Vetter wrote: > This is the functional backport of upstream commit > 09ede5414f0215461c933032630bf9c3a61a8ba3 Original commit message > below. Backport has been tested by the bug reporter, please consider > applying to all stable kernels. > > We need to track this correctly. While at it shovel the boolean > to track whether the sdvo is in tv mode or not into pipe_config. > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=36997 > Tested-by: Pierre Assal > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=63609 > Tested-by: cancan,feng > Reviewed-by: Jani Nikula > Signed-off-by: Daniel Vetter > > --- > > backport v2: Fix up the git fail from v1 and actually add the right > changes. I suck. Heh, no worries, it was funny :) now applied, thanks. greg k-h ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 7/7] drm/i915: Make intel_set_mode() static
Signed-off-by: Damien Lespiau --- drivers/gpu/drm/i915/intel_display.c | 10 +++--- drivers/gpu/drm/i915/intel_drv.h | 2 -- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 30bd7f1..6755525 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -50,6 +50,10 @@ static void i9xx_crtc_clock_get(struct intel_crtc *crtc, static void ironlake_crtc_clock_get(struct intel_crtc *crtc, struct intel_crtc_config *pipe_config); +static int intel_set_mode(struct drm_crtc *crtc, struct drm_display_mode *mode, + int x, int y, struct drm_framebuffer *old_fb); + + typedef struct { int min, max; } intel_range_t; @@ -8734,9 +8738,9 @@ out: return ret; } -int intel_set_mode(struct drm_crtc *crtc, -struct drm_display_mode *mode, -int x, int y, struct drm_framebuffer *fb) +static int intel_set_mode(struct drm_crtc *crtc, + struct drm_display_mode *mode, + int x, int y, struct drm_framebuffer *fb) { int ret; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index a70a0d0..01455aa 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -576,8 +576,6 @@ struct intel_set_config { bool mode_changed; }; -extern int intel_set_mode(struct drm_crtc *crtc, struct drm_display_mode *mode, - int x, int y, struct drm_framebuffer *old_fb); extern void intel_crtc_restore_mode(struct drm_crtc *crtc); extern void intel_crtc_load_lut(struct drm_crtc *crtc); extern void intel_crtc_update_dpms(struct drm_crtc *crtc); -- 1.8.3.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 5/7] drm/i915: Make intel_encoder_dpms() static
And also fix a small typo in the intel_encoder_dpms() comment. Signed-off-by: Damien Lespiau --- drivers/gpu/drm/i915/intel_display.c | 4 ++-- drivers/gpu/drm/i915/intel_drv.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a0914db..13cf6ba 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3889,10 +3889,10 @@ void intel_encoder_destroy(struct drm_encoder *encoder) kfree(intel_encoder); } -/* Simple dpms helper for encodres with just one connector, no cloning and only +/* Simple dpms helper for encoders with just one connector, no cloning and only * one kind of off state. It clamps all !ON modes to fully OFF and changes the * state of the entire output pipe. */ -void intel_encoder_dpms(struct intel_encoder *encoder, int mode) +static void intel_encoder_dpms(struct intel_encoder *encoder, int mode) { if (mode == DRM_MODE_DPMS_ON) { encoder->connectors_active = true; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index d516c63..09c9196 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -583,7 +583,6 @@ extern void intel_crtc_restore_mode(struct drm_crtc *crtc); extern void intel_crtc_load_lut(struct drm_crtc *crtc); extern void intel_crtc_update_dpms(struct drm_crtc *crtc); extern void intel_encoder_destroy(struct drm_encoder *encoder); -extern void intel_encoder_dpms(struct intel_encoder *encoder, int mode); extern void intel_connector_dpms(struct drm_connector *, int mode); extern bool intel_connector_get_hw_state(struct intel_connector *connector); extern void intel_modeset_check_state(struct drm_device *dev); -- 1.8.3.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 6/7] drm/i915: Remove intel_modeset_disable()
Caught by the dead code police! Signed-off-by: Damien Lespiau --- drivers/gpu/drm/i915/intel_display.c | 10 -- drivers/gpu/drm/i915/intel_drv.h | 1 - 2 files changed, 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 13cf6ba..30bd7f1 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3871,16 +3871,6 @@ static void intel_crtc_disable(struct drm_crtc *crtc) } } -void intel_modeset_disable(struct drm_device *dev) -{ - struct drm_crtc *crtc; - - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - if (crtc->enabled) - intel_crtc_disable(crtc); - } -} - void intel_encoder_destroy(struct drm_encoder *encoder) { struct intel_encoder *intel_encoder = to_intel_encoder(encoder); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 09c9196..a70a0d0 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -578,7 +578,6 @@ struct intel_set_config { extern int intel_set_mode(struct drm_crtc *crtc, struct drm_display_mode *mode, int x, int y, struct drm_framebuffer *old_fb); -extern void intel_modeset_disable(struct drm_device *dev); extern void intel_crtc_restore_mode(struct drm_crtc *crtc); extern void intel_crtc_load_lut(struct drm_crtc *crtc); extern void intel_crtc_update_dpms(struct drm_crtc *crtc); -- 1.8.3.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 4/7] drm/i915: Make i915_hangcheck_elapsed() static
Signed-off-by: Damien Lespiau --- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_irq.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0be923e..6141253 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1665,7 +1665,6 @@ extern void intel_console_resume(struct work_struct *work); /* i915_irq.c */ void i915_queue_hangcheck(struct drm_device *dev); -void i915_hangcheck_elapsed(unsigned long data); void i915_handle_error(struct drm_device *dev, bool wedged); extern void intel_irq_init(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 6a1c207..8a77faf 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1869,7 +1869,7 @@ ring_stuck(struct intel_ring_buffer *ring, u32 acthd) * we kick the ring. If we see no progress on three subsequent calls * we assume chip is wedged and try to fix it by resetting the chip. */ -void i915_hangcheck_elapsed(unsigned long data) +static void i915_hangcheck_elapsed(unsigned long data) { struct drm_device *dev = (struct drm_device *)data; drm_i915_private_t *dev_priv = dev->dev_private; -- 1.8.3.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/7] drm/i915: Remove i915_gem_object_check_coherency()
This code was dead since: commit 432e58edc9de1d9c3d6a7b444b3c455b8f209a7d Author: Chris Wilson Date: Thu Nov 25 19:32:06 2010 + drm/i915: Avoid allocation for execbuffer object list so just put it to rest for good. Signed-off-by: Damien Lespiau --- drivers/gpu/drm/i915/i915_drv.h | 3 -- drivers/gpu/drm/i915/i915_gem_debug.c | 69 --- 2 files changed, 72 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index db54923..0be923e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -201,7 +201,6 @@ struct intel_ddi_plls { #define DRIVER_MINOR 6 #define DRIVER_PATCHLEVEL 0 -#define WATCH_COHERENCY0 #define WATCH_LISTS0 #define WATCH_GTT 0 @@ -2035,8 +2034,6 @@ int i915_verify_lists(struct drm_device *dev); #else #define i915_verify_lists(dev) 0 #endif -void i915_gem_object_check_coherency(struct drm_i915_gem_object *obj, -int handle); /* i915_debugfs.c */ int i915_debugfs_init(struct drm_minor *minor); diff --git a/drivers/gpu/drm/i915/i915_gem_debug.c b/drivers/gpu/drm/i915/i915_gem_debug.c index bf945a3..bcdbafc 100644 --- a/drivers/gpu/drm/i915/i915_gem_debug.c +++ b/drivers/gpu/drm/i915/i915_gem_debug.c @@ -116,72 +116,3 @@ i915_verify_lists(struct drm_device *dev) return warned = err; } #endif /* WATCH_INACTIVE */ - -#if WATCH_COHERENCY -void -i915_gem_object_check_coherency(struct drm_i915_gem_object *obj, int handle) -{ - struct drm_device *dev = obj->base.dev; - int page; - uint32_t *gtt_mapping; - uint32_t *backing_map = NULL; - int bad_count = 0; - - DRM_INFO("%s: checking coherency of object %p@0x%08x (%d, %zdkb):\n", -__func__, obj, obj->gtt_offset, handle, -obj->size / 1024); - - gtt_mapping = ioremap(dev_priv->mm.gtt_base_addr + obj->gtt_offset, - obj->base.size); - if (gtt_mapping == NULL) { - DRM_ERROR("failed to map GTT space\n"); - return; - } - - for (page = 0; page < obj->size / PAGE_SIZE; page++) { - int i; - - backing_map = kmap_atomic(obj->pages[page]); - - if (backing_map == NULL) { - DRM_ERROR("failed to map backing page\n"); - goto out; - } - - for (i = 0; i < PAGE_SIZE / 4; i++) { - uint32_t cpuval = backing_map[i]; - uint32_t gttval = readl(gtt_mapping + - page * 1024 + i); - - if (cpuval != gttval) { - DRM_INFO("incoherent CPU vs GPU at 0x%08x: " -"0x%08x vs 0x%08x\n", -(int)(obj->gtt_offset + - page * PAGE_SIZE + i * 4), -cpuval, gttval); - if (bad_count++ >= 8) { - DRM_INFO("...\n"); - goto out; - } - } - } - kunmap_atomic(backing_map); - backing_map = NULL; - } - - out: - if (backing_map != NULL) - kunmap_atomic(backing_map); - iounmap(gtt_mapping); - - /* give syslog time to catch up */ - msleep(1); - - /* Directly flush the object, since we just loaded values with the CPU -* from the backing pages and we don't want to disturb the cache -* management that we're trying to observe. -*/ - - i915_gem_clflush_object(obj); -} -#endif -- 1.8.3.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 3/7] drm/i915: Fix #endif comment
Did you say OCD? Signed-off-by: Damien Lespiau --- drivers/gpu/drm/i915/i915_gem_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_debug.c b/drivers/gpu/drm/i915/i915_gem_debug.c index bcdbafc..775d506 100644 --- a/drivers/gpu/drm/i915/i915_gem_debug.c +++ b/drivers/gpu/drm/i915/i915_gem_debug.c @@ -115,4 +115,4 @@ i915_verify_lists(struct drm_device *dev) return warned = err; } -#endif /* WATCH_INACTIVE */ +#endif /* WATCH_LIST */ -- 1.8.3.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] Dead code cleanups
Procrastinating... at least a small consolation prize: 5 files changed, 11 insertions(+), 104 deletions(-) -- Damien ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/7] drm/i915: Remove stale prototypes
Signed-off-by: Damien Lespiau --- drivers/gpu/drm/i915/i915_drv.h | 8 drivers/gpu/drm/i915/intel_drv.h | 2 -- 2 files changed, 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7863c8a..db54923 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1677,7 +1677,6 @@ extern void intel_pm_init(struct drm_device *dev); extern void intel_uncore_sanitize(struct drm_device *dev); extern void intel_uncore_early_sanitize(struct drm_device *dev); extern void intel_uncore_init(struct drm_device *dev); -extern void intel_uncore_reset(struct drm_device *dev); extern void intel_uncore_clear_errors(struct drm_device *dev); extern void intel_uncore_check_errors(struct drm_device *dev); @@ -1843,9 +1842,6 @@ static inline bool i915_terminally_wedged(struct i915_gpu_error *error) void i915_gem_reset(struct drm_device *dev); void i915_gem_clflush_object(struct drm_i915_gem_object *obj); -int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj, - uint32_t read_domains, - uint32_t write_domain); int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj); int __must_check i915_gem_init(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev); @@ -2034,8 +2030,6 @@ void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj); void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj); /* i915_gem_debug.c */ -void i915_gem_dump_object(struct drm_i915_gem_object *obj, int len, - const char *where, uint32_t mark); #if WATCH_LISTS int i915_verify_lists(struct drm_device *dev); #else @@ -2043,8 +2037,6 @@ int i915_verify_lists(struct drm_device *dev); #endif void i915_gem_object_check_coherency(struct drm_i915_gem_object *obj, int handle); -void i915_gem_dump_object(struct drm_i915_gem_object *obj, int len, - const char *where, uint32_t mark); /* i915_debugfs.c */ int i915_debugfs_init(struct drm_minor *minor); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index caf8b8d..d516c63 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -649,12 +649,10 @@ extern bool intel_get_load_detect_pipe(struct drm_connector *connector, extern void intel_release_load_detect_pipe(struct drm_connector *connector, struct intel_load_detect_pipe *old); -extern void intelfb_restore(void); extern void intel_crtc_fb_gamma_set(struct drm_crtc *crtc, u16 red, u16 green, u16 blue, int regno); extern void intel_crtc_fb_gamma_get(struct drm_crtc *crtc, u16 *red, u16 *green, u16 *blue, int regno); -extern void intel_enable_clock_gating(struct drm_device *dev); extern int intel_pin_and_fence_fb_obj(struct drm_device *dev, struct drm_i915_gem_object *obj, -- 1.8.3.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/2] drm/i915: Prevent loading of uninitialized context garbage
The extended state bits are stored in the LCA register and affect all updates to the LCA register - i.e. the state on the old context is saved when SAVE_EX_STATE_EN is currently set in the old context address before the update, and the new context is restored when RESTORE_EX_STATE_EN is set in the new context address. This is irrespective of the RESTORE_INHIBIT flag in the MI_SET_CONTEXT. Hence, upon initial loading the contents of the extended state is read from uninitialised data. To workaround this, on first load we do a dummy load without the mandatory RESTORE_EX_STATE_EN bit so that the real load causes us to initialise the extended state of the context before it is then loaded by the LCA update. v2: Split out the introduction of the variable length MI_SET_CONTEXT command sequence. References: https://bugs.freedesktop.org/show_bug.cgi?id=64073 Signed-off-by: Chris Wilson Cc: Ben Widawsky --- drivers/gpu/drm/i915/i915_gem_context.c | 18 ++ 1 file changed, 18 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 8a7b61e..a57d49a 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -367,6 +367,8 @@ mi_set_context(struct intel_ring_buffer *ring, case 5: len += 2; break; } + if (!new_context->is_initialized) + len += 2; ret = intel_ring_begin(ring, len); if (ret) @@ -382,6 +384,22 @@ mi_set_context(struct intel_ring_buffer *ring, break; } + if (!new_context->is_initialized) { + /* The GPU tries to restore the extended state irrespective +* of RestoreInhibit (since it is part of the LCA switch +* itself rather than the MI_SET_CONTEXT command). +* Since the initial contents may be garbage we do a dummy +* load first then set the mandatory flag for any future +* ring context switches. +*/ + intel_ring_emit(ring, MI_SET_CONTEXT); + intel_ring_emit(ring, + i915_gem_obj_ggtt_offset(new_context->obj) | + MI_MM_SPACE_GTT | + MI_SAVE_EXT_STATE_EN | + hw_flags); + } + intel_ring_emit(ring, MI_NOOP); intel_ring_emit(ring, MI_SET_CONTEXT); intel_ring_emit(ring, i915_gem_obj_ggtt_offset(new_context->obj) | -- 1.8.4.rc1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/2] drm/i915: Eliminate unrequired dwords for MI_SET_CONTEXT
A later patch adds yet another workaround for MI_SET_CONTEXT, at which point we start to end up with more NOOPs than actual command dwords along the non-workaround paths. It is time that we made the MI_SET_CONTEXT a variable length block and only emit the dwords we truly need. Signed-off-by: Chris Wilson Reviewed-by: Ben Widawsky --- drivers/gpu/drm/i915/i915_gem_context.c | 17 + 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 879bfa2..8a7b61e 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -348,7 +348,7 @@ mi_set_context(struct intel_ring_buffer *ring, struct i915_hw_context *new_context, u32 hw_flags) { - int ret; + int ret, len; /* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB * invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value @@ -361,7 +361,14 @@ mi_set_context(struct intel_ring_buffer *ring, return ret; } - ret = intel_ring_begin(ring, 6); + len = 4; + switch (INTEL_INFO(ring->dev)->gen) { + case 7: + case 5: len += 2; + break; + } + + ret = intel_ring_begin(ring, len); if (ret) return ret; @@ -373,9 +380,6 @@ mi_set_context(struct intel_ring_buffer *ring, case 5: intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN); break; - default: - intel_ring_emit(ring, MI_NOOP); - break; } intel_ring_emit(ring, MI_NOOP); @@ -395,9 +399,6 @@ mi_set_context(struct intel_ring_buffer *ring, case 5: intel_ring_emit(ring, MI_SUSPEND_FLUSH); break; - default: - intel_ring_emit(ring, MI_NOOP); - break; } intel_ring_advance(ring); -- 1.8.4.rc1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 21/29] drm/i915: mm_list is per VMA
On Thu, Aug 08, 2013 at 08:46:46AM +0200, Daniel Vetter wrote: > On Thu, Aug 8, 2013 at 6:32 AM, Ben Widawsky wrote: > > You killed a BUG in i915_gem_retire_requests_ring, shouldn't that be a WARN > > or are you in the business of completely killing assertions now :p? > > Yeah, and my little commit message annotation even explained that it's > fully redundant since the move_to_inactive function called on the next > line has the exact same check ;-) Heh, I realized this as I was dozing off last night... "didn't danvet point that out already." Oh well, at least I passed the test. -- Ben Widawsky, Intel Open Source Technology Center ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: Prevent loading of uninitialized context garbage
On Thu, Aug 08, 2013 at 02:37:35PM +0100, Chris Wilson wrote: The extended state bits are stored in the LCA register and affect all updates to the LCA register - i.e. the state on the old context is saved when SAVE_EX_STATE_EN is currently set in the old context address before the update, and the new context is restored when RESTORE_EX_STATE_EN is set in the new context address. This is irrespective of the RESTORE_INHIBIT flag in the MI_SET_CONTEXT. Hence, upon initial loading the contents of the extended state is read from uninitialised data. To workaround this, on first load we do a dummy load without the mandatory RESTORE_EX_STATE_EN bit so that the real load causes us to initialise the extended state of the context before it is then loaded by the LCA update. References: https://bugs.freedesktop.org/show_bug.cgi?id=64073 Signed-off-by: Chris Wilson Cc: Ben Widawsky If you split this up in 2, the variable length for mi_set_context, and the workaround - the variable length thing is: Reviewed-by: Ben Widawsky I'd like to dig a bit more at the workaround portion. -- Ben Widawsky, Intel Open Source Technology Center ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 1/9] drm/i915: Update rules for reading cache lines through the LLC
On Thu, Aug 08, 2013 at 02:41:03PM +0100, Chris Wilson wrote: > The LLC is a fun device. The cache is a distinct functional block within > the SA that arbitrates access from both the CPU and GPU cores. As such > all writes to memory land first in the LLC before further action is > taken. For example, an uncached write from either the CPU or GPU will > then proceed to memory and evict the cacheline from the LLC. This means that > a read from the LLC always returns the correct information even if the PTE > bit in the GPU differs from the PAT bit in the CPU. For the older > snooping architecture on non-LLC, the fundamental principle still holds > except that some coordination is required between the CPU and GPU to > explicitly perform the snooping (which is handled by our request > tracking). > > The upshot of this is that we know that we can issue a read from either > LLC devices or snoopable memory and trust the contents of the cache - > i.e. we can forgo a clflush before a read in these circumstances. > Writing to memory from the CPU is a little more tricky as we have to > consider that the scanout does not read from the CPU cache at all, but > from main memory. So we have to currently treat all requests to write to > uncached memory as having to be flushed to main memory for coherency > with all consumers. > > Signed-off-by: Chris Wilson > Cc: Ville Syrjälä I've read through this series a few times now and haven't found any monsters. So, I only found these two small issues: - is_pin_display() confused me and I suspect it could confuse others as well, so a comment would be nice - the pwrite flush after taking the slowpath in 3/9 For everything else: Reviewed-by: Ville Syrjälä -- Ville Syrjälä Intel OTC ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 3/9] drm/i915: Update rules for writing through the LLC with the cpu
On Thu, Aug 08, 2013 at 06:51:26PM +0300, Ville Syrjälä wrote: > On Thu, Aug 08, 2013 at 04:36:35PM +0100, Chris Wilson wrote: > > On Thu, Aug 08, 2013 at 06:27:12PM +0300, Ville Syrjälä wrote: > > > On Thu, Aug 08, 2013 at 02:41:05PM +0100, Chris Wilson wrote: > > > > if (!needs_clflush_after && > > > > obj->base.write_domain != I915_GEM_DOMAIN_CPU) { > > > > - i915_gem_clflush_object(obj); > > > > + i915_gem_clflush_object(obj, false); > > > > > > Shouldn't that be i915_gem_clflush_object(obj, obj->pin_display) ? > > > > !needs_clflush_after implies that we cache-coherent and not writing to a > > scanout, so obj->pin_display must be false here. > > But we dropped the lock in the slow path, so couldn't it have changed? Que sera, sera. The contents after userspace races with itself are going to be fairly arbitrary. Doing the flush is likely to be better than not... -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 3/9] drm/i915: Update rules for writing through the LLC with the cpu
On Thu, Aug 08, 2013 at 04:36:35PM +0100, Chris Wilson wrote: > On Thu, Aug 08, 2013 at 06:27:12PM +0300, Ville Syrjälä wrote: > > On Thu, Aug 08, 2013 at 02:41:05PM +0100, Chris Wilson wrote: > > > if (!needs_clflush_after && > > > obj->base.write_domain != I915_GEM_DOMAIN_CPU) { > > > - i915_gem_clflush_object(obj); > > > + i915_gem_clflush_object(obj, false); > > > > Shouldn't that be i915_gem_clflush_object(obj, obj->pin_display) ? > > !needs_clflush_after implies that we cache-coherent and not writing to a > scanout, so obj->pin_display must be false here. But we dropped the lock in the slow path, so couldn't it have changed? -- Ville Syrjälä Intel OTC ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 3/9] drm/i915: Update rules for writing through the LLC with the cpu
On Thu, Aug 08, 2013 at 06:27:12PM +0300, Ville Syrjälä wrote: > On Thu, Aug 08, 2013 at 02:41:05PM +0100, Chris Wilson wrote: > > if (!needs_clflush_after && > > obj->base.write_domain != I915_GEM_DOMAIN_CPU) { > > - i915_gem_clflush_object(obj); > > + i915_gem_clflush_object(obj, false); > > Shouldn't that be i915_gem_clflush_object(obj, obj->pin_display) ? !needs_clflush_after implies that we cache-coherent and not writing to a scanout, so obj->pin_display must be false here. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] 3.10.2 : new warning WARNING: at drivers/gpu/drm/i915/intel_display.c:1656 ironlake_crtc_disable+0x7ce/0x800 [i915]()
On 08/06/2013 08:03 PM, Daniel Vetter wrote: > On Tue, Aug 6, 2013 at 7:42 PM, Toralf Förster wrote: >> On 07/26/2013 07:58 PM, Daniel Vetter wrote: >>> On Wed, Jul 24, 2013 at 05:51:41PM +0200, Toralf Förster wrote: Realized this today while booting a ThinkPad T420 with integrated intel graphic : >>> >>> Can you please retest with latest upstream git from Linus' tree? >> >> erm, that patch won't apply onto 3.10.5 : >> >> patch -p1 --dry-run < /home/tfoerste/devel/my/drm.patch >> patching file drivers/gpu/drm/i915/intel_display.c >> Hunk #1 FAILED at 8314. >> Hunk #2 FAILED at 9814. >> Hunk #3 succeeded at 5754 with fuzz 2 (offset -4112 lines). >> Hunk #4 succeeded at 9420 with fuzz 1 (offset -462 lines). >> 2 out of 4 hunks FAILED -- saving rejects to file >> drivers/gpu/drm/i915/intel_display.c.rej > > The code is completely reworked in 3.11 so this won't ever apply > as-is. But in any case stable rules dictate that a bug must be fixed > in the latest upstream code first. Henc can you please test the latest > 3.11-rc kernel? > > Thanks, Daniel > Tried it few times in a row - didn't observed that issue any more (3.11-rc4) -- MfG/Sincerely Toralf Förster pgp finger print: 7B1A 07F4 EC82 0F90 D4C2 8936 872A E508 7DB6 9DA3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 3/9] drm/i915: Update rules for writing through the LLC with the cpu
On Thu, Aug 08, 2013 at 02:41:05PM +0100, Chris Wilson wrote: > As mentioned in the previous commit, reads and writes from both the CPU > and GPU go through the LLC. This gives us coherency between the CPU and > GPU irrespective of the attribute settings either device sets. We can > use to avoid having to clflush even uncached memory. > > Except for the scanout. > > The scanout resides within another functional block that does not use > the LLC but reads directly from main memory. So in order to maintain > coherency with the scanout, writes to uncached memory must be flushed. > In order to optimize writes elsewhere, we start tracking whether an > framebuffer is attached to an object. > > v2: Use pin_display tracking rather than fb_count (to ensure we flush > cursors as well etc) and only force the clflush along explicit writes to > the scanout paths (i.e. pin_to_display_plane and pwrite into scanout). > > Based on a patch by Ville Syrjälä. > > Signed-off-by: Chris Wilson > Cc: Ville Syrjälä > --- > drivers/gpu/drm/i915/i915_drv.h| 2 +- > drivers/gpu/drm/i915/i915_gem.c| 58 > -- > drivers/gpu/drm/i915/i915_gem_exec.c | 2 +- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- > drivers/gpu/drm/i915/i915_gem_gtt.c| 2 +- > 5 files changed, 36 insertions(+), 30 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 3622ec2..1ffae08 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1863,7 +1863,7 @@ static inline bool i915_terminally_wedged(struct > i915_gpu_error *error) > } > > void i915_gem_reset(struct drm_device *dev); > -void i915_gem_clflush_object(struct drm_i915_gem_object *obj); > +void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); > int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj, > uint32_t read_domains, > uint32_t write_domain); > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index c5e03ba..78535e9 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -38,7 +38,8 @@ > #include > > static void i915_gem_object_flush_gtt_write_domain(struct > drm_i915_gem_object *obj); > -static void i915_gem_object_flush_cpu_write_domain(struct > drm_i915_gem_object *obj); > +static void i915_gem_object_flush_cpu_write_domain(struct > drm_i915_gem_object *obj, > +bool force); > static __must_check int > i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, > struct i915_address_space *vm, > @@ -68,6 +69,14 @@ static bool cpu_cache_is_coherent(struct drm_device *dev, > return HAS_LLC(dev) || level != I915_CACHE_NONE; > } > > +static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) > +{ > + if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) > + return true; > + > + return obj->pin_display; > +} > + > static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object > *obj) > { > if (obj->tiling_mode) > @@ -830,8 +839,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, >* write domain and manually flush cachelines (if required). > This >* optimizes for the case when the gpu will use the data >* right away and we therefore have to clflush anyway. */ > - if (obj->cache_level == I915_CACHE_NONE) > - needs_clflush_after = 1; > + needs_clflush_after = cpu_write_needs_clflush(obj); > if (i915_gem_obj_bound_any(obj)) { > ret = i915_gem_object_set_to_gtt_domain(obj, true); > if (ret) > @@ -921,7 +929,7 @@ out: >*/ > if (!needs_clflush_after && > obj->base.write_domain != I915_GEM_DOMAIN_CPU) { > - i915_gem_clflush_object(obj); > + i915_gem_clflush_object(obj, false); Shouldn't that be i915_gem_clflush_object(obj, obj->pin_display) ? > i915_gem_chipset_flush(dev); > } > } -- Ville Syrjälä Intel OTC ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: Don't deref pipe->cpu_transcoder in the hangcheck code
On Thu, Aug 08, 2013 at 03:12:06PM +0200, Daniel Vetter wrote: > From: Chris Wilson > > If we get an error event really early in the driver setup sequence, > which gen3 is especially prone to with various display GTT faults we > Oops. So try to avoid this. > > Additionally with Haswell the transcoders are a separate bank of > registers from the pipes (4 transcoders, 3 pipes). In event of an > error, we want to be sure we have a complete and accurate picture of > the machine state, so record all the transcoders in addition to all > the active pipes. > > This regression has been introduced in > > commit 702e7a56af3780d8b3a717f698209bef44187bb0 > Author: Paulo Zanoni > Date: Tue Oct 23 18:29:59 2012 -0200 > > drm/i915: convert PIPECONF to use transcoder instead of pipe > > Based on the patch "drm/i915: Dump all transcoder registers on error" > from Chris Wilson: > > v2: Rebase so that we don't try to be clever and try to figure out the > cpu transcoder from hw state. That exercise should be done when we > analyze the error state offline. > > The actual bugfix is to not call intel_pipe_to_cpu_transcoder in the > error state capture code in case the pipes aren't fully set up yet. > > v3: Simplifiy the err->num_transcoders computation a bit. While at it > make the error capture stuff save on systems without a display block. > > v4: Fix fail, spotted by Jani. > > v5: Completely new commit message, cc: stable. > > Cc: Paulo Zanoni > Cc: Damien Lespiau > Cc: Jani Nikula > Cc: Chris Wilson > Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=60021 > Cc: sta...@vger.kernel.org > Signed-off-by: Daniel Vetter Lgtm. We may have to modify transcoders[] to be more dynamic in future, but for now this works. Reviewed-by: Chris Wilson -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 6/9] drm/i915: Allocate context objects from stolen
Once again, the CPU PAT bits are irrelevant when considering the GPU cacheing, and context objects are never accessed from the CPU or directly by userspace making them another ideal candidate to allocate from stolen memory. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_context.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index a57d49a..498f8a0 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -141,6 +141,7 @@ create_hw_context(struct drm_device *dev, struct drm_i915_file_private *file_priv) { struct drm_i915_private *dev_priv = dev->dev_private; + const int size = dev_priv->hw_context_size; struct i915_hw_context *ctx; int ret; @@ -149,7 +150,9 @@ create_hw_context(struct drm_device *dev, return ERR_PTR(-ENOMEM); kref_init(&ctx->ref); - ctx->obj = i915_gem_alloc_object(dev, dev_priv->hw_context_size); + ctx->obj = i915_gem_object_create_stolen(dev, size); + if (ctx->obj == NULL) + ctx->obj = i915_gem_alloc_object(dev, size); if (ctx->obj == NULL) { kfree(ctx); DRM_DEBUG_DRIVER("Context object allocated failed\n"); -- 1.8.4.rc1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 5/9] drm/i915: Allocate LLC ringbuffers from stolen
As stolen objects now behave identically (wrt to default LLC cacheing) as their normal system counterparts, we no longer have to differentiate our usage for ringbuffers. So allocate them from stolen on SNB+ as well. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index dbc1f7c..5d38ced 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1298,9 +1298,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, return ret; } - obj = NULL; - if (!HAS_LLC(dev)) - obj = i915_gem_object_create_stolen(dev, ring->size); + obj = i915_gem_object_create_stolen(dev, ring->size); if (obj == NULL) obj = i915_gem_alloc_object(dev, ring->size); if (obj == NULL) { -- 1.8.4.rc1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 4/9] drm/i915: Allow the GPU to cache stolen memory
As a corollary to reviewing the interaction between LLC and our cache domains, the GPU PTE bits are independent of the CPU PAT bits. As such we can set the cache level on stolen memory based on how we wish the GPU to cache accesses to it. So we are free to set the same default cache levels as for normal bo, i.e. enable LLC cacheing by default where appropriate. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_stolen.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index c0ab8ec..112c5e1 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -254,9 +254,8 @@ _i915_gem_object_create_stolen(struct drm_device *dev, i915_gem_object_pin_pages(obj); obj->stolen = stolen; - obj->base.write_domain = I915_GEM_DOMAIN_GTT; - obj->base.read_domains = I915_GEM_DOMAIN_GTT; - obj->cache_level = I915_CACHE_NONE; + obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; + obj->cache_level = HAS_LLC(dev) ? I915_CACHE_LLC : I915_CACHE_NONE; return obj; -- 1.8.4.rc1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 9/9] drm/i915: Allow the user to set bo into the DISPLAY cache domain
This is primarily for the benefit of the create2 ioctl so that the caller can avoid the later step of rebinding the bo with new PTE bits. After introducing WT (and possibly GFDT) cacheing for display targets, not everything in the display is earmarked as UC, and more importantly what is is controlled by the kernel. Note that set_cache_level/get_cache_level for DISPLAY is not necessarily idempotent; get_cache_level may return UC for architectures that have no special cache domain for the display engine. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 7 +++ include/uapi/drm/i915_drm.h | 1 + 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2b897f2..e68a129 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3557,6 +3557,10 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, args->caching = I915_CACHING_CACHED; break; + case I915_CACHE_WT: + args->caching = I915_CACHING_DISPLAY; + break; + default: args->caching = I915_CACHING_NONE; break; @@ -3583,6 +3587,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, case I915_CACHING_CACHED: level = I915_CACHE_LLC; break; + case I915_CACHING_DISPLAY: + level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; + break; default: return -EINVAL; } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 39d14b3..40582e5 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -853,6 +853,7 @@ struct drm_i915_gem_busy { #define I915_CACHING_NONE 0 #define I915_CACHING_CACHED1 +#define I915_CACHING_DISPLAY 2 struct drm_i915_gem_caching { /** -- 1.8.4.rc1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/9] drm/i915: Track when an object is pinned for use by the display engine
The display engine has unique coherency rules such that it requires special handling to ensure that all writes to cursors, scanouts and sprites are clflushed. This patch introduces the infrastructure to simply track when an object is being accessed by the display engine. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 2 ++ drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 25 +++-- drivers/gpu/drm/i915/intel_display.c | 8 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index b06e6ce..463c660 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -117,6 +117,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_printf(m, " (name: %d)", obj->base.name); if (obj->pin_count) seq_printf(m, " (pinned x %d)", obj->pin_count); + if (obj->pin_display) + seq_printf(m, " (display)"); if (obj->fence_reg != I915_FENCE_REG_NONE) seq_printf(m, " (fence: %d)", obj->fence_reg); list_for_each_entry(vma, &obj->vma_list, vma_link) { diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 16a46f3..3622ec2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1380,6 +1380,7 @@ struct drm_i915_gem_object { */ unsigned int fault_mappable:1; unsigned int pin_mappable:1; + unsigned int pin_display:1; /* * Is the GPU currently using a fence to access this buffer, @@ -1894,6 +1895,7 @@ int __must_check i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, u32 alignment, struct intel_ring_buffer *pipelined); +void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj); int i915_gem_attach_phys_object(struct drm_device *dev, struct drm_i915_gem_object *obj, int id, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7cd36c5..c5e03ba 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3583,6 +3583,11 @@ unlock: return ret; } +static bool is_pin_display(struct drm_i915_gem_object *obj) +{ + return obj->pin_count != !!obj->user_pin_count; +} + /* * Prepare buffer for display plane (scanout, cursors, etc). * Can be called from an uninterruptible phase (modesetting) and allows @@ -3602,6 +3607,11 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, return ret; } + /* Mark the pin_display early so that we account for the +* display coherency whilst setting up the cache domains. +*/ + obj->pin_display = true; + /* The display engine is not coherent with the LLC cache on gen6. As * a result, we make sure that the pinning that is about to occur is * done with uncached PTEs. This is lowest common denominator for all @@ -3613,7 +3623,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, */ ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE); if (ret) - return ret; + goto err_unpin_display; /* As the user may map the buffer once pinned in the display plane * (e.g. libkms for the bootup splash), we have to ensure that we @@ -3621,7 +3631,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, */ ret = i915_gem_obj_ggtt_pin(obj, alignment, true, false); if (ret) - return ret; + goto err_unpin_display; i915_gem_object_flush_cpu_write_domain(obj); @@ -3639,6 +3649,17 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, old_write_domain); return 0; + +err_unpin_display: + obj->pin_display = is_pin_display(obj); + return ret; +} + +void +i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj) +{ + i915_gem_object_unpin(obj); + obj->pin_display = is_pin_display(obj); } int diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ae56e87..41b2cc2 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1883,7 +1883,7 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev, return 0; err_unpin: - i915_gem_object_unpin(obj); + i915_gem_object_unpin_from_display_plane(obj); err_interruptible: dev_priv->mm.interruptible = true; return ret; @@ -1892,7 +1892,7 @@ err_interruptible: void intel_unpin_fb_obj(struct drm_i915_gem_object *o
[Intel-gfx] [PATCH 7/9] drm/i915: Only do a chipset flush after a clflush
Now that we skip clflushes more often, return a boolean indicating whether the clflush was actually performed, and only if it was do the chipset flush. (Though on most of the architectures where the clflush will be skipped, the chipset flush is a no-op!) Signed-off-by: Chris Wilson Cc: Ville Syrjälä --- drivers/gpu/drm/i915/i915_drv.h| 2 +- drivers/gpu/drm/i915/i915_gem.c| 20 +++- drivers/gpu/drm/i915/i915_gem_exec.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 5 +++-- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1ffae08..c51a771 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1863,7 +1863,7 @@ static inline bool i915_terminally_wedged(struct i915_gpu_error *error) } void i915_gem_reset(struct drm_device *dev); -void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); +bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj, uint32_t read_domains, uint32_t write_domain); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 78535e9..2656c69 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -929,8 +929,8 @@ out: */ if (!needs_clflush_after && obj->base.write_domain != I915_GEM_DOMAIN_CPU) { - i915_gem_clflush_object(obj, false); - i915_gem_chipset_flush(dev); + if (i915_gem_clflush_object(obj, false)) + i915_gem_chipset_flush(dev); } } @@ -3306,7 +3306,7 @@ err_unpin: return ret; } -void +bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force) { @@ -3315,14 +3315,14 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj, * again at bind time. */ if (obj->pages == NULL) - return; + return false; /* * Stolen memory is always coherent with the GPU as it is explicitly * marked as wc by the system, or the system is cache-coherent. */ if (obj->stolen) - return; + return false; /* If the GPU is snooping the contents of the CPU cache, * we do not need to manually clear the CPU cache lines. However, @@ -,11 +,12 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj, * tracking. */ if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) - return; + return false; trace_i915_gem_object_clflush(obj); - drm_clflush_sg(obj->pages); + + return true; } /** Flushes the GTT write domain for the object if it's dirty. */ @@ -3377,8 +3378,9 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) return; - i915_gem_clflush_object(obj, force); - i915_gem_chipset_flush(obj->base.dev); + if (i915_gem_clflush_object(obj, force)) + i915_gem_chipset_flush(obj->base.dev); + old_write_domain = obj->base.write_domain; obj->base.write_domain = 0; diff --git a/drivers/gpu/drm/i915/i915_gem_exec.c b/drivers/gpu/drm/i915/i915_gem_exec.c index 6af0067..a2c6dbf 100644 --- a/drivers/gpu/drm/i915/i915_gem_exec.c +++ b/drivers/gpu/drm/i915/i915_gem_exec.c @@ -50,8 +50,8 @@ static int i915_gem_exec_flush_object(struct drm_i915_gem_object *obj, return ret; if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) { - i915_gem_clflush_object(obj, false); - i915_gem_chipset_flush(obj->base.dev); + if (i915_gem_clflush_object(obj, false)) + i915_gem_chipset_flush(obj->base.dev); obj->base.write_domain &= ~I915_GEM_DOMAIN_CPU; } if (obj->base.write_domain & I915_GEM_DOMAIN_GTT) { diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index e999578..7dcf78c 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -708,6 +708,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, { struct drm_i915_gem_object *obj; uint32_t flush_domains = 0; + bool flush_chipset = false; int ret; list_for_each_entry(obj, objects, exec_list) { @@ -716,12 +717,12 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, return ret; if (obj->base.write_domain & I915_GEM_DO
[Intel-gfx] [PATCH 8/9] drm/i915: Use Write-Through cacheing for the display plane on Iris
Haswell GT3e has the unique feature of supporting Write-Through cacheing of objects within the eLLC/LLC. The purpose of this is to enable the display plane to remain coherent whilst objects lie resident in the eLLC/LLC - so that we, in theory, get the best of both worlds, perfect display and fast access. However, we still need to be careful as the CPU does not see the WT when accessing the cache. In particular, this means that we need to flush the cache lines after writing to an object through the CPU, and on transitioning from a cached state to WT. v2: Actually do the clflush on transition to WT, nagging by Ville. v3: Flush the CPU cache after writes into WT objects. v4: Rease onto LLC updates and report WT as "uncached" for get_cache_level_ioctl to remain symmetric with set_cache_level_ioctl. Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Kenneth Graunke --- drivers/gpu/drm/i915/i915_dma.c | 3 +++ drivers/gpu/drm/i915/i915_drv.h | 4 +++- drivers/gpu/drm/i915/i915_gem.c | 14 -- drivers/gpu/drm/i915/i915_gem_gtt.c | 11 ++- include/uapi/drm/i915_drm.h | 1 + 5 files changed, 29 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 9decc5b..de0b86a 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -976,6 +976,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_LLC: value = HAS_LLC(dev); break; + case I915_PARAM_HAS_WT: + value = HAS_WT(dev); + break; case I915_PARAM_HAS_ALIASING_PPGTT: value = dev_priv->mm.aliasing_ppgtt ? 1 : 0; break; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c51a771..0a903c4 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -457,6 +457,7 @@ enum i915_cache_level { caches, eg sampler/render caches, and the large Last-Level-Cache. LLC is coherent with the CPU, but L3 is only visible to the GPU. */ + I915_CACHE_WT, /* hsw:gt3e WriteThrough for scanouts */ }; typedef uint32_t gen6_gtt_pte_t; @@ -1388,7 +1389,7 @@ struct drm_i915_gem_object { unsigned int pending_fenced_gpu_access:1; unsigned int fenced_gpu_access:1; - unsigned int cache_level:2; + unsigned int cache_level:3; unsigned int has_aliasing_ppgtt_mapping:1; unsigned int has_global_gtt_mapping:1; @@ -1545,6 +1546,7 @@ struct drm_i915_file_private { #define HAS_BLT(dev)(INTEL_INFO(dev)->has_blt_ring) #define HAS_VEBOX(dev) (INTEL_INFO(dev)->has_vebox_ring) #define HAS_LLC(dev)(INTEL_INFO(dev)->has_llc) +#define HAS_WT(dev)(IS_HASWELL(dev) && to_i915(dev)->ellc_size) #define I915_NEED_GFX_HWS(dev) (INTEL_INFO(dev)->need_gfx_hws) #define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 5) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2656c69..2b897f2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3551,7 +3551,16 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, goto unlock; } - args->caching = obj->cache_level != I915_CACHE_NONE; + switch (obj->cache_level) { + case I915_CACHE_LLC: + case I915_CACHE_L3_LLC: + args->caching = I915_CACHING_CACHED; + break; + + default: + args->caching = I915_CACHING_NONE; + break; + } drm_gem_object_unreference(&obj->base); unlock: @@ -3634,7 +3643,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, * of uncaching, which would allow us to flush all the LLC-cached data * with that bit in the PTE to main memory with just one PIPE_CONTROL. */ - ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE); + ret = i915_gem_object_set_cache_level(obj, + HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); if (ret) goto err_unpin_display; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 89d8cc8..1791643 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -55,6 +55,7 @@ #define HSW_WB_LLC_AGE3HSW_CACHEABILITY_CONTROL(0x2) #define HSW_WB_LLC_AGE0HSW_CACHEABILITY_CONTROL(0x3) #define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb) +#define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6) static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, enum i915_cache_level level) @@ -138,8 +139,1
[Intel-gfx] [PATCH 1/9] drm/i915: Update rules for reading cache lines through the LLC
The LLC is a fun device. The cache is a distinct functional block within the SA that arbitrates access from both the CPU and GPU cores. As such all writes to memory land first in the LLC before further action is taken. For example, an uncached write from either the CPU or GPU will then proceed to memory and evict the cacheline from the LLC. This means that a read from the LLC always returns the correct information even if the PTE bit in the GPU differs from the PAT bit in the CPU. For the older snooping architecture on non-LLC, the fundamental principle still holds except that some coordination is required between the CPU and GPU to explicitly perform the snooping (which is handled by our request tracking). The upshot of this is that we know that we can issue a read from either LLC devices or snoopable memory and trust the contents of the cache - i.e. we can forgo a clflush before a read in these circumstances. Writing to memory from the CPU is a little more tricky as we have to consider that the scanout does not read from the CPU cache at all, but from main memory. So we have to currently treat all requests to write to uncached memory as having to be flushed to main memory for coherency with all consumers. Signed-off-by: Chris Wilson Cc: Ville Syrjälä --- drivers/gpu/drm/i915/i915_gem.c | 22 ++ 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4c420f9..7cd36c5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -62,6 +62,12 @@ static long i915_gem_purge(struct drm_i915_private *dev_priv, long target); static void i915_gem_shrink_all(struct drm_i915_private *dev_priv); static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); +static bool cpu_cache_is_coherent(struct drm_device *dev, + enum i915_cache_level level) +{ + return HAS_LLC(dev) || level != I915_CACHE_NONE; +} + static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) { if (obj->tiling_mode) @@ -508,8 +514,7 @@ i915_gem_shmem_pread(struct drm_device *dev, * read domain and manually flush cachelines (if required). This * optimizes for the case when the gpu will dirty the data * anyway again before the next pread happens. */ - if (obj->cache_level == I915_CACHE_NONE) - needs_clflush = 1; + needs_clflush = !cpu_cache_is_coherent(dev, obj->cache_level); if (i915_gem_obj_bound_any(obj)) { ret = i915_gem_object_set_to_gtt_domain(obj, false); if (ret) @@ -833,11 +838,11 @@ i915_gem_shmem_pwrite(struct drm_device *dev, return ret; } } - /* Same trick applies for invalidate partially written cachelines before -* writing. */ - if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU) - && obj->cache_level == I915_CACHE_NONE) - needs_clflush_before = 1; + /* Same trick applies to invalidate partially written cachelines read +* before writing. */ + if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) + needs_clflush_before = + !cpu_cache_is_coherent(dev, obj->cache_level); ret = i915_gem_object_get_pages(obj); if (ret) @@ -3679,7 +3684,8 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) /* Flush the CPU cache if it's still invalid. */ if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { - i915_gem_clflush_object(obj); + if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + i915_gem_clflush_object(obj); obj->base.read_domains |= I915_GEM_DOMAIN_CPU; } -- 1.8.4.rc1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 3/9] drm/i915: Update rules for writing through the LLC with the cpu
As mentioned in the previous commit, reads and writes from both the CPU and GPU go through the LLC. This gives us coherency between the CPU and GPU irrespective of the attribute settings either device sets. We can use to avoid having to clflush even uncached memory. Except for the scanout. The scanout resides within another functional block that does not use the LLC but reads directly from main memory. So in order to maintain coherency with the scanout, writes to uncached memory must be flushed. In order to optimize writes elsewhere, we start tracking whether an framebuffer is attached to an object. v2: Use pin_display tracking rather than fb_count (to ensure we flush cursors as well etc) and only force the clflush along explicit writes to the scanout paths (i.e. pin_to_display_plane and pwrite into scanout). Based on a patch by Ville Syrjälä. Signed-off-by: Chris Wilson Cc: Ville Syrjälä --- drivers/gpu/drm/i915/i915_drv.h| 2 +- drivers/gpu/drm/i915/i915_gem.c| 58 -- drivers/gpu/drm/i915/i915_gem_exec.c | 2 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/i915_gem_gtt.c| 2 +- 5 files changed, 36 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3622ec2..1ffae08 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1863,7 +1863,7 @@ static inline bool i915_terminally_wedged(struct i915_gpu_error *error) } void i915_gem_reset(struct drm_device *dev); -void i915_gem_clflush_object(struct drm_i915_gem_object *obj); +void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj, uint32_t read_domains, uint32_t write_domain); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c5e03ba..78535e9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -38,7 +38,8 @@ #include static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); -static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); +static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, + bool force); static __must_check int i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, struct i915_address_space *vm, @@ -68,6 +69,14 @@ static bool cpu_cache_is_coherent(struct drm_device *dev, return HAS_LLC(dev) || level != I915_CACHE_NONE; } +static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) +{ + if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + return true; + + return obj->pin_display; +} + static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) { if (obj->tiling_mode) @@ -830,8 +839,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, * write domain and manually flush cachelines (if required). This * optimizes for the case when the gpu will use the data * right away and we therefore have to clflush anyway. */ - if (obj->cache_level == I915_CACHE_NONE) - needs_clflush_after = 1; + needs_clflush_after = cpu_write_needs_clflush(obj); if (i915_gem_obj_bound_any(obj)) { ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) @@ -921,7 +929,7 @@ out: */ if (!needs_clflush_after && obj->base.write_domain != I915_GEM_DOMAIN_CPU) { - i915_gem_clflush_object(obj); + i915_gem_clflush_object(obj, false); i915_gem_chipset_flush(dev); } } @@ -999,9 +1007,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, goto out; } - if (obj->cache_level == I915_CACHE_NONE && - obj->tiling_mode == I915_TILING_NONE && - obj->base.write_domain != I915_GEM_DOMAIN_CPU) { + if (obj->tiling_mode == I915_TILING_NONE && + obj->base.write_domain != I915_GEM_DOMAIN_CPU && + cpu_write_needs_clflush(obj)) { ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); /* Note that the gtt paths might fail with non-page-backed user * pointers (e.g. gtt mappings when moving data between @@ -1350,8 +1358,8 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, } /* Pinned buffers may be scanout, so flush the cache */ - if (obj->pin_count) - i915_gem_object_flush_cpu_write_domain(obj); + if (obj->pin_
Re: [Intel-gfx] [PATCH] drm/i915: Don't deref pipe->cpu_transcoder in the hangcheck code
On Thu, 08 Aug 2013, Daniel Vetter wrote: > From: Chris Wilson > > If we get an error event really early in the driver setup sequence, > which gen3 is especially prone to with various display GTT faults we > Oops. So try to avoid this. > > Additionally with Haswell the transcoders are a separate bank of > registers from the pipes (4 transcoders, 3 pipes). In event of an > error, we want to be sure we have a complete and accurate picture of > the machine state, so record all the transcoders in addition to all > the active pipes. > > This regression has been introduced in > > commit 702e7a56af3780d8b3a717f698209bef44187bb0 > Author: Paulo Zanoni > Date: Tue Oct 23 18:29:59 2012 -0200 > > drm/i915: convert PIPECONF to use transcoder instead of pipe > > Based on the patch "drm/i915: Dump all transcoder registers on error" > from Chris Wilson: > > v2: Rebase so that we don't try to be clever and try to figure out the > cpu transcoder from hw state. That exercise should be done when we > analyze the error state offline. > > The actual bugfix is to not call intel_pipe_to_cpu_transcoder in the > error state capture code in case the pipes aren't fully set up yet. > > v3: Simplifiy the err->num_transcoders computation a bit. While at it > make the error capture stuff save on systems without a display block. > > v4: Fix fail, spotted by Jani. > > v5: Completely new commit message, cc: stable. > > Cc: Paulo Zanoni > Cc: Damien Lespiau > Cc: Jani Nikula s/Cc/Reviewed-by/ Cheers, Jani. > Cc: Chris Wilson > Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=60021 > Cc: sta...@vger.kernel.org > Signed-off-by: Daniel Vetter > --- > drivers/gpu/drm/i915/intel_display.c | 86 > > 1 file changed, 57 insertions(+), 29 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_display.c > b/drivers/gpu/drm/i915/intel_display.c > index 4127ad2..0b11405 100644 > --- a/drivers/gpu/drm/i915/intel_display.c > +++ b/drivers/gpu/drm/i915/intel_display.c > @@ -10380,6 +10380,8 @@ struct intel_display_error_state { > > u32 power_well_driver; > > + int num_transcoders; > + > struct intel_cursor_error_state { > u32 control; > u32 position; > @@ -10388,16 +10390,7 @@ struct intel_display_error_state { > } cursor[I915_MAX_PIPES]; > > struct intel_pipe_error_state { > - enum transcoder cpu_transcoder; > - u32 conf; > u32 source; > - > - u32 htotal; > - u32 hblank; > - u32 hsync; > - u32 vtotal; > - u32 vblank; > - u32 vsync; > } pipe[I915_MAX_PIPES]; > > struct intel_plane_error_state { > @@ -10409,6 +10402,19 @@ struct intel_display_error_state { > u32 surface; > u32 tile_offset; > } plane[I915_MAX_PIPES]; > + > + struct intel_transcoder_error_state { > + enum transcoder cpu_transcoder; > + > + u32 conf; > + > + u32 htotal; > + u32 hblank; > + u32 hsync; > + u32 vtotal; > + u32 vblank; > + u32 vsync; > + } transcoder[4]; > }; > > struct intel_display_error_state * > @@ -10416,9 +10422,17 @@ intel_display_capture_error_state(struct drm_device > *dev) > { > drm_i915_private_t *dev_priv = dev->dev_private; > struct intel_display_error_state *error; > - enum transcoder cpu_transcoder; > + int transcoders[] = { > + TRANSCODER_A, > + TRANSCODER_B, > + TRANSCODER_C, > + TRANSCODER_EDP, > + }; > int i; > > + if (INTEL_INFO(dev)->num_pipes == 0) > + return NULL; > + > error = kmalloc(sizeof(*error), GFP_ATOMIC); > if (error == NULL) > return NULL; > @@ -10427,9 +10441,6 @@ intel_display_capture_error_state(struct drm_device > *dev) > error->power_well_driver = I915_READ(HSW_PWR_WELL_DRIVER); > > for_each_pipe(i) { > - cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv, i); > - error->pipe[i].cpu_transcoder = cpu_transcoder; > - > if (INTEL_INFO(dev)->gen <= 6 || IS_VALLEYVIEW(dev)) { > error->cursor[i].control = I915_READ(CURCNTR(i)); > error->cursor[i].position = I915_READ(CURPOS(i)); > @@ -10453,14 +10464,25 @@ intel_display_capture_error_state(struct drm_device > *dev) > error->plane[i].tile_offset = I915_READ(DSPTILEOFF(i)); > } > > - error->pipe[i].conf = I915_READ(PIPECONF(cpu_transcoder)); > error->pipe[i].source = I915_READ(PIPESRC(i)); > - error->pipe[i].htotal = I915_READ(HTOTAL(cpu_transcoder)); > - error->pipe[i].hblank = I915_READ(HBLANK(cpu_transcoder)); > - error->pipe[i].hsync = I915_READ(HSYNC(cpu_transcoder)); > -
[Intel-gfx] [PATCH] drm/i915: Prevent loading of uninitialized context garbage
The extended state bits are stored in the LCA register and affect all updates to the LCA register - i.e. the state on the old context is saved when SAVE_EX_STATE_EN is currently set in the old context address before the update, and the new context is restored when RESTORE_EX_STATE_EN is set in the new context address. This is irrespective of the RESTORE_INHIBIT flag in the MI_SET_CONTEXT. Hence, upon initial loading the contents of the extended state is read from uninitialised data. To workaround this, on first load we do a dummy load without the mandatory RESTORE_EX_STATE_EN bit so that the real load causes us to initialise the extended state of the context before it is then loaded by the LCA update. References: https://bugs.freedesktop.org/show_bug.cgi?id=64073 Signed-off-by: Chris Wilson Cc: Ben Widawsky --- drivers/gpu/drm/i915/i915_gem_context.c | 35 + 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 70e9e40..cf35f01 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -351,7 +351,7 @@ mi_set_context(struct intel_ring_buffer *ring, struct i915_hw_context *new_context, u32 hw_flags) { - int ret; + int ret, len; /* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB * invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value @@ -364,7 +364,16 @@ mi_set_context(struct intel_ring_buffer *ring, return ret; } - ret = intel_ring_begin(ring, 6); + len = 4; + switch (INTEL_INFO(ring->dev)->gen) { + case 7: + case 5: len += 2; + break; + } + if (!new_context->is_initialized) + len += 2; + + ret = intel_ring_begin(ring, len); if (ret) return ret; @@ -376,9 +385,22 @@ mi_set_context(struct intel_ring_buffer *ring, case 5: intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN); break; - default: - intel_ring_emit(ring, MI_NOOP); - break; + } + + if (!new_context->is_initialized) { + /* The GPU tries to restore the extended state irrespective +* of RestoreInhibit (since it is part of the LCA switch +* itself rather than the MI_SET_CONTEXT command). +* Since the initial contents may be garbage we do a dummy +* load first then set the mandatory flag for any future +* ring context switches. +*/ + intel_ring_emit(ring, MI_SET_CONTEXT); + intel_ring_emit(ring, + i915_gem_obj_ggtt_offset(new_context->obj) | + MI_MM_SPACE_GTT | + MI_SAVE_EXT_STATE_EN | + hw_flags); } intel_ring_emit(ring, MI_NOOP); @@ -398,9 +420,6 @@ mi_set_context(struct intel_ring_buffer *ring, case 5: intel_ring_emit(ring, MI_SUSPEND_FLUSH); break; - default: - intel_ring_emit(ring, MI_NOOP); - break; } intel_ring_advance(ring); -- 1.8.4.rc1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915: Don't deref pipe->cpu_transcoder in the hangcheck code
From: Chris Wilson If we get an error event really early in the driver setup sequence, which gen3 is especially prone to with various display GTT faults we Oops. So try to avoid this. Additionally with Haswell the transcoders are a separate bank of registers from the pipes (4 transcoders, 3 pipes). In event of an error, we want to be sure we have a complete and accurate picture of the machine state, so record all the transcoders in addition to all the active pipes. This regression has been introduced in commit 702e7a56af3780d8b3a717f698209bef44187bb0 Author: Paulo Zanoni Date: Tue Oct 23 18:29:59 2012 -0200 drm/i915: convert PIPECONF to use transcoder instead of pipe Based on the patch "drm/i915: Dump all transcoder registers on error" from Chris Wilson: v2: Rebase so that we don't try to be clever and try to figure out the cpu transcoder from hw state. That exercise should be done when we analyze the error state offline. The actual bugfix is to not call intel_pipe_to_cpu_transcoder in the error state capture code in case the pipes aren't fully set up yet. v3: Simplifiy the err->num_transcoders computation a bit. While at it make the error capture stuff save on systems without a display block. v4: Fix fail, spotted by Jani. v5: Completely new commit message, cc: stable. Cc: Paulo Zanoni Cc: Damien Lespiau Cc: Jani Nikula Cc: Chris Wilson Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=60021 Cc: sta...@vger.kernel.org Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 86 1 file changed, 57 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 4127ad2..0b11405 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10380,6 +10380,8 @@ struct intel_display_error_state { u32 power_well_driver; + int num_transcoders; + struct intel_cursor_error_state { u32 control; u32 position; @@ -10388,16 +10390,7 @@ struct intel_display_error_state { } cursor[I915_MAX_PIPES]; struct intel_pipe_error_state { - enum transcoder cpu_transcoder; - u32 conf; u32 source; - - u32 htotal; - u32 hblank; - u32 hsync; - u32 vtotal; - u32 vblank; - u32 vsync; } pipe[I915_MAX_PIPES]; struct intel_plane_error_state { @@ -10409,6 +10402,19 @@ struct intel_display_error_state { u32 surface; u32 tile_offset; } plane[I915_MAX_PIPES]; + + struct intel_transcoder_error_state { + enum transcoder cpu_transcoder; + + u32 conf; + + u32 htotal; + u32 hblank; + u32 hsync; + u32 vtotal; + u32 vblank; + u32 vsync; + } transcoder[4]; }; struct intel_display_error_state * @@ -10416,9 +10422,17 @@ intel_display_capture_error_state(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; struct intel_display_error_state *error; - enum transcoder cpu_transcoder; + int transcoders[] = { + TRANSCODER_A, + TRANSCODER_B, + TRANSCODER_C, + TRANSCODER_EDP, + }; int i; + if (INTEL_INFO(dev)->num_pipes == 0) + return NULL; + error = kmalloc(sizeof(*error), GFP_ATOMIC); if (error == NULL) return NULL; @@ -10427,9 +10441,6 @@ intel_display_capture_error_state(struct drm_device *dev) error->power_well_driver = I915_READ(HSW_PWR_WELL_DRIVER); for_each_pipe(i) { - cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv, i); - error->pipe[i].cpu_transcoder = cpu_transcoder; - if (INTEL_INFO(dev)->gen <= 6 || IS_VALLEYVIEW(dev)) { error->cursor[i].control = I915_READ(CURCNTR(i)); error->cursor[i].position = I915_READ(CURPOS(i)); @@ -10453,14 +10464,25 @@ intel_display_capture_error_state(struct drm_device *dev) error->plane[i].tile_offset = I915_READ(DSPTILEOFF(i)); } - error->pipe[i].conf = I915_READ(PIPECONF(cpu_transcoder)); error->pipe[i].source = I915_READ(PIPESRC(i)); - error->pipe[i].htotal = I915_READ(HTOTAL(cpu_transcoder)); - error->pipe[i].hblank = I915_READ(HBLANK(cpu_transcoder)); - error->pipe[i].hsync = I915_READ(HSYNC(cpu_transcoder)); - error->pipe[i].vtotal = I915_READ(VTOTAL(cpu_transcoder)); - error->pipe[i].vblank = I915_READ(VBLANK(cpu_transcoder)); - error->pipe[i].vsync = I915_READ(VSYNC(cpu_transcoder)); + } + +
Re: [Intel-gfx] [PATCH] drm/i915: Dump all transcoder registers on error
On Tue, Jun 25, 2013 at 02:43:57PM +0200, Daniel Vetter wrote: > On Tue, Jun 25, 2013 at 08:15:22AM +0100, Chris Wilson wrote: > > With Haswell the transcoders are a separate bank of registers from the > > pipes (4 transcoders, 3 pipes). In event of an error, we want to be sure > > we have a complete and accurate picture of the machine state, so record > > all the transcoders in addition to all the active pipes. > > > > Signed-off-by: Chris Wilson > > I think we should squash this together with the previous patch and move > the cpu_transcoder->pipe readout logic into intel_error_decode, similarly > to how we already augment the various ring register state with useful > context information. > > I just generally prefer our error state capture code to be as dumb as > possible, with the least amount of trust for our hw/sw state that we can > get away with. I've gone ahead and done this and pushed the frobbed patch to dinq. -Daniel -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: List objects allocated from stolen memory in debugfs
On Thu, Aug 08, 2013 at 10:15:31AM +0100, Chris Wilson wrote: > On Wed, Aug 07, 2013 at 11:44:20PM +0200, Daniel Vetter wrote: > > On Wed, Aug 07, 2013 at 06:30:54PM +0100, Chris Wilson wrote: > > > I was curious as to what objects were currently allocated from stolen > > > memory, and so exported it from debugfs. > > > > > > Signed-off-by: Chris Wilson > > > > I liike this, but since I've just merged Ben's exec_list vma conversion it > > doesn't apply any more cleanly. Can you please rebase and resend? > > It applies cleanly to current -nightly. Or at least the local > approximation thereof. I've gotten ahead of myself here, it applies cleanly. But it'll fail once patch 25 is in from Ben. Otoh that one has review outstanding, so I think it's the one to give in. Queued for -next, thanks for the patch. -Daniel -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 13/13] drm/i915: Always call intel_update_sprite_watermarks() when disabling a plane
On Tue, Aug 06, 2013 at 09:06:16PM +0100, Chris Wilson wrote: > On Tue, Aug 06, 2013 at 10:24:12PM +0300, ville.syrj...@linux.intel.com wrote: > > From: Ville Syrjälä > > > > ILK and VLV codepaths didn't update sprite watermarks when disabling a > > sprite. Make them do that. > > > > Signed-off-by: Ville Syrjälä > > I am totally loving the naming confusion between intel_plane and > intel_sprite in intel_display.c. There when we refer to a plane, we may > mean a slice of the CRTC pipeline or what we refer elsewhere as a sprite. > > e.g. intel_disable_plane and intel_plane_disable. > > Having got past that to realise this is intel_sprite.c not > intel_display.c, > Reviewed->by: Chris Wilson All merged to dinq, thanks for patches&review. -Daniel -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: List objects allocated from stolen memory in debugfs
On Wed, Aug 07, 2013 at 11:44:20PM +0200, Daniel Vetter wrote: > On Wed, Aug 07, 2013 at 06:30:54PM +0100, Chris Wilson wrote: > > I was curious as to what objects were currently allocated from stolen > > memory, and so exported it from debugfs. > > > > Signed-off-by: Chris Wilson > > I liike this, but since I've just merged Ben's exec_list vma conversion it > doesn't apply any more cleanly. Can you please rebase and resend? It applies cleanly to current -nightly. Or at least the local approximation thereof. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v2] drm/i915: Pull watermark level validity check out
On Wed, Aug 07, 2013 at 01:24:47PM +0300, ville.syrj...@linux.intel.com wrote: > From: Ville Syrjälä > > Refactor the code a bit to split the watermark level validity check into > a separate function. > > Also add hack there that allows us to use it even for LP0 watermarks. > ATM we don't pre-compute/check the LP0 watermarks, so we just have to > clamp them to the maximum and hope things work out. > > v2: Add some debug prints when we exceed max WM0 > Kill pointless ret = false' assignment. > Include the check for the already disabled 'result' which > got shuffled around when the patchs got reorderd > > Signed-off-by: Ville Syrjälä I still think we are missing a log entry of what watermark values we pick, but the most important issue of having to clamp the values is logged. Reviewed-by: Chris Wilson -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/2] drm/i915: Do not add an interrupt for a context switch
We use the request to ensure we hold a reference to the context for the duration that it remains in use by the ring. Each request only holds a reference to the current context, hence we emit a request after switching contexts with the final reference to the old context. However, the extra interrupt caused by that request is not useful (no timing critical function will wait for the context object), instead the overhead of servicing the IRQ shows up in some (lightweight) benchmarks. In order to keep the useful property of using the request to manage the context lifetime, we want to add a dummy request that is associated with the interrupt from the subsequent real request following the batch. The extra interrupt was added as a side-effect of using i915_add_request() in commit 112522f6789581824903f6f72082b5b841a7f0f9 Author: Chris Wilson Date: Thu May 2 16:48:07 2013 +0300 drm/i915: put context upon switching v2: Daniel convinced me that the request here was solely for context lifetime tracking and that we have the active ref to keep the object alive whilst the MI_SET_CONTEXT. So the only concern then is which context should get the blame for MI_SET_CONTEXT failing. The old scheme added a request for the old context so that any hang upto and including the switch away would mark the old context as guilty. Now any hang here implicates the new context. However since we have already gone through a complete flush with the last context in its last request, and all that lies in no-man's-land is an invalidate flush and the MI_SET_CONTEXT, we should be safe in not unduly placing blame on the new context. Signed-off-by: Chris Wilson Cc: Ben Widawsky Cc: Paulo Zanoni --- drivers/gpu/drm/i915/i915_gem.c | 1 - drivers/gpu/drm/i915/i915_gem_context.c | 12 +--- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0fa45e0..9aa8302 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2137,7 +2137,6 @@ int __i915_add_request(struct intel_ring_buffer *ring, if (request == NULL) return -ENOMEM; - /* Record the position of the start of the request so that * should we detect the updated seqno part-way through the * GPU processing the request, we never over-estimate the diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index db94aca..bfc5638 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -466,17 +466,7 @@ static int do_switch(struct i915_hw_context *to) from->obj->dirty = 1; BUG_ON(from->obj->ring != ring); - ret = i915_add_request(ring, NULL); - if (ret) { - /* Too late, we've already scheduled a context switch. -* Try to undo the change so that the hw state is -* consistent with out tracking. In case of emergency, -* scream. -*/ - WARN_ON(mi_set_context(ring, from, MI_RESTORE_INHIBIT)); - return ret; - } - + /* obj is kept alive until the next request by its active ref */ i915_gem_object_unpin(from->obj); i915_gem_context_unreference(from); } -- 1.8.4.rc1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/2] drm/i915: Rearrange the comments in i915_add_request()
The comments were a little out-of-sequence with the code, forcing the reader to jump around whilst reading. Whilst moving the comments around, add one to explain the context reference. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9aa8302..2b54686 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2154,8 +2154,6 @@ int __i915_add_request(struct intel_ring_buffer *ring, request->ring = ring; request->head = request_start; request->tail = request_ring_position; - request->ctx = ring->last_context; - request->batch_obj = obj; /* Whilst this request exists, batch_obj will be on the * active_list, and so will hold the active reference. Only when this @@ -2163,7 +2161,12 @@ int __i915_add_request(struct intel_ring_buffer *ring, * inactive_list and lose its active reference. Hence we do not need * to explicitly hold another reference here. */ + request->batch_obj = obj; + /* Hold a reference to the current context so that we can inspect +* it later in case a hangcheck error event fires. +*/ + request->ctx = ring->last_context; if (request->ctx) i915_gem_context_reference(request->ctx); -- 1.8.4.rc1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: Do not add an interrupt for a context switch
On Thu, Aug 08, 2013 at 08:43:30AM +0200, Daniel Vetter wrote: > On Thu, Aug 8, 2013 at 1:25 AM, Chris Wilson wrote: > > On Thu, Aug 08, 2013 at 01:18:18AM +0200, Daniel Vetter wrote: > >> Afaict the request holds a ref on the context, and that a ref on the hw > >> context bo. But the active list itself (thanks to the move_to_active) > >> should also hold a ref to the hw context bo, so I don't think we really > >> need full request here. The old context might disappear, but no one really > >> cares if it disappears a notch too early since the backing storage will > >> survive long enough. > > > > The next request holds a ref to the new context. We care about holding a > > ref to the old context until the hw has finished writing to it. That is > > the purpose of taking a request here, to bump the old_ctx->bo->active > > for the context save. Otherwise the backing storage is liable to disappear > > too early (and the hw write to a random location). > > Hm, I might be especially dense, but doesn't the > i915_gem_object_move_to_active(from->obj, ring) call right above the > add_request take care of exactly that, whether we have the request > added or not? And anyone waiting for that object (e.g. the eviction > code) will notice in check_olr that the request is missing, add it and > then wait for it? Yeah, I win this battle for being the most thick headed. The move-to-active will indeed hold the active reference until the next request which by definition will be after MI_SET_CONTEXT is executed. So this here dummy request purely holds a context reference, which I agree can disappear as soon as the user visible ref is dropped. In which case, we do not even need the context reference in the request per-se, except for error handling. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] Optimization in intersect functions
On Wed, Aug 07, 2013 at 05:38:27PM -0300, Raul Fernandes wrote: > This patch avoids the calculation of next points if unnecessary. > Is this correct?? > If yes, can someone commit?? Looks good, so I pushed. In future, please generate patches using git commit and git send-email (or git format-patch and attach in your usual mailer). This makes it easier to review and apply as you have then have to write a change log entry explaining why the patch is necessary (and it will have the correct authorship information) and the patch will be in a format most convenient to use with git. Thanks for the patch! -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915: Pin pages whilst mapping the dma-buf
As we attempt to kmalloc after calling get_pages, there is a possibility that the shrinker may reap the pages we just acquired. To prevent this we need to increment the pages_pin_count early, so rearrange the code and error paths to make it so. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 41 ++ 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index f2e185c..0f9f4fe 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -37,27 +37,24 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme ret = i915_mutex_lock_interruptible(obj->base.dev); if (ret) - return ERR_PTR(ret); + goto err; ret = i915_gem_object_get_pages(obj); - if (ret) { - st = ERR_PTR(ret); - goto out; - } + if (ret) + goto err_unlock; + + i915_gem_object_pin_pages(obj); /* Copy sg so that we make an independent mapping */ st = kmalloc(sizeof(struct sg_table), GFP_KERNEL); if (st == NULL) { - st = ERR_PTR(-ENOMEM); - goto out; + ret = -ENOMEM; + goto err_unpin; } ret = sg_alloc_table(st, obj->pages->nents, GFP_KERNEL); - if (ret) { - kfree(st); - st = ERR_PTR(ret); - goto out; - } + if (ret) + goto err_free; src = obj->pages->sgl; dst = st->sgl; @@ -68,17 +65,23 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme } if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) { - sg_free_table(st); - kfree(st); - st = ERR_PTR(-ENOMEM); - goto out; + ret =-ENOMEM; + goto err_free_sg; } - i915_gem_object_pin_pages(obj); - -out: mutex_unlock(&obj->base.dev->struct_mutex); return st; + +err_free_sg: + sg_free_table(st); +err_free: + kfree(st); +err_unpin: + i915_gem_object_unpin_pages(obj); +err_unlock: + mutex_unlock(&obj->base.dev->struct_mutex); +err: + return ERR_PTR(ret); } static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment, -- 1.8.4.rc1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PULL] drm-intel-fixes
Hi Dave, A few bugfixes for serious stuff and regressions. Highlight is the reinstated hack to keep the i915 backlight on when running on an optimus machine, this prevents black screens especially with some radeon muxed platforms. And the patch to quiet dmesg on Linus' old mac mini ;-) Cheers, Daniel The following changes since commit c095ba7224d8edc71dcef0d655911399a8bd4a3f: Linux 3.11-rc4 (2013-08-04 13:46:46 -0700) are available in the git repository at: git://people.freedesktop.org/~danvet/drm-intel tags/drm-intel-fixes-2013-08-08 for you to fetch changes up to 3f577573cd5482a32f85bd131e52f7cb4b9ac518: drm/i915: do not disable backlight on vgaswitcheroo switch off (2013-08-07 11:57:09 +0200) Aaron Lu (1): drm/i915: avoid brightness overflow when doing scale Daniel Vetter (1): drm/i915: fix gen4 digital port hotplug definitions Jani Nikula (1): drm/i915: do not disable backlight on vgaswitcheroo switch off Paulo Zanoni (1): drm/i915: update last_vblank when disabling the power well Ville Syrjälä (1): drm/i915: Don't call encoder's get_config unless encoder is active drivers/gpu/drm/i915/i915_reg.h | 12 +--- drivers/gpu/drm/i915/intel_display.c | 4 +++- drivers/gpu/drm/i915/intel_panel.c | 18 -- drivers/gpu/drm/i915/intel_pm.c | 18 ++ 4 files changed, 46 insertions(+), 6 deletions(-) -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 5/5] drm/prime: remove cargo-cult locking from map_sg helper
I've checked both implementations (radeon/nouveau) and they both grab the page array from ttm simply by dereferencing it and then wrapping it up with drm_prime_pages_to_sg in the callback and map it with dma_map_sg (in the helper). Only the grabbing of the underlying page array is anything we need to be concerned about, and either those pages are pinned independently, or we're screwed no matter what. And indeed, nouveau/radeon pin the backing storage in their attach/detach functions. Since I've created this patch cma prime support for dma_buf was added. drm_gem_cma_prime_get_sg_table only calls kzalloc and the creates&maps the sg table with dma_get_sgtable. It doesn't touch any gem object state otherwise. So the cma helpers also look safe. The only thing we might claim it does is prevent concurrent mapping of dma_buf attachments. But a) that's not allowed and b) the current code is racy already since it checks whether the sg mapping exists _before_ grabbing the lock. So the dev->struct_mutex locking here does absolutely nothing useful, but only distracts. Remove it. This should also help Maarten's work to eventually pin the backing storage more dynamically by preventing locking inversions around dev->struct_mutex. v2: Add analysis for recently added cma helper prime code. Cc: Laurent Pinchart Cc: Maarten Lankhorst Acked-by: Laurent Pinchart Acked-by: Maarten Lankhorst Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_prime.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index a35f206..f115962 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -167,8 +167,6 @@ static struct sg_table *drm_gem_map_dma_buf(struct dma_buf_attachment *attach, if (WARN_ON(prime_attach->dir != DMA_NONE)) return ERR_PTR(-EBUSY); - mutex_lock(&obj->dev->struct_mutex); - sgt = obj->dev->driver->gem_prime_get_sg_table(obj); if (!IS_ERR(sgt)) { @@ -182,7 +180,6 @@ static struct sg_table *drm_gem_map_dma_buf(struct dma_buf_attachment *attach, } } - mutex_unlock(&obj->dev->struct_mutex); return sgt; } -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 4/5] drm/exynos: explicit store base gem object in dma_buf->priv
From: Inki Dae Signed-off-by: Inki Dae Signed-off-by: Kyungmin Park Signed-off-by: Daniel Vetter --- drivers/gpu/drm/exynos/exynos_drm_dmabuf.c | 12 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c b/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c index 3cd56e1..fd76449 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c @@ -22,6 +22,11 @@ struct exynos_drm_dmabuf_attachment { bool is_mapped; }; +static struct exynos_drm_gem_obj *dma_buf_to_obj(struct dma_buf *buf) +{ + return to_exynos_gem_obj(buf->priv); +} + static int exynos_gem_attach_dma_buf(struct dma_buf *dmabuf, struct device *dev, struct dma_buf_attachment *attach) @@ -63,7 +68,7 @@ static struct sg_table * enum dma_data_direction dir) { struct exynos_drm_dmabuf_attachment *exynos_attach = attach->priv; - struct exynos_drm_gem_obj *gem_obj = attach->dmabuf->priv; + struct exynos_drm_gem_obj *gem_obj = dma_buf_to_obj(attach->dmabuf); struct drm_device *dev = gem_obj->base.dev; struct exynos_drm_gem_buf *buf; struct scatterlist *rd, *wr; @@ -180,7 +185,7 @@ struct dma_buf *exynos_dmabuf_prime_export(struct drm_device *drm_dev, { struct exynos_drm_gem_obj *exynos_gem_obj = to_exynos_gem_obj(obj); - return dma_buf_export(exynos_gem_obj, &exynos_dmabuf_ops, + return dma_buf_export(obj, &exynos_dmabuf_ops, exynos_gem_obj->base.size, flags); } @@ -198,8 +203,7 @@ struct drm_gem_object *exynos_dmabuf_prime_import(struct drm_device *drm_dev, if (dma_buf->ops == &exynos_dmabuf_ops) { struct drm_gem_object *obj; - exynos_gem_obj = dma_buf->priv; - obj = &exynos_gem_obj->base; + obj = dma_buf->priv; /* is it from our device? */ if (obj->dev == drm_dev) { -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 3/5] drm/i915: explicit store base gem object in dma_buf->priv
Makes it more obviously correct what tricks we play by reusing the drm prime release helper. Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 21 - 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index f7e1682..e918b05 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -27,10 +27,15 @@ #include "i915_drv.h" #include +static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf) +{ + return to_intel_bo(buf->priv); +} + static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachment, enum dma_data_direction dir) { - struct drm_i915_gem_object *obj = attachment->dmabuf->priv; + struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf); struct sg_table *st; struct scatterlist *src, *dst; int ret, i; @@ -85,7 +90,7 @@ static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment, struct sg_table *sg, enum dma_data_direction dir) { - struct drm_i915_gem_object *obj = attachment->dmabuf->priv; + struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf); mutex_lock(&obj->base.dev->struct_mutex); @@ -100,7 +105,7 @@ static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment, static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf) { - struct drm_i915_gem_object *obj = dma_buf->priv; + struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); struct drm_device *dev = obj->base.dev; struct sg_page_iter sg_iter; struct page **pages; @@ -148,7 +153,7 @@ error: static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) { - struct drm_i915_gem_object *obj = dma_buf->priv; + struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); struct drm_device *dev = obj->base.dev; int ret; @@ -191,7 +196,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct * static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size_t length, enum dma_data_direction direction) { - struct drm_i915_gem_object *obj = dma_buf->priv; + struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); struct drm_device *dev = obj->base.dev; int ret; bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE); @@ -222,9 +227,7 @@ static const struct dma_buf_ops i915_dmabuf_ops = { struct dma_buf *i915_gem_prime_export(struct drm_device *dev, struct drm_gem_object *gem_obj, int flags) { - struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); - - return dma_buf_export(obj, &i915_dmabuf_ops, obj->base.size, flags); + return dma_buf_export(gem_obj, &i915_dmabuf_ops, gem_obj->size, flags); } static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) @@ -261,7 +264,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, /* is this one of own objects? */ if (dma_buf->ops == &i915_dmabuf_ops) { - obj = dma_buf->priv; + obj = dma_buf_to_obj(dma_buf); /* is it from our device? */ if (obj->base.dev == dev) { /* -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/5] drm/i915: unpin backing storage in dmabuf_unmap
This fixes a WARN in i915_gem_free_object when the obj->pages_pin_count isn't 0. v2: Add locking to unmap, noticed by Chris Wilson. Note that even though we call unmap with our own dev->struct_mutex held that won't result in an immediate deadlock since we never go through the dma_buf interfaces for our own, reimported buffers. But it's still easy to blow up and anger lockdep, but that's already the case with our ->map implementation. Fixing this for real will involve per dma-buf ww mutex locking by the callers. And lots of fun. So go with the duct-tape approach for now. Cc: Chris Wilson Reported-by: Maarten Lankhorst Cc: Maarten Lankhorst Tested-by: Armin K. (v1) Acked-by: Maarten Lankhorst Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 8 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 63ee1a9..f7e1682 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -85,9 +85,17 @@ static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment, struct sg_table *sg, enum dma_data_direction dir) { + struct drm_i915_gem_object *obj = attachment->dmabuf->priv; + + mutex_lock(&obj->base.dev->struct_mutex); + dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, dir); sg_free_table(sg); kfree(sg); + + i915_gem_object_unpin_pages(obj); + + mutex_unlock(&obj->base.dev->struct_mutex); } static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf) -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/5] drm: use common drm_gem_dmabuf_release in i915/exynos drivers
Note that this is slightly tricky since both drivers store their native objects in dma_buf->priv. But both also embed the base drm_gem_object at the first position, so the implicit cast is ok. To use the release helper we need to export it, too. Cc: Inki Dae Cc: Intel Graphics Development Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_prime.c| 3 ++- drivers/gpu/drm/exynos/exynos_drm_dmabuf.c | 23 +-- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 13 + include/drm/drmP.h | 1 + 4 files changed, 5 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 85e450e..a35f206 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -192,7 +192,7 @@ static void drm_gem_unmap_dma_buf(struct dma_buf_attachment *attach, /* nothing to be done here */ } -static void drm_gem_dmabuf_release(struct dma_buf *dma_buf) +void drm_gem_dmabuf_release(struct dma_buf *dma_buf) { struct drm_gem_object *obj = dma_buf->priv; @@ -202,6 +202,7 @@ static void drm_gem_dmabuf_release(struct dma_buf *dma_buf) drm_gem_object_unreference_unlocked(obj); } } +EXPORT_SYMBOL(drm_gem_dmabuf_release); static void *drm_gem_dmabuf_vmap(struct dma_buf *dma_buf) { diff --git a/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c b/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c index a0f997e..3cd56e1 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c @@ -127,27 +127,6 @@ static void exynos_gem_unmap_dma_buf(struct dma_buf_attachment *attach, /* Nothing to do. */ } -static void exynos_dmabuf_release(struct dma_buf *dmabuf) -{ - struct exynos_drm_gem_obj *exynos_gem_obj = dmabuf->priv; - - /* -* exynos_dmabuf_release() call means that file object's -* f_count is 0 and it calls drm_gem_object_handle_unreference() -* to drop the references that these values had been increased -* at drm_prime_handle_to_fd() -*/ - if (exynos_gem_obj->base.export_dma_buf == dmabuf) { - exynos_gem_obj->base.export_dma_buf = NULL; - - /* -* drop this gem object refcount to release allocated buffer -* and resources. -*/ - drm_gem_object_unreference_unlocked(&exynos_gem_obj->base); - } -} - static void *exynos_gem_dmabuf_kmap_atomic(struct dma_buf *dma_buf, unsigned long page_num) { @@ -193,7 +172,7 @@ static struct dma_buf_ops exynos_dmabuf_ops = { .kunmap = exynos_gem_dmabuf_kunmap, .kunmap_atomic = exynos_gem_dmabuf_kunmap_atomic, .mmap = exynos_gem_dmabuf_mmap, - .release= exynos_dmabuf_release, + .release= drm_gem_dmabuf_release, }; struct dma_buf *exynos_dmabuf_prime_export(struct drm_device *drm_dev, diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index f2e185c..63ee1a9 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -90,17 +90,6 @@ static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment, kfree(sg); } -static void i915_gem_dmabuf_release(struct dma_buf *dma_buf) -{ - struct drm_i915_gem_object *obj = dma_buf->priv; - - if (obj->base.export_dma_buf == dma_buf) { - /* drop the reference on the export fd holds */ - obj->base.export_dma_buf = NULL; - drm_gem_object_unreference_unlocked(&obj->base); - } -} - static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf) { struct drm_i915_gem_object *obj = dma_buf->priv; @@ -211,7 +200,7 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size static const struct dma_buf_ops i915_dmabuf_ops = { .map_dma_buf = i915_gem_map_dma_buf, .unmap_dma_buf = i915_gem_unmap_dma_buf, - .release = i915_gem_dmabuf_release, + .release = drm_gem_dmabuf_release, .kmap = i915_gem_dmabuf_kmap, .kmap_atomic = i915_gem_dmabuf_kmap_atomic, .kunmap = i915_gem_dmabuf_kunmap, diff --git a/include/drm/drmP.h b/include/drm/drmP.h index fba5473..69bf832 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1540,6 +1540,7 @@ extern struct drm_gem_object *drm_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf); extern int drm_gem_prime_fd_to_handle(struct drm_device *dev, struct drm_file *file_priv, int prime_fd, uint32_t *handle); +extern void drm_gem_dmabuf_release(struct dma_buf *dma_buf); extern int drm_prime_handle_to_fd_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); -- 1.8.3.2
[Intel-gfx] [PATCH 0/5] First cut of prime patches for drm-next
Hi Dave, Inki supplied a patch to convert exynos, so I think these prep patches are ready to go in. I want a common drm_gem_dmabuf_release function across all drivers since th oops fix around the teardown of the obj->export_dma_buf pointer needs to have changed code in there, and duplicating tricky locking stuff accross all drivers isn't great. Please consider merging into drm-next. Cheers, Daniel Daniel Vetter (4): drm: use common drm_gem_dmabuf_release in i915/exynos drivers drm/i915: unpin backing storage in dmabuf_unmap drm/i915: explicit store base gem object in dma_buf->priv drm/prime: remove cargo-cult locking from map_sg helper Inki Dae (1): drm/exynos: explicit store base gem object in dma_buf->priv drivers/gpu/drm/drm_prime.c| 6 ++--- drivers/gpu/drm/exynos/exynos_drm_dmabuf.c | 35 - drivers/gpu/drm/i915/i915_gem_dmabuf.c | 36 +++--- include/drm/drmP.h | 1 + 4 files changed, 30 insertions(+), 48 deletions(-) -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 1/4] drm/exynos: explicit store base gem object in dma_buf->priv
Hi Inki, On Thu, Aug 08, 2013 at 01:56:28PM +0900, Inki Dae wrote: > Hi, Daniel. You can repost your patch set including the below my patch. And > my patch numbering is wrong. Sorry about that. Thanks for the patch, I'll submit it toghether with the other ones soon. -Daniel > > [PATCH 1/4] -> [PATCH 4/4] > > > Thanks, > Inki Dae > > > -Original Message- > > From: Inki Dae [mailto:inki@samsung.com] > > Sent: Thursday, August 08, 2013 1:40 PM > > To: dan...@ffwll.ch > > Cc: dri-de...@lists.freedesktop.org; intel-gfx@lists.freedesktop.org; Inki > > Dae; Kyungmin Park > > Subject: [PATCH 1/4] drm/exynos: explicit store base gem object in > > dma_buf->priv > > > > Signed-off-by: Inki Dae > > Signed-off-by: Kyungmin Park > > --- > > drivers/gpu/drm/exynos/exynos_drm_dmabuf.c | 12 > > 1 files changed, 8 insertions(+), 4 deletions(-) > > > > diff --git a/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c > > b/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c > > index 3cd56e1..fd76449 100644 > > --- a/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c > > +++ b/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c > > @@ -22,6 +22,11 @@ struct exynos_drm_dmabuf_attachment { > > bool is_mapped; > > }; > > > > +static struct exynos_drm_gem_obj *dma_buf_to_obj(struct dma_buf *buf) > > +{ > > + return to_exynos_gem_obj(buf->priv); > > +} > > + > > static int exynos_gem_attach_dma_buf(struct dma_buf *dmabuf, > > struct device *dev, > > struct dma_buf_attachment *attach) > > @@ -63,7 +68,7 @@ static struct sg_table * > > enum dma_data_direction dir) > > { > > struct exynos_drm_dmabuf_attachment *exynos_attach = attach->priv; > > - struct exynos_drm_gem_obj *gem_obj = attach->dmabuf->priv; > > + struct exynos_drm_gem_obj *gem_obj = dma_buf_to_obj(attach->dmabuf); > > struct drm_device *dev = gem_obj->base.dev; > > struct exynos_drm_gem_buf *buf; > > struct scatterlist *rd, *wr; > > @@ -180,7 +185,7 @@ struct dma_buf *exynos_dmabuf_prime_export(struct > > drm_device *drm_dev, > > { > > struct exynos_drm_gem_obj *exynos_gem_obj = to_exynos_gem_obj(obj); > > > > - return dma_buf_export(exynos_gem_obj, &exynos_dmabuf_ops, > > + return dma_buf_export(obj, &exynos_dmabuf_ops, > > exynos_gem_obj->base.size, flags); > > } > > > > @@ -198,8 +203,7 @@ struct drm_gem_object > > *exynos_dmabuf_prime_import(struct drm_device *drm_dev, > > if (dma_buf->ops == &exynos_dmabuf_ops) { > > struct drm_gem_object *obj; > > > > - exynos_gem_obj = dma_buf->priv; > > - obj = &exynos_gem_obj->base; > > + obj = dma_buf->priv; > > > > /* is it from our device? */ > > if (obj->dev == drm_dev) { > > -- > > 1.7.5.4 > -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx