[Intel-gfx] [PATCH] drm/i915: edp resume/On time optimization.

2016-01-11 Thread abhay . kumar
From: Abhay Kumar 

Make resume/on codepath not to wait for panel_power_cycle_delay(t11_t12)
if this time is already spent in suspend/poweron time.

v2: Use CLOCK_BOOTTIME and remove jiffies for panel power cycle
delay calculation(Ville).

v3: Addressing Ville review comment.

Cc: Ville Syrjälä 
Signed-off-by: Abhay Kumar 
---
 drivers/gpu/drm/i915/intel_dp.c  | 19 ++-
 drivers/gpu/drm/i915/intel_drv.h |  2 +-
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 796e3d3..d0885bc 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1812,12 +1812,21 @@ static void wait_panel_off(struct intel_dp *intel_dp)
 
 static void wait_panel_power_cycle(struct intel_dp *intel_dp)
 {
+   static ktime_t panel_power_on_time;
+   s64 panel_power_off_duration;
+
DRM_DEBUG_KMS("Wait for panel power cycle\n");
 
+   /* take the difference of currrent time and panel power off time
+* and then make panel wait for t11_t12 if needed. */
+   panel_power_on_time = ktime_get_boottime();
+   panel_power_off_duration = ktime_ms_delta(panel_power_on_time, 
intel_dp->panel_power_off_time);
+
/* When we disable the VDD override bit last we have to do the manual
 * wait. */
-   wait_remaining_ms_from_jiffies(intel_dp->last_power_cycle,
-  intel_dp->panel_power_cycle_delay);
+   if (panel_power_off_duration < ((s64) 
intel_dp->panel_power_cycle_delay))
+   wait_remaining_ms_from_jiffies(jiffies,
+  (intel_dp->panel_power_cycle_delay - 
panel_power_off_duration));
 
wait_panel_status(intel_dp, IDLE_CYCLE_MASK, IDLE_CYCLE_VALUE);
 }
@@ -1969,7 +1978,7 @@ static void edp_panel_vdd_off_sync(struct intel_dp 
*intel_dp)
I915_READ(pp_stat_reg), I915_READ(pp_ctrl_reg));
 
if ((pp & POWER_TARGET_ON) == 0)
-   intel_dp->last_power_cycle = jiffies;
+   intel_dp->panel_power_off_time = ktime_get_boottime();
 
power_domain = intel_display_port_aux_power_domain(intel_encoder);
intel_display_power_put(dev_priv, power_domain);
@@ -2118,7 +2127,7 @@ static void edp_panel_off(struct intel_dp *intel_dp)
I915_WRITE(pp_ctrl_reg, pp);
POSTING_READ(pp_ctrl_reg);
 
-   intel_dp->last_power_cycle = jiffies;
+   intel_dp->panel_power_off_time = ktime_get_boottime();
wait_panel_off(intel_dp);
 
/* We got a reference when we enabled the VDD. */
@@ -5122,7 +5131,7 @@ intel_dp_add_properties(struct intel_dp *intel_dp, struct 
drm_connector *connect
 
 static void intel_dp_init_panel_power_timestamps(struct intel_dp *intel_dp)
 {
-   intel_dp->last_power_cycle = jiffies;
+   intel_dp->panel_power_off_time = ktime_get_boottime();
intel_dp->last_power_on = jiffies;
intel_dp->last_backlight_off = jiffies;
 }
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index bdfe403..06b37b8 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -793,9 +793,9 @@ struct intel_dp {
int backlight_off_delay;
struct delayed_work panel_vdd_work;
bool want_panel_vdd;
-   unsigned long last_power_cycle;
unsigned long last_power_on;
unsigned long last_backlight_off;
+   ktime_t panel_power_off_time;
 
struct notifier_block edp_notifier;
 
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/6] drm/i915/guc: Make the GuC fw loading helper functions general

2016-01-11 Thread yu . dai
From: Alex Dai 

Rename some of the GuC fw loading code to make them more general. We
will utilize them for HuC loading as well.
s/intel_guc_fw/intel_uc_fw/g
s/GUC_FIRMWARE/UC_FIRMWARE/g

Struct intel_guc_fw is renamed to intel_uc_fw. Prefix of tts members,
such as 'guc' or 'guc_fw' either is renamed to 'uc' or removed for
same purpose.

Signed-off-by: Alex Dai 
Signed-off-by: Peter Antoine 
---
 drivers/gpu/drm/i915/i915_debugfs.c |  12 +--
 drivers/gpu/drm/i915/intel_guc.h|  39 +++
 drivers/gpu/drm/i915/intel_guc_loader.c | 181 +---
 3 files changed, 122 insertions(+), 110 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index e3377ab..ec667f3 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2410,7 +2410,7 @@ static int i915_guc_load_status_info(struct seq_file *m, 
void *data)
 {
struct drm_info_node *node = m->private;
struct drm_i915_private *dev_priv = node->minor->dev->dev_private;
-   struct intel_guc_fw *guc_fw = _priv->guc.guc_fw;
+   struct intel_uc_fw *guc_fw = _priv->guc.guc_fw;
u32 tmp, i;
 
if (!HAS_GUC_UCODE(dev_priv->dev))
@@ -2418,15 +2418,15 @@ static int i915_guc_load_status_info(struct seq_file 
*m, void *data)
 
seq_printf(m, "GuC firmware status:\n");
seq_printf(m, "\tpath: %s\n",
-   guc_fw->guc_fw_path);
+   guc_fw->uc_fw_path);
seq_printf(m, "\tfetch: %s\n",
-   intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status));
+   intel_uc_fw_status_repr(guc_fw->fetch_status));
seq_printf(m, "\tload: %s\n",
-   intel_guc_fw_status_repr(guc_fw->guc_fw_load_status));
+   intel_uc_fw_status_repr(guc_fw->load_status));
seq_printf(m, "\tversion wanted: %d.%d\n",
-   guc_fw->guc_fw_major_wanted, guc_fw->guc_fw_minor_wanted);
+   guc_fw->major_ver_wanted, guc_fw->minor_ver_wanted);
seq_printf(m, "\tversion found: %d.%d\n",
-   guc_fw->guc_fw_major_found, guc_fw->guc_fw_minor_found);
+   guc_fw->major_ver_found, guc_fw->minor_ver_found);
seq_printf(m, "\theader: offset is %d; size = %d\n",
guc_fw->header_offset, guc_fw->header_size);
seq_printf(m, "\tuCode: offset is %d; size = %d\n",
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 045b149..2324677 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -52,29 +52,29 @@ struct i915_guc_client {
int retcode;
 };
 
-enum intel_guc_fw_status {
-   GUC_FIRMWARE_FAIL = -1,
-   GUC_FIRMWARE_NONE = 0,
-   GUC_FIRMWARE_PENDING,
-   GUC_FIRMWARE_SUCCESS
+enum intel_uc_fw_status {
+   UC_FIRMWARE_FAIL = -1,
+   UC_FIRMWARE_NONE = 0,
+   UC_FIRMWARE_PENDING,
+   UC_FIRMWARE_SUCCESS
 };
 
 /*
  * This structure encapsulates all the data needed during the process
  * of fetching, caching, and loading the firmware image into the GuC.
  */
-struct intel_guc_fw {
-   struct drm_device * guc_dev;
-   const char *guc_fw_path;
-   size_t  guc_fw_size;
-   struct drm_i915_gem_object *guc_fw_obj;
-   enum intel_guc_fw_statusguc_fw_fetch_status;
-   enum intel_guc_fw_statusguc_fw_load_status;
-
-   uint16_tguc_fw_major_wanted;
-   uint16_tguc_fw_minor_wanted;
-   uint16_tguc_fw_major_found;
-   uint16_tguc_fw_minor_found;
+struct intel_uc_fw {
+   struct drm_device * uc_dev;
+   const char *uc_fw_path;
+   size_t  uc_fw_size;
+   struct drm_i915_gem_object *uc_fw_obj;
+   enum intel_uc_fw_status fetch_status;
+   enum intel_uc_fw_status load_status;
+
+   uint16_t major_ver_wanted;
+   uint16_t minor_ver_wanted;
+   uint16_t major_ver_found;
+   uint16_t minor_ver_found;
 
uint32_t header_size;
uint32_t header_offset;
@@ -85,7 +85,7 @@ struct intel_guc_fw {
 };
 
 struct intel_guc {
-   struct intel_guc_fw guc_fw;
+   struct intel_uc_fw guc_fw;
uint32_t log_flags;
struct drm_i915_gem_object *log_obj;
 
@@ -114,9 +114,10 @@ struct intel_guc {
 extern void intel_guc_ucode_init(struct drm_device *dev);
 extern int intel_guc_ucode_load(struct drm_device *dev);
 extern void intel_guc_ucode_fini(struct drm_device *dev);
-extern const char *intel_guc_fw_status_repr(enum intel_guc_fw_status status);
+extern const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status);
 extern int intel_guc_suspend(struct drm_device *dev);
 extern int intel_guc_resume(struct drm_device 

[Intel-gfx] [PATCH] drm/i915: reboot notifier delay for eDP panels

2016-01-11 Thread clinton . a . taylor
From: Clint Taylor 

Add reboot notifier for all platforms. This guarantees T12 delay
compliance during reboot cycles when pre-os enables the panel within
500ms.

Signed-off-by: Clint Taylor 
---
 drivers/gpu/drm/i915/intel_dp.c |   11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 796e3d3..dbbd27a 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -126,6 +126,7 @@ static struct intel_dp *intel_attached_dp(struct 
drm_connector *connector)
 static void intel_dp_link_down(struct intel_dp *intel_dp);
 static bool edp_panel_vdd_on(struct intel_dp *intel_dp);
 static void edp_panel_vdd_off(struct intel_dp *intel_dp, bool sync);
+static void edp_panel_off(struct intel_dp *intel_dp);
 static void vlv_init_panel_power_sequencer(struct intel_dp *intel_dp);
 static void vlv_steal_power_sequencer(struct drm_device *dev,
  enum pipe pipe);
@@ -596,6 +597,10 @@ static int edp_notify_handler(struct notifier_block *this, 
unsigned long code,
I915_WRITE(pp_ctrl_reg, PANEL_UNLOCK_REGS | PANEL_POWER_OFF);
msleep(intel_dp->panel_power_cycle_delay);
}
+   else
+   {
+   edp_panel_off(intel_dp);
+   }
 
pps_unlock(intel_dp);
 
@@ -5796,10 +5801,10 @@ static bool intel_edp_init_connector(struct intel_dp 
*intel_dp,
}
mutex_unlock(>mode_config.mutex);
 
-   if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
-   intel_dp->edp_notifier.notifier_call = edp_notify_handler;
-   register_reboot_notifier(_dp->edp_notifier);
+   intel_dp->edp_notifier.notifier_call = edp_notify_handler;
+   register_reboot_notifier(_dp->edp_notifier);
 
+   if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
/*
 * Figure out the current pipe for the initial backlight setup.
 * If the current pipe isn't valid, try the PPS pipe, and if 
that
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 10/38] drm/i915: Force MMIO flips when scheduler enabled

2016-01-11 Thread Chris Wilson
On Mon, Jan 11, 2016 at 06:42:39PM +, john.c.harri...@intel.com wrote:
> From: John Harrison 
> 
> MMIO flips are the preferred mechanism now but more importantly,

Says who?

> pipe
> based flips cause issues for the scheduler. Specifically, submitting
> work to the rings around the side of the scheduler could cause that
> work to be lost if the scheduler generates a pre-emption event on that
> ring.

That just says that you haven't designed for the ability to schedule a
flip into the scheduler, including handling the priority bump that might
required to hit the deadline.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/7] drm/i915: Removed now redudant parameter to i915_gem_request_completed()

2016-01-11 Thread Jesse Barnes
On 01/08/2016 10:47 AM, john.c.harri...@intel.com wrote:
> From: John Harrison 
> 
> The change to the implementation of i915_gem_request_completed() means
> that the lazy coherency flag is no longer used. This can now be
> removed to simplify the interface.
> 
> For: VIZ-5190
> Signed-off-by: John Harrison 
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c  |  2 +-
>  drivers/gpu/drm/i915/i915_drv.h  |  3 +--
>  drivers/gpu/drm/i915/i915_gem.c  | 18 +-
>  drivers/gpu/drm/i915/intel_display.c |  2 +-
>  drivers/gpu/drm/i915/intel_pm.c  |  4 ++--
>  5 files changed, 14 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
> b/drivers/gpu/drm/i915/i915_debugfs.c
> index af41e5c..b54d99e 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -601,7 +601,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, 
> void *data)
>  
> i915_gem_request_get_seqno(work->flip_queued_req),
>  dev_priv->next_seqno,
>  ring->get_seqno(ring, true),
> -
> i915_gem_request_completed(work->flip_queued_req, true));
> +
> i915_gem_request_completed(work->flip_queued_req));
>   } else
>   seq_printf(m, "Flip not associated with any 
> ring\n");
>   seq_printf(m, "Flip queued on frame %d, (was ready on 
> frame %d), now %d\n",
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index aa5cba7..caf7897 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2263,8 +2263,7 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
>  struct drm_i915_gem_request **req_out);
>  void i915_gem_request_cancel(struct drm_i915_gem_request *req);
>  
> -static inline bool i915_gem_request_completed(struct drm_i915_gem_request 
> *req,
> -   bool lazy_coherency)
> +static inline bool i915_gem_request_completed(struct drm_i915_gem_request 
> *req)
>  {
>   return fence_is_signaled(>fence);
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 1138990..93d2f32 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1165,7 +1165,7 @@ static int __i915_spin_request(struct 
> drm_i915_gem_request *req)
>  
>   timeout = jiffies + 1;
>   while (!need_resched()) {
> - if (i915_gem_request_completed(req, true))
> + if (i915_gem_request_completed(req))
>   return 0;
>  
>   if (time_after_eq(jiffies, timeout))
> @@ -1173,7 +1173,7 @@ static int __i915_spin_request(struct 
> drm_i915_gem_request *req)
>  
>   cpu_relax_lowlatency();
>   }
> - if (i915_gem_request_completed(req, false))
> + if (i915_gem_request_completed(req))
>   return 0;
>  
>   return -EAGAIN;
> @@ -1217,7 +1217,7 @@ int __i915_wait_request(struct drm_i915_gem_request 
> *req,
>   if (list_empty(>list))
>   return 0;
>  
> - if (i915_gem_request_completed(req, true))
> + if (i915_gem_request_completed(req))
>   return 0;
>  
>   timeout_expire = timeout ?
> @@ -1257,7 +1257,7 @@ int __i915_wait_request(struct drm_i915_gem_request 
> *req,
>   break;
>   }
>  
> - if (i915_gem_request_completed(req, false)) {
> + if (i915_gem_request_completed(req)) {
>   ret = 0;
>   break;
>   }
> @@ -2759,7 +2759,7 @@ i915_gem_find_active_request(struct intel_engine_cs 
> *ring)
>   struct drm_i915_gem_request *request;
>  
>   list_for_each_entry(request, >request_list, list) {
> - if (i915_gem_request_completed(request, false))
> + if (i915_gem_request_completed(request))
>   continue;
>  
>   return request;
> @@ -2900,7 +2900,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs 
> *ring)
>  struct drm_i915_gem_request,
>  list);
>  
> - if (!i915_gem_request_completed(request, true))
> + if (!i915_gem_request_completed(request))
>   break;
>  
>   i915_gem_request_retire(request);
> @@ -2924,7 +2924,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs 
> *ring)
>   }
>  
>   if (unlikely(ring->trace_irq_req &&
> -  i915_gem_request_completed(ring->trace_irq_req, true))) {
> +  i915_gem_request_completed(ring->trace_irq_req))) {
>   ring->irq_put(ring);
>

[Intel-gfx] [PATCH 4/6] drm/i915/huc: Add HuC fw loading support

2016-01-11 Thread yu . dai
From: Alex Dai 

The HuC loading process is similar to GuC. The intel_uc_fw_fetch()
is used for both cases.

HuC loading needs to be before GuC loading. The WOPCM setting must
be done early before loading any of them.

Signed-off-by: Alex Dai 
Signed-off-by: Peter Antoine 
---
 drivers/gpu/drm/i915/Makefile   |   1 +
 drivers/gpu/drm/i915/i915_dma.c |   3 +
 drivers/gpu/drm/i915/i915_drv.h |   3 +
 drivers/gpu/drm/i915/i915_gem.c |   7 +
 drivers/gpu/drm/i915/i915_guc_reg.h |   3 +
 drivers/gpu/drm/i915/intel_guc_loader.c |   7 +-
 drivers/gpu/drm/i915/intel_huc.h|  44 ++
 drivers/gpu/drm/i915/intel_huc_loader.c | 262 
 8 files changed, 325 insertions(+), 5 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/intel_huc.h
 create mode 100644 drivers/gpu/drm/i915/intel_huc_loader.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 0851de07..693cc8f 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -42,6 +42,7 @@ i915-y += i915_cmd_parser.o \
 
 # general-purpose microcontroller (GuC) support
 i915-y += intel_guc_loader.o \
+ intel_huc_loader.o \
  i915_guc_submission.o
 
 # autogenerated null render state
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 44a896c..1b99dd3 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -410,6 +410,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
 * working irqs for e.g. gmbus and dp aux transfers. */
intel_modeset_init(dev);
 
+   intel_huc_ucode_init(dev);
intel_guc_ucode_init(dev);
 
ret = i915_gem_init(dev);
@@ -453,6 +454,7 @@ cleanup_gem:
i915_gem_context_fini(dev);
mutex_unlock(>struct_mutex);
 cleanup_irq:
+   intel_huc_ucode_fini(dev);
intel_guc_ucode_fini(dev);
drm_irq_uninstall(dev);
 cleanup_gem_stolen:
@@ -1194,6 +1196,7 @@ int i915_driver_unload(struct drm_device *dev)
/* Flush any outstanding unpin_work. */
flush_workqueue(dev_priv->wq);
 
+   intel_huc_ucode_fini(dev);
intel_guc_ucode_fini(dev);
mutex_lock(>struct_mutex);
i915_gem_cleanup_ringbuffer(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 747d2d8..15e9e59 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -53,6 +53,7 @@
 #include 
 #include 
 #include "intel_guc.h"
+#include "intel_huc.h"
 
 /* General customization:
  */
@@ -1699,6 +1700,7 @@ struct drm_i915_private {
 
struct i915_virtual_gpu vgpu;
 
+   struct intel_huc huc;
struct intel_guc guc;
 
struct intel_csr csr;
@@ -2629,6 +2631,7 @@ struct drm_i915_cmd_table {
 
 #define HAS_GUC_UCODE(dev) (IS_GEN9(dev) && !IS_KABYLAKE(dev))
 #define HAS_GUC_SCHED(dev) (IS_GEN9(dev) && !IS_KABYLAKE(dev))
+#define HAS_HUC_UCODE(dev) (IS_GEN9(dev) && !IS_KABYLAKE(dev))
 
 #define HAS_RESOURCE_STREAMER(dev) (IS_HASWELL(dev) || \
INTEL_INFO(dev)->gen >= 8)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6c60e04..75de2eb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4849,6 +4849,13 @@ i915_gem_init_hw(struct drm_device *dev)
 
/* We can't enable contexts until all firmware is loaded */
if (HAS_GUC_UCODE(dev)) {
+   /* init WOPCM */
+   I915_WRITE(GUC_WOPCM_SIZE, GUC_WOPCM_SIZE_VALUE);
+   I915_WRITE(DMA_GUC_WOPCM_OFFSET, GUC_WOPCM_OFFSET_VALUE |
+   HUC_LOADING_AGENT_GUC);
+
+   intel_huc_ucode_load(dev);
+
ret = intel_guc_ucode_load(dev);
if (ret) {
DRM_ERROR("Failed to initialize GuC, error %d\n", ret);
diff --git a/drivers/gpu/drm/i915/i915_guc_reg.h 
b/drivers/gpu/drm/i915/i915_guc_reg.h
index e4ba582..8d27c09 100644
--- a/drivers/gpu/drm/i915/i915_guc_reg.h
+++ b/drivers/gpu/drm/i915/i915_guc_reg.h
@@ -52,9 +52,12 @@
 #define   DMA_ADDRESS_SPACE_GTT  (8 << 16)
 #define DMA_COPY_SIZE  _MMIO(0xc310)
 #define DMA_CTRL   _MMIO(0xc314)
+#define   HUC_UKERNEL(1<<9)
 #define   UOS_MOVE   (1<<4)
 #define   START_DMA  (1<<0)
 #define DMA_GUC_WOPCM_OFFSET   _MMIO(0xc340)
+#define   HUC_LOADING_AGENT_VCR  (0<<1)
+#define   HUC_LOADING_AGENT_GUC  (1<<1)
 #define   GUC_WOPCM_OFFSET_VALUE 0x8   /* 512KB */
 #define GUC_MAX_IDLE_COUNT _MMIO(0xC3E4)
 
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c 
b/drivers/gpu/drm/i915/intel_guc_loader.c
index a704d80..5832792 100644
--- 

[Intel-gfx] [PATCH 2/6] drm/i915/guc: Bypass fw loading gracefully if GuC is not supported

2016-01-11 Thread yu . dai
From: Alex Dai 

This is to rework previous patch:

commit 9f9e539f90bcecfdc7b3679d337b7a62d4313205
Author: Daniel Vetter 
Date:   Fri Oct 23 11:10:59 2015 +0200

drm/i915: Shut up GuC errors when it's disabled

There is the case where GuC loading is needed even GuC submission
is disabled. For example, HuC loading and authentication require
GuC to be loaded regardless. In this patch, driver will try to load
the firmware only when it explicitly asks for that by specifying fw
name and version. All other cases are considered as UC_FIRMWARE_NONE
and the loading is bypassed silently.

Signed-off-by: Alex Dai 
---
 drivers/gpu/drm/i915/intel_guc_loader.c | 32 +++-
 1 file changed, 11 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c 
b/drivers/gpu/drm/i915/intel_guc_loader.c
index 70dbeb5..e11e1e8 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -594,39 +594,29 @@ void intel_guc_ucode_init(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_uc_fw *guc_fw = _priv->guc.guc_fw;
-   const char *fw_path;
+   const char *fw_path = NULL;
+
+   guc_fw->uc_dev = dev;
+   guc_fw->uc_fw_path = NULL;
+   guc_fw->fetch_status = UC_FIRMWARE_NONE;
+   guc_fw->load_status = UC_FIRMWARE_NONE;
 
if (!HAS_GUC_SCHED(dev))
i915.enable_guc_submission = false;
 
-   if (!HAS_GUC_UCODE(dev)) {
-   fw_path = NULL;
-   } else if (IS_SKYLAKE(dev)) {
+   if (!HAS_GUC_UCODE(dev))
+   return;
+
+   if (IS_SKYLAKE(dev)) {
fw_path = I915_SKL_GUC_UCODE;
guc_fw->major_ver_wanted = 4;
guc_fw->minor_ver_wanted = 3;
-   } else {
-   i915.enable_guc_submission = false;
-   fw_path = "";   /* unknown device */
}
 
-   if (!i915.enable_guc_submission)
-   return;
-
-   guc_fw->uc_dev = dev;
-   guc_fw->uc_fw_path = fw_path;
-   guc_fw->fetch_status = UC_FIRMWARE_NONE;
-   guc_fw->load_status = UC_FIRMWARE_NONE;
-
if (fw_path == NULL)
return;
 
-   if (*fw_path == '\0') {
-   DRM_ERROR("No GuC firmware known for this platform\n");
-   guc_fw->fetch_status = UC_FIRMWARE_FAIL;
-   return;
-   }
-
+   guc_fw->uc_fw_path = fw_path;
guc_fw->fetch_status = UC_FIRMWARE_PENDING;
DRM_DEBUG_DRIVER("GuC firmware pending, path %s\n", fw_path);
intel_uc_fw_fetch(dev, guc_fw);
-- 
2.5.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 3/6] drm/i915/huc: Unified css_header struct for GuC and HuC

2016-01-11 Thread yu . dai
From: Alex Dai 

HuC firmware css header has almost exactly same definition as GuC
firmware except for the sw_version. Also, add a new member fw_type
into intel_uc_fw to indicate what kind of fw it is. So, the loader
will pull right sw_version from header.

Signed-off-by: Alex Dai 
Signed-off-by: Peter Antoine 
---
 drivers/gpu/drm/i915/intel_guc.h|  4 
 drivers/gpu/drm/i915/intel_guc_fwif.h   | 16 ++---
 drivers/gpu/drm/i915/intel_guc_loader.c | 42 +
 3 files changed, 44 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 2324677..45f4fd3 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -59,6 +59,9 @@ enum intel_uc_fw_status {
UC_FIRMWARE_SUCCESS
 };
 
+#define UC_FW_TYPE_GUC 0
+#define UC_FW_TYPE_HUC 1
+
 /*
  * This structure encapsulates all the data needed during the process
  * of fetching, caching, and loading the firmware image into the GuC.
@@ -76,6 +79,7 @@ struct intel_uc_fw {
uint16_t major_ver_found;
uint16_t minor_ver_found;
 
+   uint32_t fw_type;
uint32_t header_size;
uint32_t header_offset;
uint32_t rsa_size;
diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h 
b/drivers/gpu/drm/i915/intel_guc_fwif.h
index b4632f0..f8846d6 100644
--- a/drivers/gpu/drm/i915/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
@@ -146,7 +146,7 @@
  * The GuC firmware layout looks like this:
  *
  * +---+
- * |guc_css_header |
+ * | uc_css_header |
  * | contains major/minor version  |
  * +---+
  * | uCode |
@@ -172,9 +172,16 @@
  * 3. Length info of each component can be found in header, in dwords.
  * 4. Modulus and exponent key are not required by driver. They may not appear
  * in fw. So driver will load a truncated firmware in this case.
+ *
+ * HuC firmware layout is same as GuC firmware.
+ *
+ * HuC firmware css header is different. However, the only difference is where
+ * the version information is saved. The uc_css_header is unified to support
+ * both. Driver should get HuC version from uc_css_header.huc_sw_version, while
+ * uc_css_header.guc_sw_version for GuC.
  */
 
-struct guc_css_header {
+struct uc_css_header {
uint32_t module_type;
/* header_size includes all non-uCode bits, including css_header, rsa
 * key, modulus key and exponent data. */
@@ -205,7 +212,10 @@ struct guc_css_header {
 
char username[8];
char buildnumber[12];
-   uint32_t device_id;
+   union {
+   uint32_t device_id;
+   uint32_t huc_sw_version;
+   };
uint32_t guc_sw_version;
uint32_t prod_preprod_fw;
uint32_t reserved[12];
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c 
b/drivers/gpu/drm/i915/intel_guc_loader.c
index e11e1e8..a704d80 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -464,7 +464,7 @@ void intel_uc_fw_fetch(struct drm_device *dev, struct 
intel_uc_fw *uc_fw)
 {
struct drm_i915_gem_object *obj;
const struct firmware *fw;
-   struct guc_css_header *css;
+   struct uc_css_header *css;
size_t size;
int err;
 
@@ -481,19 +481,19 @@ void intel_uc_fw_fetch(struct drm_device *dev, struct 
intel_uc_fw *uc_fw)
uc_fw->uc_fw_path, fw);
 
/* Check the size of the blob before examining buffer contents */
-   if (fw->size < sizeof(struct guc_css_header)) {
+   if (fw->size < sizeof(struct uc_css_header)) {
DRM_ERROR("Firmware header is missing\n");
goto fail;
}
 
-   css = (struct guc_css_header *)fw->data;
+   css = (struct uc_css_header *)fw->data;
 
/* Firmware bits always start from header */
uc_fw->header_offset = 0;
uc_fw->header_size = (css->header_size_dw - css->modulus_size_dw -
css->key_size_dw - css->exponent_size_dw) * sizeof(u32);
 
-   if (uc_fw->header_size != sizeof(struct guc_css_header)) {
+   if (uc_fw->header_size != sizeof(struct uc_css_header)) {
DRM_ERROR("CSS header definition mismatch\n");
goto fail;
}
@@ -517,23 +517,35 @@ void intel_uc_fw_fetch(struct drm_device *dev, struct 
intel_uc_fw *uc_fw)
goto fail;
}
 
-   /* Header and uCode will be loaded to WOPCM. Size of the two. */
-   size = uc_fw->header_size + uc_fw->ucode_size;
-
-   /* Top 32k of WOPCM is reserved (8K stack + 24k RC6 context). */
-   if (size > GUC_WOPCM_SIZE_VALUE - 0x8000) {
-   DRM_ERROR("Firmware is too large to fit in WOPCM\n");
-   goto fail;
-   }
-
/*
 

Re: [Intel-gfx] [PATCH 04/13] drm/i915: Fail engine initialization if LRCA is incorrectly aligned

2016-01-11 Thread Chris Wilson
On Mon, Jan 11, 2016 at 04:02:09PM +, Dave Gordon wrote:
> IIRC the original version of this WARN (in intel_lr_context_descriptor()
> above) was added with the GuC submission code, because the context
> descriptor as used in execlist code is a 64-bit value, but the GuC
> requires that all the unique stuff fits in those 20 unmasked bits of
> a 32-bit value, with the low 12 bits being used for flags. So we
> wanted to check that we never got a context ID that couldn't be
> pruned down to just those 20 bits without losing information. It's
> never been seen to happen since GuC development finished, so we can
> reasonably lose the check now.

I am missing something here as the GuC doesn't use the high 32bits of
the context descriptor, i.e. it never touches the lrca portion?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 5/7] drm/i915: Interrupt driven fences

2016-01-11 Thread Jesse Barnes
On 01/11/2016 11:10 AM, John Harrison wrote:
> On 08/01/2016 22:46, Chris Wilson wrote:
>> On Fri, Jan 08, 2016 at 06:47:26PM +, john.c.harri...@intel.com wrote:
>>> +void i915_gem_request_notify(struct intel_engine_cs *ring, bool 
>>> fence_locked)
>>> +{
>>> +struct drm_i915_gem_request *req, *req_next;
>>> +unsigned long flags;
>>>   u32 seqno;
>>>   -seqno = req->ring->get_seqno(req->ring, false/*lazy_coherency*/);
>>> +if (list_empty(>fence_signal_list))
>>> +return;
>>> +
>>> +if (!fence_locked)
>>> +spin_lock_irqsave(>fence_lock, flags);
>>>   -return i915_seqno_passed(seqno, req->seqno);
>>> +seqno = ring->get_seqno(ring, false);
>> We really don't want to do be doing the forcewake dance from inside the
>> interrupt handler. We made that mistake years ago.
>> -Chris
>>
> What forcewake dance? Nothing in the above code mentions force wake.

get_seqno() w/o lazy_coherency set will do a POSTING_READ of the ring active 
head, which goes through our crazy read function and does forcewake.  So we may 
need something smarter here.

Jesse

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 06/38] drm/i915: Re-instate request->uniq because it is extremely useful

2016-01-11 Thread Chris Wilson
On Mon, Jan 11, 2016 at 06:42:35PM +, john.c.harri...@intel.com wrote:
> From: John Harrison 
> 
> The seqno value cannot always be used when debugging issues via trace
> points. This is because it can be reset back to start, especially
> during TDR type tests. Also, when the scheduler arrives the seqno is
> only valid while a given request is executing on the hardware. While
> the request is simply queued waiting for submission, it's seqno value
> will be zero (meaning invalid).

Even with per-context seqno that can be assigned before execution as we
know that requests within a context cannot be reordered?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/skl: Use proper plane dimensions for DDB and WM calculations

2016-01-11 Thread Matt Roper
On Mon, Jan 11, 2016 at 09:31:03PM +0200, Ville Syrjälä wrote:
> On Mon, Dec 21, 2015 at 07:31:17AM -0800, Matt Roper wrote:
> > In commit
> > 
> > commit 024c9045221fe45482863c47c4b4c47d37f97cbf
> > Author: Matt Roper 
> > Date:   Thu Sep 24 15:53:11 2015 -0700
> > 
> > drm/i915/skl: Eliminate usage of pipe_wm_parameters from 
> > SKL-style WM (v4)
> > 
> > I fumbled while converting the dimensions stored in the plane_parameters
> > structure to the values stored in plane state and accidentally replaced
> > the plane dimensions with the pipe dimensions in both the DDB allocation
> > function and the WM calculation function.  On the DDB side this is
> > harmless since we effectively treat all of our non-cursor planes as
> > full-screen which may not be optimal, but generally won't cause any
> > problems either (and in 99% of the cases where there's no sprite plane
> > usage or primary plane windowing, there's no effect at all).  On the WM
> > calculation side there's more potential for this fumble to cause actual
> > problems since cursors also get miscalculated.
> > 
> > Cc: Ville Syrjälä 
> > Cc: "Kondapally, Kalyan" 
> > Cc: Radhakrishna Sripada 
> > Signed-off-by: Matt Roper 
> > ---
> >  drivers/gpu/drm/i915/intel_pm.c | 24 +---
> >  1 file changed, 13 insertions(+), 11 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_pm.c 
> > b/drivers/gpu/drm/i915/intel_pm.c
> > index 8d0d6f5..f4d4cc7 100644
> > --- a/drivers/gpu/drm/i915/intel_pm.c
> > +++ b/drivers/gpu/drm/i915/intel_pm.c
> > @@ -2845,25 +2845,22 @@ skl_plane_relative_data_rate(const struct 
> > intel_crtc_state *cstate,
> >  const struct drm_plane_state *pstate,
> >  int y)
> >  {
> > -   struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
> > +   struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate);
> > struct drm_framebuffer *fb = pstate->fb;
> > +   unsigned w = drm_rect_width(_pstate->dst);
> > +   unsigned h = drm_rect_height(_pstate->dst);
> 
> I think you're supposed to use the src dimensions in most places.

Hmm, just went back to double check the bpsec and if I'm interpreting it
correctly, it looks like we actually need to use the larger of the two:
"Down scaling effectively increases the pixel rate. Up scaling does not
reduce the pixel rate."

Thanks for pointing that out; I'll send an updated patch.



Matt

> 
> >  
> > /* for planar format */
> > if (fb->pixel_format == DRM_FORMAT_NV12) {
> > if (y)  /* y-plane data rate */
> > -   return intel_crtc->config->pipe_src_w *
> > -   intel_crtc->config->pipe_src_h *
> > -   drm_format_plane_cpp(fb->pixel_format, 0);
> > +   return w * h * drm_format_plane_cpp(fb->pixel_format, 
> > 0);
> > else/* uv-plane data rate */
> > -   return (intel_crtc->config->pipe_src_w/2) *
> > -   (intel_crtc->config->pipe_src_h/2) *
> > +   return (w/2) * (h/2) *
> > drm_format_plane_cpp(fb->pixel_format, 1);
> > }
> >  
> > /* for packed formats */
> > -   return intel_crtc->config->pipe_src_w *
> > -   intel_crtc->config->pipe_src_h *
> > -   drm_format_plane_cpp(fb->pixel_format, 0);
> > +   return w * h * drm_format_plane_cpp(fb->pixel_format, 0);
> >  }
> >  
> >  /*
> > @@ -2960,6 +2957,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
> >  * FIXME: we may not allocate every single block here.
> >  */
> > total_data_rate = skl_get_total_relative_data_rate(cstate);
> > +   if (!total_data_rate)
> > +   return;
> >  
> > start = alloc->start;
> > for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
> > @@ -3093,12 +3092,15 @@ static bool skl_compute_plane_wm(const struct 
> > drm_i915_private *dev_priv,
> >  {
> > struct drm_plane *plane = _plane->base;
> > struct drm_framebuffer *fb = plane->state->fb;
> > +   struct intel_plane_state *intel_pstate =
> > +   to_intel_plane_state(plane->state);
> > uint32_t latency = dev_priv->wm.skl_latency[level];
> > uint32_t method1, method2;
> > uint32_t plane_bytes_per_line, plane_blocks_per_line;
> > uint32_t res_blocks, res_lines;
> > uint32_t selected_result;
> > uint8_t bytes_per_pixel;
> > +   unsigned w = drm_rect_width(_pstate->dst);
> >  
> > if (latency == 0 || !cstate->base.active || !fb)
> > return false;
> > @@ -3109,12 +3111,12 @@ static bool skl_compute_plane_wm(const struct 
> > drm_i915_private *dev_priv,
> >  latency);
> > method2 = skl_wm_method2(skl_pipe_pixel_rate(cstate),
> >  

Re: [Intel-gfx] [PATCH 073/190] drm/i915: Introduce i915_gem_active for request tracking

2016-01-11 Thread Chris Wilson
On Mon, Jan 11, 2016 at 05:32:27PM +, Tvrtko Ursulin wrote:
> >+struct i915_gem_active {
> >+struct drm_i915_gem_request *request;
> >+};
> >+
> >+static inline void
> >+i915_gem_request_mark_active(struct drm_i915_gem_request *request,
> >+ struct i915_gem_active *active)
> >+{
> >+i915_gem_request_assign(>request, request);
> >+}
> 
> This function name bothers me since I think the name is misleading
> and unintuitive. It is not marking a request as active but
> associating it with the second data structure.
> 
> Maybe i915_gem_request_move_to_active to keep the mental association
> with the well established vma_move_to_active ?

That's backwards imo, since it is the i915_gem_active that gets added to
the request. (Or at least will be.)
 
> Maybe struct i915_gem_active could also be better called
> i915_gem_active_list ?

It's not a list but a node. I started with drm_i915_gem_request_node,
but that's too unwieldy and I felt even more confusing.

> It is not ideal because of the future little reversal of who is in
> who's list, so maybe there is something even better. But I think an
> intuitive name is really important for code clarity and
> maintainability.

In userspace, I have the request (which is actually a userspace fence
itself) containing a list of fences (that are identical to i915_gem_active,
they track which request contains the reference and a callback for
signalling) and those fences have a direct correspondence to,
ARB_sync_objects, for example. But we already have plenty of conflict
regarding the term fence, so that's no go.

i915_gem_active, for me, made the association with the active-reference
tracking that is ingrained into the objects and beyond. I quite like the
connection with GPU activity

i915_gem_retire_notifier? Hmm, I still like how
i915_gem_active.request != NULL is quite self-descriptive.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 3/7] drm/i915: Add per context timelines to fence object

2016-01-11 Thread Jesse Barnes
On 01/11/2016 11:03 AM, John Harrison wrote:
> On 08/01/2016 22:05, Chris Wilson wrote:
>> On Fri, Jan 08, 2016 at 06:47:24PM +, john.c.harri...@intel.com wrote:
>>> From: John Harrison 
>>>
>>> The fence object used inside the request structure requires a sequence
>>> number. Although this is not used by the i915 driver itself, it could
>>> potentially be used by non-i915 code if the fence is passed outside of
>>> the driver. This is the intention as it allows external kernel drivers
>>> and user applications to wait on batch buffer completion
>>> asynchronously via the dma-buff fence API.
>> That doesn't make any sense as they are not limited by a single
>> timeline.
> I don't understand what you mean. Who is not limited by a single timeline?  
> The point is that the current seqno values cannot be used as there is no 
> guarantee that they will increment globally once things like a scheduler and 
> pre-emption arrive. Whereas, the fence internal implementation makes various 
> assumptions about the linearity of the timeline. External users do not want 
> to care about timelines or seqnos at all, they just want the fence API to 
> work as documented.
> 
>>
>>> To ensure that such external users are not confused by strange things
>>> happening with the seqno, this patch adds in a per context timeline
>>> that can provide a guaranteed in-order seqno value for the fence. This
>>> is safe because the scheduler will not re-order batch buffers within a
>>> context - they are considered to be mutually dependent.
>> You haven't added per-context breadcrumbs. What we need for being able
>> to execute requests from parallel timelines, but with requests within a
>> timeline being ordered, is a per-context page where we can emit the
>> per-context issued breadcrumb. Then instead of looking up the current
>> HW seqno in a global page, the request just looks at the current context
>> HW seqno in the context seq, just
>> i915_seqno_passed(*req->p_context_seqno, req->seqno).
> This patch is not attempting to implement per context seqno values. That can 
> be done as future work. This patch is doing the simplest, least invasive 
> implementation in order to make external fences work.

Right.  I think we want to move to per-context seqnos, but we don't have to do 
it before this work lands.  It should be easier to do it after the rest of 
these bits land in fact, since seqno handling will be well encapsulated aiui.

Jesse

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 18/38] drm/i915: Added scheduler support to __wait_request() calls

2016-01-11 Thread Chris Wilson
On Mon, Jan 11, 2016 at 06:42:47PM +, john.c.harri...@intel.com wrote:
> From: John Harrison 
> 
> The scheduler can cause batch buffers, and hence requests, to be
> submitted to the ring out of order and asynchronously to their
> submission to the driver. Thus at the point of waiting for the
> completion of a given request, it is not even guaranteed that the
> request has actually been sent to the hardware yet. Even it is has
> been sent, it is possible that it could be pre-empted and thus
> 'unsent'.
> 
> This means that it is necessary to be able to submit requests to the
> hardware during the wait call itself. Unfortunately, while some
> callers of __wait_request() release the mutex lock first, others do
> not (and apparently can not). Hence there is the ability to deadlock
> as the wait stalls for submission but the asynchronous submission is
> stalled for the mutex lock.

That is a nonsequitor. Do you mean to say that unless we take action
inside GEM, the request will never be submitted to hardware by the
scheduler?
 
> This change hooks the scheduler in to the __wait_request() code to
> ensure correct behaviour. That is, flush the target batch buffer
> through to the hardware and do not deadlock waiting for something that
> cannot currently be submitted.

The dependencies are known during request construction, how could we
generate a cyclic graph? The scheduler itself does not need the
struct_mutex (other than the buggy code), so GEM holding the
struct_mutex will not prevent the scheduler from eventually submitting
the request we are waiting for. So as far as I can see, you are papering
over your own bugs.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 21/38] drm/i915: Added a module parameter for allowing scheduler overrides

2016-01-11 Thread Chris Wilson
On Mon, Jan 11, 2016 at 06:42:50PM +, john.c.harri...@intel.com wrote:
> From: John Harrison 
> 
> It can be useful to be able to disable certain features (e.g. the
> entire scheduler) via a module parameter for debugging purposes. A
> parameter has the advantage of not being a compile time switch but
> without implying that it can be changed dynamically at runtime.

> +module_param_named(scheduler_override, i915.scheduler_override, int, 0600);
> +MODULE_PARM_DESC(scheduler_override, "Scheduler override mask (0 = none, 1 = 
> direct submission [default])");

Is this consistent with the other *enable* booleans?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/7] drm/i915: Convert requests to use struct fence

2016-01-11 Thread Jesse Barnes
On 01/11/2016 11:03 AM, John Harrison wrote:
> On 08/01/2016 21:59, Chris Wilson wrote:
>> On Fri, Jan 08, 2016 at 06:47:22PM +, john.c.harri...@intel.com wrote:
>>> From: John Harrison 
>>>
>>> There is a construct in the linux kernel called 'struct fence' that is
>>> intended to keep track of work that is executed on hardware. I.e. it
>>> solves the basic problem that the drivers 'struct
>>> drm_i915_gem_request' is trying to address. The request structure does
>>> quite a lot more than simply track the execution progress so is very
>>> definitely still required. However, the basic completion status side
>>> could be updated to use the ready made fence implementation and gain
>>> all the advantages that provides.
>>>
>>> This patch makes the first step of integrating a struct fence into the
>>> request. It replaces the explicit reference count with that of the
>>> fence. It also replaces the 'is completed' test with the fence's
>>> equivalent. Currently, that simply chains on to the original request
>>> implementation. A future patch will improve this.
>> But this forces everyone to do the heavyweight polling until the request
>> is completed?
> Not sure what you mean by heavy weight polling. And as described, this is 
> only an intermediate step.

Just the lazy_coherency removal maybe?  Chris?

Jesse
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 08/38] drm/i915: Prepare retire_requests to handle out-of-order seqnos

2016-01-11 Thread Chris Wilson
On Mon, Jan 11, 2016 at 06:42:37PM +, john.c.harri...@intel.com wrote:
> From: John Harrison 
> 
> A major point of the GPU scheduler is that it re-orders batch buffers
> after they have been submitted to the driver. This leads to requests
> completing out of order. In turn, this means that the retire
> processing can no longer assume that all completed entries are at the
> front of the list. Rather than attempting to re-order the request list
> on a regular basis, it is better to simply scan the entire list.

This is a major misstep. Just think in terms of per-context timelines,
and retirment order within those timelines being consistent..
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 04/38] drm/i915: Split i915_dem_do_execbuffer() in half

2016-01-11 Thread Chris Wilson
On Mon, Jan 11, 2016 at 06:42:33PM +, john.c.harri...@intel.com wrote:
> From: John Harrison 
> 
> Split the execbuffer() function in half. The first half collects and
> validates all the information required to process the batch buffer. It
> also does all the object pinning, relocations, active list management,
> etc - basically anything that must be done upfront before the IOCTL
> returns and allows the user land side to start changing/freeing
> things. The second half does the actual ring submission.

I don't get this at all. The point of requests is that GEM constructed a
request, which could be used to pass along all the implicit GEM
synchronisation points and the explict ones, along with the ringbuffer
to execute, to an engine that could then submit it. For legacy, the request
was inline and so added immediately to the ring (but that is an
implementation detail, there is nothing stopping us from using a chained
batch to implement a ring per context), for execlists the request is
queued for future execution. A scheduler was meant to sit in the middle
and determine the order in which requests were executed, but that should
be almost transparent to the high level code tracking the requests.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 38/38] drm/i915: Allow scheduler to manage inter-ring object synchronisation

2016-01-11 Thread Chris Wilson
On Mon, Jan 11, 2016 at 06:43:07PM +, john.c.harri...@intel.com wrote:
> From: John Harrison 
> 
> The scheduler has always tracked batch buffer dependencies based on
> DRM object usage. This means that it will not submit a batch on one
> ring that has outstanding dependencies still executing on other rings.
> This is exactly the same synchronisation performed by
> i915_gem_object_sync() using hardware semaphores where available and
> CPU stalls where not (e.g. in execlist mode and/or on Gen8 hardware).
> 
> Unfortunately, when a batch buffer is submitted to the driver the
> _object_sync() call happens first. Thus in case where hardware
> semaphores are disabled, the driver has already stalled until the
> dependency has been resolved.

But this should just add the dependency to the request in the scheduler
callback for i915_gem_object_sync_to, or better renamed as
i915_gem_request_submit_after. Without a scheduler we can do the
optimisation of doing that work inline, with a scheduler we can just
track the dependency.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 20/38] drm/i915: Added scheduler flush calls to ring throttle and idle functions

2016-01-11 Thread Chris Wilson
On Mon, Jan 11, 2016 at 06:42:49PM +, john.c.harri...@intel.com wrote:
> From: John Harrison 
> 
> When requesting that all GPU work is completed, it is now necessary to
> get the scheduler involved in order to flush out work that queued and
> not yet submitted.

But why is this needed over and above waiting on the request? Why do we
actually need to flush the work as the scheduler will get to the request
eventually (one hopes!)?  Why the priority bump? That would not be
intended for either the idle (wait until everybody has finished) or
throttling.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 3/7] drm/i915: Add per context timelines to fence object

2016-01-11 Thread Chris Wilson
On Mon, Jan 11, 2016 at 02:47:33PM -0800, Jesse Barnes wrote:
> On 01/11/2016 11:03 AM, John Harrison wrote:
> > On 08/01/2016 22:05, Chris Wilson wrote:
> >> On Fri, Jan 08, 2016 at 06:47:24PM +, john.c.harri...@intel.com wrote:
> >>> From: John Harrison 
> >>>
> >>> The fence object used inside the request structure requires a sequence
> >>> number. Although this is not used by the i915 driver itself, it could
> >>> potentially be used by non-i915 code if the fence is passed outside of
> >>> the driver. This is the intention as it allows external kernel drivers
> >>> and user applications to wait on batch buffer completion
> >>> asynchronously via the dma-buff fence API.
> >> That doesn't make any sense as they are not limited by a single
> >> timeline.
> > I don't understand what you mean. Who is not limited by a single timeline?  
> > The point is that the current seqno values cannot be used as there is no 
> > guarantee that they will increment globally once things like a scheduler 
> > and pre-emption arrive. Whereas, the fence internal implementation makes 
> > various assumptions about the linearity of the timeline. External users do 
> > not want to care about timelines or seqnos at all, they just want the fence 
> > API to work as documented.
> > 
> >>
> >>> To ensure that such external users are not confused by strange things
> >>> happening with the seqno, this patch adds in a per context timeline
> >>> that can provide a guaranteed in-order seqno value for the fence. This
> >>> is safe because the scheduler will not re-order batch buffers within a
> >>> context - they are considered to be mutually dependent.
> >> You haven't added per-context breadcrumbs. What we need for being able
> >> to execute requests from parallel timelines, but with requests within a
> >> timeline being ordered, is a per-context page where we can emit the
> >> per-context issued breadcrumb. Then instead of looking up the current
> >> HW seqno in a global page, the request just looks at the current context
> >> HW seqno in the context seq, just
> >> i915_seqno_passed(*req->p_context_seqno, req->seqno).
> > This patch is not attempting to implement per context seqno values. That 
> > can be done as future work. This patch is doing the simplest, least 
> > invasive implementation in order to make external fences work.
> 
> Right.  I think we want to move to per-context seqnos, but we don't have to 
> do it before this work lands.  It should be easier to do it after the rest of 
> these bits land in fact, since seqno handling will be well encapsulated aiui.

This patch is irrelevent then. I think it is actually worse because it
is encapsulating a design detail that is fundamentally wrong.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 00/38] GPU scheduler for i915 driver

2016-01-11 Thread Chris Wilson
On Mon, Jan 11, 2016 at 06:42:29PM +, john.c.harri...@intel.com wrote:
> From: John Harrison 
> 
> Implemented a batch buffer submission scheduler for the i915 DRM driver.

I've lost track of the number of patches that are a result of not having
per-context seqno and could be eliminated.
 
> The general theory of operation is that when batch buffers are
> submitted to the driver, the execbuffer() code assigns a unique seqno
> value and then packages up all the information required to execute the
> batch buffer at a later time. This package is given over to the
> scheduler which adds it to an internal node list. The scheduler also
> scans the list of objects associated with the batch buffer and
> compares them against the objects already in use by other buffers in
> the node list. If matches are found then the new batch buffer node is
> marked as being dependent upon the matching node. The same is done for
> the context object. The scheduler also bumps up the priority of such
> matching nodes on the grounds that the more dependencies a given batch
> buffer has the more important it is likely to be.

The implicit synchronisation rules for GEM are best left for GEM.
Through the existing mechansim for synchronising requests, you can also
gather the information required to compute the dependency graph of the
new request. Adding the explicit synchronisation can then be done at the
same juncture.

> The scheduler aims to have a given (tuneable) number of batch buffers
> in flight on the hardware at any given time. If fewer than this are
> currently executing when a new node is queued, then the node is passed
> straight through to the submit function. Otherwise it is simply added
> to the queue and the driver returns back to user land.
> 
> As each batch buffer completes, it raises an interrupt which wakes up
> the scheduler. Note that it is possible for multiple buffers to
> complete before the IRQ handler gets to run. Further, the seqno values
> of the individual buffers are not necessary incrementing as the
> scheduler may have re-ordered their submission. However, the scheduler
> keeps the list of executing buffers in order of hardware submission.
> Thus it can scan through the list until a matching seqno is found and
> then mark all in flight nodes from that point on as completed.
> 
> A deferred work queue is also poked by the interrupt handler. When
> this wakes up it can do more involved processing such as actually
> removing completed nodes from the queue and freeing up the resources
> associated with them (internal memory allocations, DRM object
> references, context reference, etc.). The work handler also checks the
> in flight count and calls the submission code if a new slot has
> appeared.

No. Leave GEM code to GEM. Just handle scheduling of requests, avoid the
struct_mutex and let GEM tidy up after the requests it is tracking. Create
a kthread for the scheduler, long running tasks are not meant to be on the
system_wq. A kthread also allows you to set a rtpriority.
 
> When the scheduler's submit code is called, it scans the queued node
> list for the highest priority node that has no unmet dependencies.
> Note that the dependency calculation is complex as it must take
> inter-ring dependencies and potential preemptions into account. Note
> also that in the future this will be extended to include external
> dependencies such as the Android Native Sync file descriptors and/or
> the linux dma-buff synchronisation scheme.

(You can skip the note since it is just checking if a dependency is a
struct fence and whether that has been signalled, that is not any more
complex than the current request checking.)

> If a suitable node is found then it is sent to execbuff_final() for
> submission to the hardware. The in flight count is then re-checked and
> a new node popped from the list if appropriate.

That was the wrong callback to break up. You just wanted an
engine->submit_request(). But then if you look at execlists, you will
see a way to marry the two such that the scheduler has neglible overhead
above and beyond the already considerable overhead of execlistss. With
legacy, you will have to introduce the cost of interrupt driven
scheduling, but you can borrow an idea or two from execlists to mitigate
that somewhat (i.e. context switch interrupts rather than reusing the
user interrupt after every batch).
 
> The scheduler also allows high priority batch buffers (e.g. from a
> desktop compositor) to jump ahead of whatever is already running if
> the underlying hardware supports pre-emption. In this situation, any
> work that was pre-empted is returned to the queued list ready to be
> resubmitted when no more high priority work is outstanding.

You could actually demonstrate that in execlists without adding a full
blown scheduler.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list

Re: [Intel-gfx] [PATCH v4 12/38] drm/i915: Added scheduler hook into i915_gem_request_notify()

2016-01-11 Thread Chris Wilson
On Mon, Jan 11, 2016 at 06:42:41PM +, john.c.harri...@intel.com wrote:
> From: John Harrison 
> 
> The scheduler needs to know when requests have completed so that it
> can keep its own internal state up to date and can submit new requests
> to the hardware from its queue.

Why would you reuse the user interrupt rather than introduce a
context-switch interrupt using the pipe_notify/dword_notify (yes, it can
be done by fixing up the current code). In the case of execlists you
wouldn't even need to add another interrupt vector as you could just
overload the execlists submission routine. For legacy, this would at
least let you reduce the interrupt rate from per batch to per context
switch, and keep the logic separate for user request tracking.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 5/6] drm/i915/huc: Add debugfs for HuC loading status check

2016-01-11 Thread yu . dai
From: Alex Dai 

Add debugfs entry for HuC loading status check.

Signed-off-by: Alex Dai 
Signed-off-by: Peter Antoine 
---
 drivers/gpu/drm/i915/i915_debugfs.c | 32 
 1 file changed, 32 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index ec667f3..7676f56 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2406,6 +2406,37 @@ static int i915_llc(struct seq_file *m, void *data)
return 0;
 }
 
+static int i915_huc_load_status_info(struct seq_file *m, void *data)
+{
+   struct drm_info_node *node = m->private;
+   struct drm_i915_private *dev_priv = node->minor->dev->dev_private;
+   struct intel_uc_fw *huc_fw = _priv->huc.huc_fw;
+
+   if (!HAS_HUC_UCODE(dev_priv->dev))
+   return 0;
+
+   seq_printf(m, "HuC firmware status:\n");
+   seq_printf(m, "\tpath: %s\n", huc_fw->uc_fw_path);
+   seq_printf(m, "\tfetch: %s\n",
+   intel_uc_fw_status_repr(huc_fw->fetch_status));
+   seq_printf(m, "\tload: %s\n",
+   intel_uc_fw_status_repr(huc_fw->load_status));
+   seq_printf(m, "\tversion wanted: %d.%d\n",
+   huc_fw->major_ver_wanted, huc_fw->minor_ver_wanted);
+   seq_printf(m, "\tversion found: %d.%d\n",
+   huc_fw->major_ver_found, huc_fw->minor_ver_found);
+   seq_printf(m, "\theader: offset is %d; size = %d\n",
+   huc_fw->header_offset, huc_fw->header_size);
+   seq_printf(m, "\tuCode: offset is %d; size = %d\n",
+   huc_fw->ucode_offset, huc_fw->ucode_size);
+   seq_printf(m, "\tRSA: offset is %d; size = %d\n",
+   huc_fw->rsa_offset, huc_fw->rsa_size);
+
+   seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2));
+
+   return 0;
+}
+
 static int i915_guc_load_status_info(struct seq_file *m, void *data)
 {
struct drm_info_node *node = m->private;
@@ -5346,6 +5377,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
{"i915_guc_info", i915_guc_info, 0},
{"i915_guc_load_status", i915_guc_load_status_info, 0},
{"i915_guc_log_dump", i915_guc_log_dump, 0},
+   {"i915_huc_load_status", i915_huc_load_status_info, 0},
{"i915_frequency_info", i915_frequency_info, 0},
{"i915_hangcheck_info", i915_hangcheck_info, 0},
{"i915_drpc_info", i915_drpc_info, 0},
-- 
2.5.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 6/6] drm/i915/huc: Support HuC authentication

2016-01-11 Thread yu . dai
From: Alex Dai 

The HuC authentication is done by host2guc call. The HuC RSA keys
are sent to GuC for authentication.

Signed-off-by: Alex Dai 
Signed-off-by: Peter Antoine 
---
 drivers/gpu/drm/i915/i915_guc_submission.c | 65 ++
 drivers/gpu/drm/i915/intel_guc_fwif.h  |  1 +
 drivers/gpu/drm/i915/intel_guc_loader.c|  2 +
 3 files changed, 68 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c 
b/drivers/gpu/drm/i915/i915_guc_submission.c
index 8ce4f32..096b524 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -25,6 +25,7 @@
 #include 
 #include "i915_drv.h"
 #include "intel_guc.h"
+#include "intel_huc.h"
 
 /**
  * DOC: GuC-based command submission
@@ -1059,3 +1060,67 @@ int intel_guc_resume(struct drm_device *dev)
 
return host2guc_action(guc, data, ARRAY_SIZE(data));
 }
+
+/**
+ * intel_huc_ucode_auth() - authenticate ucode
+ * @dev: the drm device
+ *
+ * Triggers a HuC fw authentication request to the GuC via host-2-guc
+ * interface.
+ */
+void intel_huc_ucode_auth(struct drm_device *dev)
+{
+   struct drm_i915_private *dev_priv = dev->dev_private;
+   struct intel_guc *guc = _priv->guc;
+   struct intel_huc *huc = _priv->huc;
+   int ret;
+   u32 data[2];
+
+   /* Bypass the case where there is no HuC firmware */
+   if (huc->huc_fw.fetch_status == UC_FIRMWARE_NONE ||
+   huc->huc_fw.load_status == UC_FIRMWARE_NONE)
+   return;
+
+   if (guc->guc_fw.load_status != UC_FIRMWARE_SUCCESS) {
+   DRM_ERROR("HuC: GuC fw wasn't loaded. Can't authenticate");
+   return;
+   }
+
+   if (huc->huc_fw.load_status != UC_FIRMWARE_SUCCESS) {
+   DRM_ERROR("HuC: fw wasn't loaded. Nothing to authenticate");
+   return;
+   }
+
+   ret = i915_gem_obj_ggtt_pin(huc->huc_fw.uc_fw_obj, 0, 0);
+   if (ret) {
+   DRM_ERROR("HuC: Pin failed");
+   return;
+   }
+
+   /* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
+   I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+
+   /* Specify auth action and where public signature is. It's stored
+* at the beginning of the gem object, before the fw bits
+*/
+   data[0] = HOST2GUC_ACTION_AUTHENTICATE_HUC;
+   data[1] = i915_gem_obj_ggtt_offset(huc->huc_fw.uc_fw_obj) +
+   huc->huc_fw.rsa_offset;
+
+   ret = host2guc_action(guc, data, ARRAY_SIZE(data));
+   if (ret) {
+   DRM_ERROR("HuC: GuC did not ack Auth request\n");
+   goto out;
+   }
+
+   /* Check authentication status, it should be done by now */
+   ret = wait_for_atomic(
+   (I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED) > 0, 5000);
+   if (ret) {
+   DRM_ERROR("HuC: Authentication failed\n");
+   goto out;
+   }
+
+out:
+   i915_gem_object_ggtt_unpin(huc->huc_fw.uc_fw_obj);
+}
diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h 
b/drivers/gpu/drm/i915/intel_guc_fwif.h
index f8846d6..2974e33 100644
--- a/drivers/gpu/drm/i915/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
@@ -429,6 +429,7 @@ enum host2guc_action {
HOST2GUC_ACTION_ENTER_S_STATE = 0x501,
HOST2GUC_ACTION_EXIT_S_STATE = 0x502,
HOST2GUC_ACTION_SLPC_REQUEST = 0x3003,
+   HOST2GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
HOST2GUC_ACTION_LIMIT
 };
 
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c 
b/drivers/gpu/drm/i915/intel_guc_loader.c
index 5832792..45b9c43 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -424,6 +424,8 @@ int intel_guc_ucode_load(struct drm_device *dev)
intel_uc_fw_status_repr(guc_fw->fetch_status),
intel_uc_fw_status_repr(guc_fw->load_status));
 
+   intel_huc_ucode_auth(dev);
+
if (i915.enable_guc_submission) {
/* The execbuf_client will be recreated. Release it first. */
i915_guc_submission_disable(dev);
-- 
2.5.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 0/6] Support HuC loading and authentication

2016-01-11 Thread yu . dai
From: Alex Dai 

This series of patches is to enable HuC firmware loading and authentication.
The GuC loader and css_header are unified for HuC loading.

Alex Dai (6):
  drm/i915/guc: Make the GuC fw loading helper functions general
  drm/i915/guc: Bypass fw loading gracefully if GuC is not supported
  drm/i915/huc: Unified css_header struct for GuC and HuC
  drm/i915/huc: Add HuC fw loading support
  drm/i915/huc: Add debugfs for HuC loading status check
  drm/i915/huc: Support HuC authentication

 drivers/gpu/drm/i915/Makefile  |   1 +
 drivers/gpu/drm/i915/i915_debugfs.c|  44 -
 drivers/gpu/drm/i915/i915_dma.c|   3 +
 drivers/gpu/drm/i915/i915_drv.h|   3 +
 drivers/gpu/drm/i915/i915_gem.c|   7 +
 drivers/gpu/drm/i915/i915_guc_reg.h|   3 +
 drivers/gpu/drm/i915/i915_guc_submission.c |  65 +++
 drivers/gpu/drm/i915/intel_guc.h   |  45 ++---
 drivers/gpu/drm/i915/intel_guc_fwif.h  |  17 +-
 drivers/gpu/drm/i915/intel_guc_loader.c| 246 ++-
 drivers/gpu/drm/i915/intel_huc.h   |  44 +
 drivers/gpu/drm/i915/intel_huc_loader.c| 262 +
 12 files changed, 594 insertions(+), 146 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/intel_huc.h
 create mode 100644 drivers/gpu/drm/i915/intel_huc_loader.c

-- 
2.5.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 028/190] drm/i915: On GPU reset, set the HWS breadcrumb to the last seqno

2016-01-11 Thread Chris Wilson
After the GPU reset and we discard all of the incomplete requests, mark
the GPU as having advanced to the last_submitted_seqno (as having
completed the requests and ready for fresh work). The impact of this is
negligble, as all the requests will be considered completed by this
point, it just brings the HWS into line with expectations for external
viewers.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b956b8813307..a713e8a6cb36 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2818,6 +2818,8 @@ static void i915_gem_reset_ring_cleanup(struct 
drm_i915_private *dev_priv,
buffer->last_retired_head = buffer->tail;
intel_ring_update_space(buffer);
}
+
+   intel_ring_init_seqno(ring, ring->last_submitted_seqno);
 }
 
 void i915_gem_reset(struct drm_device *dev)
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 082/190] drm/i915: Count how many VMA are bound for an object

2016-01-11 Thread Chris Wilson
Since we may have VMA allocated for an object, but we interrupted their
binding, there is a disparity between have elements on the obj->vma_list
and being bound. i915_gem_obj_bound_any() does this check, but this is
not rigorously observed - add an explicit count to make it easier.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c  | 12 +--
 drivers/gpu/drm/i915/i915_drv.h  |  3 ++-
 drivers/gpu/drm/i915/i915_gem.c  | 34 +---
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 17 +---
 drivers/gpu/drm/i915/i915_gem_stolen.c   |  1 +
 5 files changed, 23 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 0d1f470567b0..e2b1242e369b 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -164,6 +164,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object 
*obj)
if (obj->fence_reg != I915_FENCE_REG_NONE)
seq_printf(m, " (fence: %d)", obj->fence_reg);
list_for_each_entry(vma, >vma_list, obj_link) {
+   if (!drm_mm_node_allocated(>node))
+   continue;
+
seq_printf(m, " (%sgtt offset: %08llx, size: %08llx",
   vma->is_ggtt ? "g" : "pp",
   vma->node.start, vma->node.size);
@@ -331,11 +334,11 @@ static int per_file_stats(int id, void *ptr, void *data)
struct drm_i915_gem_object *obj = ptr;
struct file_stats *stats = data;
struct i915_vma *vma;
-   int bound = 0;
 
stats->count++;
stats->total += obj->base.size;
-
+   if (!obj->bind_count)
+   stats->unbound += obj->base.size;
if (obj->base.name || obj->base.dma_buf)
stats->shared += obj->base.size;
 
@@ -343,8 +346,6 @@ static int per_file_stats(int id, void *ptr, void *data)
if (!drm_mm_node_allocated(>node))
continue;
 
-   bound++;
-
if (vma->is_ggtt) {
stats->global += vma->node.size;
} else {
@@ -362,9 +363,6 @@ static int per_file_stats(int id, void *ptr, void *data)
stats->inactive += vma->node.size;
}
 
-   if (!bound)
-   stats->unbound += obj->base.size;
-
return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index aa9d3782107e..8f5cf244094e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2088,6 +2088,8 @@ struct drm_i915_gem_object {
 
unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
 
+   /** Count of VMA actually bound by this object */
+   unsigned int bind_count;
unsigned int pin_display;
 
struct sg_table *pages;
@@ -2874,7 +2876,6 @@ i915_gem_obj_ggtt_offset(struct drm_i915_gem_object *o)
return i915_gem_obj_ggtt_offset_view(o, _ggtt_view_normal);
 }
 
-bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o);
 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
  const struct i915_ggtt_view *view);
 bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 164ebdaa0369..ed3f306af42f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1812,7 +1812,7 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
if (obj->pages_pin_count)
return -EBUSY;
 
-   BUG_ON(i915_gem_obj_bound_any(obj));
+   BUG_ON(obj->bind_count);
 
/* ->put_pages might need to allocate memory for the bit17 swizzle
 * array, hence protect them from being reaped by removing them from gtt
@@ -2558,7 +2558,6 @@ static void i915_gem_object_finish_gtt(struct 
drm_i915_gem_object *obj)
 static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
 {
struct drm_i915_gem_object *obj = vma->obj;
-   struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
int ret;
 
if (list_empty(>obj_link))
@@ -2572,7 +2571,8 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool 
wait)
if (vma->pin_count)
return -EBUSY;
 
-   BUG_ON(obj->pages == NULL);
+   GEM_BUG_ON(obj->bind_count == 0);
+   GEM_BUG_ON(obj->pages == NULL);
 
if (wait) {
ret = i915_gem_object_wait_rendering(obj, false);
@@ -2610,8 +2610,9 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool 
wait)
 
/* Since the unbound list is global, only move to that list if
 * no more VMAs exist. */
-   if (list_empty(>vma_list))
-   list_move_tail(>global_list, _priv->mm.unbound_list);
+   if (--obj->bind_count == 0)
+   list_move_tail(>global_list,
+   

[Intel-gfx] [PATCH 064/190] drm/i915: Rename intel_pin_and_map_ring()

2016-01-11 Thread Chris Wilson
For more consistent oop-naming, we would use intel_ring_verb, so pick
intel_ring_map().

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_lrc.c|  6 ++---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 44 -
 drivers/gpu/drm/i915/intel_ringbuffer.h |  4 +--
 3 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index fa4c0c0db994..3a80d9d45f5c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -889,7 +889,7 @@ static int intel_lr_context_do_pin(struct intel_engine_cs 
*ring,
if (ret)
return ret;
 
-   ret = intel_pin_and_map_ring(ring->dev, ringbuf);
+   ret = intel_ring_map(ringbuf);
if (ret)
goto unpin_ctx_obj;
 
@@ -936,7 +936,7 @@ void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
if (ctx_obj) {
WARN_ON(!mutex_is_locked(>i915->dev->struct_mutex));
if (--rq->ctx->engine[engine].pin_count == 0) {
-   intel_unpin_ring(ring);
+   intel_ring_unmap(ring);
i915_gem_object_ggtt_unpin(ctx_obj);
i915_gem_context_unreference(rq->ctx);
}
@@ -2178,7 +2178,7 @@ void intel_lr_context_free(struct intel_context *ctx)
struct intel_engine_cs *engine = ring->engine;
 
if (ctx == engine->default_context) {
-   intel_unpin_ring(ring);
+   intel_ring_unmap(ring);
i915_gem_object_ggtt_unpin(ctx_obj);
}
WARN_ON(ctx->engine[engine->id].pin_count);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 95974156a1d9..74a4a54e6ca5 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1845,22 +1845,12 @@ static int init_phys_status_page(struct intel_engine_cs 
*ring)
return 0;
 }
 
-void intel_unpin_ring(struct intel_ring *ringbuf)
+int intel_ring_map(struct intel_ring *ring)
 {
-   if (HAS_LLC(ringbuf->obj->base.dev) && !ringbuf->obj->stolen)
-   i915_gem_object_unpin_vmap(ringbuf->obj);
-   else
-   iounmap(ringbuf->virtual_start);
-   i915_gem_object_ggtt_unpin(ringbuf->obj);
-}
-
-int intel_pin_and_map_ring(struct drm_device *dev, struct intel_ring *ringbuf)
-{
-   struct drm_i915_private *dev_priv = to_i915(dev);
-   struct drm_i915_gem_object *obj = ringbuf->obj;
+   struct drm_i915_gem_object *obj = ring->obj;
int ret;
 
-   if (HAS_LLC(dev_priv) && !obj->stolen) {
+   if (HAS_LLC(ring->engine->i915) && !obj->stolen) {
ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, 0);
if (ret)
return ret;
@@ -1869,10 +1859,10 @@ int intel_pin_and_map_ring(struct drm_device *dev, 
struct intel_ring *ringbuf)
if (ret)
goto unpin;
 
-   ringbuf->virtual_start = i915_gem_object_pin_vmap(obj);
-   if (IS_ERR(ringbuf->virtual_start)) {
-   ret = PTR_ERR(ringbuf->virtual_start);
-   ringbuf->virtual_start = NULL;
+   ring->virtual_start = i915_gem_object_pin_vmap(obj);
+   if (IS_ERR(ring->virtual_start)) {
+   ret = PTR_ERR(ring->virtual_start);
+   ring->virtual_start = NULL;
goto unpin;
}
} else {
@@ -1884,9 +1874,10 @@ int intel_pin_and_map_ring(struct drm_device *dev, 
struct intel_ring *ringbuf)
if (ret)
goto unpin;
 
-   ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base 
+
-   
i915_gem_obj_ggtt_offset(obj), ringbuf->size);
-   if (ringbuf->virtual_start == NULL) {
+   ring->virtual_start = 
ioremap_wc(ring->engine->i915->gtt.mappable_base +
+i915_gem_obj_ggtt_offset(obj),
+ring->size);
+   if (ring->virtual_start == NULL) {
ret = -ENOMEM;
goto unpin;
}
@@ -1899,6 +1890,15 @@ unpin:
return ret;
 }
 
+void intel_ring_unmap(struct intel_ring *ring)
+{
+   if (HAS_LLC(ring->engine->i915) && !ring->obj->stolen)
+   i915_gem_object_unpin_vmap(ring->obj);
+   else
+   iounmap(ring->virtual_start);
+   i915_gem_object_ggtt_unpin(ring->obj);
+}
+
 static void intel_destroy_ringbuffer_obj(struct intel_ring *ringbuf)
 {
drm_gem_object_unreference(>obj->base);
@@ -2012,7 +2012,7 @@ static int 

[Intel-gfx] ✓ success: Fi.CI.BAT

2016-01-11 Thread Patchwork
== Summary ==

Built on ff88655b3a5467bbc3be8c67d3e05ebf182557d3 drm-intel-nightly: 
2016y-01m-11d-07h-30m-16s UTC integration manifest

Test kms_pipe_crc_basic:
Subgroup read-crc-pipe-b:
dmesg-warn -> PASS   (byt-nuc)

bdw-ultratotal:138  pass:130  dwarn:1   dfail:0   fail:1   skip:6  
bsw-nuc-2total:141  pass:114  dwarn:3   dfail:0   fail:0   skip:24 
byt-nuc  total:141  pass:119  dwarn:7   dfail:0   fail:0   skip:15 
hsw-brixbox  total:141  pass:134  dwarn:0   dfail:0   fail:0   skip:7  
hsw-gt2  total:141  pass:137  dwarn:0   dfail:0   fail:0   skip:4  
ilk-hp8440p  total:141  pass:100  dwarn:4   dfail:0   fail:0   skip:37 
skl-i5k-2total:141  pass:132  dwarn:1   dfail:0   fail:0   skip:8  
skl-i7k-2total:141  pass:131  dwarn:2   dfail:0   fail:0   skip:8  
snb-dellxps  total:141  pass:122  dwarn:5   dfail:0   fail:0   skip:14 
snb-x220ttotal:141  pass:122  dwarn:5   dfail:0   fail:1   skip:13 

Results at /archive/results/CI_IGT_test/Patchwork_1113/

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 01/13] drm/i915/bdw+: Replace list_del+list_add_tail with list_move_tail

2016-01-11 Thread Daniel Vetter
On Fri, Jan 08, 2016 at 11:29:40AM +, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin 
> 
> Same effect for slightly less source code and resulting binary.
> 
> Signed-off-by: Tvrtko Ursulin 

Reviewed-by: Daniel Vetter 
> ---
>  drivers/gpu/drm/i915/intel_lrc.c | 15 ++-
>  1 file changed, 6 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
> b/drivers/gpu/drm/i915/intel_lrc.c
> index 23839ff04e27..8b6071fcd743 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -431,9 +431,8 @@ static void execlists_context_unqueue(struct 
> intel_engine_cs *ring)
>   /* Same ctx: ignore first request, as second request
>* will update tail past first request's workload */
>   cursor->elsp_submitted = req0->elsp_submitted;
> - list_del(>execlist_link);
> - list_add_tail(>execlist_link,
> - >execlist_retired_req_list);
> + list_move_tail(>execlist_link,
> +>execlist_retired_req_list);
>   req0 = cursor;
>   } else {
>   req1 = cursor;
> @@ -485,9 +484,8 @@ static bool execlists_check_remove_request(struct 
> intel_engine_cs *ring,
>"Never submitted head request\n");
>  
>   if (--head_req->elsp_submitted <= 0) {
> - list_del(_req->execlist_link);
> - list_add_tail(_req->execlist_link,
> - >execlist_retired_req_list);
> + list_move_tail(_req->execlist_link,
> +
> >execlist_retired_req_list);
>   return true;

Aside: Some of this code is over-indented ...
-Daniel

>   }
>   }
> @@ -608,9 +606,8 @@ static int execlists_context_queue(struct 
> drm_i915_gem_request *request)
>   if (request->ctx == tail_req->ctx) {
>   WARN(tail_req->elsp_submitted != 0,
>   "More than 2 already-submitted reqs queued\n");
> - list_del(_req->execlist_link);
> - list_add_tail(_req->execlist_link,
> - >execlist_retired_req_list);
> + list_move_tail(_req->execlist_link,
> +>execlist_retired_req_list);
>   }
>   }
>  
> -- 
> 1.9.1
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 07/13] drm/i915: Introduce dedicated object VMA iterator

2016-01-11 Thread Daniel Vetter
On Fri, Jan 08, 2016 at 01:29:14PM +, Tvrtko Ursulin wrote:
> 
> On 08/01/16 11:29, Tvrtko Ursulin wrote:
> >From: Tvrtko Ursulin 
> >
> >Purpose is to catch places which iterate the object VMA list
> >without holding the big lock.
> >
> >Implemented by open coding list_for_each_entry to make the
> >macro compatible with existing call sites.
> >
> >Signed-off-by: Tvrtko Ursulin 
> >Cc: Daniel Vetter 
> >---
> >  drivers/gpu/drm/i915/i915_debugfs.c  |  8 
> >  drivers/gpu/drm/i915/i915_drv.h  |  6 ++
> >  drivers/gpu/drm/i915/i915_gem.c  | 24 
> >  drivers/gpu/drm/i915/i915_gem_gtt.c  |  2 +-
> >  drivers/gpu/drm/i915/i915_gem_shrinker.c |  2 +-
> >  drivers/gpu/drm/i915/i915_gpu_error.c|  4 ++--
> >  6 files changed, 26 insertions(+), 20 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
> >b/drivers/gpu/drm/i915/i915_debugfs.c
> >index 714a45cf8a51..d7c2a3201161 100644
> >--- a/drivers/gpu/drm/i915/i915_debugfs.c
> >+++ b/drivers/gpu/drm/i915/i915_debugfs.c
> >@@ -117,7 +117,7 @@ static u64 i915_gem_obj_total_ggtt_size(struct 
> >drm_i915_gem_object *obj)
> > u64 size = 0;
> > struct i915_vma *vma;
> >
> >-list_for_each_entry(vma, >vma_list, vma_link) {
> >+i915_gem_obj_for_each_vma(vma, obj) {
> > if (i915_is_ggtt(vma->vm) &&
> > drm_mm_node_allocated(>node))
> > size += vma->node.size;
> >@@ -155,7 +155,7 @@ describe_obj(struct seq_file *m, struct 
> >drm_i915_gem_object *obj)
> >obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
> > if (obj->base.name)
> > seq_printf(m, " (name: %d)", obj->base.name);
> >-list_for_each_entry(vma, >vma_list, vma_link) {
> >+i915_gem_obj_for_each_vma(vma, obj) {
> > if (vma->pin_count > 0)
> > pin_count++;
> > }
> >@@ -164,7 +164,7 @@ describe_obj(struct seq_file *m, struct 
> >drm_i915_gem_object *obj)
> > seq_printf(m, " (display)");
> > if (obj->fence_reg != I915_FENCE_REG_NONE)
> > seq_printf(m, " (fence: %d)", obj->fence_reg);
> >-list_for_each_entry(vma, >vma_list, vma_link) {
> >+i915_gem_obj_for_each_vma(vma, obj) {
> > seq_printf(m, " (%sgtt offset: %08llx, size: %08llx",
> >i915_is_ggtt(vma->vm) ? "g" : "pp",
> >vma->node.start, vma->node.size);
> >@@ -342,7 +342,7 @@ static int per_file_stats(int id, void *ptr, void *data)
> > stats->shared += obj->base.size;
> >
> > if (USES_FULL_PPGTT(obj->base.dev)) {
> >-list_for_each_entry(vma, >vma_list, vma_link) {
> >+i915_gem_obj_for_each_vma(vma, obj) {
> > struct i915_hw_ppgtt *ppgtt;
> >
> > if (!drm_mm_node_allocated(>node))
> >diff --git a/drivers/gpu/drm/i915/i915_drv.h 
> >b/drivers/gpu/drm/i915/i915_drv.h
> >index b77a5d84eac2..0406a020dfcc 100644
> >--- a/drivers/gpu/drm/i915/i915_drv.h
> >+++ b/drivers/gpu/drm/i915/i915_drv.h
> >@@ -2852,6 +2852,12 @@ struct drm_i915_gem_object 
> >*i915_gem_object_create_from_data(
> >  void i915_gem_free_object(struct drm_gem_object *obj);
> >  void i915_gem_vma_destroy(struct i915_vma *vma);
> >
> >+#define i915_gem_obj_for_each_vma(vma, obj) \
> >+for (WARN_ON_ONCE(!mutex_is_locked(&(obj)->base.dev->struct_mutex)), \
> >+ vma = list_first_entry(&(obj)->vma_list, typeof(*vma), vma_link);\
> >+ >vma_link != (&(obj)->vma_list); \
> >+ vma = list_next_entry(vma, vma_link))
> >+
> 
> 
> Unfortunately error capture is not happy with this approach. Can't even see
> that error capture attempts to grab the mutex anywhere.
> 
> So what? Drop the idea or add a "doing error capture" flag somewhere?

Fix the bugs. Not surprise at all that we've screwed this up all over the
place ;-) Afaics modeset code isn't much better either ...
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] ✗ warning: Fi.CI.BAT

2016-01-11 Thread Patchwork
== Summary ==

Built on ff88655b3a5467bbc3be8c67d3e05ebf182557d3 drm-intel-nightly: 
2016y-01m-11d-07h-30m-16s UTC integration manifest

Test gem_storedw_loop:
Subgroup basic-render:
dmesg-warn -> PASS   (bdw-ultra)
dmesg-warn -> PASS   (skl-i7k-2) UNSTABLE
Test kms_flip:
Subgroup basic-flip-vs-dpms:
dmesg-warn -> PASS   (ilk-hp8440p)
Subgroup basic-flip-vs-modeset:
pass   -> DMESG-WARN (ilk-hp8440p)
Test kms_pipe_crc_basic:
Subgroup read-crc-pipe-b:
dmesg-warn -> PASS   (byt-nuc)

bdw-ultratotal:138  pass:132  dwarn:0   dfail:0   fail:0   skip:6  
bsw-nuc-2total:141  pass:114  dwarn:3   dfail:0   fail:0   skip:24 
byt-nuc  total:141  pass:119  dwarn:7   dfail:0   fail:0   skip:15 
hsw-brixbox  total:141  pass:134  dwarn:0   dfail:0   fail:0   skip:7  
hsw-gt2  total:141  pass:137  dwarn:0   dfail:0   fail:0   skip:4  
hsw-xps12total:138  pass:133  dwarn:1   dfail:0   fail:0   skip:4  
ilk-hp8440p  total:141  pass:100  dwarn:4   dfail:0   fail:0   skip:37 
skl-i5k-2total:141  pass:132  dwarn:1   dfail:0   fail:0   skip:8  
skl-i7k-2total:141  pass:132  dwarn:1   dfail:0   fail:0   skip:8  
snb-dellxps  total:141  pass:122  dwarn:5   dfail:0   fail:0   skip:14 
snb-x220ttotal:141  pass:122  dwarn:5   dfail:0   fail:1   skip:13 

Results at /archive/results/CI_IGT_test/Patchwork_1110/

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] ✗ failure: Fi.CI.BAT

2016-01-11 Thread Patchwork
== Summary ==

HEAD is now at ff88655 drm-intel-nightly: 2016y-01m-11d-07h-30m-16s UTC 
integration manifest
Applying: drm/i915: Use passed plane state for sprite planes, v4.
Using index info to reconstruct a base tree...
M   drivers/gpu/drm/i915/intel_drv.h
M   drivers/gpu/drm/i915/intel_sprite.c
Falling back to patching base and 3-way merge...
Auto-merging drivers/gpu/drm/i915/intel_sprite.c
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/intel_sprite.c
Patch failed at 0001 drm/i915: Use passed plane state for sprite planes, v4.

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] ✗ failure: Fi.CI.BAT

2016-01-11 Thread Patchwork
== Summary ==

Built on ff88655b3a5467bbc3be8c67d3e05ebf182557d3 drm-intel-nightly: 
2016y-01m-11d-07h-30m-16s UTC integration manifest

Test gem_storedw_loop:
Subgroup basic-render:
pass   -> DMESG-WARN (skl-i5k-2) UNSTABLE
dmesg-warn -> PASS   (bdw-ultra)
Test kms_flip:
Subgroup basic-flip-vs-dpms:
dmesg-warn -> PASS   (ilk-hp8440p)
Test kms_pipe_crc_basic:
Subgroup read-crc-pipe-b:
pass   -> DMESG-WARN (ilk-hp8440p)
dmesg-warn -> PASS   (byt-nuc)

bdw-ultratotal:138  pass:132  dwarn:0   dfail:0   fail:0   skip:6  
bsw-nuc-2total:141  pass:114  dwarn:3   dfail:0   fail:0   skip:24 
byt-nuc  total:141  pass:119  dwarn:7   dfail:0   fail:0   skip:15 
hsw-brixbox  total:141  pass:134  dwarn:0   dfail:0   fail:0   skip:7  
hsw-gt2  total:141  pass:137  dwarn:0   dfail:0   fail:0   skip:4  
hsw-xps12total:138  pass:133  dwarn:1   dfail:0   fail:0   skip:4  
ilk-hp8440p  total:141  pass:100  dwarn:4   dfail:0   fail:0   skip:37 
skl-i5k-2total:141  pass:131  dwarn:2   dfail:0   fail:0   skip:8  
skl-i7k-2total:141  pass:131  dwarn:2   dfail:0   fail:0   skip:8  
snb-dellxps  total:141  pass:122  dwarn:5   dfail:0   fail:0   skip:14 
snb-x220ttotal:141  pass:122  dwarn:5   dfail:0   fail:1   skip:13 

HANGED ivb-t430s in igt@kms_pipe_crc_basic@nonblocking-crc-pipe-b

Results at /archive/results/CI_IGT_test/Patchwork_1112/

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 007/190] drm/i915: Hide the atomic_read(reset_counter) behind a helper

2016-01-11 Thread Chris Wilson
This is principally a little bit of syntatic sugar to hide the
atomic_read()s throughout the code to retrieve the current reset_counter.
It also provides the other utility functions to check the reset state on the
already read reset_counter, so that (in later patches) we can read it once
and do multiple tests rather than risk the value changing between tests.

v2: Be strictly on converting existing i915_reset_in_progress() over to
the more verbose i915_reset_in_progress_or_wedged().

Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/i915_debugfs.c |  4 ++--
 drivers/gpu/drm/i915/i915_drv.h | 32 
 drivers/gpu/drm/i915/i915_gem.c | 16 
 drivers/gpu/drm/i915/i915_irq.c |  2 +-
 drivers/gpu/drm/i915/intel_display.c| 18 +++---
 drivers/gpu/drm/i915/intel_lrc.c|  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c |  4 ++--
 7 files changed, 53 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index e3377abc0d4d..932af05b8eec 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4696,7 +4696,7 @@ i915_wedged_get(void *data, u64 *val)
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = dev->dev_private;
 
-   *val = atomic_read(_priv->gpu_error.reset_counter);
+   *val = i915_reset_counter(_priv->gpu_error);
 
return 0;
 }
@@ -4715,7 +4715,7 @@ i915_wedged_set(void *data, u64 val)
 * while it is writing to 'i915_wedged'
 */
 
-   if (i915_reset_in_progress(_priv->gpu_error))
+   if (i915_reset_in_progress_or_wedged(_priv->gpu_error))
return -EAGAIN;
 
intel_runtime_pm_get(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1a6168affadd..b274237726de 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2983,20 +2983,44 @@ void i915_gem_retire_requests_ring(struct 
intel_engine_cs *ring);
 int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
  bool interruptible);
 
+static inline u32 i915_reset_counter(struct i915_gpu_error *error)
+{
+   return atomic_read(>reset_counter);
+}
+
+static inline bool __i915_reset_in_progress(u32 reset)
+{
+   return unlikely(reset & I915_RESET_IN_PROGRESS_FLAG);
+}
+
+static inline bool __i915_reset_in_progress_or_wedged(u32 reset)
+{
+   return unlikely(reset & (I915_RESET_IN_PROGRESS_FLAG | I915_WEDGED));
+}
+
+static inline bool __i915_terminally_wedged(u32 reset)
+{
+   return unlikely(reset & I915_WEDGED);
+}
+
 static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
 {
-   return unlikely(atomic_read(>reset_counter)
-   & (I915_RESET_IN_PROGRESS_FLAG | I915_WEDGED));
+   return __i915_reset_in_progress(i915_reset_counter(error));
+}
+
+static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error 
*error)
+{
+   return __i915_reset_in_progress_or_wedged(i915_reset_counter(error));
 }
 
 static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
 {
-   return atomic_read(>reset_counter) & I915_WEDGED;
+   return __i915_terminally_wedged(i915_reset_counter(error));
 }
 
 static inline u32 i915_reset_count(struct i915_gpu_error *error)
 {
-   return ((atomic_read(>reset_counter) & ~I915_WEDGED) + 1) / 2;
+   return ((i915_reset_counter(error) & ~I915_WEDGED) + 1) / 2;
 }
 
 static inline bool i915_stop_ring_allow_ban(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 99fd6aa4dd62..78bf980a69bf 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -83,7 +83,7 @@ i915_gem_wait_for_error(struct i915_gpu_error *error)
 {
int ret;
 
-#define EXIT_COND (!i915_reset_in_progress(error) || \
+#define EXIT_COND (!i915_reset_in_progress_or_wedged(error) || \
   i915_terminally_wedged(error))
if (EXIT_COND)
return 0;
@@ -,7 +,7 @@ int
 i915_gem_check_wedge(struct i915_gpu_error *error,
 bool interruptible)
 {
-   if (i915_reset_in_progress(error)) {
+   if (i915_reset_in_progress_or_wedged(error)) {
/* Non-interruptible callers can't handle -EAGAIN, hence return
 * -EIO unconditionally for these. */
if (!interruptible)
@@ -1295,7 +1295,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 
/* We need to check whether any gpu reset happened in between
 * the caller grabbing the seqno and now ... */
-   if (reset_counter != 

[Intel-gfx] [PATCH 011/190] drm/i915: Simplify reset_counter handling during atomic modesetting

2016-01-11 Thread Chris Wilson
Now that the reset_counter is stored on the request, we can rearrange
the code to handle reading the counter versus waiting during the atomic
modesetting for readibility (by deleting the hairiest of codes).

Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/intel_display.c | 18 +++---
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c 
b/drivers/gpu/drm/i915/intel_display.c
index 4f36313f31ac..ee0ec72b16b4 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -13504,9 +13504,9 @@ static int intel_atomic_prepare_commit(struct 
drm_device *dev,
return ret;
 
ret = drm_atomic_helper_prepare_planes(dev, state);
-   if (!ret && !async && 
!i915_reset_in_progress_or_wedged(_priv->gpu_error)) {
-   mutex_unlock(>struct_mutex);
+   mutex_unlock(>struct_mutex);
 
+   if (!ret && !async) {
for_each_plane_in_state(state, plane, plane_state, i) {
struct intel_plane_state *intel_plane_state =
to_intel_plane_state(plane_state);
@@ -13520,19 +13520,15 @@ static int intel_atomic_prepare_commit(struct 
drm_device *dev,
/* Swallow -EIO errors to allow updates during hw 
lockup. */
if (ret == -EIO)
ret = 0;
-
-   if (ret)
+   if (ret) {
+   mutex_lock(>struct_mutex);
+   drm_atomic_helper_cleanup_planes(dev, state);
+   mutex_unlock(>struct_mutex);
break;
+   }
}
-
-   if (!ret)
-   return 0;
-
-   mutex_lock(>struct_mutex);
-   drm_atomic_helper_cleanup_planes(dev, state);
}
 
-   mutex_unlock(>struct_mutex);
return ret;
 }
 
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 005/190] drm/i915: Force clean compilation with -Werror

2016-01-11 Thread Chris Wilson
Our driver compiles clean (nowadays thanks to 0day) but for me, at least,
it would be beneficial if the compiler threw an error rather than a
warning when it found a piece of suspect code. (I use this to
compile-check patch series and want to break on the first compiler error
in order to fix the patch.)

v2: Kick off a new "Debugging" submenu for i915.ko

At this point, we applied it to the kernel and promptly kicked it out
again as it broke buildbots (due to a compiler warning on 32bits):

commit 908d759b210effb33d927a8cb6603a16448474e4
Author: Daniel Vetter 
Date:   Tue May 26 07:46:21 2015 +0200

Revert "drm/i915: Force clean compilation with -Werror"

v3: Avoid enabling -Werror for allyesconfig/allmodconfig builds, using
COMPILE_TEST as a suitable proxy suggested by Andrew Morton. (Damien)
Only make the option available for EXPERT to reinforce that the option
should not be casually enabled.

Signed-off-by: Chris Wilson 
Cc: Jani Nikula 
Cc: Damien Lespiau 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/Kconfig   |  6 ++
 drivers/gpu/drm/i915/Kconfig.debug | 12 
 drivers/gpu/drm/i915/Makefile  |  2 ++
 3 files changed, 20 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/Kconfig.debug

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index b979295aab82..33e8563c2f99 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -59,3 +59,9 @@ config DRM_I915_USERPTR
  selected to enabled full userptr support.
 
  If in doubt, say "Y".
+
+menu "drm/i915 Debugging"
+depends on DRM_I915
+depends on EXPERT
+source drivers/gpu/drm/i915/Kconfig.debug
+endmenu
diff --git a/drivers/gpu/drm/i915/Kconfig.debug 
b/drivers/gpu/drm/i915/Kconfig.debug
new file mode 100644
index ..1f10ee228eda
--- /dev/null
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -0,0 +1,12 @@
+config DRM_I915_WERROR
+   bool "Force GCC to throw an error instead of a warning when compiling"
+   default n
+   # As this may inadvertently break the build, only allow the user
+   # to shoot oneself in the foot iff they aim really hard
+   depends on EXPERT
+   # We use the dependency on !COMPILE_TEST to not be enabled in
+   # allmodconfig or allyesconfig configurations
+   depends on !COMPILE_TEST
+   ---help---
+ Add -Werror to the build flags for (and only for) i915.ko.
+ Do not enable this unless you are writing code for the i915.ko module.
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 0851de07bd13..1e9895b9a546 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -2,6 +2,8 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
+subdir-ccflags-$(CONFIG_DRM_I915_WERROR) := -Werror
+
 # Please keep these build lists sorted!
 
 # core driver code
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 041/190] drm/i915: Allow userspace to request no-error-capture upon GPU hangs

2016-01-11 Thread Chris Wilson
igt likes to inject GPU hangs into its command streams. However, as we
expect these hangs, we don't actually want them recorded in the dmesg
output or stored in the i915_error_state (usually). To accomodate this
allow userspace to set a flag on the context that any hang emanating
from that context will not be recorded. We still do the error capture
(otherwise how do we find the guilty context and know its intent?) as
part of the reason for random GPU hang injection is to exercise the race
conditions between the error capture and normal execution.

v2: Split out the request->ringbuf error capture changes.
v3: Move the flag defines next to the intel_context->flags definition

Signed-off-by: Chris Wilson 
Acked-by: Daniel Vetter 
Reviewed-by: Dave Gordon 
---
 drivers/gpu/drm/i915/i915_drv.h |  7 +--
 drivers/gpu/drm/i915/i915_gem_context.c | 13 +
 drivers/gpu/drm/i915/i915_gpu_error.c   | 14 +-
 include/uapi/drm/i915_drm.h |  1 +
 4 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c3b795f1566b..57e450e25ad6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -486,6 +486,7 @@ struct drm_i915_error_state {
struct timeval time;
 
char error_msg[128];
+   bool simulated;
int iommu;
u32 reset_count;
u32 suspend_count;
@@ -842,7 +843,6 @@ struct i915_ctx_hang_stats {
 /* This must match up with the value previously used for execbuf2.rsvd1. */
 #define DEFAULT_CONTEXT_HANDLE 0
 
-#define CONTEXT_NO_ZEROMAP (1<<0)
 /**
  * struct intel_context - as the name implies, represents a context.
  * @ref: reference count.
@@ -867,11 +867,14 @@ struct intel_context {
int user_handle;
uint8_t remap_slice;
struct drm_i915_private *i915;
-   int flags;
struct drm_i915_file_private *file_priv;
struct i915_ctx_hang_stats hang_stats;
struct i915_hw_ppgtt *ppgtt;
 
+   unsigned flags;
+#define CONTEXT_NO_ZEROMAP (1<<0)
+#define CONTEXT_NO_ERROR_CAPTURE   (1<<1)
+
/* Legacy ring buffer submission */
struct {
struct drm_i915_gem_object *rcs_state;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index e5e9a8918f19..0aea5ccf6d68 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -939,6 +939,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, 
void *data,
else
args->value = to_i915(dev)->gtt.base.total;
break;
+   case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+   args->value = !!(ctx->flags & CONTEXT_NO_ERROR_CAPTURE);
+   break;
default:
ret = -EINVAL;
break;
@@ -984,6 +987,16 @@ int i915_gem_context_setparam_ioctl(struct drm_device 
*dev, void *data,
ctx->flags |= args->value ? CONTEXT_NO_ZEROMAP : 0;
}
break;
+   case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+   if (args->size) {
+   ret = -EINVAL;
+   } else {
+   if (args->value)
+   ctx->flags |= CONTEXT_NO_ERROR_CAPTURE;
+   else
+   ctx->flags &= ~CONTEXT_NO_ERROR_CAPTURE;
+   }
+   break;
default:
ret = -EINVAL;
break;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 93da2c7581f6..4f17d6847569 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1040,6 +1040,8 @@ static void i915_gem_record_rings(struct drm_device *dev,
rcu_read_unlock();
}
 
+   error->simulated |= request->ctx->flags & 
CONTEXT_NO_ERROR_CAPTURE;
+
rb = request->ringbuf;
error->ring[i].cpu_ring_head = rb->head;
error->ring[i].cpu_ring_tail = rb->tail;
@@ -1333,12 +1335,14 @@ void i915_capture_error_state(struct drm_device *dev, 
bool wedged,
i915_error_capture_msg(dev, error, wedged, error_msg);
DRM_INFO("%s\n", error->error_msg);
 
-   spin_lock_irqsave(_priv->gpu_error.lock, flags);
-   if (dev_priv->gpu_error.first_error == NULL) {
-   dev_priv->gpu_error.first_error = error;
-   error = NULL;
+   if (!error->simulated) {
+   spin_lock_irqsave(_priv->gpu_error.lock, flags);
+   if (dev_priv->gpu_error.first_error == NULL) {
+   dev_priv->gpu_error.first_error = error;
+   error = NULL;
+  

[Intel-gfx] [PATCH 032/190] drm/i915: Remove debug noise on detecting fault-injection of missed interrupts

2016-01-11 Thread Chris Wilson
Since the tests can and do explicitly check debugfs/i915_ring_missed_irqs
for the handling of a "missed interrupt", adding it to the dmesg at INFO
is just noise. When it happens for real, we still class it as an ERROR.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_irq.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index b3942dec7de4..502663f13cd8 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3083,9 +3083,6 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
if (!test_bit(ring->id, 
_priv->gpu_error.test_irq_rings))
DRM_ERROR("Hangcheck 
timer elapsed... %s idle\n",
  ring->name);
-   else
-   DRM_INFO("Fake missed 
irq on %s\n",
-ring->name);
 

intel_engine_enable_fake_irq(ring);
}
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 030/190] drm/i915: Move the get/put irq locking into the caller

2016-01-11 Thread Chris Wilson
With only a single callsite for intel_engine_cs->irq_get and ->irq_put,
we can reduce the code size by moving the common preamble into the
caller, and we can also eliminate the reference counting.

For completeness, as we are no longer doing reference counting on irq,
rename the get/put vfunctions to enable/disable respectively.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_breadcrumbs.c |   8 +-
 drivers/gpu/drm/i915/intel_lrc.c |  53 ++
 drivers/gpu/drm/i915/intel_ringbuffer.c  | 302 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.h  |   5 +-
 4 files changed, 125 insertions(+), 243 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c 
b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index cf9cbcc2d5d7..0ea01bd6811c 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -51,12 +51,16 @@ static void irq_enable(struct intel_engine_cs *engine)
 */
engine->irq_posted = true;
 
-   WARN_ON(!engine->irq_get(engine));
+   spin_lock_irq(>i915->irq_lock);
+   engine->irq_enable(engine);
+   spin_unlock_irq(>i915->irq_lock);
 }
 
 static void irq_disable(struct intel_engine_cs *engine)
 {
-   engine->irq_put(engine);
+   spin_lock_irq(>i915->irq_lock);
+   engine->irq_disable(engine);
+   spin_unlock_irq(>i915->irq_lock);
 
engine->irq_posted = false;
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 27d91f1ceb2b..b1ede2e9b372 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1640,37 +1640,20 @@ static int gen8_emit_bb_start(struct 
drm_i915_gem_request *req,
return 0;
 }
 
-static bool gen8_logical_ring_get_irq(struct intel_engine_cs *ring)
+static void gen8_logical_ring_enable_irq(struct intel_engine_cs *ring)
 {
-   struct drm_device *dev = ring->dev;
-   struct drm_i915_private *dev_priv = dev->dev_private;
-   unsigned long flags;
-
-   if (WARN_ON(!intel_irqs_enabled(dev_priv)))
-   return false;
-
-   spin_lock_irqsave(_priv->irq_lock, flags);
-   if (ring->irq_refcount++ == 0) {
-   I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | 
ring->irq_keep_mask));
-   POSTING_READ(RING_IMR(ring->mmio_base));
-   }
-   spin_unlock_irqrestore(_priv->irq_lock, flags);
+   struct drm_i915_private *dev_priv = ring->i915;
 
-   return true;
+   I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
+   POSTING_READ(RING_IMR(ring->mmio_base));
 }
 
-static void gen8_logical_ring_put_irq(struct intel_engine_cs *ring)
+static void gen8_logical_ring_disable_irq(struct intel_engine_cs *ring)
 {
-   struct drm_device *dev = ring->dev;
-   struct drm_i915_private *dev_priv = dev->dev_private;
-   unsigned long flags;
+   struct drm_i915_private *dev_priv = ring->i915;
 
-   spin_lock_irqsave(_priv->irq_lock, flags);
-   if (--ring->irq_refcount == 0) {
-   I915_WRITE_IMR(ring, ~ring->irq_keep_mask);
-   POSTING_READ(RING_IMR(ring->mmio_base));
-   }
-   spin_unlock_irqrestore(_priv->irq_lock, flags);
+   I915_WRITE_IMR(ring, ~ring->irq_keep_mask);
+   POSTING_READ(RING_IMR(ring->mmio_base));
 }
 
 static int gen8_emit_flush(struct drm_i915_gem_request *request,
@@ -1993,8 +1976,8 @@ static int logical_render_ring_init(struct drm_device 
*dev)
ring->irq_seqno_barrier = gen6_seqno_barrier;
ring->emit_request = gen8_emit_request;
ring->emit_flush = gen8_emit_flush_render;
-   ring->irq_get = gen8_logical_ring_get_irq;
-   ring->irq_put = gen8_logical_ring_put_irq;
+   ring->irq_enable = gen8_logical_ring_enable_irq;
+   ring->irq_disable = gen8_logical_ring_disable_irq;
ring->emit_bb_start = gen8_emit_bb_start;
 
ring->dev = dev;
@@ -2039,8 +2022,8 @@ static int logical_bsd_ring_init(struct drm_device *dev)
ring->irq_seqno_barrier = gen6_seqno_barrier;
ring->emit_request = gen8_emit_request;
ring->emit_flush = gen8_emit_flush;
-   ring->irq_get = gen8_logical_ring_get_irq;
-   ring->irq_put = gen8_logical_ring_put_irq;
+   ring->irq_enable = gen8_logical_ring_enable_irq;
+   ring->irq_disable = gen8_logical_ring_disable_irq;
ring->emit_bb_start = gen8_emit_bb_start;
 
return logical_ring_init(dev, ring);
@@ -2063,8 +2046,8 @@ static int logical_bsd2_ring_init(struct drm_device *dev)
ring->irq_seqno_barrier = gen6_seqno_barrier;
ring->emit_request = gen8_emit_request;
ring->emit_flush = gen8_emit_flush;
-   ring->irq_get = gen8_logical_ring_get_irq;
-   ring->irq_put = gen8_logical_ring_put_irq;
+   ring->irq_enable = gen8_logical_ring_enable_irq;
+   ring->irq_disable = gen8_logical_ring_disable_irq;
ring->emit_bb_start = gen8_emit_bb_start;
 

[Intel-gfx] [PATCH 023/190] drm/i915: Only apply one barrier after a breadcrumb interrupt is posted

2016-01-11 Thread Chris Wilson
If we flag the seqno as potentially stale upon receiving an interrupt,
we can use that information to reduce the frequency that we apply the
heavyweight coherent seqno read (i.e. if we wake up a chain of waiters).

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h  | 15 ++-
 drivers/gpu/drm/i915/i915_irq.c  |  1 +
 drivers/gpu/drm/i915/intel_breadcrumbs.c |  8 
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  1 +
 4 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c2ee8efdd928..8940b8d3fa59 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3649,7 +3649,20 @@ static inline bool __i915_request_irq_complete(struct 
drm_i915_gem_request *req)
 * but it is easier and safer to do it every time the waiter
 * is woken.
 */
-   if (engine->irq_seqno_barrier) {
+   if (engine->irq_seqno_barrier && READ_ONCE(engine->irq_posted)) {
+   /* The ordering of irq_posted versus applying the barrier
+* is crucial. The clearing of the current irq_posted must
+* be visible before we perform the barrier operation,
+* such that if a subsequent interrupt arrives, irq_posted
+* is reasserted and our task rewoken (which causes us to
+* do another __i915_request_irq_complete() immediately
+* and reapply the barrier). Conversely, if the clear
+* occurs after the barrier, then an interrupt that arrived
+* whilst we waited on the barrier would not trigger a
+* barrier on the next pass, and the read may not see the
+* seqno update.
+*/
+   WRITE_ONCE(engine->irq_posted, false);
engine->irq_seqno_barrier(engine);
if (i915_gem_request_completed(req))
return true;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 627c7fb6aa9b..738edd7fbf8d 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1000,6 +1000,7 @@ static void notify_ring(struct intel_engine_cs *ring)
return;
 
trace_i915_gem_request_notify(ring);
+   ring->irq_posted = true; /* paired with mb() in wake_up_process() */
intel_engine_wakeup(ring);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c 
b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index f66acf820c40..d689bd61534e 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -43,12 +43,20 @@ static void intel_breadcrumbs_fake_irq(unsigned long data)
 
 static void irq_enable(struct intel_engine_cs *engine)
 {
+   /* Enabling the IRQ may miss the generation of the interrupt, but
+* we still need to force the barrier before reading the seqno,
+* just in case.
+*/
+   engine->irq_posted = true;
+
WARN_ON(!engine->irq_get(engine));
 }
 
 static void irq_disable(struct intel_engine_cs *engine)
 {
engine->irq_put(engine);
+
+   engine->irq_posted = false;
 }
 
 static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 28ab07b38c05..6cc8e9c5f8d6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -198,6 +198,7 @@ struct  intel_engine_cs {
struct i915_ctx_workarounds wa_ctx;
 
unsigned irq_refcount; /* protected by dev_priv->irq_lock */
+   boolirq_posted;
u32 irq_enable_mask;/* bitmask to enable ring 
interrupt */
struct drm_i915_gem_request *trace_irq_req;
bool __must_check (*irq_get)(struct intel_engine_cs *ring);
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 012/190] drm/i915: Prevent leaking of -EIO from i915_wait_request()

2016-01-11 Thread Chris Wilson
Reporting -EIO from i915_wait_request() has proven very troublematic
over the years, with numerous hard-to-reproduce bugs cropping up in the
corner case of where a reset occurs and the code wasn't expecting such
an error.

If the we reset the GPU or have detected a hang and wish to reset the
GPU, the request is forcibly complete and the wait broken. Currently, we
report either -EAGAIN or -EIO in order for the caller to retreat and
restart the wait (if appropriate) after dropping and then reacquiring
the struct_mutex (essential to allow the GPU reset to proceed). However,
if we take the view that the request is complete (no further work will
be done on it by the GPU because it is dead and soon to be reset), then
we can proceed with the task at hand and then drop the struct_mutex
allowing the reset to occur. This transfers the burden of checking
whether it is safe to proceed to the caller, which in all but one
instance it is safe - completely eliminating the source of all spurious
-EIO.

Of note, we only have two API entry points where we expect that
userspace can observe an EIO. First is when submitting an execbuf, if
the GPU is terminally wedged, then the operation cannot succeed and an
-EIO is reported. Secondly, existing userspace uses the throttle ioctl
to detect an already wedged GPU before starting using HW acceleration
(or to confirm that the GPU is wedged after an error condition). So if
the GPU is wedged when the user calls throttle, also report -EIO.

v2: Split more carefully the change to i915_wait_request() and assorted
ABI from the reset handling.
v3: Add a couple of WARN_ON(EIO) to the interruptible modesetting code
so that we don't start to leak EIO there in future (and break our hang
resistant modesetting).

Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/i915_drv.h |  2 --
 drivers/gpu/drm/i915/i915_gem.c | 44 -
 drivers/gpu/drm/i915/i915_gem_userptr.c |  6 ++---
 drivers/gpu/drm/i915/intel_display.c| 13 +-
 drivers/gpu/drm/i915/intel_lrc.c|  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c |  2 +-
 6 files changed, 32 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f74bca326b79..bbdb056d2a8e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2978,8 +2978,6 @@ i915_gem_find_active_request(struct intel_engine_cs 
*ring);
 
 bool i915_gem_retire_requests(struct drm_device *dev);
 void i915_gem_retire_requests_ring(struct intel_engine_cs *ring);
-int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
- bool interruptible);
 
 static inline u32 i915_reset_counter(struct i915_gpu_error *error)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 56069bdada85..f570990f03e0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -206,11 +206,10 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object 
*obj)
BUG_ON(obj->madv == __I915_MADV_PURGED);
 
ret = i915_gem_object_set_to_cpu_domain(obj, true);
-   if (ret) {
+   if (WARN_ON(ret)) {
/* In the event of a disaster, abandon all caches and
 * hope for the best.
 */
-   WARN_ON(ret != -EIO);
obj->base.read_domains = obj->base.write_domain = 
I915_GEM_DOMAIN_CPU;
}
 
@@ -1104,15 +1103,13 @@ put_rpm:
return ret;
 }
 
-int
-i915_gem_check_wedge(struct i915_gpu_error *error,
-bool interruptible)
+static int
+i915_gem_check_wedge(unsigned reset_counter, bool interruptible)
 {
-   if (i915_reset_in_progress_or_wedged(error)) {
-   /* Recovery complete, but the reset failed ... */
-   if (i915_terminally_wedged(error))
-   return -EIO;
+   if (__i915_terminally_wedged(reset_counter))
+   return -EIO;
 
+   if (__i915_reset_in_progress(reset_counter)) {
/* Non-interruptible callers can't handle -EAGAIN, hence return
 * -EIO unconditionally for these. */
if (!interruptible)
@@ -1283,13 +1280,14 @@ int __i915_wait_request(struct drm_i915_gem_request 
*req,
prepare_to_wait(>irq_queue, , state);
 
/* We need to check whether any gpu reset happened in between
-* the caller grabbing the seqno and now ... */
+* the request being submitted and now. If a reset has occurred,
+* the request is effectively complete (we either are in the
+* process of or have discarded the rendering and completely
+* reset the GPU. The results of the request are lost and we
+* are free to continue on 

[Intel-gfx] [PATCH 019/190] drm/i915: Separate out the seqno-barrier from engine->get_seqno

2016-01-11 Thread Chris Wilson
In order to simplify the next couple of patches, extract the
lazy_coherency optimisation our of the engine->get_seqno() vfunc into
its own callback.

v2: Rename the barrier to engine->irq_seqno_barrier to try and better
reflect that the barrier is only required after the user interrupt before
reading the seqno (to ensure that the seqno update lands in time as we
do not have strict seqno-irq ordering on all platforms).

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c  |  6 ++---
 drivers/gpu/drm/i915/i915_drv.h  | 12 ++
 drivers/gpu/drm/i915/i915_gpu_error.c|  2 +-
 drivers/gpu/drm/i915/i915_irq.c  |  4 ++--
 drivers/gpu/drm/i915/i915_trace.h|  2 +-
 drivers/gpu/drm/i915/intel_breadcrumbs.c |  4 ++--
 drivers/gpu/drm/i915/intel_lrc.c | 39 
 drivers/gpu/drm/i915/intel_ringbuffer.c  | 36 +++--
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  4 ++--
 9 files changed, 53 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 9396597b136d..1499e2337e5d 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -600,7 +600,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void 
*data)
   ring->name,
   
i915_gem_request_get_seqno(work->flip_queued_req),
   dev_priv->next_seqno,
-  ring->get_seqno(ring, true),
+  ring->get_seqno(ring),
   
i915_gem_request_completed(work->flip_queued_req, true));
} else
seq_printf(m, "Flip not associated with any 
ring\n");
@@ -734,7 +734,7 @@ static void i915_ring_seqno_info(struct seq_file *m,
 
if (ring->get_seqno) {
seq_printf(m, "Current sequence (%s): %x\n",
-  ring->name, ring->get_seqno(ring, false));
+  ring->name, ring->get_seqno(ring));
}
 
spin_lock(>breadcrumbs.lock);
@@ -1354,7 +1354,7 @@ static int i915_hangcheck_info(struct seq_file *m, void 
*unused)
intel_runtime_pm_get(dev_priv);
 
for_each_ring(ring, dev_priv, i) {
-   seqno[i] = ring->get_seqno(ring, false);
+   seqno[i] = ring->get_seqno(ring);
acthd[i] = intel_ring_get_active_head(ring);
}
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a9e8de57e848..9762aa76bb0a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2972,15 +2972,19 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
 static inline bool i915_gem_request_started(struct drm_i915_gem_request *req,
   bool lazy_coherency)
 {
-   u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency);
-   return i915_seqno_passed(seqno, req->previous_seqno);
+   if (!lazy_coherency && req->ring->irq_seqno_barrier)
+   req->ring->irq_seqno_barrier(req->ring);
+   return i915_seqno_passed(req->ring->get_seqno(req->ring),
+req->previous_seqno);
 }
 
 static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
  bool lazy_coherency)
 {
-   u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency);
-   return i915_seqno_passed(seqno, req->seqno);
+   if (!lazy_coherency && req->ring->irq_seqno_barrier)
+   req->ring->irq_seqno_barrier(req->ring);
+   return i915_seqno_passed(req->ring->get_seqno(req->ring),
+req->seqno);
 }
 
 int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index f805d117f3d1..01d0206ca4dd 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -902,8 +902,8 @@ static void i915_record_ring_state(struct drm_device *dev,
 
ering->waiting = intel_engine_has_waiter(ring);
ering->instpm = I915_READ(RING_INSTPM(ring->mmio_base));
-   ering->seqno = ring->get_seqno(ring, false);
ering->acthd = intel_ring_get_active_head(ring);
+   ering->seqno = ring->get_seqno(ring);
ering->start = I915_READ_START(ring);
ering->head = I915_READ_HEAD(ring);
ering->tail = I915_READ_TAIL(ring);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 95b997a57da8..d73669783045 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2903,7 +2903,7 @@ static int semaphore_passed(struct intel_engine_cs *ring)

[Intel-gfx] [PATCH 017/190] drm/i915: Remove forcewake dance from seqno/irq barrier on legacy gen6+

2016-01-11 Thread Chris Wilson
In order to ensure seqno/irq coherency, we current read a ring register.
We are not sure quite how it works, only that is does. Experiments show
that e.g. doing a clflush(seqno) instead is not sufficient, but we can
remove the forcewake dance from the mmio access.

v2: Baytrail wants a clflush too.

Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 99780b674311..a1d43b2c7077 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1490,10 +1490,21 @@ gen6_ring_get_seqno(struct intel_engine_cs *ring, bool 
lazy_coherency)
 {
/* Workaround to force correct ordering between irq and seqno writes on
 * ivb (and maybe also on snb) by reading from a CS register (like
-* ACTHD) before reading the status page. */
+* ACTHD) before reading the status page.
+*
+* Note that this effectively effectively stalls the read by the time
+* it takes to do a memory transaction, which more or less ensures
+* that the write from the GPU has sufficient time to invalidate
+* the CPU cacheline. Alternatively we could delay the interrupt from
+* the CS ring to give the write time to land, but that would incur
+* a delay after every batch i.e. much more frequent than a delay
+* when waiting for the interrupt (with the same net latency).
+*/
if (!lazy_coherency) {
struct drm_i915_private *dev_priv = ring->dev->dev_private;
-   POSTING_READ(RING_ACTHD(ring->mmio_base));
+   POSTING_READ_FW(RING_ACTHD(ring->mmio_base));
+
+   intel_flush_status_page(ring, I915_GEM_HWS_INDEX);
}
 
return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 010/190] drm/i915: Store the reset counter when constructing a request

2016-01-11 Thread Chris Wilson
As the request is only valid during the same global reset epoch, we can
record the current reset_counter when constructing the request and reuse
it when waiting upon that request in future. This removes a very hairy
atomic check serialised by the struct_mutex at the time of waiting and
allows us to transfer those waits to a central dispatcher for all
waiters and all requests.

PS: With per-engine resets, we obviously cannot assume a global reset
epoch for the requests - a per-engine epoch makes the most sense. The
challenge then is how to handle checking in the waiter for when to break
the wait, as the fine-grained reset may also want to requeue the
request (i.e. the assumption that just because the epoch changes the
request is completed may be broken - or we just avoid breaking that
assumption with the fine-grained resets).

Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
Reviewed-by:: Daniel Vetter 
---
 drivers/gpu/drm/i915/i915_drv.h |  2 +-
 drivers/gpu/drm/i915/i915_gem.c | 40 +++--
 drivers/gpu/drm/i915/intel_display.c|  7 +-
 drivers/gpu/drm/i915/intel_lrc.c|  7 --
 drivers/gpu/drm/i915/intel_ringbuffer.c |  6 -
 5 files changed, 15 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 60531df3844c..f74bca326b79 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2191,6 +2191,7 @@ struct drm_i915_gem_request {
/** On Which ring this request was generated */
struct drm_i915_private *i915;
struct intel_engine_cs *ring;
+   unsigned reset_counter;
 
 /** GEM sequence number associated with the previous request,
  * when the HWS breadcrumb is equal to this the GPU is processing
@@ -3050,7 +3051,6 @@ void __i915_add_request(struct drm_i915_gem_request *req,
 #define i915_add_request_no_flush(req) \
__i915_add_request(req, NULL, false)
 int __i915_wait_request(struct drm_i915_gem_request *req,
-   unsigned reset_counter,
bool interruptible,
s64 *timeout,
struct intel_rps_client *rps);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2cdd20b3aeaf..56069bdada85 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1212,7 +1212,6 @@ static int __i915_spin_request(struct 
drm_i915_gem_request *req, int state)
 /**
  * __i915_wait_request - wait until execution of request has finished
  * @req: duh!
- * @reset_counter: reset sequence associated with the given request
  * @interruptible: do an interruptible wait (normally yes)
  * @timeout: in - how long to wait (NULL forever); out - how much time 
remaining
  *
@@ -1227,7 +1226,6 @@ static int __i915_spin_request(struct 
drm_i915_gem_request *req, int state)
  * errno with remaining time filled in timeout argument.
  */
 int __i915_wait_request(struct drm_i915_gem_request *req,
-   unsigned reset_counter,
bool interruptible,
s64 *timeout,
struct intel_rps_client *rps)
@@ -1286,7 +1284,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 
/* We need to check whether any gpu reset happened in between
 * the caller grabbing the seqno and now ... */
-   if (reset_counter != i915_reset_counter(_priv->gpu_error)) {
+   if (req->reset_counter != 
i915_reset_counter(_priv->gpu_error)) {
/* ... but upgrade the -EAGAIN to an -EIO if the gpu
 * is truely gone. */
ret = i915_gem_check_wedge(_priv->gpu_error, 
interruptible);
@@ -1459,13 +1457,7 @@ i915_wait_request(struct drm_i915_gem_request *req)
 
BUG_ON(!mutex_is_locked(>struct_mutex));
 
-   ret = i915_gem_check_wedge(_priv->gpu_error, interruptible);
-   if (ret)
-   return ret;
-
-   ret = __i915_wait_request(req,
- i915_reset_counter(_priv->gpu_error),
- interruptible, NULL, NULL);
+   ret = __i915_wait_request(req, interruptible, NULL, NULL);
if (ret)
return ret;
 
@@ -1540,7 +1532,6 @@ i915_gem_object_wait_rendering__nonblocking(struct 
drm_i915_gem_object *obj,
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_gem_request *requests[I915_NUM_RINGS];
-   unsigned reset_counter;
int ret, i, n = 0;
 
BUG_ON(!mutex_is_locked(>struct_mutex));
@@ -1549,12 +1540,6 @@ i915_gem_object_wait_rendering__nonblocking(struct 
drm_i915_gem_object *obj,
if (!obj->active)
return 0;
 
-   ret = 

[Intel-gfx] [PATCH 020/190] drm/i915: Remove the lazy_coherency parameter from request-completed?

2016-01-11 Thread Chris Wilson
Now that we have split out the seqno-barrier from the
engine->get_seqno() callback itself, we can move the users of the
seqno-barrier to the required callsites simplifying the common code and
making the required workaround handling much more explicit.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c  |  4 ++--
 drivers/gpu/drm/i915/i915_drv.h  | 17 -
 drivers/gpu/drm/i915/i915_gem.c  | 24 
 drivers/gpu/drm/i915/intel_display.c |  2 +-
 drivers/gpu/drm/i915/intel_pm.c  |  4 ++--
 5 files changed, 29 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 1499e2337e5d..d09e48455dcb 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -601,7 +601,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void 
*data)
   
i915_gem_request_get_seqno(work->flip_queued_req),
   dev_priv->next_seqno,
   ring->get_seqno(ring),
-  
i915_gem_request_completed(work->flip_queued_req, true));
+  
i915_gem_request_completed(work->flip_queued_req));
} else
seq_printf(m, "Flip not associated with any 
ring\n");
seq_printf(m, "Flip queued on frame %d, (was ready on 
frame %d), now %d\n",
@@ -1354,8 +1354,8 @@ static int i915_hangcheck_info(struct seq_file *m, void 
*unused)
intel_runtime_pm_get(dev_priv);
 
for_each_ring(ring, dev_priv, i) {
-   seqno[i] = ring->get_seqno(ring);
acthd[i] = intel_ring_get_active_head(ring);
+   seqno[i] = ring->get_seqno(ring);
}
 
i915_get_extra_instdone(dev, instdone);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9762aa76bb0a..44d46018ee13 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2969,20 +2969,14 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
return (int32_t)(seq1 - seq2) >= 0;
 }
 
-static inline bool i915_gem_request_started(struct drm_i915_gem_request *req,
-  bool lazy_coherency)
+static inline bool i915_gem_request_started(struct drm_i915_gem_request *req)
 {
-   if (!lazy_coherency && req->ring->irq_seqno_barrier)
-   req->ring->irq_seqno_barrier(req->ring);
return i915_seqno_passed(req->ring->get_seqno(req->ring),
 req->previous_seqno);
 }
 
-static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
- bool lazy_coherency)
+static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req)
 {
-   if (!lazy_coherency && req->ring->irq_seqno_barrier)
-   req->ring->irq_seqno_barrier(req->ring);
return i915_seqno_passed(req->ring->get_seqno(req->ring),
 req->seqno);
 }
@@ -3636,6 +3630,8 @@ static inline void i915_trace_irq_get(struct 
intel_engine_cs *ring,
 
 static inline bool __i915_request_irq_complete(struct drm_i915_gem_request 
*req)
 {
+   struct intel_engine_cs *engine = req->ring;
+
/* Ensure our read of the seqno is coherent so that we
 * do not "miss an interrupt" (i.e. if this is the last
 * request and the seqno write from the GPU is not visible
@@ -3647,7 +3643,10 @@ static inline bool __i915_request_irq_complete(struct 
drm_i915_gem_request *req)
 * but it is easier and safer to do it every time the waiter
 * is woken.
 */
-   if (i915_gem_request_completed(req, false))
+   if (engine->irq_seqno_barrier)
+   engine->irq_seqno_barrier(engine);
+
+   if (i915_gem_request_completed(req))
return true;
 
/* We need to check whether any gpu reset happened in between
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4b26529f1f44..d125820c6309 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1171,12 +1171,12 @@ static bool __i915_spin_request(struct 
drm_i915_gem_request *req,
 */
 
/* Only spin if we know the GPU is processing this request */
-   if (!i915_gem_request_started(req, true))
+   if (!i915_gem_request_started(req))
return false;
 
timeout = local_clock_us() + 5;
do {
-   if (i915_gem_request_completed(req, true))
+   if (i915_gem_request_completed(req))
return true;
 
if (signal_pending_state(state, wait->task))
@@ -1228,7 +1228,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
if 

[Intel-gfx] [PATCH 026/190] drm/i915: Stop setting wraparound seqno on initialisation

2016-01-11 Thread Chris Wilson
We have testcases to ensure that seqno wraparound works fine, so we can
forgo forcing everyone to encounter seqno wraparound during early
uptime. seqno wraparound incurs a full GPU stall so not forcing it
will eliminate one jitter from the early system. Using the testcases, we
have very deterministic testing which given how difficult it would be to
debug an issue (GPU hang) stemming from a wraparound using pure
postmortem analysis I see no value in forcing a wrap during boot.

Advancing the global next_seqno after a GPU reset is equally pointless.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 16 +---
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d125820c6309..a0744626a110 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4814,14 +4814,6 @@ i915_gem_init_hw(struct drm_device *dev)
}
}
 
-   /*
-* Increment the next seqno by 0x100 so we have a visible break
-* on re-initialisation
-*/
-   ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100);
-   if (ret)
-   goto out;
-
/* Now it is safe to go back round and do everything else: */
for_each_ring(ring, dev_priv, i) {
struct drm_i915_gem_request *req;
@@ -5001,13 +4993,7 @@ i915_gem_load(struct drm_device *dev)
dev_priv->num_fence_regs =
I915_READ(vgtif_reg(avail_rs.fence_num));
 
-   /*
-* Set initial sequence number for requests.
-* Using this number allows the wraparound to happen early,
-* catching any obvious problems.
-*/
-   dev_priv->next_seqno = ((u32)~0 - 0x1100);
-   dev_priv->last_seqno = ((u32)~0 - 0x1101);
+   dev_priv->next_seqno = 1;
 
/* Initialize fence registers to zero */
INIT_LIST_HEAD(_priv->mm.fence_list);
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 031/190] drm/i915: Harden detection of missed interrupts

2016-01-11 Thread Chris Wilson
Only declare a missed interrupt if we find that the GPU is idle with
waiters and a hangcheck interval has passed in which no new user
interrupts have been raised.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c |  6 ++
 drivers/gpu/drm/i915/i915_irq.c | 10 --
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 ++
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 5a706c700684..567f8db4c70a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -735,6 +735,9 @@ static void i915_ring_seqno_info(struct seq_file *m,
seq_printf(m, "Current sequence (%s): %x\n",
   ring->name, intel_ring_get_seqno(ring));
 
+   seq_printf(m, "Current user interrupts (%s): %x\n",
+  ring->name, READ_ONCE(ring->user_interrupts));
+
spin_lock(>breadcrumbs.lock);
for (rb = rb_first(>breadcrumbs.waiters);
 rb != NULL;
@@ -1372,6 +1375,9 @@ static int i915_hangcheck_info(struct seq_file *m, void 
*unused)
seq_printf(m, "\tseqno = %x [current %x], waiters? %d\n",
   ring->hangcheck.seqno, seqno[i],
   intel_engine_has_waiter(ring));
+   seq_printf(m, "\tuser interrupts = %x [current %x]\n",
+  ring->hangcheck.user_interrupts,
+  ring->user_interrupts);
seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
   (long long)ring->hangcheck.acthd,
   (long long)acthd[i]);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index bf48fa63127a..b3942dec7de4 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -997,8 +997,10 @@ static void ironlake_rps_change_irq_handler(struct 
drm_device *dev)
 static void notify_ring(struct intel_engine_cs *ring)
 {
ring->irq_posted = true; /* paired with mb() in wake_up_process() */
-   if (intel_engine_wakeup(ring))
+   if (intel_engine_wakeup(ring)) {
trace_i915_gem_request_notify(ring);
+   ring->user_interrupts++;
+   }
 }
 
 static void vlv_c0_read(struct drm_i915_private *dev_priv,
@@ -3061,12 +3063,14 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
for_each_ring(ring, dev_priv, i) {
u64 acthd;
u32 seqno;
+   unsigned user_interrupts;
bool busy = true;
 
semaphore_clear_deadlocks(dev_priv);
 
acthd = intel_ring_get_active_head(ring);
seqno = intel_ring_get_seqno(ring);
+   user_interrupts = READ_ONCE(ring->user_interrupts);
 
if (ring->hangcheck.seqno == seqno) {
if (ring_idle(ring, seqno)) {
@@ -3074,7 +3078,8 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
 
if (intel_engine_has_waiter(ring)) {
/* Issue a wake-up to catch stuck h/w. 
*/
-   if (!test_and_set_bit(ring->id, 
_priv->gpu_error.missed_irq_rings)) {
+   if (ring->hangcheck.user_interrupts == 
user_interrupts &&
+   !test_and_set_bit(ring->id, 
_priv->gpu_error.missed_irq_rings)) {
if (!test_bit(ring->id, 
_priv->gpu_error.test_irq_rings))
DRM_ERROR("Hangcheck 
timer elapsed... %s idle\n",
  ring->name);
@@ -3142,6 +3147,7 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
 
ring->hangcheck.seqno = seqno;
ring->hangcheck.acthd = acthd;
+   ring->hangcheck.user_interrupts = user_interrupts;
busy_count += busy;
}
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 3364bcebd456..73da75fa47c1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -90,6 +90,7 @@ struct intel_ring_hangcheck {
u64 acthd;
u64 max_acthd;
u32 seqno;
+   unsigned user_interrupts;
int score;
enum intel_ring_hangcheck_action action;
int deadlock;
@@ -328,6 +329,7 @@ struct  intel_engine_cs {
 * inspecting request list.
 */
u32 last_submitted_seqno;
+   unsigned user_interrupts;
 
bool gpu_caches_dirty;
 
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 013/190] drm/i915: Suppress error message when GPU resets are disabled

2016-01-11 Thread Chris Wilson
If we do not have lowlevel support for reseting the GPU, or if the user
has explicitly disabled reseting the device, the failure is expected.
Since it is an expected failure, we should be using a lower priority
message than *ERROR*, perhaps NOTICE. In the absence of DRM_NOTICE, just
emit the expected failure as a DEBUG message.

Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/i915_drv.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 2f03379cdb4b..5160f1414de4 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -910,7 +910,10 @@ int i915_reset(struct drm_device *dev)
pr_notice("drm/i915: Resetting chip after gpu hang\n");
 
if (ret) {
-   DRM_ERROR("Failed to reset chip: %i\n", ret);
+   if (ret != -ENODEV)
+   DRM_ERROR("Failed to reset chip: %i\n", ret);
+   else
+   DRM_DEBUG_DRIVER("GPU reset disabled\n");
goto error;
}
 
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 07/13] drm/i915: Introduce dedicated object VMA iterator

2016-01-11 Thread Tvrtko Ursulin

On 11/01/16 08:43, Daniel Vetter wrote:
> On Fri, Jan 08, 2016 at 01:29:14PM +, Tvrtko Ursulin wrote:
>>
>> On 08/01/16 11:29, Tvrtko Ursulin wrote:
>>> From: Tvrtko Ursulin 
>>>
>>> Purpose is to catch places which iterate the object VMA list
>>> without holding the big lock.
>>>
>>> Implemented by open coding list_for_each_entry to make the
>>> macro compatible with existing call sites.
>>>
>>> Signed-off-by: Tvrtko Ursulin 
>>> Cc: Daniel Vetter 
>>> ---
>>>   drivers/gpu/drm/i915/i915_debugfs.c  |  8 
>>>   drivers/gpu/drm/i915/i915_drv.h  |  6 ++
>>>   drivers/gpu/drm/i915/i915_gem.c  | 24 
>>>   drivers/gpu/drm/i915/i915_gem_gtt.c  |  2 +-
>>>   drivers/gpu/drm/i915/i915_gem_shrinker.c |  2 +-
>>>   drivers/gpu/drm/i915/i915_gpu_error.c|  4 ++--
>>>   6 files changed, 26 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
>>> b/drivers/gpu/drm/i915/i915_debugfs.c
>>> index 714a45cf8a51..d7c2a3201161 100644
>>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>>> @@ -117,7 +117,7 @@ static u64 i915_gem_obj_total_ggtt_size(struct 
>>> drm_i915_gem_object *obj)
>>> u64 size = 0;
>>> struct i915_vma *vma;
>>>
>>> -   list_for_each_entry(vma, >vma_list, vma_link) {
>>> +   i915_gem_obj_for_each_vma(vma, obj) {
>>> if (i915_is_ggtt(vma->vm) &&
>>> drm_mm_node_allocated(>node))
>>> size += vma->node.size;
>>> @@ -155,7 +155,7 @@ describe_obj(struct seq_file *m, struct 
>>> drm_i915_gem_object *obj)
>>>obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
>>> if (obj->base.name)
>>> seq_printf(m, " (name: %d)", obj->base.name);
>>> -   list_for_each_entry(vma, >vma_list, vma_link) {
>>> +   i915_gem_obj_for_each_vma(vma, obj) {
>>> if (vma->pin_count > 0)
>>> pin_count++;
>>> }
>>> @@ -164,7 +164,7 @@ describe_obj(struct seq_file *m, struct 
>>> drm_i915_gem_object *obj)
>>> seq_printf(m, " (display)");
>>> if (obj->fence_reg != I915_FENCE_REG_NONE)
>>> seq_printf(m, " (fence: %d)", obj->fence_reg);
>>> -   list_for_each_entry(vma, >vma_list, vma_link) {
>>> +   i915_gem_obj_for_each_vma(vma, obj) {
>>> seq_printf(m, " (%sgtt offset: %08llx, size: %08llx",
>>>i915_is_ggtt(vma->vm) ? "g" : "pp",
>>>vma->node.start, vma->node.size);
>>> @@ -342,7 +342,7 @@ static int per_file_stats(int id, void *ptr, void *data)
>>> stats->shared += obj->base.size;
>>>
>>> if (USES_FULL_PPGTT(obj->base.dev)) {
>>> -   list_for_each_entry(vma, >vma_list, vma_link) {
>>> +   i915_gem_obj_for_each_vma(vma, obj) {
>>> struct i915_hw_ppgtt *ppgtt;
>>>
>>> if (!drm_mm_node_allocated(>node))
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>>> b/drivers/gpu/drm/i915/i915_drv.h
>>> index b77a5d84eac2..0406a020dfcc 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -2852,6 +2852,12 @@ struct drm_i915_gem_object 
>>> *i915_gem_object_create_from_data(
>>>   void i915_gem_free_object(struct drm_gem_object *obj);
>>>   void i915_gem_vma_destroy(struct i915_vma *vma);
>>>
>>> +#define i915_gem_obj_for_each_vma(vma, obj) \
>>> +   for (WARN_ON_ONCE(!mutex_is_locked(&(obj)->base.dev->struct_mutex)), \
>>> +vma = list_first_entry(&(obj)->vma_list, typeof(*vma), vma_link);\
>>> +>vma_link != (&(obj)->vma_list); \
>>> +vma = list_next_entry(vma, vma_link))
>>> +
>>
>>
>> Unfortunately error capture is not happy with this approach. Can't even see
>> that error capture attempts to grab the mutex anywhere.
>>
>> So what? Drop the idea or add a "doing error capture" flag somewhere?
> 
> Fix the bugs. Not surprise at all that we've screwed this up all over the
> place ;-) Afaics modeset code isn't much better either ...

Ok I'll drop this patch then since the series contains fixes to all but one
related issues. The remaining one is then:

[   17.370366] [ cut here ]
[   17.375633] WARNING: CPU: 0 PID: 1128 at 
drivers/gpu/drm/i915/i915_gem.c:5166 i915_gem_obj_ggtt_offset_view+0x10f/0x120 
[i915]()
[   17.388879] WARN_ON_ONCE(!mutex_is_locked(&(o)->base.dev->struct_mutex))
[   17.396364] Modules linked in: hid_generic usbhid coretemp asix usbnet 
libphy mii i915 gpio_lynxpoint i2c_hid hid video i2c_algo_bit drm_kms_helper 
acpi_pad drm lpc_ich mfd_core nls_iso8859_1 e1000e ptp ahci libahci pps_core
[   17.419484] CPU: 0 PID: 1128 Comm: Xorg Tainted: G U  
4.4.0-rc8-160107+ #105
[   17.428771] Hardware name: Intel Corporation Broadwell Client 
platform/WhiteTip Mountain 1, BIOS BDW-E1R1.86C.0080.R01.1406120446 06/12/2014
[   17.443161]  

[Intel-gfx] ✗ failure: Fi.CI.BAT

2016-01-11 Thread Patchwork
== Summary ==

Built on ff88655b3a5467bbc3be8c67d3e05ebf182557d3 drm-intel-nightly: 
2016y-01m-11d-07h-30m-16s UTC integration manifest

Test gem_storedw_loop:
Subgroup basic-render:
pass   -> DMESG-WARN (bdw-nuci7)
dmesg-warn -> PASS   (bdw-ultra)
Test kms_flip:
Subgroup basic-flip-vs-dpms:
dmesg-warn -> PASS   (ilk-hp8440p)
Test kms_pipe_crc_basic:
Subgroup nonblocking-crc-pipe-b-frame-sequence:
pass   -> DMESG-WARN (bdw-ultra)
Subgroup read-crc-pipe-a-frame-sequence:
pass   -> FAIL   (snb-x220t)
Subgroup read-crc-pipe-b:
dmesg-warn -> PASS   (byt-nuc)
Subgroup read-crc-pipe-c:
dmesg-warn -> PASS   (bsw-nuc-2)

bdw-nuci7total:138  pass:128  dwarn:1   dfail:0   fail:0   skip:9  
bdw-ultratotal:138  pass:131  dwarn:1   dfail:0   fail:0   skip:6  
bsw-nuc-2total:141  pass:115  dwarn:2   dfail:0   fail:0   skip:24 
byt-nuc  total:141  pass:119  dwarn:7   dfail:0   fail:0   skip:15 
hsw-brixbox  total:141  pass:134  dwarn:0   dfail:0   fail:0   skip:7  
hsw-gt2  total:141  pass:137  dwarn:0   dfail:0   fail:0   skip:4  
ilk-hp8440p  total:141  pass:101  dwarn:3   dfail:0   fail:0   skip:37 
ivb-t430stotal:135  pass:122  dwarn:3   dfail:4   fail:0   skip:6  
skl-i5k-2total:141  pass:132  dwarn:1   dfail:0   fail:0   skip:8  
skl-i7k-2total:141  pass:131  dwarn:2   dfail:0   fail:0   skip:8  
snb-dellxps  total:141  pass:122  dwarn:5   dfail:0   fail:0   skip:14 
snb-x220ttotal:141  pass:121  dwarn:5   dfail:0   fail:2   skip:13 

Results at /archive/results/CI_IGT_test/Patchwork_1120/

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 07/13] drm/i915: Introduce dedicated object VMA iterator

2016-01-11 Thread Daniel Vetter
On Fri, Jan 08, 2016 at 11:44:04AM +, Chris Wilson wrote:
> On Fri, Jan 08, 2016 at 11:29:46AM +, Tvrtko Ursulin wrote:
> > From: Tvrtko Ursulin 
> > 
> > Purpose is to catch places which iterate the object VMA list
> > without holding the big lock.
> > 
> > Implemented by open coding list_for_each_entry to make the
> > macro compatible with existing call sites.
> > 
> > Signed-off-by: Tvrtko Ursulin 
> > Cc: Daniel Vetter 
> > +#define i915_gem_obj_for_each_vma(vma, obj) \
> > +   for (WARN_ON_ONCE(!mutex_is_locked(&(obj)->base.dev->struct_mutex)), \
> 
> Let's not go around adding WARN(!mutex_locked) to GEM code when
> lockdep_assert_held doesn't add overhead outside of testing.

Hm yeah I still prefere WARN_ON for modeset code (where it doesn't matter)
because of increased test coverage. But for gem it indeed makes more sense
to only do this for lockdep-enabled builds. CI runs with lockdep, so we're
good.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 040/190] drm/i915: Record the ringbuffer associated with the request

2016-01-11 Thread Chris Wilson
The request tells us where to read the ringbuf from, so use that
information to simplify the error capture. If no request was active at
the time of the hang, the ring is idle and there is no information
inside the ring pertaining to the hang.

Note carefully that this will reduce the amount of information stored in
the error state - any ring without an active request will not be
recorded.

Signed-off-by: Chris Wilson 
Reviewed-by: Dave Gordon 
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 28 
 1 file changed, 8 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 3e137fc701cf..93da2c7581f6 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -995,7 +995,6 @@ static void i915_gem_record_rings(struct drm_device *dev,
 
for (i = 0; i < I915_NUM_RINGS; i++) {
struct intel_engine_cs *ring = _priv->ring[i];
-   struct intel_ringbuffer *rbuf;
 
error->ring[i].pid = -1;
 
@@ -1009,6 +1008,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
request = i915_gem_find_active_request(ring);
if (request) {
struct i915_address_space *vm;
+   struct intel_ringbuffer *rb;
 
vm = request->ctx && request->ctx->ppgtt ?
>ctx->ppgtt->base :
@@ -1039,26 +1039,14 @@ static void i915_gem_record_rings(struct drm_device 
*dev,
}
rcu_read_unlock();
}
-   }
 
-   if (i915.enable_execlists) {
-   /* TODO: This is only a small fix to keep basic error
-* capture working, but we need to add more information
-* for it to be useful (e.g. dump the context being
-* executed).
-*/
-   if (request)
-   rbuf = request->ctx->engine[ring->id].ringbuf;
-   else
-   rbuf = 
ring->default_context->engine[ring->id].ringbuf;
-   } else
-   rbuf = ring->buffer;
-
-   error->ring[i].cpu_ring_head = rbuf->head;
-   error->ring[i].cpu_ring_tail = rbuf->tail;
-
-   error->ring[i].ringbuffer =
-   i915_error_ggtt_object_create(dev_priv, rbuf->obj);
+   rb = request->ringbuf;
+   error->ring[i].cpu_ring_head = rb->head;
+   error->ring[i].cpu_ring_tail = rb->tail;
+   error->ring[i].ringbuffer =
+   i915_error_ggtt_object_create(dev_priv,
+ rb->obj);
+   }
 
error->ring[i].hws_page =
i915_error_ggtt_object_create(dev_priv, 
ring->status_page.obj);
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 043/190] drm/i915: Skip capturing an error state if we already have one

2016-01-11 Thread Chris Wilson
As we only ever keep the first error state around, we can avoid some
work that can be quite intrusive if we don't record the error the second
time around. This does move the race whereby the user could discard one
error state as the second is being captured, but that race exists in the
current code and we hope that recapturing error state is only done for
debugging.

Note that as we discard the error state for simulated errors, igt that
exercise error capture continue to function.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 4f17d6847569..86f582115313 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1312,6 +1312,9 @@ void i915_capture_error_state(struct drm_device *dev, 
bool wedged,
struct drm_i915_error_state *error;
unsigned long flags;
 
+   if (READ_ONCE(dev_priv->gpu_error.first_error))
+   return;
+
/* Account for pipe specific data like PIPE*STAT */
error = kzalloc(sizeof(*error), GFP_ATOMIC);
if (!error) {
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 058/190] drm/i915: Rename request->ring to request->engine

2016-01-11 Thread Chris Wilson
In order to disambiguate between the pointer to the intel_engine_cs
(called ring) and the intel_ringbuffer (called ringbuf), rename
s/ring/engine/.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c  |  11 +--
 drivers/gpu/drm/i915/i915_drv.h  |   2 +-
 drivers/gpu/drm/i915/i915_gem.c  |  32 +++
 drivers/gpu/drm/i915/i915_gem_context.c  |  70 +++---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   |   8 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c  |  47 +-
 drivers/gpu/drm/i915/i915_gem_render_state.c |  18 ++--
 drivers/gpu/drm/i915/i915_gem_request.c  |  53 ---
 drivers/gpu/drm/i915/i915_gem_request.h  |  10 +-
 drivers/gpu/drm/i915/i915_gpu_error.c|   3 +-
 drivers/gpu/drm/i915/i915_guc_submission.c   |   8 +-
 drivers/gpu/drm/i915/i915_trace.h|  32 +++
 drivers/gpu/drm/i915/intel_breadcrumbs.c |   2 +-
 drivers/gpu/drm/i915/intel_display.c |  10 +-
 drivers/gpu/drm/i915/intel_lrc.c | 134 +--
 drivers/gpu/drm/i915/intel_mocs.c|  13 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.c  |  62 ++---
 17 files changed, 240 insertions(+), 275 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 387ae77d3c29..018076c89247 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -185,8 +185,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object 
*obj)
seq_printf(m, " (%s mappable)", s);
}
if (obj->last_write_req != NULL)
-   seq_printf(m, " (%s)",
-  
i915_gem_request_get_ring(obj->last_write_req)->name);
+   seq_printf(m, " (%s)", obj->last_write_req->engine->name);
if (obj->frontbuffer_bits)
seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
 }
@@ -593,14 +592,14 @@ static int i915_gem_pageflip_info(struct seq_file *m, 
void *data)
   pipe, plane);
}
if (work->flip_queued_req) {
-   struct intel_engine_cs *ring =
-   
i915_gem_request_get_ring(work->flip_queued_req);
+   struct intel_engine_cs *engine =
+   work->flip_queued_req->engine;
 
seq_printf(m, "Flip queued on %s at seqno %x, 
next seqno %x [current breadcrumb %x], completed? %d\n",
-  ring->name,
+  engine->name,
   
i915_gem_request_get_seqno(work->flip_queued_req),
   dev_priv->next_seqno,
-  intel_ring_get_seqno(ring),
+  intel_ring_get_seqno(engine),
   
i915_gem_request_completed(work->flip_queued_req));
} else
seq_printf(m, "Flip not associated with any 
ring\n");
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 58e9e5e50769..baede4517c70 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3410,7 +3410,7 @@ wait_remaining_ms_from_jiffies(unsigned long 
timestamp_jiffies, int to_wait_ms)
 }
 static inline bool __i915_request_irq_complete(struct drm_i915_gem_request 
*req)
 {
-   struct intel_engine_cs *engine = req->ring;
+   struct intel_engine_cs *engine = req->engine;
 
/* Before we do the heavier coherent read of the seqno,
 * check the value (hopefully) in the CPU cacheline.
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 247731672cb1..6622c9bb3af8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1122,7 +1122,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object 
*obj,
if (ret)
return ret;
 
-   i = obj->last_write_req->ring->id;
+   i = obj->last_write_req->engine->id;
if (obj->last_read_req[i] == obj->last_write_req)
i915_gem_object_retire__read(obj, i);
else
@@ -1149,7 +1149,7 @@ static void
 i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
   struct drm_i915_gem_request *req)
 {
-   int ring = req->ring->id;
+   int ring = req->engine->id;
 
if (obj->last_read_req[ring] == req)
i915_gem_object_retire__read(obj, ring);
@@ -2062,17 +2062,15 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 

[Intel-gfx] [PATCH 055/190] drm/i915: Unify intel_logical_ring_emit and intel_ring_emit

2016-01-11 Thread Chris Wilson
Both perform the same actions with more or less indirection, so just
unify the code.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c|   2 +-
 drivers/gpu/drm/i915/i915_gem_context.c|   8 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  34 -
 drivers/gpu/drm/i915/i915_gem_gtt.c|  26 +++
 drivers/gpu/drm/i915/intel_display.c   |  26 +++
 drivers/gpu/drm/i915/intel_lrc.c   | 114 ++---
 drivers/gpu/drm/i915/intel_lrc.h   |  26 ---
 drivers/gpu/drm/i915/intel_mocs.c  |  30 
 drivers/gpu/drm/i915/intel_overlay.c   |  42 +--
 drivers/gpu/drm/i915/intel_ringbuffer.c| 101 -
 drivers/gpu/drm/i915/intel_ringbuffer.h|  21 ++
 11 files changed, 194 insertions(+), 236 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c2a1ec8abc11..247731672cb1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4068,7 +4068,7 @@ err:
 
 int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
 {
-   struct intel_engine_cs *ring = req->ring;
+   struct intel_ringbuffer *ring = req->ringbuf;
struct drm_i915_private *dev_priv = req->i915;
u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
int i, ret;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 3e3b4bf3fed1..d58de7e084dc 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -519,7 +519,7 @@ i915_gem_context_get(struct drm_i915_file_private 
*file_priv, u32 id)
 static inline int
 mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
 {
-   struct intel_engine_cs *ring = req->ring;
+   struct intel_ringbuffer *ring = req->ringbuf;
u32 flags = hw_flags | MI_MM_SPACE_GTT;
const int num_rings =
/* Use an extended w/a on ivb+ if signalling from other rings */
@@ -534,7 +534,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 
hw_flags)
 * itlb_before_ctx_switch.
 */
if (IS_GEN6(req->i915)) {
-   ret = ring->flush(req, I915_GEM_GPU_DOMAINS, 0);
+   ret = req->ring->flush(req, I915_GEM_GPU_DOMAINS, 0);
if (ret)
return ret;
}
@@ -562,7 +562,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 
hw_flags)
 
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings));
for_each_ring(signaller, req->i915, i) {
-   if (signaller == ring)
+   if (signaller == req->ring)
continue;
 
intel_ring_emit_reg(ring, 
RING_PSMI_CTL(signaller->mmio_base));
@@ -587,7 +587,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 
hw_flags)
 
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings));
for_each_ring(signaller, req->i915, i) {
-   if (signaller == ring)
+   if (signaller == req->ring)
continue;
 
intel_ring_emit_reg(ring, 
RING_PSMI_CTL(signaller->mmio_base));
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 78b462956c78..603a247ac333 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1146,14 +1146,12 @@ i915_gem_execbuffer_retire_commands(struct 
i915_execbuffer_params *params)
 }
 
 static int
-i915_reset_gen7_sol_offsets(struct drm_device *dev,
-   struct drm_i915_gem_request *req)
+i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
 {
-   struct intel_engine_cs *ring = req->ring;
-   struct drm_i915_private *dev_priv = dev->dev_private;
+   struct intel_ringbuffer *ring = req->ringbuf;
int ret, i;
 
-   if (!IS_GEN7(dev) || ring != _priv->ring[RCS]) {
+   if (!IS_GEN7(req->i915) || req->ring->id != RCS) {
DRM_DEBUG("sol reset is gen7/rcs only\n");
return -EINVAL;
}
@@ -1231,9 +1229,8 @@ i915_gem_ringbuffer_submission(struct 
i915_execbuffer_params *params,
   struct drm_i915_gem_execbuffer2 *args,
   struct list_head *vmas)
 {
-   struct drm_device *dev = params->dev;
-   struct intel_engine_cs *ring = params->ring;
-   struct drm_i915_private *dev_priv = dev->dev_private;
+   struct intel_ringbuffer *ring = params->request->ringbuf;
+   struct drm_i915_private *dev_priv = params->request->i915;
u64 exec_start, exec_len;
int instp_mode;
u32 instp_mask;
@@ -1247,34 

[Intel-gfx] [PATCH 037/190] drm/i915: Add background commentary to "waitboosting"

2016-01-11 Thread Chris Wilson
Describe the intent of boosting the GPU frequency to maximum before
waiting on the GPU.

RPS waitboosting was introduced with

commit b29c19b645287f7062e17d70fa4e9781a01a5d88
Author: Chris Wilson 
Date:   Wed Sep 25 17:34:56 2013 +0100

drm/i915: Boost RPS frequency for CPU stalls

but lacked a concise comment in the code to explain itself.

Signed-off-by: Chris Wilson 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/i915_gem.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3fea582768e9..3948e85eaa48 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1244,6 +1244,22 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
}
 
trace_i915_gem_request_wait_begin(req);
+
+   /* This client is about to stall waiting for the GPU. In many cases
+* this is undesirable and limits the throughput of the system, as
+* many clients cannot continue processing user input/output whilst
+* blocked. RPS autotuning may take tens of milliseconds to respond
+* to the GPU load and thus incurs additional latency for the client.
+* We can circumvent that by promoting the GPU frequency to maximum
+* before we wait. This makes the GPU throttle up much more quickly
+* (good for benchmarks and user experience, e.g. window animations),
+* but at a cost of spending more power processing the workload
+* (bad for battery). Not all clients even want their results
+* immediately and for them we should just let the GPU select its own
+* frequency to maximise efficiency. To prevent a single client from
+* forcing the clocks too high for the whole system, we only allow
+* each client to waitboost once in a busy period.
+*/
if (INTEL_INFO(req->i915)->gen >= 6)
gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
 
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 036/190] drm/i915: Restore waitboost credit to the synchronous waiter

2016-01-11 Thread Chris Wilson
Ideally, we want to automagically have the GPU respond to the
instantaneous load by reclocking itself. However, reclocking occurs
relatively slowly, and to the client waiting for a result from the GPU,
too late. To compensate and reduce the client latency, we allow the
first wait from a client to boost the GPU clocks to maximum. This
overcomes the lag in autoreclocking, at the expense of forcing the GPU
clocks too high. So to offset the excessive power usage, we currently
allow a client to only boost the clocks once before we detect the GPU
is idle again. This works reasonably for say the first frame in a
benchmark, but for many more synchronous workloads (like OpenCL) we find
the GPU clocks remain too low. By noting a wait which would idle the GPU
(i.e. we just waited upon the last known request), we can give that
client the idle boost credit (for their next wait) without the 100ms
delay required for us to detect the GPU idle state. The intention is to
boost clients that are stalling in the process of feeding the GPU more
work (and who in doing so let the GPU idle), without granting boost
credits to clients that are throttling themselves (such as compositors).

Signed-off-by: Chris Wilson 
Cc: "Zou, Nanhai" 
Cc: Jesse Barnes 
Reviewed-by: Jesse Barnes 
---
 drivers/gpu/drm/i915/i915_gem.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e9f5ca7ea835..3fea582768e9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1314,6 +1314,22 @@ complete:
*timeout = 0;
}
 
+   if (ret == 0 && rps && req->seqno == req->ring->last_submitted_seqno) {
+   /* The GPU is now idle and this client has stalled.
+* Since no other client has submitted a request in the
+* meantime, assume that this client is the only one
+* supplying work to the GPU but is unable to keep that
+* work supplied because it is waiting. Since the GPU is
+* then never kept fully busy, RPS autoclocking will
+* keep the clocks relatively low, causing further delays.
+* Compensate by giving the synchronous client credit for
+* a waitboost next time.
+*/
+   spin_lock(>i915->rps.client_lock);
+   list_del_init(>link);
+   spin_unlock(>i915->rps.client_lock);
+   }
+
return ret;
 }
 
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 050/190] drm/i915: Refactor duplicate object vmap functions

2016-01-11 Thread Chris Wilson
We now have two implementations for vmapping a whole object, one for
dma-buf and one for the ringbuffer. If we couple the vmapping into the
obj->pages lifetime, then we can reuse an obj->vmapping for both and at
the same time couple it into the shrinker.

v2: Mark the failable kmalloc() as __GFP_NOWARN (vsyrjala)
v3: Call unpin_vmap from the right dmabuf unmapper

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h | 12 +---
 drivers/gpu/drm/i915/i915_gem.c | 41 +
 drivers/gpu/drm/i915/i915_gem_dmabuf.c  | 53 -
 drivers/gpu/drm/i915/intel_ringbuffer.c | 53 ++---
 4 files changed, 71 insertions(+), 88 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 49a151126b2a..56cf2ffc1eac 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2114,10 +2114,7 @@ struct drm_i915_gem_object {
struct scatterlist *sg;
int last;
} get_page;
-
-   /* prime dma-buf support */
-   void *dma_buf_vmapping;
-   int vmapping_count;
+   void *vmapping;
 
/** Breadcrumb of last rendering to the buffer.
 * There can only be one writer, but we allow for multiple readers.
@@ -2774,12 +2771,19 @@ static inline void i915_gem_object_pin_pages(struct 
drm_i915_gem_object *obj)
BUG_ON(obj->pages == NULL);
obj->pages_pin_count++;
 }
+
 static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 {
BUG_ON(obj->pages_pin_count == 0);
obj->pages_pin_count--;
 }
 
+void *__must_check i915_gem_object_pin_vmap(struct drm_i915_gem_object *obj);
+static inline void i915_gem_object_unpin_vmap(struct drm_i915_gem_object *obj)
+{
+   i915_gem_object_unpin_pages(obj);
+}
+
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
 struct intel_engine_cs *to,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9df00e694cd9..2912e8714f5b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1854,6 +1854,11 @@ i915_gem_object_put_pages(struct drm_i915_gem_object 
*obj)
ops->put_pages(obj);
obj->pages = NULL;
 
+   if (obj->vmapping) {
+   vunmap(obj->vmapping);
+   obj->vmapping = NULL;
+   }
+
i915_gem_object_invalidate(obj);
 
return 0;
@@ -2019,6 +2024,42 @@ i915_gem_object_get_pages(struct drm_i915_gem_object 
*obj)
return 0;
 }
 
+void *i915_gem_object_pin_vmap(struct drm_i915_gem_object *obj)
+{
+   int ret;
+
+   ret = i915_gem_object_get_pages(obj);
+   if (ret)
+   return ERR_PTR(ret);
+
+   i915_gem_object_pin_pages(obj);
+
+   if (obj->vmapping == NULL) {
+   struct sg_page_iter sg_iter;
+   struct page **pages;
+   int n;
+
+   n = obj->base.size >> PAGE_SHIFT;
+   pages = kmalloc(n*sizeof(*pages), GFP_TEMPORARY | __GFP_NOWARN);
+   if (pages == NULL)
+   pages = drm_malloc_ab(n, sizeof(*pages));
+   if (pages != NULL) {
+   n = 0;
+   for_each_sg_page(obj->pages->sgl, _iter, 
obj->pages->nents, 0)
+   pages[n++] = sg_page_iter_page(_iter);
+
+   obj->vmapping = vmap(pages, n, 0, PAGE_KERNEL);
+   drm_free_large(pages);
+   }
+   if (obj->vmapping == NULL) {
+   i915_gem_object_unpin_pages(obj);
+   return ERR_PTR(-ENOMEM);
+   }
+   }
+
+   return obj->vmapping;
+}
+
 void i915_vma_move_to_active(struct i915_vma *vma,
 struct drm_i915_gem_request *req)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index e9c2bfd85b52..8894648acee0 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -95,14 +95,12 @@ static void i915_gem_unmap_dma_buf(struct 
dma_buf_attachment *attachment,
 {
struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf);
 
-   mutex_lock(>base.dev->struct_mutex);
-
dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, dir);
sg_free_table(sg);
kfree(sg);
 
+   mutex_lock(>base.dev->struct_mutex);
i915_gem_object_unpin_pages(obj);
-
mutex_unlock(>base.dev->struct_mutex);
 }
 
@@ -110,51 +108,17 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
 {
struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
struct drm_device *dev = obj->base.dev;
-   struct sg_page_iter sg_iter;
-   struct page **pages;
-   int 

[Intel-gfx] [PATCH 048/190] drm/i915: Disable waitboosting for fence_wait()

2016-01-11 Thread Chris Wilson
We want to restrict waitboosting to known process contexts, where we can
track which clients are receiving waitboosts and prevent excessive power
wasting. For fence_wait() we do not have any client tracking and so that
leaves it open to abuse.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_request.c | 6 +++---
 drivers/gpu/drm/i915/i915_gem_request.h | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index a796dbd1b0e4..01893d847dfd 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -68,7 +68,7 @@ static signed long i915_fence_wait(struct fence *fence,
 
ret = __i915_wait_request(to_i915_request(fence),
  interruptible, timeout,
- NULL);
+ NO_WAITBOOST);
if (ret == -ETIME)
return 0;
 
@@ -621,7 +621,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 * forcing the clocks too high for the whole system, we only allow
 * each client to waitboost once in a busy period.
 */
-   if (INTEL_INFO(req->i915)->gen >= 6)
+   if (!IS_ERR(rps) && INTEL_INFO(req->i915)->gen >= 6)
gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
 
intel_wait_init(, req->fence.seqno);
@@ -691,7 +691,7 @@ complete:
*timeout = 0;
}
 
-   if (ret == 0 && rps &&
+   if (ret == 0 && !IS_ERR_OR_NULL(rps) &&
req->fence.seqno == req->ring->last_submitted_seqno) {
/* The GPU is now idle and this client has stalled.
 * Since no other client has submitted a request in the
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h 
b/drivers/gpu/drm/i915/i915_gem_request.h
index 0ab14fd0fce0..6b3de827929a 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -179,6 +179,7 @@ void __i915_add_request(struct drm_i915_gem_request *req,
__i915_add_request(req, NULL, false)
 
 struct intel_rps_client;
+#define NO_WAITBOOST ERR_PTR(-1)
 
 int __i915_wait_request(struct drm_i915_gem_request *req,
bool interruptible,
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 015/190] drm/i915: Remove the dedicated hangcheck workqueue

2016-01-11 Thread Chris Wilson
The queue only ever contains at most one item and has no special flags.
It is just a very simple wrapper around the system-wq - a complication
with no benefits.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_dma.c | 11 ---
 drivers/gpu/drm/i915/i915_drv.h |  1 -
 drivers/gpu/drm/i915/i915_irq.c |  6 +++---
 3 files changed, 3 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 44a896ce32e6..9e49e304dd8e 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1016,14 +1016,6 @@ int i915_driver_load(struct drm_device *dev, unsigned 
long flags)
goto out_freewq;
}
 
-   dev_priv->gpu_error.hangcheck_wq =
-   alloc_ordered_workqueue("i915-hangcheck", 0);
-   if (dev_priv->gpu_error.hangcheck_wq == NULL) {
-   DRM_ERROR("Failed to create our hangcheck workqueue.\n");
-   ret = -ENOMEM;
-   goto out_freedpwq;
-   }
-
intel_irq_init(dev_priv);
intel_uncore_sanitize(dev);
 
@@ -1105,8 +1097,6 @@ out_gem_unload:
intel_teardown_gmbus(dev);
intel_teardown_mchbar(dev);
pm_qos_remove_request(_priv->pm_qos);
-   destroy_workqueue(dev_priv->gpu_error.hangcheck_wq);
-out_freedpwq:
destroy_workqueue(dev_priv->hotplug.dp_wq);
 out_freewq:
destroy_workqueue(dev_priv->wq);
@@ -1209,7 +1199,6 @@ int i915_driver_unload(struct drm_device *dev)
 
destroy_workqueue(dev_priv->hotplug.dp_wq);
destroy_workqueue(dev_priv->wq);
-   destroy_workqueue(dev_priv->gpu_error.hangcheck_wq);
pm_qos_remove_request(_priv->pm_qos);
 
i915_global_gtt_cleanup(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d9d411919779..188bed933f11 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1330,7 +1330,6 @@ struct i915_gpu_error {
/* Hang gpu twice in this window and your context gets banned */
 #define DRM_I915_CTX_BAN_PERIOD DIV_ROUND_UP(8*DRM_I915_HANGCHECK_PERIOD, 1000)
 
-   struct workqueue_struct *hangcheck_wq;
struct delayed_work hangcheck_work;
 
/* For reset and error_state handling. */
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 94f5f4e99446..8939438d747d 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3175,7 +3175,7 @@ out:
 
 void i915_queue_hangcheck(struct drm_i915_private *dev_priv)
 {
-   struct i915_gpu_error *e = _priv->gpu_error;
+   unsigned long delay;
 
if (!i915.enable_hangcheck)
return;
@@ -3185,8 +3185,8 @@ void i915_queue_hangcheck(struct drm_i915_private 
*dev_priv)
 * we will ignore a hung ring if a second ring is kept busy.
 */
 
-   queue_delayed_work(e->hangcheck_wq, >hangcheck_work,
-  
round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES));
+   delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES);
+   schedule_delayed_work(_priv->gpu_error.hangcheck_work, delay);
 }
 
 static void ibx_irq_reset(struct drm_device *dev)
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 045/190] drm/i915: Move releasing of the GEM request from free to retire/cancel

2016-01-11 Thread Chris Wilson
If we move the release of the GEM request (i.e. decoupling it from the
various lists used for client and context tracking) after it is complete
(either by the GPU retiring the request, or by the caller cancelling the
request), we can remove the requirement that the final unreference of
the GEM request need to be under the struct_mutex.

v2: Execlists as always is badly asymetric and year old patches still
haven't landed to fix it up.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c  |  4 +--
 drivers/gpu/drm/i915/i915_gem_request.c  | 50 ++--
 drivers/gpu/drm/i915/i915_gem_request.h  | 14 -
 drivers/gpu/drm/i915/intel_breadcrumbs.c |  2 +-
 drivers/gpu/drm/i915/intel_display.c |  2 +-
 drivers/gpu/drm/i915/intel_lrc.c |  6 ++--
 drivers/gpu/drm/i915/intel_pm.c  |  2 +-
 7 files changed, 30 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 68a25617ca7a..6d8d65304abf 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2502,7 +2502,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, 
struct drm_file *file)
ret = __i915_wait_request(req[i], true,
  args->timeout_ns > 0 ? 
>timeout_ns : NULL,
  to_rps_client(file));
-   i915_gem_request_unreference__unlocked(req[i]);
+   i915_gem_request_unreference(req[i]);
}
return ret;
 
@@ -3505,7 +3505,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct 
drm_file *file)
return 0;
 
ret = __i915_wait_request(target, true, NULL, NULL);
-   i915_gem_request_unreference__unlocked(target);
+   i915_gem_request_unreference(target);
 
return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index b4ede6dd7b20..1c4f4d83a3c2 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -184,13 +184,6 @@ err:
return ret;
 }
 
-void i915_gem_request_cancel(struct drm_i915_gem_request *req)
-{
-   intel_ring_reserved_space_cancel(req->ringbuf);
-
-   i915_gem_request_unreference(req);
-}
-
 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
   struct drm_file *file)
 {
@@ -235,9 +228,28 @@ i915_gem_request_remove_from_client(struct 
drm_i915_gem_request *request)
request->pid = NULL;
 }
 
+static void __i915_gem_request_release(struct drm_i915_gem_request *request)
+{
+   i915_gem_request_remove_from_client(request);
+
+   i915_gem_context_unreference(request->ctx);
+   i915_gem_request_unreference(request);
+}
+
+void i915_gem_request_cancel(struct drm_i915_gem_request *req)
+{
+   intel_ring_reserved_space_cancel(req->ringbuf);
+   if (i915.enable_execlists) {
+   if (req->ctx != req->ring->default_context)
+   intel_lr_context_unpin(req);
+   }
+   __i915_gem_request_release(req);
+}
+
 static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 {
trace_i915_gem_request_retire(request);
+   list_del_init(>list);
 
/* We know the GPU must have read the request to have
 * sent us the seqno + interrupt, so use the position
@@ -248,11 +260,7 @@ static void i915_gem_request_retire(struct 
drm_i915_gem_request *request)
 * completion order.
 */
request->ringbuf->last_retired_head = request->postfix;
-
-   list_del_init(>list);
-   i915_gem_request_remove_from_client(request);
-
-   i915_gem_request_unreference(request);
+   __i915_gem_request_release(request);
 }
 
 void
@@ -639,21 +647,7 @@ i915_wait_request(struct drm_i915_gem_request *req)
 
 void i915_gem_request_free(struct kref *req_ref)
 {
-   struct drm_i915_gem_request *req = container_of(req_ref,
-typeof(*req), ref);
-   struct intel_context *ctx = req->ctx;
-
-   if (req->file_priv)
-   i915_gem_request_remove_from_client(req);
-
-   if (ctx) {
-   if (i915.enable_execlists) {
-   if (ctx != req->ring->default_context)
-   intel_lr_context_unpin(req);
-   }
-
-   i915_gem_context_unreference(ctx);
-   }
-
+   struct drm_i915_gem_request *req =
+   container_of(req_ref, typeof(*req), ref);
kmem_cache_free(req->i915->requests, req);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h 
b/drivers/gpu/drm/i915/i915_gem_request.h
index d46f22f30b0a..af1b825fce50 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -154,23 +154,9 @@ i915_gem_request_reference(struct 

[Intel-gfx] [PATCH 059/190] drm/i915: Rename request->ringbuf to request->ring

2016-01-11 Thread Chris Wilson
Now that we have disambuigated ring and engine, we can use the clearer
and more consistent name for the intel_ringbuffer pointer in the
request.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c|   8 +-
 drivers/gpu/drm/i915/i915_gem_context.c|   2 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   4 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c|   6 +-
 drivers/gpu/drm/i915/i915_gem_request.c|  20 ++--
 drivers/gpu/drm/i915/i915_gem_request.h|   2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c  |  31 +++---
 drivers/gpu/drm/i915/i915_guc_submission.c |   4 +-
 drivers/gpu/drm/i915/intel_display.c   |  10 +-
 drivers/gpu/drm/i915/intel_lrc.c   | 152 ++---
 drivers/gpu/drm/i915/intel_mocs.c  |  34 +++
 drivers/gpu/drm/i915/intel_overlay.c   |  42 
 drivers/gpu/drm/i915/intel_ringbuffer.c|  86 
 13 files changed, 198 insertions(+), 203 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6622c9bb3af8..430c439ece26 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4083,11 +4083,11 @@ int i915_gem_l3_remap(struct drm_i915_gem_request *req, 
int slice)
 * at initialization time.
 */
for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) {
-   intel_ring_emit(req->ringbuf, MI_LOAD_REGISTER_IMM(1));
-   intel_ring_emit_reg(req->ringbuf, GEN7_L3LOG(slice, i));
-   intel_ring_emit(req->ringbuf, remap_info[i]);
+   intel_ring_emit(req->ring, MI_LOAD_REGISTER_IMM(1));
+   intel_ring_emit_reg(req->ring, GEN7_L3LOG(slice, i));
+   intel_ring_emit(req->ring, remap_info[i]);
}
-   intel_ring_advance(req->ringbuf);
+   intel_ring_advance(req->ring);
 
return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index dece033cf604..5b4e77a80c19 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -519,7 +519,7 @@ i915_gem_context_get(struct drm_i915_file_private 
*file_priv, u32 id)
 static inline int
 mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
 {
-   struct intel_ringbuffer *ring = req->ringbuf;
+   struct intel_ringbuffer *ring = req->ring;
u32 flags = hw_flags | MI_MM_SPACE_GTT;
const int num_rings =
/* Use an extended w/a on ivb+ if signalling from other rings */
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index e7df91f9a51f..a0f5a997c2f2 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1148,7 +1148,7 @@ i915_gem_execbuffer_retire_commands(struct 
i915_execbuffer_params *params)
 static int
 i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
 {
-   struct intel_ringbuffer *ring = req->ringbuf;
+   struct intel_ringbuffer *ring = req->ring;
int ret, i;
 
if (!IS_GEN7(req->i915) || req->engine->id != RCS) {
@@ -1229,7 +1229,7 @@ i915_gem_ringbuffer_submission(struct 
i915_execbuffer_params *params,
   struct drm_i915_gem_execbuffer2 *args,
   struct list_head *vmas)
 {
-   struct intel_ringbuffer *ring = params->request->ringbuf;
+   struct intel_ringbuffer *ring = params->request->ring;
struct drm_i915_private *dev_priv = params->request->i915;
u64 exec_start, exec_len;
int instp_mode;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index cb7cb59d4c4a..38c109cda904 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -656,7 +656,7 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req,
  unsigned entry,
  dma_addr_t addr)
 {
-   struct intel_ringbuffer *ring = req->ringbuf;
+   struct intel_ringbuffer *ring = req->ring;
int ret;
 
BUG_ON(entry >= 4);
@@ -1648,7 +1648,7 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
 struct drm_i915_gem_request *req)
 {
-   struct intel_ringbuffer *ring = req->ringbuf;
+   struct intel_ringbuffer *ring = req->ring;
int ret;
 
/* NB: TLBs must be flushed and invalidated before a switch */
@@ -1686,7 +1686,7 @@ static int vgpu_mm_switch(struct i915_hw_ppgtt *ppgtt,
 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
  struct drm_i915_gem_request *req)
 {
-   struct intel_ringbuffer *ring = req->ringbuf;
+   struct intel_ringbuffer *ring = req->ring;
int ret;
 
/* NB: TLBs must be flushed and invalidated before a switch */
diff --git 

[Intel-gfx] [PATCH 056/190] drm/i915: Unify intel_ring_begin()

2016-01-11 Thread Chris Wilson
Combine the near identical implementations of intel_logical_ring_begin()
and intel_ring_begin() - the only difference is that the logical wait
has to check for a matching ring (which is assumed by legacy).

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_lrc.c| 141 ++--
 drivers/gpu/drm/i915/intel_lrc.h|   1 -
 drivers/gpu/drm/i915/intel_mocs.c   |  12 +--
 drivers/gpu/drm/i915/intel_ringbuffer.c | 111 +
 4 files changed, 69 insertions(+), 196 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index dc4fc9d8612c..3d14b69632e8 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -698,48 +698,6 @@ int intel_logical_ring_alloc_request_extras(struct 
drm_i915_gem_request *request
return 0;
 }
 
-static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
-  int bytes)
-{
-   struct intel_ringbuffer *ringbuf = req->ringbuf;
-   struct intel_engine_cs *ring = req->ring;
-   struct drm_i915_gem_request *target;
-   unsigned space;
-   int ret;
-
-   if (intel_ring_space(ringbuf) >= bytes)
-   return 0;
-
-   /* The whole point of reserving space is to not wait! */
-   WARN_ON(ringbuf->reserved_in_use);
-
-   list_for_each_entry(target, >request_list, list) {
-   /*
-* The request queue is per-engine, so can contain requests
-* from multiple ringbuffers. Here, we must ignore any that
-* aren't from the ringbuffer we're considering.
-*/
-   if (target->ringbuf != ringbuf)
-   continue;
-
-   /* Would completion of this request free enough space? */
-   space = __intel_ring_space(target->postfix, ringbuf->tail,
-  ringbuf->size);
-   if (space >= bytes)
-   break;
-   }
-
-   if (WARN_ON(>list == >request_list))
-   return -ENOSPC;
-
-   ret = i915_wait_request(target);
-   if (ret)
-   return ret;
-
-   ringbuf->space = space;
-   return 0;
-}
-
 /*
  * intel_logical_ring_advance_and_submit() - advance the tail and submit the 
workload
  * @request: Request to advance the logical ringbuffer of.
@@ -763,89 +721,6 @@ intel_logical_ring_advance_and_submit(struct 
drm_i915_gem_request *request)
execlists_context_queue(request);
 }
 
-static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
-{
-   int rem = ringbuf->size - ringbuf->tail;
-   memset(ringbuf->virtual_start + ringbuf->tail, 0, rem);
-
-   ringbuf->tail = 0;
-   intel_ring_update_space(ringbuf);
-}
-
-static int logical_ring_prepare(struct drm_i915_gem_request *req, int bytes)
-{
-   struct intel_ringbuffer *ringbuf = req->ringbuf;
-   int remain_usable = ringbuf->effective_size - ringbuf->tail;
-   int remain_actual = ringbuf->size - ringbuf->tail;
-   int ret, total_bytes, wait_bytes = 0;
-   bool need_wrap = false;
-
-   if (ringbuf->reserved_in_use)
-   total_bytes = bytes;
-   else
-   total_bytes = bytes + ringbuf->reserved_size;
-
-   if (unlikely(bytes > remain_usable)) {
-   /*
-* Not enough space for the basic request. So need to flush
-* out the remainder and then wait for base + reserved.
-*/
-   wait_bytes = remain_actual + total_bytes;
-   need_wrap = true;
-   } else {
-   if (unlikely(total_bytes > remain_usable)) {
-   /*
-* The base request will fit but the reserved space
-* falls off the end. So only need to to wait for the
-* reserved size after flushing out the remainder.
-*/
-   wait_bytes = remain_actual + ringbuf->reserved_size;
-   need_wrap = true;
-   } else if (total_bytes > ringbuf->space) {
-   /* No wrapping required, just waiting. */
-   wait_bytes = total_bytes;
-   }
-   }
-
-   if (wait_bytes) {
-   ret = logical_ring_wait_for_space(req, wait_bytes);
-   if (unlikely(ret))
-   return ret;
-
-   if (need_wrap)
-   __wrap_ring_buffer(ringbuf);
-   }
-
-   return 0;
-}
-
-/**
- * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some 
commands
- *
- * @req: The request to start some new work for
- * @num_dwords: number of DWORDs that we plan to write to the ringbuffer.
- *
- * The ringbuffer might not be ready to accept the commands right away (maybe 
it needs to
- * be 

[Intel-gfx] [PATCH 067/190] drm/i915: Unify legacy/execlists emission of MI_BATCHBUFFER_START

2016-01-11 Thread Chris Wilson
Both the ->dispatch_execbuffer and ->emit_bb_start callbacks do exactly
the same thing, add MI_BATCHBUFFER_START to the request's ringbuffer -
we need only one vfunc.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  6 +--
 drivers/gpu/drm/i915/i915_gem_render_state.c | 16 +++
 drivers/gpu/drm/i915/intel_lrc.c |  9 +++-
 drivers/gpu/drm/i915/intel_ringbuffer.c  | 67 +---
 drivers/gpu/drm/i915/intel_ringbuffer.h  | 12 +++--
 5 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3956d74d8c8c..3e6384deca65 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1297,9 +1297,9 @@ i915_gem_ringbuffer_submission(struct 
i915_execbuffer_params *params,
exec_start = params->batch_obj_vm_offset +
 params->args_batch_start_offset;
 
-   ret = params->ring->dispatch_execbuffer(params->request,
-   exec_start, exec_len,
-   params->dispatch_flags);
+   ret = params->ring->emit_bb_start(params->request,
+ exec_start, exec_len,
+ params->dispatch_flags);
if (ret)
return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c 
b/drivers/gpu/drm/i915/i915_gem_render_state.c
index bee3f0ccd0cd..ccc988c2b226 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -205,18 +205,18 @@ int i915_gem_render_state_init(struct 
drm_i915_gem_request *req)
if (so.rodata == NULL)
return 0;
 
-   ret = req->engine->dispatch_execbuffer(req, so.ggtt_offset,
-  so.rodata->batch_items * 4,
-  I915_DISPATCH_SECURE);
+   ret = req->engine->emit_bb_start(req, so.ggtt_offset,
+so.rodata->batch_items * 4,
+I915_DISPATCH_SECURE);
if (ret)
goto out;
 
if (so.aux_batch_size > 8) {
-   ret = req->engine->dispatch_execbuffer(req,
-  (so.ggtt_offset +
-   so.aux_batch_offset),
-  so.aux_batch_size,
-  I915_DISPATCH_SECURE);
+   ret = req->engine->emit_bb_start(req,
+(so.ggtt_offset +
+ so.aux_batch_offset),
+so.aux_batch_size,
+I915_DISPATCH_SECURE);
if (ret)
goto out;
}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 82b21a883732..30effca91184 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -783,7 +783,9 @@ int intel_execlists_submission(struct 
i915_execbuffer_params *params,
exec_start = params->batch_obj_vm_offset +
 args->batch_start_offset;
 
-   ret = engine->emit_bb_start(params->request, exec_start, 
params->dispatch_flags);
+   ret = engine->emit_bb_start(params->request,
+   exec_start, args->batch_len,
+   params->dispatch_flags);
if (ret)
return ret;
 
@@ -1409,7 +1411,8 @@ static int intel_logical_ring_emit_pdps(struct 
drm_i915_gem_request *req)
 }
 
 static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
- u64 offset, unsigned dispatch_flags)
+ u64 offset, u32 len,
+ unsigned dispatch_flags)
 {
struct intel_ring *ring = req->ring;
bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE);
@@ -1637,12 +1640,14 @@ static int intel_lr_context_render_state_init(struct 
drm_i915_gem_request *req)
return 0;
 
ret = req->engine->emit_bb_start(req, so.ggtt_offset,
+so.rodata->batch_items * 4,
 I915_DISPATCH_SECURE);
if (ret)
goto out;
 
ret = req->engine->emit_bb_start(req,
 (so.ggtt_offset + so.aux_batch_offset),
+so.aux_batch_size,
 I915_DISPATCH_SECURE);
if (ret)
goto out;
diff --git 

[Intel-gfx] [PATCH 038/190] drm/i915: Flush the RPS bottom-half when the GPU idles

2016-01-11 Thread Chris Wilson
Make sure that the RPS bottom-half is flushed before we set the idle
frequency when we decide the GPU is idle. This should prevent any races
with the bottom-half and setting the idle frequency, and ensures that
the bottom-half is bounded by the GPU's rpm reference taken for when it
is active (i.e. between gen6_rps_busy() and gen6_rps_idle()).

v2: Avoid recursively using the i915->wq - RPS does not touch the
struct_mutex so has no place being on the ordered i915->wq.
v3: Enable/disable interrupts for RPS busy/idle in order to prevent
further HW access from RPS outside of the wakeref.

Signed-off-by: Chris Wilson 
Cc: Imre Deak 
Cc: Jesse Barnes 
---
 drivers/gpu/drm/i915/i915_drv.c  |  1 -
 drivers/gpu/drm/i915/i915_irq.c  | 45 +++-
 drivers/gpu/drm/i915/intel_display.c |  1 +
 drivers/gpu/drm/i915/intel_drv.h |  6 ++---
 drivers/gpu/drm/i915/intel_pm.c  | 23 +-
 5 files changed, 34 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 4c090f1cf69c..442e1217e442 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1492,7 +1492,6 @@ static int intel_runtime_suspend(struct device *device)
 
intel_guc_suspend(dev);
 
-   intel_suspend_gt_powersave(dev);
intel_runtime_pm_disable_interrupts(dev_priv);
 
ret = intel_suspend_complete(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 8866e981bcba..d9757d227c86 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -336,9 +336,8 @@ void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, 
uint32_t mask)
__gen6_disable_pm_irq(dev_priv, mask);
 }
 
-void gen6_reset_rps_interrupts(struct drm_device *dev)
+void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv)
 {
-   struct drm_i915_private *dev_priv = dev->dev_private;
i915_reg_t reg = gen6_pm_iir(dev_priv);
 
spin_lock_irq(_priv->irq_lock);
@@ -349,14 +348,14 @@ void gen6_reset_rps_interrupts(struct drm_device *dev)
spin_unlock_irq(_priv->irq_lock);
 }
 
-void gen6_enable_rps_interrupts(struct drm_device *dev)
+void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv)
 {
-   struct drm_i915_private *dev_priv = dev->dev_private;
+   if (dev_priv->rps.interrupts_enabled)
+   return;
 
spin_lock_irq(_priv->irq_lock);
-
-   WARN_ON(dev_priv->rps.pm_iir);
-   WARN_ON(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events);
+   WARN_ON_ONCE(dev_priv->rps.pm_iir);
+   WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & 
dev_priv->pm_rps_events);
dev_priv->rps.interrupts_enabled = true;
I915_WRITE(gen6_pm_ier(dev_priv), I915_READ(gen6_pm_ier(dev_priv)) |
dev_priv->pm_rps_events);
@@ -382,17 +381,13 @@ u32 gen6_sanitize_rps_pm_mask(struct drm_i915_private 
*dev_priv, u32 mask)
return mask;
 }
 
-void gen6_disable_rps_interrupts(struct drm_device *dev)
+void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv)
 {
-   struct drm_i915_private *dev_priv = dev->dev_private;
+   if (!dev_priv->rps.interrupts_enabled)
+   return;
 
spin_lock_irq(_priv->irq_lock);
dev_priv->rps.interrupts_enabled = false;
-   spin_unlock_irq(_priv->irq_lock);
-
-   cancel_work_sync(_priv->rps.work);
-
-   spin_lock_irq(_priv->irq_lock);
 
I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0));
 
@@ -401,8 +396,15 @@ void gen6_disable_rps_interrupts(struct drm_device *dev)
~dev_priv->pm_rps_events);
 
spin_unlock_irq(_priv->irq_lock);
+   synchronize_irq(dev_priv->dev->irq);
 
-   synchronize_irq(dev->irq);
+   /* Now that we will not be generating any more work, flush any
+* outsanding tasks. As we are called on the RPS idle path,
+* we will reset the GPU to minimum frequencies, so the current
+* state of the worker can be discarded.
+*/
+   cancel_work_sync(_priv->rps.work);
+   gen6_reset_rps_interrupts(dev_priv);
 }
 
 /**
@@ -1103,13 +1105,6 @@ static void gen6_pm_rps_work(struct work_struct *work)
return;
}
 
-   /*
-* The RPS work is synced during runtime suspend, we don't require a
-* wakeref. TODO: instead of disabling the asserts make sure that we
-* always hold an RPM reference while the work is running.
-*/
-   DISABLE_RPM_WAKEREF_ASSERTS(dev_priv);
-
pm_iir = dev_priv->rps.pm_iir;
dev_priv->rps.pm_iir = 0;
/* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */
@@ -1122,7 +1117,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
WARN_ON(pm_iir & 

[Intel-gfx] [PATCH 047/190] drm/i915: Rename request reference/unreference to get/put

2016-01-11 Thread Chris Wilson
Now that we derive requests from struct fence, swap over to its
nomenclature for references. It's shorter and more idiomatic across the
kernel.

s/i915_gem_request_reference/i915_gem_request_get/
s/i915_gem_request_unreference/i915_gem_request_put/

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c  | 14 +++---
 drivers/gpu/drm/i915/i915_gem_request.c  |  2 +-
 drivers/gpu/drm/i915/i915_gem_request.h  |  8 
 drivers/gpu/drm/i915/intel_breadcrumbs.c |  4 ++--
 drivers/gpu/drm/i915/intel_display.c |  4 ++--
 drivers/gpu/drm/i915/intel_lrc.c |  4 ++--
 drivers/gpu/drm/i915/intel_pm.c  |  5 ++---
 7 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6d8d65304abf..fd61e722b595 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1185,7 +1185,7 @@ i915_gem_object_wait_rendering__nonblocking(struct 
drm_i915_gem_object *obj,
if (req == NULL)
return 0;
 
-   requests[n++] = i915_gem_request_reference(req);
+   requests[n++] = i915_gem_request_get(req);
} else {
for (i = 0; i < I915_NUM_RINGS; i++) {
struct drm_i915_gem_request *req;
@@ -1194,7 +1194,7 @@ i915_gem_object_wait_rendering__nonblocking(struct 
drm_i915_gem_object *obj,
if (req == NULL)
continue;
 
-   requests[n++] = i915_gem_request_reference(req);
+   requests[n++] = i915_gem_request_get(req);
}
}
 
@@ -1207,7 +1207,7 @@ i915_gem_object_wait_rendering__nonblocking(struct 
drm_i915_gem_object *obj,
for (i = 0; i < n; i++) {
if (ret == 0)
i915_gem_object_retire_request(obj, requests[i]);
-   i915_gem_request_unreference(requests[i]);
+   i915_gem_request_put(requests[i]);
}
 
return ret;
@@ -2492,7 +2492,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, 
struct drm_file *file)
if (obj->last_read_req[i] == NULL)
continue;
 
-   req[n++] = i915_gem_request_reference(obj->last_read_req[i]);
+   req[n++] = i915_gem_request_get(obj->last_read_req[i]);
}
 
mutex_unlock(>struct_mutex);
@@ -2502,7 +2502,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, 
struct drm_file *file)
ret = __i915_wait_request(req[i], true,
  args->timeout_ns > 0 ? 
>timeout_ns : NULL,
  to_rps_client(file));
-   i915_gem_request_unreference(req[i]);
+   i915_gem_request_put(req[i]);
}
return ret;
 
@@ -3498,14 +3498,14 @@ i915_gem_ring_throttle(struct drm_device *dev, struct 
drm_file *file)
target = request;
}
if (target)
-   i915_gem_request_reference(target);
+   i915_gem_request_get(target);
spin_unlock(_priv->mm.lock);
 
if (target == NULL)
return 0;
 
ret = __i915_wait_request(target, true, NULL, NULL);
-   i915_gem_request_unreference(target);
+   i915_gem_request_put(target);
 
return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index e366ca0dcd99..a796dbd1b0e4 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -326,7 +326,7 @@ static void __i915_gem_request_release(struct 
drm_i915_gem_request *request)
i915_gem_request_remove_from_client(request);
 
i915_gem_context_unreference(request->ctx);
-   i915_gem_request_unreference(request);
+   i915_gem_request_put(request);
 }
 
 void i915_gem_request_cancel(struct drm_i915_gem_request *req)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h 
b/drivers/gpu/drm/i915/i915_gem_request.h
index b55d0b7c7f2a..0ab14fd0fce0 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -147,13 +147,13 @@ to_request(struct fence *fence)
 }
 
 static inline struct drm_i915_gem_request *
-i915_gem_request_reference(struct drm_i915_gem_request *req)
+i915_gem_request_get(struct drm_i915_gem_request *req)
 {
return to_request(fence_get(>fence));
 }
 
 static inline void
-i915_gem_request_unreference(struct drm_i915_gem_request *req)
+i915_gem_request_put(struct drm_i915_gem_request *req)
 {
fence_put(>fence);
 }
@@ -162,10 +162,10 @@ static inline void i915_gem_request_assign(struct 
drm_i915_gem_request **pdst,
   struct drm_i915_gem_request *src)
 {
if (src)
-   i915_gem_request_reference(src);
+   

[Intel-gfx] [PATCH 008/190] drm/i915: Simplify checking of GPU reset_counter in display pageflips

2016-01-11 Thread Chris Wilson
If we, when we store the reset_counter for the operation, we ensure that
it is not in a wedged or in the middle of a reset, we can then assert that
if any reset occurs the reset_counter must change. Later we can just
compare the operation's reset epoch against the current counter to see
if we need to abort the operation (to handle the hang).

Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/intel_display.c | 16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c 
b/drivers/gpu/drm/i915/intel_display.c
index 0933bdbaa935..183c05bdb220 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3288,14 +3288,12 @@ void intel_finish_reset(struct drm_device *dev)
 static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
 {
struct drm_device *dev = crtc->dev;
-   struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
unsigned reset_counter;
bool pending;
 
-   reset_counter = i915_reset_counter(_priv->gpu_error);
-   if (intel_crtc->reset_counter != reset_counter ||
-   __i915_reset_in_progress_or_wedged(reset_counter))
+   reset_counter = i915_reset_counter(_i915(dev)->gpu_error);
+   if (intel_crtc->reset_counter != reset_counter)
return false;
 
spin_lock_irq(>event_lock);
@@ -11011,8 +11009,7 @@ static bool page_flip_finished(struct intel_crtc *crtc)
unsigned reset_counter;
 
reset_counter = i915_reset_counter(_priv->gpu_error);
-   if (crtc->reset_counter != reset_counter ||
-   __i915_reset_in_progress_or_wedged(reset_counter))
+   if (crtc->reset_counter != reset_counter)
return true;
 
/*
@@ -11668,8 +11665,13 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
if (ret)
goto cleanup;
 
-   atomic_inc(_crtc->unpin_work_count);
intel_crtc->reset_counter = i915_reset_counter(_priv->gpu_error);
+   if (__i915_reset_in_progress_or_wedged(intel_crtc->reset_counter)) {
+   ret = -EIO;
+   goto cleanup;
+   }
+
+   atomic_inc(_crtc->unpin_work_count);
 
if (INTEL_INFO(dev)->gen >= 5 || IS_G4X(dev))
work->flip_count = I915_READ(PIPE_FLIPCOUNT_G4X(pipe)) + 1;
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 021/190] drm/i915: Use HWS for seqno tracking everywhere

2016-01-11 Thread Chris Wilson
By using the same address for storing the HWS on every platform, we can
remove the platform specific vfuncs and reduce the get-seqno routine to
a single read of a cached memory location.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c  | 10 ++--
 drivers/gpu/drm/i915/i915_drv.h  |  4 +-
 drivers/gpu/drm/i915/i915_gpu_error.c|  2 +-
 drivers/gpu/drm/i915/i915_irq.c  |  4 +-
 drivers/gpu/drm/i915/i915_trace.h|  2 +-
 drivers/gpu/drm/i915/intel_breadcrumbs.c |  4 +-
 drivers/gpu/drm/i915/intel_lrc.c | 46 ++---
 drivers/gpu/drm/i915/intel_ringbuffer.c  | 86 
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  7 +--
 9 files changed, 43 insertions(+), 122 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index d09e48455dcb..5a706c700684 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -600,7 +600,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void 
*data)
   ring->name,
   
i915_gem_request_get_seqno(work->flip_queued_req),
   dev_priv->next_seqno,
-  ring->get_seqno(ring),
+  intel_ring_get_seqno(ring),
   
i915_gem_request_completed(work->flip_queued_req));
} else
seq_printf(m, "Flip not associated with any 
ring\n");
@@ -732,10 +732,8 @@ static void i915_ring_seqno_info(struct seq_file *m,
 {
struct rb_node *rb;
 
-   if (ring->get_seqno) {
-   seq_printf(m, "Current sequence (%s): %x\n",
-  ring->name, ring->get_seqno(ring));
-   }
+   seq_printf(m, "Current sequence (%s): %x\n",
+  ring->name, intel_ring_get_seqno(ring));
 
spin_lock(>breadcrumbs.lock);
for (rb = rb_first(>breadcrumbs.waiters);
@@ -1355,7 +1353,7 @@ static int i915_hangcheck_info(struct seq_file *m, void 
*unused)
 
for_each_ring(ring, dev_priv, i) {
acthd[i] = intel_ring_get_active_head(ring);
-   seqno[i] = ring->get_seqno(ring);
+   seqno[i] = intel_ring_get_seqno(ring);
}
 
i915_get_extra_instdone(dev, instdone);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 44d46018ee13..fcedcbc50834 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2971,13 +2971,13 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
 
 static inline bool i915_gem_request_started(struct drm_i915_gem_request *req)
 {
-   return i915_seqno_passed(req->ring->get_seqno(req->ring),
+   return i915_seqno_passed(intel_ring_get_seqno(req->ring),
 req->previous_seqno);
 }
 
 static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req)
 {
-   return i915_seqno_passed(req->ring->get_seqno(req->ring),
+   return i915_seqno_passed(intel_ring_get_seqno(req->ring),
 req->seqno);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 01d0206ca4dd..3e137fc701cf 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -903,7 +903,7 @@ static void i915_record_ring_state(struct drm_device *dev,
ering->waiting = intel_engine_has_waiter(ring);
ering->instpm = I915_READ(RING_INSTPM(ring->mmio_base));
ering->acthd = intel_ring_get_active_head(ring);
-   ering->seqno = ring->get_seqno(ring);
+   ering->seqno = intel_ring_get_seqno(ring);
ering->start = I915_READ_START(ring);
ering->head = I915_READ_HEAD(ring);
ering->tail = I915_READ_TAIL(ring);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index d73669783045..627c7fb6aa9b 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2903,7 +2903,7 @@ static int semaphore_passed(struct intel_engine_cs *ring)
if (signaller->hangcheck.deadlock >= I915_NUM_RINGS)
return -1;
 
-   if (i915_seqno_passed(signaller->get_seqno(signaller), seqno))
+   if (i915_seqno_passed(intel_ring_get_seqno(signaller), seqno))
return 1;
 
/* cursory check for an unkickable deadlock */
@@ -3068,7 +3068,7 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
semaphore_clear_deadlocks(dev_priv);
 
acthd = intel_ring_get_active_head(ring);
-   seqno = ring->get_seqno(ring);
+   seqno = intel_ring_get_seqno(ring);
 
if (ring->hangcheck.seqno == seqno) {
if 

[Intel-gfx] [PATCH 027/190] drm/i915: Only query timestamp when measuring elapsed time

2016-01-11 Thread Chris Wilson
Avoid the two calls to ktime_get_raw_ns() (at best it reads the TSC) as
we only need to compute the elapsed time for a timed wait.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 13 +
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a0744626a110..b956b8813307 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1220,7 +1220,6 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
struct intel_wait wait;
unsigned long timeout_remain;
-   s64 before, now;
int ret = 0;
 
might_sleep();
@@ -1239,13 +1238,12 @@ int __i915_wait_request(struct drm_i915_gem_request 
*req,
if (*timeout == 0)
return -ETIME;
 
+   /* Record current time in case interrupted, or wedged */
timeout_remain = nsecs_to_jiffies_timeout(*timeout);
+   *timeout += ktime_get_raw_ns();
}
 
-   /* Record current time in case interrupted by signal, or wedged */
trace_i915_gem_request_wait_begin(req);
-   before = ktime_get_raw_ns();
-
if (INTEL_INFO(req->i915)->gen >= 6)
gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
 
@@ -1298,13 +1296,12 @@ wakeup:
 complete:
intel_engine_remove_wait(req->ring, );
__set_task_state(wait.task, TASK_RUNNING);
-   now = ktime_get_raw_ns();
trace_i915_gem_request_wait_end(req);
 
if (timeout) {
-   s64 tres = *timeout - (now - before);
-
-   *timeout = tres < 0 ? 0 : tres;
+   *timeout -= ktime_get_raw_ns();
+   if (*timeout < 0)
+   *timeout = 0;
 
/*
 * Apparently ktime isn't accurate enough and occasionally has a
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 073/190] drm/i915: Introduce i915_gem_active for request tracking

2016-01-11 Thread Chris Wilson
In the next patch, request tracking is made more generic and for that we
need a new expanded struct and to separate out the logic changes from
the mechanical churn, we split out the structure renaming into this
patch.

v2: Writer's block. Add some spiel about why we track requests.
v3: Now i915_gem_active.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c| 10 +++---
 drivers/gpu/drm/i915/i915_drv.h|  9 +++--
 drivers/gpu/drm/i915/i915_gem.c| 56 +++---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  4 +--
 drivers/gpu/drm/i915/i915_gem_fence.c  |  6 ++--
 drivers/gpu/drm/i915/i915_gem_request.h| 38 
 drivers/gpu/drm/i915/i915_gem_tiling.c |  2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c  |  6 ++--
 drivers/gpu/drm/i915/intel_display.c   | 10 +++---
 9 files changed, 89 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 8de944ed3369..65cb1d6a5d64 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -146,10 +146,10 @@ describe_obj(struct seq_file *m, struct 
drm_i915_gem_object *obj)
   obj->base.write_domain);
for_each_ring(ring, dev_priv, i)
seq_printf(m, "%x ",
-   
i915_gem_request_get_seqno(obj->last_read_req[i]));
+   
i915_gem_request_get_seqno(obj->last_read[i].request));
seq_printf(m, "] %x %x%s%s%s",
-  i915_gem_request_get_seqno(obj->last_write_req),
-  i915_gem_request_get_seqno(obj->last_fenced_req),
+  i915_gem_request_get_seqno(obj->last_write.request),
+  i915_gem_request_get_seqno(obj->last_fence.request),
   i915_cache_level_str(to_i915(obj->base.dev), 
obj->cache_level),
   obj->dirty ? " dirty" : "",
   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
@@ -184,8 +184,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object 
*obj)
*t = '\0';
seq_printf(m, " (%s mappable)", s);
}
-   if (obj->last_write_req != NULL)
-   seq_printf(m, " (%s)", obj->last_write_req->engine->name);
+   if (obj->last_write.request != NULL)
+   seq_printf(m, " (%s)", obj->last_write.request->engine->name);
if (obj->frontbuffer_bits)
seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cae448e238ca..c577f86d94f8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2110,11 +2110,10 @@ struct drm_i915_gem_object {
 * requests on one ring where the write request is older than the
 * read request. This allows for the CPU to read from an active
 * buffer by only waiting for the write to complete.
-* */
-   struct drm_i915_gem_request *last_read_req[I915_NUM_RINGS];
-   struct drm_i915_gem_request *last_write_req;
-   /** Breadcrumb of last fenced GPU access to the buffer. */
-   struct drm_i915_gem_request *last_fenced_req;
+*/
+   struct i915_gem_active last_read[I915_NUM_RINGS];
+   struct i915_gem_active last_write;
+   struct i915_gem_active last_fence;
 
/** Current tiling stride for the object, if it's tiled. */
uint32_t stride;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b0230e7151ce..77c253ddf060 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1117,23 +1117,23 @@ i915_gem_object_wait_rendering(struct 
drm_i915_gem_object *obj,
return 0;
 
if (readonly) {
-   if (obj->last_write_req != NULL) {
-   ret = i915_wait_request(obj->last_write_req);
+   if (obj->last_write.request != NULL) {
+   ret = i915_wait_request(obj->last_write.request);
if (ret)
return ret;
 
-   i = obj->last_write_req->engine->id;
-   if (obj->last_read_req[i] == obj->last_write_req)
+   i = obj->last_write.request->engine->id;
+   if (obj->last_read[i].request == 
obj->last_write.request)
i915_gem_object_retire__read(obj, i);
else
i915_gem_object_retire__write(obj);
}
} else {
for (i = 0; i < I915_NUM_RINGS; i++) {
-   if (obj->last_read_req[i] == NULL)
+   if (obj->last_read[i].request == NULL)
continue;
 
-   ret = 

[Intel-gfx] [PATCH 051/190] drm,i915: Introduce drm_malloc_gfp()

2016-01-11 Thread Chris Wilson
I have instances where I want to use drm_malloc_ab() but with a custom
gfp mask. And with those, where I want a temporary allocation, I want to
try a high-order kmalloc() before using a vmalloc().

So refactor my usage into drm_malloc_gfp().

Signed-off-by: Chris Wilson 
Cc: dri-de...@lists.freedesktop.org
Cc: Ville Syrjälä 
Reviewed-by: Ville Syrjälä 
Acked-by: Dave Airlie 
---
 drivers/gpu/drm/i915/i915_gem.c|  4 +---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  8 +++-
 drivers/gpu/drm/i915/i915_gem_gtt.c|  5 +++--
 drivers/gpu/drm/i915/i915_gem_userptr.c| 15 ---
 include/drm/drm_mem_util.h | 19 +++
 5 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2912e8714f5b..a4f9c5bbb883 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2040,9 +2040,7 @@ void *i915_gem_object_pin_vmap(struct drm_i915_gem_object 
*obj)
int n;
 
n = obj->base.size >> PAGE_SHIFT;
-   pages = kmalloc(n*sizeof(*pages), GFP_TEMPORARY | __GFP_NOWARN);
-   if (pages == NULL)
-   pages = drm_malloc_ab(n, sizeof(*pages));
+   pages = drm_malloc_gfp(n, sizeof(*pages), GFP_TEMPORARY);
if (pages != NULL) {
n = 0;
for_each_sg_page(obj->pages->sgl, _iter, 
obj->pages->nents, 0)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index da1c6fe5b40e..dfabeee2ff0b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1766,11 +1766,9 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
return -EINVAL;
}
 
-   exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
-GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
-   if (exec2_list == NULL)
-   exec2_list = drm_malloc_ab(sizeof(*exec2_list),
-  args->buffer_count);
+   exec2_list = drm_malloc_gfp(sizeof(*exec2_list),
+   args->buffer_count,
+   GFP_TEMPORARY);
if (exec2_list == NULL) {
DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
  args->buffer_count);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 56f4f2e58d53..224fe89baca3 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3376,8 +3376,9 @@ intel_rotate_fb_obj_pages(struct i915_ggtt_view 
*ggtt_view,
int ret = -ENOMEM;
 
/* Allocate a temporary list of source pages for random access. */
-   page_addr_list = drm_malloc_ab(obj->base.size / PAGE_SIZE,
-  sizeof(dma_addr_t));
+   page_addr_list = drm_malloc_gfp(obj->base.size / PAGE_SIZE,
+   sizeof(dma_addr_t),
+   GFP_TEMPORARY);
if (!page_addr_list)
return ERR_PTR(ret);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c 
b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 1a5f89dba4af..251e81c4b0ea 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -573,10 +573,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct 
*_work)
ret = -ENOMEM;
pinned = 0;
 
-   pvec = kmalloc(npages*sizeof(struct page *),
-  GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
-   if (pvec == NULL)
-   pvec = drm_malloc_ab(npages, sizeof(struct page *));
+   pvec = drm_malloc_gfp(npages, sizeof(struct page *), GFP_TEMPORARY);
if (pvec != NULL) {
struct mm_struct *mm = obj->userptr.mm->mm;
 
@@ -713,14 +710,10 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object 
*obj)
pvec = NULL;
pinned = 0;
if (obj->userptr.mm->mm == current->mm) {
-   pvec = kmalloc(num_pages*sizeof(struct page *),
-  GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+   pvec = drm_malloc_gfp(num_pages, sizeof(struct page *), 
GFP_TEMPORARY);
if (pvec == NULL) {
-   pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
-   if (pvec == NULL) {
-   __i915_gem_userptr_set_active(obj, false);
-   return -ENOMEM;
-   }
+   __i915_gem_userptr_set_active(obj, false);
+   return -ENOMEM;
}
 
pinned = 

[Intel-gfx] [PATCH 054/190] drm/i915: Use the new rq->i915 field where appropriate

2016-01-11 Thread Chris Wilson
In a few frequent cases, having a direct pointer to the drm_i915_private
from the request is very useful.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c|  7 +++---
 drivers/gpu/drm/i915/i915_gem_context.c| 21 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  3 +--
 drivers/gpu/drm/i915/i915_gem_request.c|  2 +-
 drivers/gpu/drm/i915/intel_lrc.c   |  6 ++
 drivers/gpu/drm/i915/intel_pm.c|  3 +--
 drivers/gpu/drm/i915/intel_ringbuffer.c| 34 --
 7 files changed, 32 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 31926a4fb42a..c2a1ec8abc11 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2568,7 +2568,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
return 0;
 
if (!i915.semaphores) {
-   struct drm_i915_private *i915 = to_i915(obj->base.dev);
+   struct drm_i915_private *i915 = from_req->i915;
ret = __i915_wait_request(from_req,
  i915->mm.interruptible,
  NULL,
@@ -4069,12 +4069,11 @@ err:
 int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
 {
struct intel_engine_cs *ring = req->ring;
-   struct drm_device *dev = ring->dev;
-   struct drm_i915_private *dev_priv = dev->dev_private;
+   struct drm_i915_private *dev_priv = req->i915;
u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
int i, ret;
 
-   if (!HAS_L3_DPF(dev) || !remap_info)
+   if (!HAS_L3_DPF(dev_priv) || !remap_info)
return 0;
 
ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 361be1085a18..3e3b4bf3fed1 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -524,7 +524,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 
hw_flags)
const int num_rings =
/* Use an extended w/a on ivb+ if signalling from other rings */
i915.semaphores ?
-   hweight32(INTEL_INFO(ring->dev)->ring_mask) - 1 :
+   hweight32(INTEL_INFO(req->i915)->ring_mask) - 1 :
0;
int len, i, ret;
 
@@ -533,21 +533,21 @@ mi_set_context(struct drm_i915_gem_request *req, u32 
hw_flags)
 * explicitly, so we rely on the value at ring init, stored in
 * itlb_before_ctx_switch.
 */
-   if (IS_GEN6(ring->dev)) {
+   if (IS_GEN6(req->i915)) {
ret = ring->flush(req, I915_GEM_GPU_DOMAINS, 0);
if (ret)
return ret;
}
 
/* These flags are for resource streamer on HSW+ */
-   if (IS_HASWELL(ring->dev) || INTEL_INFO(ring->dev)->gen >= 8)
+   if (IS_HASWELL(req->i915) || INTEL_INFO(req->i915)->gen >= 8)
flags |= (HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN);
-   else if (INTEL_INFO(ring->dev)->gen < 8)
+   else if (INTEL_INFO(req->i915)->gen < 8)
flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN);
 
 
len = 4;
-   if (INTEL_INFO(ring->dev)->gen >= 7)
+   if (INTEL_INFO(req->i915)->gen >= 7)
len += 2 + (num_rings ? 4*num_rings + 2 : 0);
 
ret = intel_ring_begin(req, len);
@@ -555,13 +555,13 @@ mi_set_context(struct drm_i915_gem_request *req, u32 
hw_flags)
return ret;
 
/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
-   if (INTEL_INFO(ring->dev)->gen >= 7) {
+   if (INTEL_INFO(req->i915)->gen >= 7) {
intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE);
if (num_rings) {
struct intel_engine_cs *signaller;
 
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings));
-   for_each_ring(signaller, to_i915(ring->dev), i) {
+   for_each_ring(signaller, req->i915, i) {
if (signaller == ring)
continue;
 
@@ -581,12 +581,12 @@ mi_set_context(struct drm_i915_gem_request *req, u32 
hw_flags)
 */
intel_ring_emit(ring, MI_NOOP);
 
-   if (INTEL_INFO(ring->dev)->gen >= 7) {
+   if (INTEL_INFO(req->i915)->gen >= 7) {
if (num_rings) {
struct intel_engine_cs *signaller;
 
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings));
-   for_each_ring(signaller, to_i915(ring->dev), i) {
+   for_each_ring(signaller, req->i915, i) {
if (signaller == ring)
continue;
 

[Intel-gfx] [PATCH 107/190] drm/i915: Record allocated vma size

2016-01-11 Thread Chris Wilson
Tracking the size of the VMA as allocated allows us to dramatically
reduce the complexity of later functions (like inserting the VMA in to
the drm_mm range manager).

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h |  10 +--
 drivers/gpu/drm/i915/i915_gem.c | 117 +++-
 drivers/gpu/drm/i915/i915_gem_gtt.c |  56 +
 drivers/gpu/drm/i915/i915_gem_gtt.h |   6 +-
 4 files changed, 70 insertions(+), 119 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 49b126e4191e..7df6cfabe7fa 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2853,11 +2853,11 @@ int i915_gem_object_attach_phys(struct 
drm_i915_gem_object *obj,
 int i915_gem_open(struct drm_device *dev, struct drm_file *file);
 void i915_gem_release(struct drm_device *dev, struct drm_file *file);
 
-uint32_t
-i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode);
-uint32_t
-i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
-   int tiling_mode, bool fenced);
+uint64_t
+i915_gem_get_gtt_size(struct drm_device *dev, uint64_t size, int tiling_mode);
+uint64_t
+i915_gem_get_gtt_alignment(struct drm_device *dev, uint64_t size,
+  int tiling_mode, bool fenced);
 
 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
enum i915_cache_level cache_level);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9c159e64a9a0..0d4f358f4067 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1589,11 +1589,13 @@ i915_gem_release_all_mmaps(struct drm_i915_private 
*dev_priv)
i915_gem_release_mmap(obj);
 }
 
-uint32_t
-i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
+uint64_t
+i915_gem_get_gtt_size(struct drm_device *dev, uint64_t size, int tiling_mode)
 {
uint32_t gtt_size;
 
+   GEM_BUG_ON(size == 0);
+
if (INTEL_INFO(dev)->gen >= 4 ||
tiling_mode == I915_TILING_NONE)
return size;
@@ -1617,10 +1619,12 @@ i915_gem_get_gtt_size(struct drm_device *dev, uint32_t 
size, int tiling_mode)
  * Return the required GTT alignment for an object, taking into account
  * potential fence register mapping.
  */
-uint32_t
-i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
+uint64_t
+i915_gem_get_gtt_alignment(struct drm_device *dev, uint64_t size,
   int tiling_mode, bool fenced)
 {
+   GEM_BUG_ON(size == 0);
+
/*
 * Minimum alignment is 4k (GTT page size), but might be greater
 * if a fence register is needed for the object.
@@ -2747,68 +2751,51 @@ i915_gem_object_insert_into_vm(struct 
drm_i915_gem_object *obj,
   struct i915_address_space *vm,
   const struct i915_ggtt_view *ggtt_view,
   uint64_t size,
-  unsigned alignment,
+  uint64_t alignment,
   uint64_t flags)
 {
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
-   u64 start, end;
-   u32 search_flag, alloc_flag;
struct i915_vma *vma;
+   u64 start, end;
+   u64 min_alignment;
int ret;
 
-   if (i915_is_ggtt(vm)) {
-   u32 fence_size, fence_alignment, unfenced_alignment;
-   u64 view_size;
-
-   if (WARN_ON(!ggtt_view))
-   return ERR_PTR(-EINVAL);
-
-   view_size = i915_ggtt_view_size(obj, ggtt_view);
-
-   fence_size = i915_gem_get_gtt_size(dev,
-  view_size,
-  obj->tiling_mode);
-   fence_alignment = i915_gem_get_gtt_alignment(dev,
-view_size,
-obj->tiling_mode,
-true);
-   unfenced_alignment = i915_gem_get_gtt_alignment(dev,
-   view_size,
-   
obj->tiling_mode,
-   false);
-   size = max(size, view_size);
-   if (flags & PIN_MAPPABLE)
-   size = max_t(u64, size, fence_size);
-
-   if (alignment == 0)
-   alignment = flags & PIN_MAPPABLE ? fence_alignment :
-   unfenced_alignment;
-   if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
-

[Intel-gfx] [PATCH 139/190] drm/i915: Move fence tracking from object to vma

2016-01-11 Thread Chris Wilson
In order to handle tiled partial GTT mmappings, we need to associate the
fence with an individual vma.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c|  15 +-
 drivers/gpu/drm/i915/i915_drv.h|  81 --
 drivers/gpu/drm/i915/i915_gem.c|  34 ++-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  21 +-
 drivers/gpu/drm/i915/i915_gem_fence.c  | 381 +++--
 drivers/gpu/drm/i915/i915_gem_gtt.c|   7 +
 drivers/gpu/drm/i915/i915_gem_gtt.h|   9 +
 drivers/gpu/drm/i915/i915_gem_tiling.c |  65 +++--
 drivers/gpu/drm/i915/i915_gpu_error.c  |   2 +-
 drivers/gpu/drm/i915/intel_display.c   |  57 ++---
 drivers/gpu/drm/i915/intel_fbc.c   |  30 ++-
 drivers/gpu/drm/i915/intel_fbdev.c |   4 +-
 drivers/gpu/drm/i915/intel_overlay.c   |   2 +-
 13 files changed, 324 insertions(+), 384 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 418b80de5246..f15ed7793969 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -133,9 +133,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object 
*obj)
for_each_ring(ring, dev_priv, i)
seq_printf(m, "%x ",

i915_gem_request_get_seqno(obj->last_read[i].request));
-   seq_printf(m, "] %x %x%s%s%s",
+   seq_printf(m, "] %x %s%s%s",
   i915_gem_request_get_seqno(obj->last_write.request),
-  i915_gem_request_get_seqno(obj->last_fence.request),
   i915_cache_level_str(to_i915(obj->base.dev), 
obj->cache_level),
   obj->dirty ? " dirty" : "",
   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
@@ -148,8 +147,6 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object 
*obj)
seq_printf(m, " (pinned x %d)", pin_count);
if (obj->pin_display)
seq_printf(m, " (display)");
-   if (obj->fence_reg != I915_FENCE_REG_NONE)
-   seq_printf(m, " (fence: %d)", obj->fence_reg);
list_for_each_entry(vma, >vma_list, obj_link) {
if (!drm_mm_node_allocated(>node))
continue;
@@ -159,6 +156,10 @@ describe_obj(struct seq_file *m, struct 
drm_i915_gem_object *obj)
   vma->node.start, vma->node.size);
if (vma->is_ggtt)
seq_printf(m, ", type: %u", vma->ggtt_view.type);
+   if (vma->fence)
+   seq_printf(m, " , fence: %d%s",
+  vma->fence->id,
+  vma->last_fence.request ? "*" : "");
seq_puts(m, ")");
}
if (obj->stolen)
@@ -948,14 +949,14 @@ static int i915_gem_fence_regs_info(struct seq_file *m, 
void *data)
 
seq_printf(m, "Total fences = %d\n", dev_priv->num_fence_regs);
for (i = 0; i < dev_priv->num_fence_regs; i++) {
-   struct drm_i915_gem_object *obj = dev_priv->fence_regs[i].obj;
+   struct i915_vma *vma = dev_priv->fence_regs[i].vma;
 
seq_printf(m, "Fence %d, pin count = %d, object = ",
   i, dev_priv->fence_regs[i].pin_count);
-   if (obj == NULL)
+   if (vma == NULL)
seq_puts(m, "unused");
else
-   describe_obj(m, obj);
+   describe_obj(m, vma->obj);
seq_putc(m, '\n');
}
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cfc4430d3b50..bb0f750bb5b5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -458,15 +458,21 @@ struct intel_opregion {
 struct intel_overlay;
 struct intel_overlay_error_state;
 
-#define I915_FENCE_REG_NONE -1
-#define I915_MAX_NUM_FENCES 32
-/* 32 fences + sign bit for FENCE_REG_NONE */
-#define I915_MAX_NUM_FENCE_BITS 6
-
 struct drm_i915_fence_reg {
struct list_head lru_list;
-   struct drm_i915_gem_object *obj;
+   struct drm_i915_private *i915;
+   struct i915_vma *vma;
int pin_count;
+   int id;
+   /**
+* Whether the tiling parameters for the currently
+* associated fence register have changed. Note that
+* for the purposes of tracking tiling changes we also
+* treat the unfenced register, the register slot that
+* the object occupies whilst it executes a fenced
+* command (such as BLT on gen2/3), as a "fence".
+*/
+   bool dirty;
 };
 
 struct sdvo_device_mapping {
@@ -2053,13 +2059,6 @@ struct drm_i915_gem_object {
unsigned int dirty:1;
 
/**
-* Fence register bits (if any) for this object.  Will be set
-* as needed when mapped into the GTT.
-* Protected by dev->struct_mutex.
-*/

[Intel-gfx] [PATCH 123/190] drm/i915: Mark unmappable GGTT entries as PIN_HIGH

2016-01-11 Thread Chris Wilson
We allocate a few objects into the GGTT that we never need to access via
the mappable aperture (such as contexts, status pages). We can request
that these are bound high in the VM to increase the amount of mappable
aperture available.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_context.c |  4 ++--
 drivers/gpu/drm/i915/intel_lrc.c|  3 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.c | 13 +
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 9250a7405807..c54c17944796 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -389,7 +389,7 @@ int i915_gem_context_init(struct drm_device *dev)
 * context.
 */
ret = i915_gem_object_ggtt_pin(ctx->legacy_hw_ctx.rcs_state,
-  NULL, 0, alignment, 0);
+  NULL, 0, alignment, PIN_HIGH);
if (ret) {
DRM_ERROR("Failed to pinned default global context 
(error %d)\n",
  ret);
@@ -677,7 +677,7 @@ static int do_switch(struct drm_i915_gem_request *req)
if (engine->id == RCS) {
u32 alignment = get_context_alignment(engine->dev);
ret = i915_gem_object_ggtt_pin(to->legacy_hw_ctx.rcs_state,
-  NULL, 0, alignment, 0);
+  NULL, 0, alignment, PIN_HIGH);
if (ret)
return ret;
}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 86fa41770ff1..206311b55e71 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -583,7 +583,8 @@ static int intel_lr_context_pin(struct intel_context *ctx,
ctx_obj = ctx->engine[engine->id].state;
ret = i915_gem_object_ggtt_pin(ctx_obj, NULL,
   0, GEN8_LR_CONTEXT_ALIGN,
-  PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
+  PIN_OFFSET_BIAS | GUC_WOPCM_TOP |
+  PIN_HIGH);
if (ret)
goto err;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index ba3631d216fe..6db7f93a3c1d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -649,7 +649,8 @@ intel_init_pipe_control(struct intel_engine_cs *ring)
if (ret)
goto err_unref;
 
-   ret = i915_gem_object_ggtt_pin(ring->scratch.obj, NULL, 0, 4096, 0);
+   ret = i915_gem_object_ggtt_pin(ring->scratch.obj, NULL,
+  0, 4096, PIN_HIGH);
if (ret)
goto err_unref;
 
@@ -1891,7 +1892,9 @@ int intel_ring_map(struct intel_ring *ring)
int ret;
 
if (HAS_LLC(ring->engine->i915) && !obj->stolen) {
-   ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, 0);
+   ret = i915_gem_object_ggtt_pin(obj, NULL,
+  0, PAGE_SIZE,
+  PIN_HIGH);
if (ret)
return ret;
 
@@ -1906,7 +1909,8 @@ int intel_ring_map(struct intel_ring *ring)
goto unpin;
}
} else {
-   ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
+   ret = i915_gem_object_ggtt_pin(obj, NULL,
+  0, PAGE_SIZE,
   PIN_MAPPABLE);
if (ret)
return ret;
@@ -2505,7 +2509,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
} else {
i915_gem_object_set_cache_level(obj, 
I915_CACHE_LLC);
ret = i915_gem_object_ggtt_pin(obj, NULL,
-  0, 0, 0);
+  0, 0,
+  PIN_HIGH);
if (ret != 0) {
drm_gem_object_unreference(>base);
DRM_ERROR("Failed to pin semaphore bo. 
Disabling semaphores\n");
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 113/190] drm/i915: Enable lockless lookup of request tracking via RCU

2016-01-11 Thread Chris Wilson
If we enable RCU for the requests (providing a grace period where we can
inspect a "dead" request before it is freed), we can allow callers to
carefully perform lockless lookup of an active request.

However, by enabling deferred freeing of requests, we can potentially
hog a lot of memory when dealing with tens of thousands of requests per
second - with a quick insertion of a synchronize_rcu() inside our
shrinker callback, that issue disappears.

v2: Currently, it is our responsibility to handle reclaim i.e. to avoid
hogging memory with the delayed slab frees. At the moment, we wait for a
grace period in the shrinker, and block for all RCU callbacks on oom.
Suggested alternatives focus on flushing our RCU callback when we have a
certain number of outstanding request frees, and blocking on that flush
after a second high watermark. (So rather than wait for the system to
run out of memory, we stop issuing requests - both are nondeterministic.)

Paul E. McKenney wrote:

Another approach is synchronize_rcu() after some largish number of
requests.  The advantage of this approach is that it throttles the
production of callbacks at the source.  The corresponding disadvantage
is that it slows things up.

Another approach is to use call_rcu(), but if the previous call_rcu()
is still in flight, block waiting for it.  Yet another approach is
the get_state_synchronize_rcu() / cond_synchronize_rcu() pair.  The
idea is to do something like this:

cond_synchronize_rcu(cookie);
cookie = get_state_synchronize_rcu();

You would of course do an initial get_state_synchronize_rcu() to
get things going.  This would not block unless there was less than
one grace period's worth of time between invocations.  But this
assumes a busy system, where there is almost always a grace period
in flight.  But you can make that happen as follows:

cond_synchronize_rcu(cookie);
cookie = get_state_synchronize_rcu();
call_rcu(_rcu_head, noop_function);

Note that you need additional code to make sure that the old callback
has completed before doing a new one.  Setting and clearing a flag
with appropriate memory ordering control suffices (e.g,. smp_load_acquire()
and smp_store_release()).

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c  |  3 ++-
 drivers/gpu/drm/i915/i915_gem_request.c  |  2 +-
 drivers/gpu/drm/i915/i915_gem_request.h  | 24 +++-
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 15 +++
 4 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6712ecf1239b..ee715558ecea 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4215,7 +4215,8 @@ i915_gem_load(struct drm_device *dev)
dev_priv->requests =
kmem_cache_create("i915_gem_request",
  sizeof(struct drm_i915_gem_request), 0,
- SLAB_HWCACHE_ALIGN,
+ SLAB_HWCACHE_ALIGN |
+ SLAB_DESTROY_BY_RCU,
  NULL);
 
INIT_LIST_HEAD(_priv->context_list);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 61be8dda4a14..be24bde2e602 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -336,7 +336,7 @@ static void __i915_gem_request_retire_active(struct 
drm_i915_gem_request *req)
 */
list_for_each_entry_safe(active, next, >active_list, link) {
INIT_LIST_HEAD(>link);
-   active->request = NULL;
+   rcu_assign_pointer(active->request, NULL);
 
active->retire(active, req);
}
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h 
b/drivers/gpu/drm/i915/i915_gem_request.h
index c2e83584f8a2..f035db7c97cd 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -138,6 +138,12 @@ i915_gem_request_get(struct drm_i915_gem_request *req)
return to_request(fence_get(>fence));
 }
 
+static inline struct drm_i915_gem_request *
+i915_gem_request_get_rcu(struct drm_i915_gem_request *req)
+{
+   return to_request(fence_get_rcu(>fence));
+}
+
 static inline void
 i915_gem_request_put(struct drm_i915_gem_request *req)
 {
@@ -242,7 +248,23 @@ i915_gem_request_mark_active(struct drm_i915_gem_request 
*request,
 struct i915_gem_active *active)
 {
list_move(>link, >active_list);
-   active->request = request;
+   rcu_assign_pointer(active->request, request);
+}
+
+static inline struct drm_i915_gem_request *
+i915_gem_active_get_request_rcu(struct i915_gem_active *active)
+{
+   do {
+   struct drm_i915_gem_request *request;
+
+   request = rcu_dereference(active->request);
+   if (request == 

[Intel-gfx] [PATCH 100/190] drm/i915: Remove request retirement before each batch

2016-01-11 Thread Chris Wilson
This reimplements the denial-of-service protection against igt from

commit 227f782e4667fc622810bce8be8ccdeee45f89c2
Author: Chris Wilson 
Date:   Thu May 15 10:41:42 2014 +0100

drm/i915: Retire requests before creating a new one

and transfers the stall from before each batch into a the close handler.
The issue is that the stall is increasing latency between batches which
is detrimental in some cases (especially coupled with execlists) to
keeping the GPU well fed. Also we have made the observation that retiring
requests can of itself free objects (and requests) and therefore makes
a good first step when shrinking.

v2: Recycle objects prior to i915_gem_object_get_pages()

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h|  1 -
 drivers/gpu/drm/i915/i915_gem.c| 23 +++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  2 --
 3 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index be63eaf8764a..5711ae3a22a1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2780,7 +2780,6 @@ struct drm_i915_gem_request *
 i915_gem_find_active_request(struct intel_engine_cs *ring);
 
 void i915_gem_retire_requests(struct drm_device *dev);
-void i915_gem_retire_requests_ring(struct intel_engine_cs *ring);
 
 static inline u32 i915_reset_counter(struct i915_gpu_error *error)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a0207b9d1aea..d705005ca26e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1986,7 +1986,6 @@ err_pages:
 int
 i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 {
-   struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
const struct drm_i915_gem_object_ops *ops = obj->ops;
int ret;
 
@@ -2000,11 +1999,15 @@ i915_gem_object_get_pages(struct drm_i915_gem_object 
*obj)
 
BUG_ON(obj->pages_pin_count);
 
+   /* Recycle as many active objects as possible first */
+   i915_gem_retire_requests(obj->base.dev);
+
ret = ops->get_pages(obj);
if (ret)
return ret;
 
-   list_add_tail(>global_list, _priv->mm.unbound_list);
+   list_add_tail(>global_list,
+ _i915(obj->base.dev)->mm.unbound_list);
 
obj->get_page.sg = obj->pages->sgl;
obj->get_page.last = 0;
@@ -2259,7 +2262,7 @@ void i915_gem_reset(struct drm_device *dev)
 /**
  * This function clears the request list as sequence numbers are passed.
  */
-void
+static bool
 i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 {
while (!list_empty(>request_list)) {
@@ -2270,10 +2273,12 @@ i915_gem_retire_requests_ring(struct intel_engine_cs 
*ring)
   link);
 
if (!i915_gem_request_completed(request))
-   break;
+   return false;
 
i915_gem_request_retire_upto(request);
}
+
+   return true;
 }
 
 void
@@ -2281,19 +2286,18 @@ i915_gem_retire_requests(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *ring;
-   bool idle = true;
+   bool idle;
int i;
 
if (!dev_priv->mm.busy)
return;
 
+   idle = true;
for_each_ring(ring, dev_priv, i) {
-   i915_gem_retire_requests_ring(ring);
-   idle &= list_empty(>request_list);
+   idle &= i915_gem_retire_requests_ring(ring);
if (i915.enable_execlists)
idle &= intel_execlists_retire_requests(ring);
}
-
if (idle)
queue_delayed_work(dev_priv->wq,
   _priv->mm.idle_work,
@@ -2399,6 +2403,7 @@ void i915_gem_close_object(struct drm_gem_object *gem,
list_for_each_entry_safe(vma, vn, >vma_list, obj_link)
if (vma->vm->file == fpriv)
i915_vma_close(vma);
+   i915_gem_object_flush_active(obj);
mutex_unlock(>base.dev->struct_mutex);
 }
 
@@ -4235,7 +4240,9 @@ i915_gem_cleanup_ringbuffer(struct drm_device *dev)
 static void
 init_ring_lists(struct intel_engine_cs *ring)
 {
+   /* Early initialisation so that core GEM works during engine setup */
INIT_LIST_HEAD(>request_list);
+   INIT_LIST_HEAD(>execlist_completed);
 }
 
 void
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 7a9d3f4732e9..90c5341506be 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -741,8 +741,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
int retry;
 
-   

[Intel-gfx] [PATCH 116/190] drm/i915: Reduce locking inside swfinish ioctl

2016-01-11 Thread Chris Wilson
We only need to take the struct_mutex if the object is pinned to the
display engine and so requires checking for clflush. (The race with
userspace pinning the object to a framebuffer is irrelevant.)

v2: Use access once for compiler hints (or not as it is a bitfield)

Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
---
 drivers/gpu/drm/i915/i915_gem.c | 29 -
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 95d4d2460f6a..f87e558a7233 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1282,25 +1282,28 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void 
*data,
 {
struct drm_i915_gem_sw_finish *args = data;
struct drm_i915_gem_object *obj;
-   int ret = 0;
-
-   ret = i915_mutex_lock_interruptible(dev);
-   if (ret)
-   return ret;
+   int ret;
 
obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
-   if (>base == NULL) {
-   ret = -ENOENT;
-   goto unlock;
-   }
+   if (>base == NULL)
+   return -ENOENT;
 
/* Pinned buffers may be scanout, so flush the cache */
-   if (obj->pin_display)
+   if (obj->pin_display) {
+   ret = i915_mutex_lock_interruptible(dev);
+   if (ret)
+   goto unref;
+
i915_gem_object_flush_cpu_write_domain(obj);
 
-   drm_gem_object_unreference(>base);
-unlock:
-   mutex_unlock(>struct_mutex);
+   drm_gem_object_unreference(>base);
+   mutex_unlock(>struct_mutex);
+   } else {
+   ret = 0;
+unref:
+   drm_gem_object_unreference_unlocked(>base);
+   }
+
return ret;
 }
 
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 141/190] drm/i915: Choose not to evict faultable objects from the GGTT

2016-01-11 Thread Chris Wilson
Often times we do not want to evict mapped objects from the GGTT as
these are quite expensive to teardown and frequently reused (causing an
equally, if not more so, expensive setup). In particular, when faulting
in a new object we want to avoid evicting an active object, or else we
may trigger a page-fault-of-doom as we ping-pong between evicting two
objects.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h   | 7 ---
 drivers/gpu/drm/i915/i915_gem.c   | 4 +++-
 drivers/gpu/drm/i915/i915_gem_evict.c | 7 +--
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index bb0f750bb5b5..45b8cbdfab55 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2709,9 +2709,10 @@ i915_vma_pin(struct i915_vma *vma,
 #define PIN_MAPPABLE   (1<<3)
 #define PIN_ZONE_4G(1<<4)
 #define PIN_NONBLOCK   (1<<5)
-#define PIN_HIGH   (1<<6)
-#define PIN_OFFSET_BIAS(1<<7)
-#define PIN_OFFSET_FIXED (1<<8)
+#define PIN_NOFAULT(1<<6)
+#define PIN_HIGH   (1<<7)
+#define PIN_OFFSET_BIAS(1<<8)
+#define PIN_OFFSET_FIXED (1<<9)
 #define PIN_OFFSET_MASK (~4095)
 
 static inline void __i915_vma_unpin(struct i915_vma *vma)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a8f4d4633bdb..60dfee56f6ef 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1483,7 +1483,9 @@ int i915_gem_fault(struct vm_area_struct *vma, struct 
vm_fault *vmf)
/* Use a partial view if the object is bigger than the aperture. */
/* Now pin it into the GTT if needed */
ggtt = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
-   PIN_MAPPABLE | PIN_NONBLOCK);
+   PIN_MAPPABLE |
+   PIN_NONBLOCK |
+   PIN_NOFAULT);
if (IS_ERR(ggtt)) {
static const unsigned int chunk_size = 256; // 1 MiB
struct i915_ggtt_view partial;
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c 
b/drivers/gpu/drm/i915/i915_gem_evict.c
index 679b7dd3a312..fdc4941be15a 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -64,7 +64,7 @@ static int switch_to_pinned_context(struct drm_i915_private 
*dev_priv)
 }
 
 static bool
-mark_free(struct i915_vma *vma, struct list_head *unwind)
+mark_free(struct i915_vma *vma, unsigned flags, struct list_head *unwind)
 {
if (vma->pin_count)
return false;
@@ -72,6 +72,9 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
if (WARN_ON(!list_empty(>exec_list)))
return false;
 
+   if (flags & PIN_NOFAULT && vma->obj->fault_mappable)
+   return false;
+
list_add(>exec_list, unwind);
return drm_mm_scan_add_block(>node);
 }
@@ -146,7 +149,7 @@ search_again:
phase = phases;
do {
list_for_each_entry(vma, *phase, vm_link)
-   if (mark_free(vma, _list))
+   if (mark_free(vma, flags, _list))
goto found;
} while (*++phase);
 
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 129/190] drm/i915: Before accessing an object via the cpu, flush GTT writes

2016-01-11 Thread Chris Wilson
If we want to read the pages directly via the CPU, we have to be sure
that we have to flush the writes via the GTT (as the CPU can not see
the address aliasing).

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e18c0d4d24ad..c12bda7a4277 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -500,6 +500,8 @@ int i915_gem_obj_prepare_shmem_read(struct 
drm_i915_gem_object *obj,
if (!obj->base.filp)
return -EINVAL;
 
+   i915_gem_object_flush_gtt_write_domain(obj);
+
if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
ret = i915_gem_object_wait_rendering(obj, true);
if (ret)
@@ -540,6 +542,8 @@ int i915_gem_obj_prepare_shmem_write(struct 
drm_i915_gem_object *obj,
if (!obj->base.filp)
return -EINVAL;
 
+   i915_gem_object_flush_gtt_write_domain(obj);
+
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
ret = i915_gem_object_wait_rendering(obj, false);
if (ret)
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 188/190] drm/i915: Use VMA for ringbuffer tracking

2016-01-11 Thread Chris Wilson
Use the GGTT VMA as the primary cookie for handing ring objects as
the most common action upon the ring is mapping and unmapping which act
upon the VMA itself. By restructuring the code to work with the ring
VMA, we can shrink the code and remove a few cycles from context pinning.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c |   2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c | 135 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.h |   2 +-
 3 files changed, 61 insertions(+), 78 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 7fb4088b3966..af2ec70dd7ab 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -379,7 +379,7 @@ static int per_file_ctx_stats(int id, void *ptr, void *data)
if (ctx->engine[n].state)
per_file_stats(0, ctx->engine[n].state->obj, data);
if (ctx->engine[n].ring)
-   per_file_stats(0, ctx->engine[n].ring->obj, data);
+   per_file_stats(0, ctx->engine[n].ring->vma->obj, data);
}
 
return 0;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 41c52cdcbe4a..512841df2527 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1899,108 +1899,91 @@ static int init_phys_status_page(struct 
intel_engine_cs *ring)
 
 int intel_ring_map(struct intel_ring *ring)
 {
-   struct drm_i915_gem_object *obj = ring->obj;
-   struct i915_vma *vma;
+   void *ptr;
int ret;
 
-   if (HAS_LLC(ring->engine->i915) && !obj->stolen) {
-   vma = i915_gem_object_ggtt_pin(obj, NULL,
-  0, PAGE_SIZE,
-  PIN_HIGH);
-   if (IS_ERR(vma))
-   return PTR_ERR(vma);
+   GEM_BUG_ON(ring->virtual_start);
 
-   ret = i915_gem_object_set_to_cpu_domain(obj, true);
-   if (ret)
-   goto unpin;
-
-   ring->virtual_start = i915_gem_object_pin_vmap(obj);
-   if (IS_ERR(ring->virtual_start)) {
-   ret = PTR_ERR(ring->virtual_start);
-   ring->virtual_start = NULL;
-   goto unpin;
-   }
-   } else {
-   vma = i915_gem_object_ggtt_pin(obj, NULL,
-  0, PAGE_SIZE,
-  PIN_MAPPABLE);
-   if (IS_ERR(vma))
-   return PTR_ERR(vma);
+   ret = i915_vma_pin(ring->vma, 0, PAGE_SIZE,
+  PIN_GLOBAL | (ring->vmap ? PIN_HIGH : PIN_MAPPABLE));
+   if (unlikely(ret))
+   return ret;
 
-   ret = i915_gem_object_set_to_gtt_domain(obj, true);
-   if (ret)
-   goto unpin;
-
-   ring->virtual_start = 
ioremap_wc(ring->engine->i915->gtt.mappable_base +
-vma->node.start,
-ring->size);
-   if (ring->virtual_start == NULL) {
-   ret = -ENOMEM;
-   goto unpin;
-   }
+   if (ring->vmap)
+   ptr = i915_gem_object_pin_vmap(ring->vma->obj);
+   else
+   ptr = i915_vma_iomap(ring->engine->i915, ring->vma);
+   if (IS_ERR(ptr)) {
+   i915_vma_unpin(ring->vma);
+   return PTR_ERR(ptr);
}
 
-   ring->vma = vma;
+   ring->virtual_start = ptr;
return 0;
-
-unpin:
-   i915_vma_unpin(vma);
-   return ret;
 }
 
 void intel_ring_unmap(struct intel_ring *ring)
 {
-   if (HAS_LLC(ring->engine->i915) && !ring->obj->stolen)
-   i915_gem_object_unpin_vmap(ring->obj);
-   else
-   iounmap(ring->virtual_start);
+   GEM_BUG_ON(ring->virtual_start == NULL);
 
-   i915_vma_unpin(ring->vma);
-   ring->vma = NULL;
-}
+   if (ring->vmap)
+   i915_gem_object_unpin_vmap(ring->vma->obj);
+   ring->virtual_start = NULL;
 
-static void intel_destroy_ringbuffer_obj(struct intel_ring *ringbuf)
-{
-   __i915_gem_object_release_unless_active(ringbuf->obj);
-   ringbuf->obj = NULL;
+   i915_vma_unpin(ring->vma);
 }
 
-static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
- struct intel_ring *ringbuf)
+static struct i915_vma *
+intel_ring_create_vma(struct drm_device *dev, int size)
 {
struct drm_i915_gem_object *obj;
+   struct i915_vma *vma;
+   int ret;
 
obj = NULL;
if (!HAS_LLC(dev))
-   obj = i915_gem_object_create_stolen(dev, ringbuf->size);
+   obj = 

[Intel-gfx] [PATCH 175/190] drm/i915: Remove superfluous i915_add_request_no_flush() helper

2016-01-11 Thread Chris Wilson
The only time we need to emit a flush inside request emission is after
an execbuffer, for which we can use the full __i915_add_request(). All
other instances want the simpler i915_add_request() without flushing, so
remove the useless helper.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_evict.c   | 2 +-
 drivers/gpu/drm/i915/i915_gem_request.h | 2 --
 drivers/gpu/drm/i915/intel_display.c| 4 ++--
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c 
b/drivers/gpu/drm/i915/i915_gem_evict.c
index e71b89bac168..56b57bdf22ab 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -53,7 +53,7 @@ static int switch_to_pinned_context(struct drm_i915_private 
*dev_priv)
return PTR_ERR(req);
 
ret = i915_switch_context(req);
-   i915_add_request_no_flush(req);
+   i915_add_request(req);
if (ret)
return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h 
b/drivers/gpu/drm/i915/i915_gem_request.h
index 1e7c4fff5257..434e028f0411 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -148,8 +148,6 @@ static inline void i915_gem_request_assign(struct 
drm_i915_gem_request **pdst,
 
 void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
 #define i915_add_request(req) \
-   __i915_add_request(req, true)
-#define i915_add_request_no_flush(req) \
__i915_add_request(req, false)
 
 struct intel_rps_client;
diff --git a/drivers/gpu/drm/i915/intel_display.c 
b/drivers/gpu/drm/i915/intel_display.c
index e518d3300a3e..b1fb43fcfeea 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -11698,7 +11698,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
if (ret)
goto cleanup_unpin;
 
-   i915_add_request_no_flush(request);
+   i915_add_request(request);
i915_gem_request_assign(>flip_queued_req, request);
}
 
@@ -11721,7 +11721,7 @@ cleanup_unpin:
intel_unpin_fb_obj(fb, crtc->primary->state);
 cleanup_request:
if (request)
-   i915_add_request_no_flush(request);
+   i915_add_request(request);
 cleanup_pending:
atomic_dec(_crtc->unpin_work_count);
mutex_unlock(>struct_mutex);
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 176/190] drm/i915: Use the MRU stack search after evicting

2016-01-11 Thread Chris Wilson
When we evict from the GTT to make room for an object, the hole we
create is put onto the MRU stack inside the drm_mm range manager. On the
next search pass, we can speed up a PIN_HIGH allocation by referencing
that stack for the new hole.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0bd6db4e83d9..a7cad2c7c034 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2873,8 +2873,10 @@ search_free:
   obj->cache_level,
   start, end,
   flags);
-   if (ret == 0)
+   if (ret == 0) {
+   search_flag = DRM_MM_SEARCH_DEFAULT;
goto search_free;
+   }
 
goto err_unpin;
}
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 165/190] drm/i915: Use the precomputed value for whether to enable command parsing

2016-01-11 Thread Chris Wilson
As i915.enable_cmd_parser is an unsafe option, make it read-only at
runtime. Now that it is constant, we can use the value determined during
initialisation as to whether we need the cmdparser at execbuffer time.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 36 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  2 +-
 drivers/gpu/drm/i915/i915_params.c |  6 ++---
 drivers/gpu/drm/i915/i915_params.h |  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h| 15 +
 5 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index fae127166e2c..84340eb42e1b 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -696,12 +696,18 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs 
*ring)
int cmd_table_count;
int ret;
 
-   if (!IS_GEN7(ring->dev))
+   if (!i915.enable_cmd_parser)
+   return 0;
+
+   if (!USES_PPGTT(ring->i915))
+   return 0;
+
+   if (!IS_GEN7(ring->i915))
return 0;
 
switch (ring->id) {
case RCS:
-   if (IS_HASWELL(ring->dev)) {
+   if (IS_HASWELL(ring->i915)) {
cmd_tables = hsw_render_ring_cmds;
cmd_table_count =
ARRAY_SIZE(hsw_render_ring_cmds);
@@ -713,7 +719,7 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *ring)
ring->reg_table = gen7_render_regs;
ring->reg_count = ARRAY_SIZE(gen7_render_regs);
 
-   if (IS_HASWELL(ring->dev)) {
+   if (IS_HASWELL(ring->i915)) {
ring->master_reg_table = hsw_master_regs;
ring->master_reg_count = ARRAY_SIZE(hsw_master_regs);
} else {
@@ -729,7 +735,7 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *ring)
ring->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
break;
case BCS:
-   if (IS_HASWELL(ring->dev)) {
+   if (IS_HASWELL(ring->i915)) {
cmd_tables = hsw_blt_ring_cmds;
cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds);
} else {
@@ -740,7 +746,7 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *ring)
ring->reg_table = gen7_blt_regs;
ring->reg_count = ARRAY_SIZE(gen7_blt_regs);
 
-   if (IS_HASWELL(ring->dev)) {
+   if (IS_HASWELL(ring->i915)) {
ring->master_reg_table = hsw_master_regs;
ring->master_reg_count = ARRAY_SIZE(hsw_master_regs);
} else {
@@ -954,26 +960,6 @@ unpin_src:
return ret ? ERR_PTR(ret) : dst;
 }
 
-/**
- * i915_needs_cmd_parser() - should a given ring use software command parsing?
- * @ring: the ring in question
- *
- * Only certain platforms require software batch buffer command parsing, and
- * only when enabled via module parameter.
- *
- * Return: true if the ring requires software command parsing
- */
-bool i915_needs_cmd_parser(struct intel_engine_cs *ring)
-{
-   if (!ring->needs_cmd_parser)
-   return false;
-
-   if (!USES_PPGTT(ring->dev))
-   return false;
-
-   return (i915.enable_cmd_parser == 1);
-}
-
 static bool check_cmd(const struct intel_engine_cs *ring,
  const struct drm_i915_cmd_descriptor *desc,
  const u32 *cmd, u32 length,
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 185fbf45a5d2..e60f559696d9 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1616,7 +1616,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
}
 
params->args_batch_start_offset = args->batch_start_offset;
-   if (i915_needs_cmd_parser(ring) && args->batch_len) {
+   if (intel_engine_needs_cmd_parser(ring) && args->batch_len) {
struct i915_vma *vma;
 
vma = i915_gem_execbuffer_parse(ring, _exec_entry,
diff --git a/drivers/gpu/drm/i915/i915_params.c 
b/drivers/gpu/drm/i915/i915_params.c
index 8d90c256520a..e6998efd9cae 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -48,7 +48,7 @@ struct i915_params i915 __read_mostly = {
.reset = true,
.invert_brightness = 0,
.disable_display = 0,
-   .enable_cmd_parser = 1,
+   .enable_cmd_parser = true,
.disable_vtd_wa = 0,
.use_mmio_flip = 0,
.mmio_debug = 0,
@@ -169,9 +169,9 @@ MODULE_PARM_DESC(disable_display, "Disable display 
(default: false)");
 module_param_named_unsafe(disable_vtd_wa, i915.disable_vtd_wa, bool, 0600);
 

[Intel-gfx] [PATCH 150/190] drm/i915: Embed the scratch page struct into each VM

2016-01-11 Thread Chris Wilson
As the scratch page is no longer shared between all VM, and each has
their own, forgo the small allocation and simply embed the scratch page
struct into the i915_address_space.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 83 +++--
 drivers/gpu/drm/i915/i915_gem_gtt.h |  6 +--
 2 files changed, 35 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 4fea8d221ba7..fa7dedd395ee 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -392,29 +392,16 @@ static void fill_page_dma_32(struct drm_device *dev, 
struct i915_page_dma *p,
fill_page_dma(dev, p, v);
 }
 
-static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev)
+static int
+setup_scratch_page(struct drm_device *dev, struct i915_page_dma *scratch)
 {
-   struct i915_page_scratch *sp;
-   int ret;
-
-   sp = kzalloc(sizeof(*sp), GFP_KERNEL);
-   if (sp == NULL)
-   return ERR_PTR(-ENOMEM);
-
-   ret = __setup_page_dma(dev, px_base(sp), GFP_DMA32 | __GFP_ZERO);
-   if (ret) {
-   kfree(sp);
-   return ERR_PTR(ret);
-   }
-
-   return sp;
+   return __setup_page_dma(dev, scratch, GFP_DMA32 | __GFP_ZERO);
 }
 
-static void free_scratch_page(struct drm_device *dev,
- struct i915_page_scratch *sp)
+static void cleanup_scratch_page(struct drm_device *dev,
+struct i915_page_dma *scratch)
 {
-   cleanup_px(dev, sp);
-   kfree(sp);
+   cleanup_page_dma(dev, scratch);
 }
 
 static struct i915_page_table *alloc_pt(struct drm_device *dev)
@@ -460,7 +447,7 @@ static void gen8_initialize_pt(struct i915_address_space 
*vm,
 {
gen8_pte_t scratch_pte;
 
-   scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
+   scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
  I915_CACHE_LLC, true);
 
fill_px(vm->dev, pt, scratch_pte);
@@ -471,9 +458,9 @@ static void gen6_initialize_pt(struct i915_address_space 
*vm,
 {
gen6_pte_t scratch_pte;
 
-   WARN_ON(px_dma(vm->scratch_page) == 0);
+   WARN_ON(vm->scratch_page.daddr == 0);
 
-   scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
+   scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
 I915_CACHE_LLC, true, 0);
 
fill32_px(vm->dev, pt, scratch_pte);
@@ -756,7 +743,7 @@ static void gen8_ppgtt_clear_range(struct 
i915_address_space *vm,
   bool use_scratch)
 {
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-   gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
+   gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
 I915_CACHE_LLC, use_scratch);
 
if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
@@ -860,21 +847,22 @@ static void gen8_free_page_tables(struct drm_device *dev,
 static int gen8_init_scratch(struct i915_address_space *vm)
 {
struct drm_device *dev = vm->dev;
+   int ret;
 
-   vm->scratch_page = alloc_scratch_page(dev);
-   if (IS_ERR(vm->scratch_page))
-   return PTR_ERR(vm->scratch_page);
+   ret = setup_scratch_page(dev, >scratch_page);
+   if (ret)
+   return ret;
 
vm->scratch_pt = alloc_pt(dev);
if (IS_ERR(vm->scratch_pt)) {
-   free_scratch_page(dev, vm->scratch_page);
+   cleanup_scratch_page(dev, >scratch_page);
return PTR_ERR(vm->scratch_pt);
}
 
vm->scratch_pd = alloc_pd(dev);
if (IS_ERR(vm->scratch_pd)) {
free_pt(dev, vm->scratch_pt);
-   free_scratch_page(dev, vm->scratch_page);
+   cleanup_scratch_page(dev, >scratch_page);
return PTR_ERR(vm->scratch_pd);
}
 
@@ -883,7 +871,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
if (IS_ERR(vm->scratch_pdp)) {
free_pd(dev, vm->scratch_pd);
free_pt(dev, vm->scratch_pt);
-   free_scratch_page(dev, vm->scratch_page);
+   cleanup_scratch_page(dev, >scratch_page);
return PTR_ERR(vm->scratch_pdp);
}
}
@@ -936,7 +924,7 @@ static void gen8_free_scratch(struct i915_address_space *vm)
free_pdp(dev, vm->scratch_pdp);
free_pd(dev, vm->scratch_pd);
free_pt(dev, vm->scratch_pt);
-   free_scratch_page(dev, vm->scratch_page);
+   cleanup_scratch_page(dev, >scratch_page);
 }
 
 static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev,
@@ -1433,7 +1421,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, 
struct seq_file *m)
struct 

[Intel-gfx] [PATCH 172/190] drm/i915: Eliminate lots of iterations over the execobjects array

2016-01-11 Thread Chris Wilson
The major scaling bottleneck in execbuffer is the processing of the
execobjects. Creating an auxiliary list is ineffecient when compared to
using the execobject array we already have allocated.

Reservation is then split into phases. As we lookup up the VMA, we
try and bind it back into active location. Only if that fails, do we add
it to the unbound list for phase 2. In phase 2, we try and add all those
objects that could not fit into their previous location, with fallback
to retrying all objects and evicting the VM in case of severe
fragmentation. (This is the same as before, except that phase 1 is now
done inline with looking up the VMA to avoid an interation over the
execobject array. In the ideal case, we eliminate the separate reservation
phase). During the reservation phase, we only evict from the VM between
passes (rather than currently as we try to fit every new VMA). In
testing with Unreal Engine's Atlantis demo which stresses the eviction
logic on gen7 class hardware, this speed up the framerate by a factor of
2.

The second loop amlagamation is between move_to_gpu and move_to_active.
As we always submit the request, even if incomplete, we can use the
current request to track active VMA as we perform the flushes and
synchronisation required.

The next big advancement is to avoid copying back to the user any
execobjects and relocations that are not changed.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h|3 +-
 drivers/gpu/drm/i915/i915_gem.c|4 +-
 drivers/gpu/drm/i915/i915_gem_evict.c  |   71 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 1310 ++--
 drivers/gpu/drm/i915/i915_gem_gtt.c|2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.h|4 +-
 6 files changed, 713 insertions(+), 681 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2ceefce0e731..601ef7412cf9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2733,6 +2733,7 @@ int i915_gem_wait_ioctl(struct drm_device *dev, void 
*data,
 void i915_gem_load(struct drm_device *dev);
 void *i915_gem_object_alloc(struct drm_device *dev);
 void i915_gem_object_free(struct drm_i915_gem_object *obj);
+bool i915_gem_object_flush_active(struct drm_i915_gem_object *obj);
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
 const struct drm_i915_gem_object_ops *ops);
 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
@@ -3078,7 +3079,7 @@ int __must_check i915_gem_evict_something(struct 
drm_device *dev,
  unsigned long end,
  unsigned flags);
 int __must_check i915_gem_evict_for_vma(struct i915_vma *vma, unsigned flags);
-int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
+int i915_gem_evict_vm(struct i915_address_space *vm);
 
 /* belongs in i915_gem_gtt.h */
 static inline void i915_gem_chipset_flush(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3eeca1fb89d2..0bd6db4e83d9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2393,7 +2393,7 @@ out:
  * write domains, emitting any outstanding lazy request and retiring and
  * completed requests.
  */
-static bool i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
+bool i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
 {
int i;
 
@@ -2821,7 +2821,7 @@ i915_vma_insert(struct i915_vma *vma,
  size, obj->base.size,
  flags & PIN_MAPPABLE ? "mappable" : "total",
  end);
-   return -E2BIG;
+   return -ENOSPC;
}
 
ret = i915_gem_object_get_pages(obj);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c 
b/drivers/gpu/drm/i915/i915_gem_evict.c
index d40bcb81c922..e71b89bac168 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -72,7 +72,7 @@ mark_free(struct i915_vma *vma, unsigned flags, struct 
list_head *unwind)
if (flags & PIN_NOFAULT && vma->obj->fault_mappable)
return false;
 
-   list_add(>exec_list, unwind);
+   list_add(>evict_link, unwind);
return drm_mm_scan_add_block(>node);
 }
 
@@ -154,11 +154,11 @@ search_again:
while (!list_empty(_list)) {
vma = list_first_entry(_list,
   struct i915_vma,
-  exec_list);
+  evict_link);
ret = drm_mm_scan_remove_block(>node);
BUG_ON(ret);
 
-   list_del(>exec_list);
+   list_del(>evict_link);
}
 
/* Can we unpin some objects such as idle hw contents,
@@ -201,16 +201,16 @@ found:
 * calling 

[Intel-gfx] [PATCH 147/190] drm/i915: Use remap_io_mapping() to prefault all PTE in a single pass

2016-01-11 Thread Chris Wilson
On an Ivybridge i7-3720qm with 1600MHz DDR3, with 32 fences,
Upload rate for 2 linear surfaces:  8134MiB/s -> 8154MiB/s
Upload rate for 2 tiled surfaces:   8625MiB/s -> 8632MiB/s
Upload rate for 4 linear surfaces:  8127MiB/s -> 8134MiB/s
Upload rate for 4 tiled surfaces:   8602MiB/s -> 8629MiB/s
Upload rate for 8 linear surfaces:  8124MiB/s -> 8137MiB/s
Upload rate for 8 tiled surfaces:   8603MiB/s -> 8624MiB/s
Upload rate for 16 linear surfaces: 8123MiB/s -> 8128MiB/s
Upload rate for 16 tiled surfaces:  8606MiB/s -> 8618MiB/s
Upload rate for 32 linear surfaces: 8121MiB/s -> 8128MiB/s
Upload rate for 32 tiled surfaces:  8605MiB/s -> 8614MiB/s
Upload rate for 64 linear surfaces: 8121MiB/s -> 8127MiB/s
Upload rate for 64 tiled surfaces:  3017MiB/s -> 5202MiB/s

Signed-off-by: Chris Wilson 
Testcase: igt/gem_fence_upload/performance
Testcase: igt/gem_mmap_gtt
---
 drivers/gpu/drm/Makefile   |   2 +-
 drivers/gpu/drm/i915/Makefile  |   5 +-
 drivers/gpu/drm/i915/i915_drv.h|   4 ++
 drivers/gpu/drm/i915/i915_gem.c|  46 +++---
 drivers/gpu/drm/i915/i915_memory.c | 122 +
 5 files changed, 138 insertions(+), 41 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_memory.c

diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index f858aa25fbb2..6834d0e33741 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -43,7 +43,7 @@ obj-$(CONFIG_DRM_RADEON)+= radeon/
 obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
 obj-$(CONFIG_DRM_MGA)  += mga/
 obj-$(CONFIG_DRM_I810) += i810/
-obj-$(CONFIG_DRM_I915)  += i915/
+obj-y += i915/
 obj-$(CONFIG_DRM_MGAG200) += mgag200/
 obj-$(CONFIG_DRM_VC4)  += vc4/
 obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 79d657f29241..a362425ef862 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -100,6 +100,9 @@ i915-y += i915_vgpu.o
 # legacy horrors
 i915-y += i915_dma.o
 
-obj-$(CONFIG_DRM_I915)  += i915.o
+obj-$(CONFIG_DRM_I915) += i915.o
+ifdef CONFIG_DRM_I915
+obj-y += i915_memory.o
+endif
 
 CFLAGS_i915_trace_points.o := -I$(src)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 45b8cbdfab55..e6f49175af1b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3447,4 +3447,8 @@ static inline bool __i915_request_irq_complete(struct 
drm_i915_gem_request *req)
return false;
 }
 
+int remap_io_mapping(struct vm_area_struct *vma,
+unsigned long addr, unsigned long pfn, unsigned long size,
+struct io_mapping *iomap);
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7e321fdd90d2..1fa4752682d6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1449,7 +1449,6 @@ int i915_gem_fault(struct vm_area_struct *vma, struct 
vm_fault *vmf)
struct drm_i915_private *dev_priv = dev->dev_private;
struct i915_vma *ggtt;
pgoff_t page_offset;
-   unsigned long pfn;
int ret = 0;
bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
 
@@ -1517,44 +1516,13 @@ int i915_gem_fault(struct vm_area_struct *vma, struct 
vm_fault *vmf)
goto unpin;
 
/* Finally, remap it using the new GTT offset */
-   pfn = dev_priv->gtt.mappable_base + ggtt->node.start;
-   pfn >>= PAGE_SHIFT;
-
-   if (ggtt->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
-   if (!obj->fault_mappable) {
-   unsigned long size = min_t(unsigned long,
-  vma->vm_end - vma->vm_start,
-  obj->base.size);
-   int i;
-
-   for (i = 0; i < size >> PAGE_SHIFT; i++) {
-   ret = vm_insert_pfn(vma,
-   (unsigned 
long)vma->vm_start + i * PAGE_SIZE,
-   pfn + i);
-   if (ret)
-   break;
-   }
-   } else
-   ret = vm_insert_pfn(vma,
-   (unsigned long)vmf->virtual_address,
-   pfn + page_offset);
-   } else {
-   /* Overriding existing pages in partial view does not cause
-* us any trouble as TLBs are still valid because the fault
-* is due to userspace losing part of the mapping or never
-* having accessed it before (at this partials' range).
-*/
-   const struct i915_ggtt_view *view = >ggtt_view;
-   unsigned long base = vma->vm_start +
-(view->params.partial.offset << 

[Intel-gfx] [PATCH 157/190] drm/i915: Tidy execlists by using intel_context_engine locals

2016-01-11 Thread Chris Wilson
No functional changes, just less typing.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_lrc.c | 63 
 1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index c2a45f48da66..62f19ed51fb2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -485,6 +485,7 @@ static int execlists_context_queue(struct 
drm_i915_gem_request *request)
 int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request 
*request)
 {
struct intel_engine_cs *engine = request->engine;
+   struct intel_context_engine *ce = >ctx->engine[engine->id];
int ret;
 
if (i915.enable_guc_submission) {
@@ -498,25 +499,25 @@ int intel_logical_ring_alloc_request_extras(struct 
drm_i915_gem_request *request
ret = i915_guc_wq_check_space(guc->execbuf_client);
}
 
-   if (request->ctx->engine[engine->id].state == NULL) {
+   if (ce->state == NULL) {
ret = execlists_context_deferred_alloc(request->ctx, engine);
if (ret)
return ret;
}
 
-   request->ring = request->ctx->engine[engine->id].ring;
+   request->ring = ce->ring;
 
ret = intel_lr_context_pin(request->ctx, engine);
if (ret)
return ret;
 
-   if (!request->ctx->engine[engine->id].initialised) {
+   if (!ce->initialised) {
ret = engine->init_context(request);
if (ret) {
intel_lr_context_unpin(request->ctx, engine);
return ret;
}
-   request->ctx->engine[engine->id].initialised = true;
+   ce->initialised = true;
}
 
return 0;
@@ -569,18 +570,18 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring)
 static int intel_lr_context_pin(struct intel_context *ctx,
struct intel_engine_cs *engine)
 {
-   struct drm_i915_private *dev_priv = engine->i915;
+   struct intel_context_engine *ce = >engine[engine->id];
struct i915_vma *vma;
struct intel_ring *ring;
u32 ggtt_offset;
int ret;
 
-   if (ctx->engine[engine->id].pin_count++)
+   if (ce->pin_count++)
return 0;
 
lockdep_assert_held(>dev->struct_mutex);
 
-   vma = i915_gem_object_ggtt_pin(ctx->engine[engine->id].state, NULL,
+   vma = i915_gem_object_ggtt_pin(ce->state, NULL,
   0, GEN8_LR_CONTEXT_ALIGN,
   PIN_OFFSET_BIAS | GUC_WOPCM_TOP |
   PIN_HIGH);
@@ -589,13 +590,13 @@ static int intel_lr_context_pin(struct intel_context *ctx,
goto err;
}
 
-   ring = ctx->engine[engine->id].ring;
+   ring = ce->ring;
ret = intel_ring_map(ring);
if (ret)
goto unpin;
 
i915_gem_context_reference(ctx);
-   ctx->engine[engine->id].vma = vma;
+   ce->vma = vma;
vma->obj->dirty = true;
 
ggtt_offset = vma->node.start + LRC_PPHWSP_PN * PAGE_SIZE;
@@ -607,30 +608,33 @@ static int intel_lr_context_pin(struct intel_context *ctx,
ring->registers[CTX_RING_BUFFER_START+1] = ring->vma->node.start;
 
/* Invalidate GuC TLB. */
-   if (i915.enable_guc_submission)
+   if (i915.enable_guc_submission) {
+   struct drm_i915_private *dev_priv = engine->i915;
I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+   }
 
return 0;
 
 unpin:
__i915_vma_unpin(vma);
 err:
-   ctx->engine[engine->id].pin_count = 0;
+   ce->pin_count = 0;
return ret;
 }
 
 void intel_lr_context_unpin(struct intel_context *ctx,
struct intel_engine_cs *engine)
 {
+   struct intel_context_engine *ce = >engine[engine->id];
struct i915_vma *vma;
 
lockdep_assert_held(>dev->struct_mutex);
-   if (--ctx->engine[engine->id].pin_count)
+   if (--ce->pin_count)
return;
 
-   intel_ring_unmap(ctx->engine[engine->id].ring);
+   intel_ring_unmap(ce->ring);
 
-   vma = ctx->engine[engine->id].vma;
+   vma = ce->vma;
kunmap(i915_gem_object_get_page(vma->obj, LRC_STATE_PN));
i915_vma_unpin(vma);
 
@@ -1929,12 +1933,13 @@ static void lrc_setup_hardware_status_page(struct 
intel_engine_cs *ring,
 static int execlists_context_deferred_alloc(struct intel_context *ctx,
struct intel_engine_cs *engine)
 {
+   struct intel_context_engine *ce = >engine[engine->id];
struct drm_i915_gem_object *ctx_obj;
uint32_t context_size;
struct intel_ring *ring;
int ret;
 
-   WARN_ON(ctx->engine[engine->id].state);
+   WARN_ON(ce->state);
 

[Intel-gfx] [PATCH 160/190] drm: Track drm_mm nodes with an interval tree

2016-01-11 Thread Chris Wilson
In addition to the last-in/first-out stack for accessing drm_mm nodes,
we occasionally and in the future often want to find a drm_mm_node by an
address. To do so efficiently we need to track the nodes in an interval
tree - lookups for a particular address will then be O(lg(N)), where N
is the number of nodes in the range manager as opposed to O(N).
Insertion however gains an extra O(lg(N)) step for all nodes
irrespective of whether the interval tree is in use. For future i915
patches, eliminating the linear walk is a significant improvement.

v2: Use generic interval-tree template for u64 and faster insertion.

Signed-off-by: Chris Wilson 
Cc: dri-de...@lists.freedesktop.org
---
 drivers/gpu/drm/drm_mm.c | 124 ++-
 include/drm/drm_mm.h |  12 +
 2 files changed, 113 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 04de6fd88f8c..fff084f266f9 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -46,6 +46,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /**
  * DOC: Overview
@@ -103,6 +104,63 @@ static struct drm_mm_node 
*drm_mm_search_free_in_range_generic(const struct drm_
u64 end,
enum drm_mm_search_flags flags);
 
+#define START(node) ((node)->start)
+#define LAST(node)  ((node)->start + (node)->size - 1)
+
+INTERVAL_TREE_DEFINE(struct drm_mm_node, rb,
+u64, __subtree_last,
+START, LAST, static inline, drm_mm_interval_tree)
+
+struct drm_mm_node *
+drm_mm_interval_first(struct drm_mm *mm, u64 start, u64 last)
+{
+   return drm_mm_interval_tree_iter_first(>interval_tree,
+  start, last);
+}
+EXPORT_SYMBOL(drm_mm_interval_first);
+
+struct drm_mm_node *
+drm_mm_interval_next(struct drm_mm_node *node, u64 start, u64 last)
+{
+   return drm_mm_interval_tree_iter_next(node, start, last);
+}
+EXPORT_SYMBOL(drm_mm_interval_next);
+
+static void drm_mm_interval_tree_add_node(struct drm_mm_node *hole_node,
+ struct drm_mm_node *node)
+{
+   struct drm_mm *mm = hole_node->mm;
+   struct rb_node **link, *rb_parent;
+   struct drm_mm_node *parent;
+
+   node->__subtree_last = LAST(node);
+
+   if (hole_node->allocated) {
+   hole_node->__subtree_last = node->__subtree_last;
+   rb_parent = _node->rb;
+   link = _node->rb.rb_right;
+   } else {
+   rb_parent = NULL;
+   link = >interval_tree.rb_node;
+   }
+
+   while (*link) {
+   rb_parent = *link;
+   parent = rb_entry(rb_parent, struct drm_mm_node, rb);
+   if (parent->__subtree_last < node->__subtree_last)
+   parent->__subtree_last = node->__subtree_last;
+   if (node->start < parent->start)
+   link = >rb.rb_left;
+   else
+   link = >rb.rb_right;
+   }
+
+   rb_link_node(>rb, rb_parent, link);
+   rb_insert_augmented(>rb,
+   >interval_tree,
+   _mm_interval_tree_augment);
+}
+
 static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
 struct drm_mm_node *node,
 u64 size, unsigned alignment,
@@ -153,6 +211,8 @@ static void drm_mm_insert_helper(struct drm_mm_node 
*hole_node,
INIT_LIST_HEAD(>hole_stack);
list_add(>node_list, _node->node_list);
 
+   drm_mm_interval_tree_add_node(hole_node, node);
+
BUG_ON(node->start + node->size > adj_end);
 
node->hole_follows = 0;
@@ -178,39 +238,50 @@ static void drm_mm_insert_helper(struct drm_mm_node 
*hole_node,
  */
 int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node)
 {
-   struct drm_mm_node *hole;
u64 end = node->start + node->size;
-   u64 hole_start;
-   u64 hole_end;
-
-   BUG_ON(node == NULL);
+   struct drm_mm_node *hole;
+   u64 hole_start, hole_end;
 
/* Find the relevant hole to add our node to */
-   drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
-   if (hole_start > node->start || hole_end < end)
-   continue;
+   hole = drm_mm_interval_tree_iter_first(>interval_tree,
+  node->start, ~(u64)0);
+   if (hole) {
+   if (hole->start <= node->start)
+   return -ENOSPC;
+   } else {
+   hole = list_entry(>head_node.node_list,
+ typeof(*hole), node_list);
+   }
 
-   node->mm = mm;
-   node->allocated = 1;
+   hole = list_last_entry(>node_list, typeof(*hole), node_list);
+   if (!hole->hole_follows)
+   

[Intel-gfx] [PATCH 177/190] drm/i915: Use VMA as the primary object for context state

2016-01-11 Thread Chris Wilson
When working with contexts, we most frequently want the GGTT VMA for the
context state, first and foremost. Since the object is available via the
VMA, we need only then store the VMA.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c| 13 ++--
 drivers/gpu/drm/i915/i915_drv.h|  3 +-
 drivers/gpu/drm/i915/i915_gem_context.c| 99 --
 drivers/gpu/drm/i915/i915_gpu_error.c  |  2 +-
 drivers/gpu/drm/i915/i915_guc_submission.c |  6 +-
 drivers/gpu/drm/i915/intel_lrc.c   | 53 
 6 files changed, 93 insertions(+), 83 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index f8ca00ce986e..7fb4088b3966 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -377,7 +377,7 @@ static int per_file_ctx_stats(int id, void *ptr, void *data)
 
for (n = 0; n < ARRAY_SIZE(ctx->engine); n++) {
if (ctx->engine[n].state)
-   per_file_stats(0, ctx->engine[n].state, data);
+   per_file_stats(0, ctx->engine[n].state->obj, data);
if (ctx->engine[n].ring)
per_file_stats(0, ctx->engine[n].ring->obj, data);
}
@@ -2002,7 +2002,7 @@ static int i915_context_status(struct seq_file *m, void 
*unused)
seq_printf(m, "%s: ", ring->name);
seq_putc(m, ctx->engine[i].initialised ? 'I' : 'i');
if (ctx->engine[i].state)
-   describe_obj(m, ctx->engine[i].state);
+   describe_obj(m, ctx->engine[i].state->obj);
if (ctx->engine[i].ring)
describe_ctx_ring(m, ctx->engine[i].ring);
seq_putc(m, '\n');
@@ -2025,14 +2025,13 @@ static void i915_dump_lrc_obj(struct seq_file *m,
  struct intel_engine_cs *ring,
  struct intel_context *ctx)
 {
-   struct drm_i915_gem_object *obj = ctx->engine[ring->id].state;
-   struct i915_vma *vma = ctx->engine[ring->id].vma;
+   struct i915_vma *vma = ctx->engine[ring->id].state;
struct page *page;
int j;
 
seq_printf(m, "CONTEXT: %s\n", ring->name);
 
-   if (obj == NULL) {
+   if (vma == NULL) {
seq_printf(m, "\tUnallocated\n\n");
return;
}
@@ -2045,12 +2044,12 @@ static void i915_dump_lrc_obj(struct seq_file *m,
   lower_32_bits(vma->node.start));
}
 
-   if (i915_gem_object_get_pages(obj)) {
+   if (i915_gem_object_get_pages(vma->obj)) {
seq_puts(m, "\tFailed to get pages for context object\n\n");
return;
}
 
-   page = i915_gem_object_get_page(obj, LRC_STATE_PN);
+   page = i915_gem_object_get_page(vma->obj, LRC_STATE_PN);
if (page != NULL) {
uint32_t *reg_state = kmap_atomic(page);
for (j = 0; j < 0x600 / sizeof(u32) / 4; j += 4) {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a4311e2d2140..6827e26b5681 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -886,8 +886,7 @@ struct intel_context {
 #define CONTEXT_NO_ERROR_CAPTURE   (1<<1)
 
struct intel_context_engine {
-   struct drm_i915_gem_object *state;
-   struct i915_vma *vma;
+   struct i915_vma *state;
struct intel_ring *ring;
int pin_count;
bool initialised;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 4e0c5e161e84..0c4864eca5f6 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -214,7 +214,7 @@ void i915_gem_context_free(struct kref *ctx_ref)
if (ce->ring)
intel_ring_free(ce->ring);
 
-   __i915_gem_object_release_unless_active(ce->state);
+   __i915_gem_object_release_unless_active(ce->state->obj);
}
 
decouple_vma(ctx);
@@ -322,13 +322,26 @@ __create_hw_context(struct drm_device *dev,
INIT_WORK(>vma_ht_resize, resize_vma_ht);
 
if (dev_priv->hw_context_size) {
-   struct drm_i915_gem_object *obj =
-   i915_gem_alloc_context_obj(dev, 
dev_priv->hw_context_size);
+   struct drm_i915_gem_object *obj;
+   struct i915_vma *vma;
+
+   obj = i915_gem_alloc_context_obj(dev,
+dev_priv->hw_context_size);
if (IS_ERR(obj)) {
ret = PTR_ERR(obj);
goto err_out;
}
-   ctx->engine[RCS].state = obj;
+
+

[Intel-gfx] [PATCH 164/190] drm/i915: Move obj->dirty:1 to obj->flags

2016-01-11 Thread Chris Wilson
The obj->dirty bit is a companion to the obj->active bits that were
moved to the obj->flags bitmask. Since we also update this bit inside
the i915_vma_move_to_active() hotpath, we can aide gcc by also moving
the obj->dirty bit to obj->flags bitmask.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c|  2 +-
 drivers/gpu/drm/i915/i915_drv.h| 21 -
 drivers/gpu/drm/i915/i915_gem.c| 18 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  3 +--
 drivers/gpu/drm/i915/i915_gem_userptr.c|  6 +++---
 drivers/gpu/drm/i915/i915_gpu_error.c  |  2 +-
 drivers/gpu/drm/i915/intel_lrc.c   |  2 +-
 7 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 558d79b63e6c..8a59630fe5fb 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -136,7 +136,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object 
*obj)
seq_printf(m, "] %x %s%s%s",
   i915_gem_request_get_seqno(obj->last_write.request),
   i915_cache_level_str(to_i915(obj->base.dev), 
obj->cache_level),
-  obj->dirty ? " dirty" : "",
+  i915_gem_object_is_dirty(obj) ? " dirty" : "",
   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
if (obj->base.name)
seq_printf(m, " (name: %d)", obj->base.name);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 62a024a7225b..d664a67cda7b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2058,7 +2058,8 @@ struct drm_i915_gem_object {
 * This is set if the object has been written to since last bound
 * to the GTT
 */
-   unsigned int dirty:1;
+#define I915_BO_DIRTY_SHIFT (I915_BO_ACTIVE_REF_SHIFT + 1)
+#define I915_BO_DIRTY_BIT (1 << I915_BO_DIRTY_SHIFT)
 
/**
 * Advice: are the backing pages purgeable?
@@ -2189,6 +2190,24 @@ i915_gem_object_unset_active_reference(struct 
drm_i915_gem_object *obj)
 }
 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj);
 
+static inline bool
+i915_gem_object_is_dirty(const struct drm_i915_gem_object *obj)
+{
+   return obj->flags & I915_BO_DIRTY_BIT;
+}
+
+static inline void
+i915_gem_object_set_dirty(struct drm_i915_gem_object *obj)
+{
+   obj->flags |= I915_BO_DIRTY_BIT;
+}
+
+static inline void
+i915_gem_object_unset_dirty(struct drm_i915_gem_object *obj)
+{
+   obj->flags &= ~I915_BO_DIRTY_BIT;
+}
+
 void i915_gem_track_fb(struct drm_i915_gem_object *old,
   struct drm_i915_gem_object *new,
   unsigned frontbuffer_bits);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 497b68849d09..5347469bbea1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -209,9 +209,9 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object 
*obj)
}
 
if (obj->madv == I915_MADV_DONTNEED)
-   obj->dirty = 0;
+   i915_gem_object_unset_dirty(obj);
 
-   if (obj->dirty) {
+   if (i915_gem_object_is_dirty(obj)) {
struct address_space *mapping = 
file_inode(obj->base.filp)->i_mapping;
char *vaddr = obj->phys_handle->vaddr;
int i;
@@ -235,7 +235,7 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object 
*obj)
page_cache_release(page);
vaddr += PAGE_SIZE;
}
-   obj->dirty = 0;
+   i915_gem_object_unset_dirty(obj);
}
 
sg_free_table(obj->pages);
@@ -589,7 +589,7 @@ int i915_gem_obj_prepare_shmem_write(struct 
drm_i915_gem_object *obj,
 
 out:
intel_fb_obj_invalidate(obj, ORIGIN_CPU);
-   obj->dirty = 1;
+   i915_gem_object_set_dirty(obj);
/* return with the pages pinned */
return 0;
 
@@ -1836,12 +1836,12 @@ i915_gem_object_put_pages_gtt(struct 
drm_i915_gem_object *obj)
i915_gem_object_save_bit_17_swizzle(obj);
 
if (obj->madv == I915_MADV_DONTNEED)
-   obj->dirty = 0;
+   i915_gem_object_unset_dirty(obj);
 
for_each_sg_page(obj->pages->sgl, _iter, obj->pages->nents, 0) {
struct page *page = sg_page_iter_page(_iter);
 
-   if (obj->dirty)
+   if (i915_gem_object_is_dirty(obj))
set_page_dirty(page);
 
if (obj->madv == I915_MADV_WILLNEED)
@@ -1849,7 +1849,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object 
*obj)
 
page_cache_release(page);
}
-   obj->dirty = 0;
+   i915_gem_object_unset_dirty(obj);
 
sg_free_table(obj->pages);
kfree(obj->pages);
@@ -3029,7 +3029,7 @@ 

[Intel-gfx] [PATCH 179/190] drm/i915: Skip MI_SET_CONTEXT for the same context

2016-01-11 Thread Chris Wilson
Fixes regression from

commit 71b7e54f71b899db9f8def67a0e976969384e699
Author: Daniel Vetter 
Date:   Tue Apr 14 17:35:18 2015 +0200

drm/i915: Don't look at pg_dirty_rings for aliasing ppgtt

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_context.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 0c4864eca5f6..060e902afd1c 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -680,7 +680,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 
hw_flags)
 
intel_ring_advance(ring);
 
-   return ret;
+   return 0;
 }
 
 static inline bool should_skip_switch(struct intel_engine_cs *ring,
@@ -690,9 +690,13 @@ static inline bool should_skip_switch(struct 
intel_engine_cs *ring,
if (to->remap_slice)
return false;
 
-   if (to->ppgtt && from == to &&
-   !(intel_engine_flag(ring) & to->ppgtt->pd_dirty_rings))
-   return true;
+   if (from == to) {
+   if (to->ppgtt == NULL)
+   return true;
+
+   if (!(intel_engine_flag(ring) & to->ppgtt->pd_dirty_rings))
+   return true;
+   }
 
return false;
 }
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 173/190] drm/i915: Wait upon userptr get-user-pages within execbuffer

2016-01-11 Thread Chris Wilson
This simply hides the EAGAIN caused by userptr when userspace causes
resource contention.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_dma.c|  1 +
 drivers/gpu/drm/i915/i915_drv.h|  8 
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  3 +++
 drivers/gpu/drm/i915/i915_gem_userptr.c| 16 +---
 4 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 7d85c3bea02a..c1afbd873197 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1192,6 +1192,7 @@ int i915_driver_unload(struct drm_device *dev)
mutex_unlock(>struct_mutex);
intel_fbc_cleanup_cfb(dev_priv);
i915_gem_cleanup_stolen(dev);
+   i915_gem_cleanup_userptr(dev);
 
intel_csr_ucode_fini(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 601ef7412cf9..a4311e2d2140 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1295,6 +1295,13 @@ struct i915_gem_mm {
struct delayed_work idle_work;
 
/**
+* Workqueue to fault in userptr pages, flushed by the execbuf
+* when required but otherwise left to userspace to try again
+* on EAGAIN.
+*/
+   struct workqueue_struct *userptr_wq;
+
+   /**
 * Are we in a non-interruptible section of code like
 * modesetting?
 */
@@ -2724,6 +2731,7 @@ int i915_gem_set_tiling(struct drm_device *dev, void 
*data,
 int i915_gem_get_tiling(struct drm_device *dev, void *data,
struct drm_file *file_priv);
 int i915_gem_init_userptr(struct drm_device *dev);
+void i915_gem_cleanup_userptr(struct drm_device *dev);
 int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
   struct drm_file *file);
 int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index f40d3254249a..733250afa139 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1133,6 +1133,9 @@ repeat:
}
}
 
+   /* A frequent cause for EAGAIN are currently unavailable client pages */
+   flush_workqueue(eb->i915->mm.userptr_wq);
+
ret = i915_mutex_lock_interruptible(dev);
if (ret) {
mutex_lock(>struct_mutex);
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c 
b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 232ce85b39db..54385f6c7e14 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -102,7 +102,8 @@ static unsigned long cancel_userptr(struct i915_mmu_object 
*mo)
 * is freed and then double free it.
 */
if (mo->active && kref_get_unless_zero(>obj->base.refcount)) {
-   schedule_work(>work);
+   queue_work(to_i915(mo->obj->base.dev)->mm.userptr_wq,
+  >work);
/* only schedule one work packet to avoid the refleak */
mo->active = false;
}
@@ -450,7 +451,7 @@ __i915_mm_struct_free(struct kref *kref)
mutex_unlock(_i915(mm->dev)->mm_lock);
 
INIT_WORK(>work, __i915_mm_struct_free__worker);
-   schedule_work(>work);
+   queue_work(to_i915(mm->dev)->mm.userptr_wq, >work);
 }
 
 static void
@@ -664,7 +665,7 @@ __i915_gem_userptr_get_pages_schedule(struct 
drm_i915_gem_object *obj,
get_task_struct(work->task);
 
INIT_WORK(>work, __i915_gem_userptr_get_pages_worker);
-   schedule_work(>work);
+   queue_work(to_i915(obj->base.dev)->mm.userptr_wq, >work);
 
*active = true;
return -EAGAIN;
@@ -886,5 +887,14 @@ i915_gem_init_userptr(struct drm_device *dev)
struct drm_i915_private *dev_priv = to_i915(dev);
mutex_init(_priv->mm_lock);
hash_init(dev_priv->mm_structs);
+   dev_priv->mm.userptr_wq =
+   alloc_workqueue("i915-userptr", WQ_HIGHPRI, 0);
return 0;
 }
+
+void
+i915_gem_cleanup_userptr(struct drm_device *dev)
+{
+   struct drm_i915_private *dev_priv = to_i915(dev);
+   destroy_workqueue(dev_priv->mm.userptr_wq);
+}
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 180/190] drm/i915: Micro-optimise i915_gem_object_get_dirty_page()

2016-01-11 Thread Chris Wilson
We can skip the set_page_dirty() calls if we already know that the
entire object is dirty. Futhermore, the WARN is redundant (we'll crash
shortly afterwards) but adds substantial overhead to the function
(roughly increasing the relocation per-page cost by 10%).

Fixes regression from
commit 033908aed5a596f6202c848c6bbc8a40fb1a8490
Author: Dave Gordon 
Date:   Thu Dec 10 18:51:23 2015 +

drm/i915: mark GEM object pages dirty when mapped & written by the CPU

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h |  8 +---
 drivers/gpu/drm/i915/i915_gem.c | 14 +-
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6827e26b5681..2f8b5e7f9320 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2807,16 +2807,18 @@ int i915_gem_obj_prepare_shmem_write(struct 
drm_i915_gem_object *obj,
 
 int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
 
-static inline int __sg_page_count(struct scatterlist *sg)
+static inline int __sg_page_count(const struct scatterlist *sg)
 {
return sg->length >> PAGE_SHIFT;
 }
 
 struct page *
-i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n);
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
+  unsigned int n);
 
 static inline struct page *
-i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n)
+i915_gem_object_get_page(struct drm_i915_gem_object *obj,
+unsigned int n)
 {
if (WARN_ON(n >= obj->base.size >> PAGE_SHIFT))
return NULL;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d452499ae5a9..9cd161645041 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4388,16 +4388,12 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 
 /* Like i915_gem_object_get_page(), but mark the returned page dirty */
 struct page *
-i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
+  unsigned int n)
 {
-   struct page *page;
-
-   /* Only default objects have per-page dirty tracking */
-   if (WARN_ON(obj->ops != _gem_object_ops))
-   return NULL;
-
-   page = i915_gem_object_get_page(obj, n);
-   set_page_dirty(page);
+   struct page *page = i915_gem_object_get_page(obj, n);
+   if (!i915_gem_object_is_dirty(obj))
+   set_page_dirty(page);
return page;
 }
 
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 181/190] drm/i915: Introduce an internal allocator for disposable private objects

2016-01-11 Thread Chris Wilson
Quite a few of our objects used for internal hardware programming do not
benefit from being swappable or from being zero initialised. As such
they do not benefit from using a shmemfs backing storage and since they
are internal and never directly exposed to the user, we do not need to
worry about providing a filp. For these we can use an
drm_i915_gem_object wrapper around a sg_table of plain struct page. They
are not swapped backed and not automatically pinned. If they are reaped
by the shrinker, the pages are released and the contents discarded. For
the internal use case, this is fine as for example, ringbuffers are
pinned from being written by a request to be read by the hardware. Once
they are idle, they can be discarded entirely. As such they are a good
match for execlist ringbuffers and a small varierty of other internal
objects.

In the first iteration, this is limited to the scratch batch buffers we
use (for command parsing and state initialisation).

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/Makefile|   1 +
 drivers/gpu/drm/i915/i915_drv.h  |   8 ++
 drivers/gpu/drm/i915/i915_gem.c  |   9 +-
 drivers/gpu/drm/i915/i915_gem_batch_pool.c   |  27 ++---
 drivers/gpu/drm/i915/i915_gem_internal.c | 157 +++
 drivers/gpu/drm/i915/i915_gem_render_state.c |   2 +-
 6 files changed, 180 insertions(+), 24 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_internal.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index a362425ef862..8d0fae65a5bd 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -28,6 +28,7 @@ i915-y += i915_cmd_parser.o \
  i915_gem_execbuffer.o \
  i915_gem_fence.o \
  i915_gem_gtt.o \
+ i915_gem_internal.o \
  i915_gem.o \
  i915_gem_render_state.o \
  i915_gem_request.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2f8b5e7f9320..e3c77d245a6b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1989,6 +1989,9 @@ enum hdmi_force_audio {
 #define I915_GTT_OFFSET_NONE ((u32)-1)
 
 struct drm_i915_gem_object_ops {
+   unsigned int flags;
+#define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1
+
/* Interface between the GEM object and its backing storage.
 * get_pages() is called once prior to the use of the associated set
 * of pages before to binding them into the GTT, and put_pages() is
@@ -3117,6 +3120,11 @@ i915_gem_object_create_stolen_for_preallocated(struct 
drm_device *dev,
   u32 gtt_offset,
   u32 size);
 
+/* i915_gem_internal.c */
+struct drm_i915_gem_object *
+i915_gem_object_create_internal(struct drm_device *dev,
+   unsigned size);
+
 /* i915_gem_shrinker.c */
 unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv,
  unsigned long target,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9cd161645041..cca45f60d0bd 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -497,7 +497,7 @@ int i915_gem_obj_prepare_shmem_read(struct 
drm_i915_gem_object *obj,
int ret;
 
*needs_clflush = 0;
-   if (!obj->base.filp)
+   if ((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0)
return -EINVAL;
 
ret = i915_gem_object_get_pages(obj);
@@ -547,7 +547,7 @@ int i915_gem_obj_prepare_shmem_write(struct 
drm_i915_gem_object *obj,
int ret;
 
*needs_clflush = 0;
-   if (!obj->base.filp)
+   if ((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0)
return -EINVAL;
 
ret = i915_gem_object_get_pages(obj);
@@ -800,7 +800,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
/* prime objects have no backing filp to GEM pread/pwrite
 * pages from.
 */
-   if (!obj->base.filp) {
+   if ((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0) {
ret = -EINVAL;
goto out;
}
@@ -1131,7 +1131,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
/* prime objects have no backing filp to GEM pread/pwrite
 * pages from.
 */
-   if (!obj->base.filp) {
+   if ((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0) {
ret = -EINVAL;
goto out;
}
@@ -3750,6 +3750,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 }
 
 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
+   .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE,
.get_pages = i915_gem_object_get_pages_gtt,
.put_pages = i915_gem_object_put_pages_gtt,
 };
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c 

[Intel-gfx] [PATCH 163/190] drm/i915: Fix i915_gem_evict_for_vma (soft-pinning)

2016-01-11 Thread Chris Wilson
Soft-pinning depends upon being able to check for availabilty of an
inverval and evict overlapping object fro a drm_mm range manager very
quickly. Currently it uses a linear list which makes performance dire,
and softpinning not a suitable replacement.

It also helps if the routine reports the correct error codes as expected
by its callers and emits a tracepoint upon use.

For posterity since the wrong patch was pushed (i.e. that missed these
key points), this is the changelog that should have been on

commit 506a8e87d8d2746b9e9d2433503fe237c54e4750
Author: Chris Wilson 
Date:   Tue Dec 8 11:55:07 2015 +

drm/i915: Add soft-pinning API for execbuffer

Userspace can pass in an offset that it presumes the object is located
at. The kernel will then do its utmost to fit the object into that
location. The assumption is that userspace is handling its own object
locations (for example along with full-ppgtt) and that the kernel will
rarely have to make space for the user's requests.

This extends the DRM_IOCTL_I915_GEM_EXECBUFFER2 to do the following:
* if the user supplies a virtual address via the execobject->offset
  *and* sets the EXEC_OBJECT_PINNED flag in execobject->flags, then
  that object is placed at that offset in the address space selected
  by the context specifier in execbuffer.
* the location must be aligned to the GTT page size, 4096 bytes
* as the object is placed exactly as specified, it may be used by this
  execbuffer call without relocations pointing to it

It may fail to do so if:
* EINVAL is returned if the object does not have a 4096 byte aligned
  address
* the object conflicts with another pinned object (either pinned by
  hardware in that address space, e.g. scanouts in the aliasing ppgtt)
  or within the same batch.
  EBUSY is returned if the location is pinned by hardware
  EINVAL is returned if the location is already in use by the batch
* EINVAL is returned if the object conflicts with its own alignment (as meets
  the hardware requirements) or if the placement of the object does not fit
  within the address space

All other execbuffer errors apply.

Presence of this execbuf extension may be queried by passing
I915_PARAM_HAS_EXEC_SOFTPIN to DRM_IOCTL_I915_GETPARAM and checking for
a reported value of 1 (or greater).

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h   |  2 +-
 drivers/gpu/drm/i915/i915_gem.c   |  2 +-
 drivers/gpu/drm/i915/i915_gem_evict.c | 69 ++-
 drivers/gpu/drm/i915/i915_trace.h | 23 
 4 files changed, 69 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index addd33bbc847..62a024a7225b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3052,7 +3052,7 @@ int __must_check i915_gem_evict_something(struct 
drm_device *dev,
  unsigned long start,
  unsigned long end,
  unsigned flags);
-int __must_check i915_gem_evict_for_vma(struct i915_vma *target);
+int __must_check i915_gem_evict_for_vma(struct i915_vma *vma, unsigned flags);
 int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
 
 /* belongs in i915_gem_gtt.h */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 962fd81ce26c..497b68849d09 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2813,7 +2813,7 @@ i915_vma_insert(struct i915_vma *vma,
vma->node.color = obj->cache_level;
ret = drm_mm_reserve_node(>vm->mm, >node);
if (ret) {
-   ret = i915_gem_evict_for_vma(vma);
+   ret = i915_gem_evict_for_vma(vma, flags);
if (ret == 0)
ret = drm_mm_reserve_node(>vm->mm, 
>node);
if (ret)
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c 
b/drivers/gpu/drm/i915/i915_gem_evict.c
index fdc4941be15a..b48839fc2996 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -229,42 +229,61 @@ found:
 }
 
 int
-i915_gem_evict_for_vma(struct i915_vma *target)
+i915_gem_evict_for_vma(struct i915_vma *target, unsigned flags)
 {
-   struct drm_mm_node *node, *next;
+   struct list_head eviction_list;
+   struct drm_mm_node *node;
+   u64 end = target->node.start + target->node.size;
+   struct i915_vma *vma, *next;
+   int ret;
 
-   list_for_each_entry_safe(node, next,
-   >vm->mm.head_node.node_list,
-   node_list) {
-   struct i915_vma *vma;
-   int ret;
+   trace_i915_gem_evict_vma(target, flags);
 
-   if (node->start + node->size <= target->node.start)
-   continue;
- 

[Intel-gfx] [PATCH 153/190] drm/i915: Record the position of the start of the request

2016-01-11 Thread Chris Wilson
Not only does it make for good documentation and debugging aide, but it
is also vital for when we want to unwind requests - such as when
throwing away an incomplete request.

v2: Rebase

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h |  1 +
 drivers/gpu/drm/i915/i915_gem_request.c | 16 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c   |  4 +++-
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c460dc0c14e1..84693d4c4e52 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -566,6 +566,7 @@ struct drm_i915_error_state {
struct drm_i915_error_request {
long jiffies;
u32 seqno;
+   u32 head;
u32 tail;
} *requests;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 9e8e594ce2bd..74be71e7d113 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -244,6 +244,13 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
goto err;
}
 
+   /* Record the position of the start of the request so that
+* should we detect the updated seqno part-way through the
+* GPU processing the request, we never over-estimate the
+* position of the head.
+*/
+   req->head = intel_ring_get_tail(req->ring);
+
/*
 * Reserve space in the ring buffer for all the commands required to
 * eventually emit this request. This is to guarantee that the
@@ -421,7 +428,6 @@ static void i915_gem_mark_busy(struct drm_i915_private 
*dev_priv)
 void __i915_add_request(struct drm_i915_gem_request *request, bool 
flush_caches)
 {
struct intel_ring *ring = request->ring;
-   u32 request_start;
int ret;
 
/*
@@ -431,7 +437,6 @@ void __i915_add_request(struct drm_i915_gem_request 
*request, bool flush_caches)
 */
intel_ring_reserved_space_use(ring);
 
-   request_start = intel_ring_get_tail(ring);
/*
 * Emit any outstanding flushes - execbuf can fail to emit the flush
 * after having emitted the batchbuffer command. Hence we need to fix
@@ -451,13 +456,6 @@ void __i915_add_request(struct drm_i915_gem_request 
*request, bool flush_caches)
/* Not allowed to fail! */
WARN(ret, "emit|add_request failed: %d!\n", ret);
 
-   /* Record the position of the start of the request so that
-* should we detect the updated seqno part-way through the
-* GPU processing the request, we never over-estimate the
-* position of the head.
-*/
-   request->head = request_start;
-
request->emitted_jiffies = jiffies;
request->previous_seqno = request->engine->last_submitted_seqno;
request->engine->last_submitted_seqno = request->fence.seqno;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index a2935d7e9278..494dee1f724d 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -457,9 +457,10 @@ int i915_error_state_to_str(struct 
drm_i915_error_state_buf *m,
   dev_priv->ring[i].name,
   error->ring[i].num_requests);
for (j = 0; j < error->ring[i].num_requests; j++) {
-   err_printf(m, "  seqno 0x%08x, emitted %ld, 
tail 0x%08x\n",
+   err_printf(m, "  seqno 0x%08x, emitted %ld, 
head 0x%08x tail 0x%08x\n",
   error->ring[i].requests[j].seqno,
   error->ring[i].requests[j].jiffies,
+  error->ring[i].requests[j].head,
   error->ring[i].requests[j].tail);
}
}
@@ -1067,6 +1068,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
erq = >ring[i].requests[count++];
erq->seqno = request->fence.seqno;
erq->jiffies = request->emitted_jiffies;
+   erq->head = request->head;
erq->tail = request->tail;
}
}
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


  1   2   3   4   5   >