[Intel-gfx] [PATCH v1 1/2] drm/i915/icl: Mind the SFC units when resetting VD or VEBox engines

2018-10-31 Thread Tomasz Lis
From: Oscar Mateo 

SFC (Scaler & Format Converter) units are shared between VD and VEBoxes.
They also happen to have separate reset bits. So, whenever we want to reset
one or more of the media engines, we have to make sure the SFCs do not
change owner in the process and, if this owner happens to be one of the
engines being reset, we need to reset the SFC as well.

This happens in 4 steps:

1) Tell the engine that a software reset is going to happen. The engine
will then try to force lock the SFC (if currently locked, it will
remain so; if currently unlocked, it will ignore this and all new lock
requests).

2) Poll the ack bit to make sure the hardware has received the forced
lock from the driver. Once this bit is set, it indicates SFC status
(lock or unlock) will not change anymore (until we tell the engine it
is safe to unlock again).

3) Check the usage bit to see if the SFC has ended up being locked to
the engine we want to reset. If this is the case, we have to reset
the SFC as well.

4) Unlock all the SFCs once the reset sequence is completed.

Obviously, if we are resetting the whole GPU, we don't have to worry
about all of this.

BSpec: 10989
BSpec: 10990
BSpec: 10954
BSpec: 10955
BSpec: 10956
BSpec: 19212

Signed-off-by: Tomasz Lis 
Signed-off-by: Oscar Mateo 
Signed-off-by: Michel Thierry 
Cc: Michel Thierry 
Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Daniele Ceraolo Spurio 
---
 drivers/gpu/drm/i915/i915_reg.h |  18 +++
 drivers/gpu/drm/i915/intel_uncore.c | 105 ++--
 2 files changed, 119 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 8d089ef..7b4dffa 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -330,6 +330,24 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define  GEN11_GRDOM_MEDIA4(1 << 8)
 #define  GEN11_GRDOM_VECS  (1 << 13)
 #define  GEN11_GRDOM_VECS2 (1 << 14)
+#define  GEN11_GRDOM_SFC0  (1 << 17)
+#define  GEN11_GRDOM_SFC1  (1 << 18)
+
+#define  GEN11_VCS_SFC_RESET_BIT(instance) (GEN11_GRDOM_SFC0 << 
((instance) >> 1))
+#define  GEN11_VECS_SFC_RESET_BIT(instance)(GEN11_GRDOM_SFC0 << (instance))
+
+#define GEN11_VCS_SFC_FORCED_LOCK(engine)  _MMIO((engine)->mmio_base + 
0x88C)
+#define   GEN11_VCS_SFC_FORCED_LOCK_BIT(1 << 0)
+#define GEN11_VCS_SFC_LOCK_STATUS(engine)  _MMIO((engine)->mmio_base + 
0x890)
+#define   GEN11_VCS_SFC_USAGE_BIT  (1 << 0)
+#define   GEN11_VCS_SFC_LOCK_ACK_BIT   (1 << 1)
+
+#define GEN11_VECS_SFC_FORCED_LOCK(engine) _MMIO((engine)->mmio_base + 
0x201C)
+#define   GEN11_VECS_SFC_FORCED_LOCK_BIT   (1 << 0)
+#define GEN11_VECS_SFC_LOCK_ACK(engine)
_MMIO((engine)->mmio_base + 0x2018)
+#define   GEN11_VECS_SFC_LOCK_ACK_BIT  (1 << 0)
+#define GEN11_VECS_SFC_USAGE(engine)   _MMIO((engine)->mmio_base + 
0x2014)
+#define   GEN11_VECS_SFC_USAGE_BIT (1 << 0)
 
 #define RING_PP_DIR_BASE(engine)   _MMIO((engine)->mmio_base + 0x228)
 #define RING_PP_DIR_BASE_READ(engine)  _MMIO((engine)->mmio_base + 0x518)
diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
b/drivers/gpu/drm/i915/intel_uncore.c
index 9289515..481e70e 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1931,6 +1931,95 @@ static int gen6_reset_engines(struct drm_i915_private 
*dev_priv,
return gen6_hw_domain_reset(dev_priv, hw_mask);
 }
 
+static u32 gen11_lock_sfc(struct drm_i915_private *dev_priv,
+ struct intel_engine_cs *engine)
+{
+   u8 vdbox_sfc_access = INTEL_INFO(dev_priv)->vdbox_sfc_access;
+   i915_reg_t sfc_forced_lock;
+   u32 sfc_forced_lock_bit;
+   i915_reg_t sfc_forced_lock_ack;
+   u32 sfc_forced_lock_ack_bit;
+   i915_reg_t sfc_usage;
+   u32 sfc_usage_bit;
+   u32 sfc_reset_bit;
+
+   switch (engine->class) {
+   case VIDEO_DECODE_CLASS:
+   if ((BIT(engine->instance) & vdbox_sfc_access) == 0)
+   return 0;
+   sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine);
+   sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT;
+   sfc_forced_lock_ack = GEN11_VCS_SFC_LOCK_STATUS(engine);
+   sfc_forced_lock_ack_bit  = GEN11_VCS_SFC_LOCK_ACK_BIT;
+   sfc_usage = GEN11_VCS_SFC_LOCK_STATUS(engine);
+   sfc_usage_bit = GEN11_VCS_SFC_USAGE_BIT;
+   sfc_reset_bit = GEN11_VCS_SFC_RESET_BIT(engine->instance);
+   break;
+   case VIDEO_ENHANCEMENT_CLASS:
+   sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine);
+   sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT;
+  

[Intel-gfx] [PATCH v1 2/2] drm/i915/icl: Record the valid VDBoxes with SFC capability

2018-10-31 Thread Tomasz Lis
From: Oscar Mateo 

In Gen11, only even numbered "logical" VDBoxes are hooked up to an SFC
(Scaler & Format Converter) unit. We will use this information to decide
when the SFC units need to be reset.

BSpec: 20189

Signed-off-by: Tomasz Lis 
Signed-off-by: Oscar Mateo 
Signed-off-by: Michel Thierry 
Signed-off-by: Daniele Ceraolo Spurio 
Cc: Michel Thierry 
Cc: Daniele Ceraolo Spurio 
Cc: Joonas Lahtinen 
Cc: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_device_info.c | 9 +
 drivers/gpu/drm/i915/intel_device_info.h | 3 +++
 2 files changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_device_info.c 
b/drivers/gpu/drm/i915/intel_device_info.c
index 89ed3a8..e2454a7 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -881,6 +881,7 @@ void intel_device_info_init_mmio(struct drm_i915_private 
*dev_priv)
 {
struct intel_device_info *info = mkwrite_device_info(dev_priv);
u32 media_fuse;
+   uint logical_vdbox = 0;
unsigned int i;
 
if (INTEL_GEN(dev_priv) < 11)
@@ -900,7 +901,15 @@ void intel_device_info_init_mmio(struct drm_i915_private 
*dev_priv)
if (!(BIT(i) & info->vdbox_enable)) {
info->ring_mask &= ~ENGINE_MASK(_VCS(i));
DRM_DEBUG_DRIVER("vcs%u fused off\n", i);
+   continue;
}
+
+   /*
+* In Gen11, only even numbered logical VDBOXes are
+* hooked up to an SFC (Scaler & Format Converter) unit.
+*/
+   if (logical_vdbox++ % 2 == 0)
+   info->vdbox_sfc_access |= BIT(i);
}
 
DRM_DEBUG_DRIVER("vebox enable: %04x\n", info->vebox_enable);
diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
b/drivers/gpu/drm/i915/intel_device_info.h
index b4c2c4e..1eda80f 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -189,6 +189,9 @@ struct intel_device_info {
u8 vdbox_enable;
u8 vebox_enable;
 
+   /* Media engine access to SFC per instance */
+   u8 vdbox_sfc_access;
+
struct color_luts {
u16 degamma_lut_size;
u16 gamma_lut_size;
-- 
2.7.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v4 1/2] drm/i915/skl: Rework MOCS tables to keep common part in a define

2018-11-05 Thread Tomasz Lis
The MOCS tables are going to be very similar across platforms.

To reduce the amount of copied code, this patch rips the common part and
puts it into a definition valid for all gen9 platforms.

Signed-off-by: Tomasz Lis 
Suggested-by: Lucas De Marchi 
Reviewed-by: Lucas De Marchi 
Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Daniele Ceraolo Spurio 
Cc: Lucas De Marchi 
---
 drivers/gpu/drm/i915/intel_mocs.c | 61 ++-
 1 file changed, 22 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c 
b/drivers/gpu/drm/i915/intel_mocs.c
index 77e9871..76aed59 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -96,26 +96,29 @@ struct drm_i915_mocs_table {
  *   may only be updated incrementally by adding entries at the
  *   end.
  */
-static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
-   [I915_MOCS_UNCACHED] = {
- /* 0x0009 */
- .control_value = LE_CACHEABILITY(LE_UC) |
-  LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-  LE_PFM(0) | LE_SCF(0),
 
- /* 0x0010 */
- .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
-   },
-   [I915_MOCS_PTE] = {
- /* 0x0038 */
- .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
-  LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-  LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-  LE_PFM(0) | LE_SCF(0),
- /* 0x0030 */
- .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+#define GEN9_MOCS_TABLE \
+   [I915_MOCS_UNCACHED] = { \
+ /* 0x0009 */ \
+ .control_value = LE_CACHEABILITY(LE_UC) | \
+  LE_TGT_CACHE(LE_TC_LLC_ELLC) | \
+  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | \
+  LE_PFM(0) | LE_SCF(0), \
+ /* 0x0010 */ \
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), \
+   }, \
+   [I915_MOCS_PTE] = { \
+ /* 0x0038 */ \
+ .control_value = LE_CACHEABILITY(LE_PAGETABLE) | \
+  LE_TGT_CACHE(LE_TC_LLC_ELLC) | \
+  LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | \
+  LE_PFM(0) | LE_SCF(0), \
+ /* 0x0030 */ \
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), \
},
+
+static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
+   GEN9_MOCS_TABLE
[I915_MOCS_CACHED] = {
  /* 0x003b */
  .control_value = LE_CACHEABILITY(LE_WB) |
@@ -129,33 +132,13 @@ static const struct drm_i915_mocs_entry 
skylake_mocs_table[] = {
 
 /* NOTE: the LE_TGT_CACHE is not used on Broxton */
 static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
-   [I915_MOCS_UNCACHED] = {
- /* 0x0009 */
- .control_value = LE_CACHEABILITY(LE_UC) |
-  LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-  LE_PFM(0) | LE_SCF(0),
-
- /* 0x0010 */
- .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
-   },
-   [I915_MOCS_PTE] = {
- /* 0x0038 */
- .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
-  LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-  LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-  LE_PFM(0) | LE_SCF(0),
-
- /* 0x0030 */
- .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
-   },
+   GEN9_MOCS_TABLE
[I915_MOCS_CACHED] = {
  /* 0x0039 */
  .control_value = LE_CACHEABILITY(LE_UC) |
   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
   LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
   LE_PFM(0) | LE_SCF(0),
-
  /* 0x0030 */
  .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
},
-- 
2.7.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v4 2/2] drm/i915/icl: Define MOCS table for Icelake

2018-11-05 Thread Tomasz Lis
The table has been unified across OSes to minimize virtualization overhead.

The MOCS table is now published as part of bspec, and versioned. Entries
are supposed to never be modified, but new ones can be added. Adding
entries increases table version. The patch includes version 1 entries.

Meaning of each entry is now explained in bspec, and user mode clients
are expected to know what each entry means. The 3 entries used for previous
platforms are still compatible with their legacy definitions, but that is
not guaranteed to be true for future platforms.

v2: Fixed SCC values, improved commit comment (Daniele)
v3: Improved MOCS table comment (Daniele)
v4: Moved new entries below gen9 ones. Put common entries into
definition to be used in multiple arrays. (Lucas)

BSpec: 34007
BSpec: 560
Signed-off-by: Tomasz Lis 
Reviewed-by: Daniele Ceraolo Spurio  (v3)
Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Mika Kuoppala 
Cc: Daniele Ceraolo Spurio 
Cc: Zhenyu Wang 
Cc: Zhi A Wang 
Cc: Anuj Phogat 
Cc: Adam Cetnerowski 
Cc: Piotr Rozenfeld 
Cc: Lucas De Marchi 
---
 drivers/gpu/drm/i915/intel_mocs.c | 249 +++---
 1 file changed, 235 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c 
b/drivers/gpu/drm/i915/intel_mocs.c
index 76aed59..2a1e5f0 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -44,6 +44,8 @@ struct drm_i915_mocs_table {
 #define LE_SCC(value)  ((value) << 8)
 #define LE_PFM(value)  ((value) << 11)
 #define LE_SCF(value)  ((value) << 14)
+#define LE_CoS(value)  ((value) << 15)
+#define LE_SSE(value)  ((value) << 17)
 
 /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
 #define L3_ESC(value)  ((value) << 0)
@@ -80,21 +82,21 @@ struct drm_i915_mocs_table {
  * LNCFCMOCS0 - LNCFCMOCS32 registers.
  *
  * These tables are intended to be kept reasonably consistent across
- * platforms. However some of the fields are not applicable to all of
- * them.
+ * HW platforms, and for ICL+, be identical across OSes. To achieve
+ * that, for Icelake and above, list of entries is published as part
+ * of bspec.
  *
  * Entries not part of the following tables are undefined as far as
- * userspace is concerned and shouldn't be relied upon.  For the time
- * being they will be implicitly initialized to the strictest caching
- * configuration (uncached) to guarantee forwards compatibility with
- * userspace programs written against more recent kernels providing
- * additional MOCS entries.
+ * userspace is concerned and shouldn't be relied upon.
  *
- * NOTE: These tables MUST start with being uncached and the length
- *   MUST be less than 63 as the last two registers are reserved
- *   by the hardware.  These tables are part of the kernel ABI and
- *   may only be updated incrementally by adding entries at the
- *   end.
+ * The last two entries are reserved by the hardware. For ICL+ they
+ * should be initialized according to bspec and never used, for older
+ * platforms they should never be written to.
+ *
+ * NOTE: These tables are part of bspec and defined as part of hardware
+ *   interface for ICL+. For older platforms, they are part of kernel
+ *   ABI. It is expected that existing entries will remain constant
+ *   and the tables will only be updated by adding new entries.
  */
 
 #define GEN9_MOCS_TABLE \
@@ -144,6 +146,222 @@ static const struct drm_i915_mocs_entry 
broxton_mocs_table[] = {
},
 };
 
+#define GEN11_MOCS_TABLE \
+   [0] = { \
+ /* Base - Uncached (Deprecated) */ \
+ .control_value = LE_CACHEABILITY(LE_UC) | \
+  LE_TGT_CACHE(LE_TC_LLC) | \
+  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | \
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0), \
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), \
+   }, \
+   [1] = { \
+ /* Base - L3 + LeCC:PAT (Deprecated) */ \
+ .control_value = LE_CACHEABILITY(LE_PAGETABLE) | \
+  LE_TGT_CACHE(LE_TC_LLC) | \
+  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | \
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0), \
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), \
+   }, \
+   [2] = { \
+ /* Base - L3 + LLC */ \
+ .control_value = LE_CACHEABILITY(LE_WB) | \
+  LE_TGT_CACHE(LE_TC_LLC) | \
+  LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | \
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0), \
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), \
+   }, \
+   [3] = { \
+ /* Base - Uncached */ \
+ .control_value = LE_CACHEABILITY(LE_U

[Intel-gfx] [PATCH v5 1/2] drm/i915/skl: Rework MOCS tables to keep common part in a define

2018-11-06 Thread Tomasz Lis
The MOCS tables are going to be very similar across platforms.

To reduce the amount of copied code, this patch rips the common part and
puts it into a definition valid for all gen9 platforms.

v2: Made defines for or-ing flags. Renamed macros from MOCS_TABLE
to MOCS_ENTRIES. (Joonas)

Signed-off-by: Tomasz Lis 
Suggested-by: Lucas De Marchi 
Reviewed-by: Lucas De Marchi 
Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Daniele Ceraolo Spurio 
Cc: Lucas De Marchi 
---
 drivers/gpu/drm/i915/intel_mocs.c | 86 ---
 1 file changed, 36 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c 
b/drivers/gpu/drm/i915/intel_mocs.c
index 77e9871..8d08a7b 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -96,71 +96,57 @@ struct drm_i915_mocs_table {
  *   may only be updated incrementally by adding entries at the
  *   end.
  */
-static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
-   [I915_MOCS_UNCACHED] = {
- /* 0x0009 */
- .control_value = LE_CACHEABILITY(LE_UC) |
-  LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-  LE_PFM(0) | LE_SCF(0),
-
- /* 0x0010 */
- .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
-   },
-   [I915_MOCS_PTE] = {
- /* 0x0038 */
- .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
-  LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-  LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-  LE_PFM(0) | LE_SCF(0),
- /* 0x0030 */
- .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+
+#define MOCS_CONTROL_VALUE(lecc, tc, lrum, daom, ersc, scc, pfm, scf) \
+   (LE_CACHEABILITY(lecc) | LE_TGT_CACHE(tc) | \
+   LE_LRUM(lrum) | LE_AOM(daom) | LE_RSC(ersc) | LE_SCC(scc) | \
+   LE_PFM(pfm) | LE_SCF(scf))
+
+#define MOCS_L3CC_VALUE(esc, scc, l3cc) \
+   (L3_ESC(esc) | L3_SCC(scc) | L3_CACHEABILITY(l3cc))
+
+#define GEN9_MOCS_ENTRIES \
+   [I915_MOCS_UNCACHED] = { \
+ /* 0x0009 */ \
+ .control_value = MOCS_CONTROL_VALUE(LE_UC, LE_TC_LLC_ELLC, \
+ 0, 0, 0, 0, 0, 0), \
+ /* 0x0010 */ \
+ .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_UC), \
+   }, \
+   [I915_MOCS_PTE] = { \
+ /* 0x0038 */ \
+ .control_value = MOCS_CONTROL_VALUE(LE_PAGETABLE, LE_TC_LLC_ELLC, \
+ 3, 0, 0, 0, 0, 0), \
+ /* 0x0030 */ \
+ .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
},
+
+static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
+   GEN9_MOCS_ENTRIES
[I915_MOCS_CACHED] = {
  /* 0x003b */
- .control_value = LE_CACHEABILITY(LE_WB) |
-  LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-  LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-  LE_PFM(0) | LE_SCF(0),
+ .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC_ELLC,
+ 3, 0, 0, 0, 0, 0),
  /* 0x0030 */
- .l3cc_value =   L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+ .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB),
},
 };
 
 /* NOTE: the LE_TGT_CACHE is not used on Broxton */
 static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
-   [I915_MOCS_UNCACHED] = {
- /* 0x0009 */
- .control_value = LE_CACHEABILITY(LE_UC) |
-  LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-  LE_PFM(0) | LE_SCF(0),
-
- /* 0x0010 */
- .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
-   },
-   [I915_MOCS_PTE] = {
- /* 0x0038 */
- .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
-  LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-  LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-  LE_PFM(0) | LE_SCF(0),
-
- /* 0x0030 */
- .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
-   },
+   GEN9_MOCS_ENTRIES
[I915_MOCS_CACHED] = {
  /* 0x0039 */
- .control_value = LE_CACHEABILITY(LE_UC) |
-  LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-  LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-  LE_PFM(0) | LE_SCF(0),
-
+ .control_value = MOCS_CONTROL_VALUE(LE_UC, LE_TC_LLC_ELLC,
+ 3, 0, 0, 0, 0, 0),
  /* 0x0030 */
- .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+ .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB

[Intel-gfx] [PATCH v5 2/2] drm/i915/icl: Define MOCS table for Icelake

2018-11-06 Thread Tomasz Lis
The table has been unified across OSes to minimize virtualization overhead.

The MOCS table is now published as part of bspec, and versioned. Entries
are supposed to never be modified, but new ones can be added. Adding
entries increases table version. The patch includes version 1 entries.

Meaning of each entry is now explained in bspec, and user mode clients
are expected to know what each entry means. The 3 entries used for previous
platforms are still compatible with their legacy definitions, but that is
not guaranteed to be true for future platforms.

v2: Fixed SCC values, improved commit comment (Daniele)
v3: Improved MOCS table comment (Daniele)
v4: Moved new entries below gen9 ones. Put common entries into
definition to be used in multiple arrays. (Lucas)
v5: Made defines for or-ing flags. Renamed macros from MOCS_TABLE
to MOCS_ENTRIES. Switched LE_CoS to upper case. (Joonas)

BSpec: 34007
BSpec: 560
Signed-off-by: Tomasz Lis 
Reviewed-by: Daniele Ceraolo Spurio  (v4)
Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Mika Kuoppala 
Cc: Daniele Ceraolo Spurio 
Cc: Zhenyu Wang 
Cc: Zhi A Wang 
Cc: Anuj Phogat 
Cc: Adam Cetnerowski 
Cc: Piotr Rozenfeld 
Cc: Lucas De Marchi 
---
 drivers/gpu/drm/i915/intel_mocs.c | 206 +++---
 1 file changed, 192 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c 
b/drivers/gpu/drm/i915/intel_mocs.c
index 8d08a7b..030a61d 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -44,6 +44,8 @@ struct drm_i915_mocs_table {
 #define LE_SCC(value)  ((value) << 8)
 #define LE_PFM(value)  ((value) << 11)
 #define LE_SCF(value)  ((value) << 14)
+#define LE_COS(value)  ((value) << 15)
+#define LE_SSE(value)  ((value) << 17)
 
 /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
 #define L3_ESC(value)  ((value) << 0)
@@ -80,21 +82,21 @@ struct drm_i915_mocs_table {
  * LNCFCMOCS0 - LNCFCMOCS32 registers.
  *
  * These tables are intended to be kept reasonably consistent across
- * platforms. However some of the fields are not applicable to all of
- * them.
+ * HW platforms, and for ICL+, be identical across OSes. To achieve
+ * that, for Icelake and above, list of entries is published as part
+ * of bspec.
  *
  * Entries not part of the following tables are undefined as far as
- * userspace is concerned and shouldn't be relied upon.  For the time
- * being they will be implicitly initialized to the strictest caching
- * configuration (uncached) to guarantee forwards compatibility with
- * userspace programs written against more recent kernels providing
- * additional MOCS entries.
+ * userspace is concerned and shouldn't be relied upon.
  *
- * NOTE: These tables MUST start with being uncached and the length
- *   MUST be less than 63 as the last two registers are reserved
- *   by the hardware.  These tables are part of the kernel ABI and
- *   may only be updated incrementally by adding entries at the
- *   end.
+ * The last two entries are reserved by the hardware. For ICL+ they
+ * should be initialized according to bspec and never used, for older
+ * platforms they should never be written to.
+ *
+ * NOTE: These tables are part of bspec and defined as part of hardware
+ *   interface for ICL+. For older platforms, they are part of kernel
+ *   ABI. It is expected that existing entries will remain constant
+ *   and the tables will only be updated by adding new entries.
  */
 
 #define MOCS_CONTROL_VALUE(lecc, tc, lrum, daom, ersc, scc, pfm, scf) \
@@ -147,6 +149,179 @@ static const struct drm_i915_mocs_entry 
broxton_mocs_table[] = {
 #undef MOCS_CONTROL_VALUE
 #undef MOCS_L3CC_VALUE
 
+#define MOCS_CONTROL_VALUE(lecc, tc, lrum, daom, ersc, scc, pfm, scf, cos, 
sse) \
+   (LE_CACHEABILITY(lecc) | LE_TGT_CACHE(tc) | \
+   LE_LRUM(lrum) | LE_AOM(daom) | LE_RSC(ersc) | LE_SCC(scc) | \
+   LE_PFM(pfm) | LE_SCF(scf) | LE_COS(cos) | LE_SSE(sse))
+
+#define MOCS_L3CC_VALUE(esc, scc, l3cc) \
+   (L3_ESC(esc) | L3_SCC(scc) | L3_CACHEABILITY(l3cc))
+
+#define GEN11_MOCS_ENTRIES \
+   [0] = { \
+ /* Base - Uncached (Deprecated) */ \
+ .control_value = MOCS_CONTROL_VALUE(LE_UC, LE_TC_LLC, \
+ 0, 0, 0, 0, 0, 0, 0, 0), \
+ .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_UC), \
+   }, \
+   [1] = { \
+ /* Base - L3 + LeCC:PAT (Deprecated) */ \
+ .control_value = MOCS_CONTROL_VALUE(LE_PAGETABLE, LE_TC_LLC, \
+ 0, 0, 0, 0, 0, 0, 0, 0), \
+ .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
+   }, \
+   [2] = { \
+ /* Base - L3 + LLC */ \
+ .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
+ 3, 0, 0, 0, 0, 0, 0, 0), \
+

[Intel-gfx] [PATCH v6 1/2] drm/i915/skl: Rework MOCS tables to keep common part in a define

2018-11-07 Thread Tomasz Lis
The MOCS tables are going to be very similar across platforms.

To reduce the amount of copied code, this patch rips the common part and
puts it into a definition valid for all gen9 platforms.

v2: Made defines for or-ing flags. Renamed macros from MOCS_TABLE
to MOCS_ENTRIES. (Joonas)

Signed-off-by: Tomasz Lis 
Suggested-by: Lucas De Marchi 
Reviewed-by: Lucas De Marchi  (v1)
Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Daniele Ceraolo Spurio 
Cc: Lucas De Marchi 
---
 drivers/gpu/drm/i915/intel_mocs.c | 86 ---
 1 file changed, 36 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c 
b/drivers/gpu/drm/i915/intel_mocs.c
index 77e9871..8d08a7b 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -96,71 +96,57 @@ struct drm_i915_mocs_table {
  *   may only be updated incrementally by adding entries at the
  *   end.
  */
-static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
-   [I915_MOCS_UNCACHED] = {
- /* 0x0009 */
- .control_value = LE_CACHEABILITY(LE_UC) |
-  LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-  LE_PFM(0) | LE_SCF(0),
-
- /* 0x0010 */
- .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
-   },
-   [I915_MOCS_PTE] = {
- /* 0x0038 */
- .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
-  LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-  LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-  LE_PFM(0) | LE_SCF(0),
- /* 0x0030 */
- .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+
+#define MOCS_CONTROL_VALUE(lecc, tc, lrum, daom, ersc, scc, pfm, scf) \
+   (LE_CACHEABILITY(lecc) | LE_TGT_CACHE(tc) | \
+   LE_LRUM(lrum) | LE_AOM(daom) | LE_RSC(ersc) | LE_SCC(scc) | \
+   LE_PFM(pfm) | LE_SCF(scf))
+
+#define MOCS_L3CC_VALUE(esc, scc, l3cc) \
+   (L3_ESC(esc) | L3_SCC(scc) | L3_CACHEABILITY(l3cc))
+
+#define GEN9_MOCS_ENTRIES \
+   [I915_MOCS_UNCACHED] = { \
+ /* 0x0009 */ \
+ .control_value = MOCS_CONTROL_VALUE(LE_UC, LE_TC_LLC_ELLC, \
+ 0, 0, 0, 0, 0, 0), \
+ /* 0x0010 */ \
+ .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_UC), \
+   }, \
+   [I915_MOCS_PTE] = { \
+ /* 0x0038 */ \
+ .control_value = MOCS_CONTROL_VALUE(LE_PAGETABLE, LE_TC_LLC_ELLC, \
+ 3, 0, 0, 0, 0, 0), \
+ /* 0x0030 */ \
+ .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
},
+
+static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
+   GEN9_MOCS_ENTRIES
[I915_MOCS_CACHED] = {
  /* 0x003b */
- .control_value = LE_CACHEABILITY(LE_WB) |
-  LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-  LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-  LE_PFM(0) | LE_SCF(0),
+ .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC_ELLC,
+ 3, 0, 0, 0, 0, 0),
  /* 0x0030 */
- .l3cc_value =   L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+ .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB),
},
 };
 
 /* NOTE: the LE_TGT_CACHE is not used on Broxton */
 static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
-   [I915_MOCS_UNCACHED] = {
- /* 0x0009 */
- .control_value = LE_CACHEABILITY(LE_UC) |
-  LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-  LE_PFM(0) | LE_SCF(0),
-
- /* 0x0010 */
- .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
-   },
-   [I915_MOCS_PTE] = {
- /* 0x0038 */
- .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
-  LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-  LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-  LE_PFM(0) | LE_SCF(0),
-
- /* 0x0030 */
- .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
-   },
+   GEN9_MOCS_ENTRIES
[I915_MOCS_CACHED] = {
  /* 0x0039 */
- .control_value = LE_CACHEABILITY(LE_UC) |
-  LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-  LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-  LE_PFM(0) | LE_SCF(0),
-
+ .control_value = MOCS_CONTROL_VALUE(LE_UC, LE_TC_LLC_ELLC,
+ 3, 0, 0, 0, 0, 0),
  /* 0x0030 */
- .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+ .l3cc_value = MOCS_L3CC_VALUE(0, 0

[Intel-gfx] [PATCH v6 2/2] drm/i915/icl: Define MOCS table for Icelake

2018-11-07 Thread Tomasz Lis
The table has been unified across OSes to minimize virtualization overhead.

The MOCS table is now published as part of bspec, and versioned. Entries
are supposed to never be modified, but new ones can be added. Adding
entries increases table version. The patch includes version 1 entries.

Meaning of each entry is now explained in bspec, and user mode clients
are expected to know what each entry means. The 3 entries used for previous
platforms are still compatible with their legacy definitions, but that is
not guaranteed to be true for future platforms.

v2: Fixed SCC values, improved commit comment (Daniele)
v3: Improved MOCS table comment (Daniele)
v4: Moved new entries below gen9 ones. Put common entries into
definition to be used in multiple arrays. (Lucas)
v5: Made defines for or-ing flags. Renamed macros from MOCS_TABLE
to MOCS_ENTRIES. Switched LE_CoS to upper case. (Joonas)
v6: Removed definitions of reserved entries. (Michal)
Increased limit of entries sent to the hardware on gen11+.

BSpec: 34007
BSpec: 560
Signed-off-by: Tomasz Lis 
Reviewed-by: Daniele Ceraolo Spurio  (v4)
Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Mika Kuoppala 
Cc: Daniele Ceraolo Spurio 
Cc: Zhenyu Wang 
Cc: Zhi A Wang 
Cc: Anuj Phogat 
Cc: Adam Cetnerowski 
Cc: Piotr Rozenfeld 
Cc: Lucas De Marchi 
---
 drivers/gpu/drm/i915/intel_mocs.c | 222 +-
 1 file changed, 197 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c 
b/drivers/gpu/drm/i915/intel_mocs.c
index 8d08a7b..4eb05c6 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -44,6 +44,8 @@ struct drm_i915_mocs_table {
 #define LE_SCC(value)  ((value) << 8)
 #define LE_PFM(value)  ((value) << 11)
 #define LE_SCF(value)  ((value) << 14)
+#define LE_COS(value)  ((value) << 15)
+#define LE_SSE(value)  ((value) << 17)
 
 /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
 #define L3_ESC(value)  ((value) << 0)
@@ -52,6 +54,10 @@ struct drm_i915_mocs_table {
 
 /* Helper defines */
 #define GEN9_NUM_MOCS_ENTRIES  62  /* 62 out of 64 - 63 & 64 are reserved. */
+#define GEN11_NUM_MOCS_ENTRIES 64  /* 63-64 are reserved, but configured. */
+
+#define NUM_MOCS_ENTRIES(i915) \
+   (INTEL_GEN(i915) < 11 ? GEN9_NUM_MOCS_ENTRIES : GEN11_NUM_MOCS_ENTRIES)
 
 /* (e)LLC caching options */
 #define LE_PAGETABLE   0
@@ -80,21 +86,21 @@ struct drm_i915_mocs_table {
  * LNCFCMOCS0 - LNCFCMOCS32 registers.
  *
  * These tables are intended to be kept reasonably consistent across
- * platforms. However some of the fields are not applicable to all of
- * them.
+ * HW platforms, and for ICL+, be identical across OSes. To achieve
+ * that, for Icelake and above, list of entries is published as part
+ * of bspec.
  *
  * Entries not part of the following tables are undefined as far as
- * userspace is concerned and shouldn't be relied upon.  For the time
- * being they will be implicitly initialized to the strictest caching
- * configuration (uncached) to guarantee forwards compatibility with
- * userspace programs written against more recent kernels providing
- * additional MOCS entries.
+ * userspace is concerned and shouldn't be relied upon.
+ *
+ * The last two entries are reserved by the hardware. For ICL+ they
+ * should be initialized according to bspec and never used, for older
+ * platforms they should never be written to.
  *
- * NOTE: These tables MUST start with being uncached and the length
- *   MUST be less than 63 as the last two registers are reserved
- *   by the hardware.  These tables are part of the kernel ABI and
- *   may only be updated incrementally by adding entries at the
- *   end.
+ * NOTE: These tables are part of bspec and defined as part of hardware
+ *   interface for ICL+. For older platforms, they are part of kernel
+ *   ABI. It is expected that existing entries will remain constant
+ *   and the tables will only be updated by adding new entries.
  */
 
 #define MOCS_CONTROL_VALUE(lecc, tc, lrum, daom, ersc, scc, pfm, scf) \
@@ -147,6 +153,167 @@ static const struct drm_i915_mocs_entry 
broxton_mocs_table[] = {
 #undef MOCS_CONTROL_VALUE
 #undef MOCS_L3CC_VALUE
 
+#define MOCS_CONTROL_VALUE(lecc, tc, lrum, daom, ersc, scc, pfm, scf, cos, 
sse) \
+   (LE_CACHEABILITY(lecc) | LE_TGT_CACHE(tc) | \
+   LE_LRUM(lrum) | LE_AOM(daom) | LE_RSC(ersc) | LE_SCC(scc) | \
+   LE_PFM(pfm) | LE_SCF(scf) | LE_COS(cos) | LE_SSE(sse))
+
+#define MOCS_L3CC_VALUE(esc, scc, l3cc) \
+   (L3_ESC(esc) | L3_SCC(scc) | L3_CACHEABILITY(l3cc))
+
+#define GEN11_MOCS_ENTRIES \
+   [0] = { \
+ /* Base - Uncached (Deprecated) */ \
+ .control_value = MOCS_CONTROL_VALUE(LE_UC, LE_TC_LLC, \
+ 0, 0, 0, 0, 0, 0, 0, 0), \
+ .l3cc_value = MO

[Intel-gfx] [PATCH v6] drm/i915/icl: Preempt-to-idle support in execlists.

2018-11-09 Thread Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

The advantage of this new preemption path is that one less context switch is
needed, and returning information about preempion being complete is received
earlier. This leads to significant improvement in our IGT latency test.

Test performed: `gem_exec_latency --run-subtest render-preemption`, executed
100 times, on the same platform, same kernel, without and with this patch.
Then taken average of the execution latency times:

subcase old preempt.icl preempt.
render-render   853.2036840.1176
render-bsd  2328.8708   2083.2576
render-blt  2080.1501   1852.0792
render-vebox1553.5134   1428.762

Improvement observed:

subcase improvement
render-render1.53%
render-bsd  10.55%
render-blt  10.96%
render-vebox 8.03%

v2: Added needs_preempt_context() change so that it is not created when
preempt-to-idle is supported. (Chris)
Updated setting HWACK flag so that it is cleared after
preempt-to-dle. (Chris, Daniele)
Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)

v3: Fixed needs_preempt_context() change. (Chris)
Merged preemption trigger functions to one. (Chris)
Fixed conyext state tonot assume COMPLETED_MASK after preemption,
since idle-to-idle case will not have it set.

v4: Simplified needs_preempt_context() change. (Daniele)
Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)

v5: Renamed inject_preempt_context(). (Daniele)
Removed duplicated GEM_BUG_ON() on HWACK (Daniele)

v6: Added performance test results.

Bspec: 18922
Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Daniele Ceraolo Spurio 
Cc: Michal Winiarski 
Cc: Mika Kuoppala 
Reviewed-by: Daniele Ceraolo Spurio 
Signed-off-by: Tomasz Lis 
---
 drivers/gpu/drm/i915/i915_drv.h  |   2 +
 drivers/gpu/drm/i915/i915_gem_context.c  |   3 +-
 drivers/gpu/drm/i915/i915_pci.c  |   3 +-
 drivers/gpu/drm/i915/intel_device_info.h |   1 +
 drivers/gpu/drm/i915/intel_lrc.c | 109 +--
 drivers/gpu/drm/i915/intel_lrc.h |   1 +
 6 files changed, 84 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 08d25aa..d2cc9f1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2579,6 +2579,8 @@ intel_info(const struct drm_i915_private *dev_priv)
((dev_priv)->info.has_logical_ring_elsq)
 #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+   ((dev_priv)->info.has_hw_preempt_to_idle)
 
 #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index b97963d..10b1d61 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -529,7 +529,8 @@ static void init_contexts(struct drm_i915_private *i915)
 
 static bool needs_preempt_context(struct drm_i915_private *i915)
 {
-   return HAS_LOGICAL_RING_PREEMPTION(i915);
+   return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+  !HAS_HW_PREEMPT_TO_IDLE(i915);
 }
 
 int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 4ccab83..82125cf 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -600,7 +600,8 @@ static const struct intel_device_info intel_cannonlake_info 
= {
   TRANSCODER_DSI0_OFFSET, TRANSCODER_DSI1_OFFSET}, \
GEN(11), \
.ddb_size = 2048, \
-   .has_logical_ring_elsq = 1
+   .has_logical_ring_elsq = 1, \
+   .has_hw_preempt_to_idle = 1
 
 static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
b/drivers/gpu/drm/i915/intel_device_info.h
index 86ce1db..a2ee278 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -104,6 +104,7 @@ enum intel_ppgtt {
func(has_logical_ring_contexts); \
func(has_logical_ring_elsq); \
func(has_logical_ring_preemption); \
+   func(has_hw_preempt_to_idle); \
func(has_overlay); \
func(has_pooled_eu); \
func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/dri

[Intel-gfx] [PATCH v1] drm/i915: Add Exec param to control data port coherency.

2018-03-30 Thread Tomasz Lis
ently with CPU, some fine grain SVM resources
(CL_MEM_SVM_FINE_GRAIN_BUFFER). These resources contain descriptors of
computational work that needs to be executed. kern_master analyzes incoming
work descriptors and populates a plain OCL buffer (non-fine-grain) with payload
for kern_worker. Once kern_master is done, kern_worker kicks-in and processes
the payload that kern_master produced. These two kernels work in a loop, one
after another. Since only kern_master requires coherency, kern_worker should
not be forced to pay for it. This means that we need to have the ability to
toggle coherency switch on or off per each GPU submission:
(ENABLE COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> (ENABLE
COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> ...

4. Why the execlist flag approach was chosen?

There are two other ways of providing the functionality to UMDs, besides the
execlist flag:

a) Chicken bit register whitelisting.

This approach would allow adding the functionality without any change to
KMDs interface. Also, it has been determined that whitelisting is safe for
gen10 and gen11. The issue is with gen9, where hardware whitelisting cannot
be used, and OCL driver needs support for it. A workaround there would be to
use command parser, which verifies buffers before execution. But such parsing
comes at a considerable performance cost.

b) Providing the flag as context IOCTL setting.

The data port coherency switch could be implemented as a context parameter,
which would schedule submission of a buffer to switch the coherency flag.
That is an elegant solution with bounds the flag to context, which matches
the hardware placement of the feature. This solution was not accepted
because of OCL driver performance concerns. The OCL driver is constructed
with emphasis on creating small, but very frequent submissions. With such
architecture, adding IOCTL setparam call before submission has considerable
impact on the performance.

Bspec: 11419
Signed-off-by: Tomasz Lis 
---
 drivers/gpu/drm/i915/i915_drv.c|  3 ++
 drivers/gpu/drm/i915/i915_gem_context.h|  1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 17 ++
 drivers/gpu/drm/i915/intel_lrc.c   | 53 ++
 drivers/gpu/drm/i915/intel_lrc.h   |  3 ++
 include/uapi/drm/i915_drm.h| 12 ++-
 6 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index d354627..030854e 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -436,6 +436,9 @@ static int i915_getparam_ioctl(struct drm_device *dev, void 
*data,
case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
value = 1000 * INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz;
break;
+   case I915_PARAM_HAS_EXEC_DATA_PORT_COHERENCY:
+   value = (INTEL_GEN(dev_priv) >= 9);
+   break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h 
b/drivers/gpu/drm/i915/i915_gem_context.h
index 7854262..00aa309 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -118,6 +118,7 @@ struct i915_gem_context {
 #define CONTEXT_BANNABLE   3
 #define CONTEXT_BANNED 4
 #define CONTEXT_FORCE_SINGLE_SUBMISSION5
+#define CONTEXT_DATA_PORT_COHERENT 6
 
/**
 * @hw_id: - unique identifier for the context
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 8c170db..e3a2f9e 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2245,6 +2245,18 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.batch_flags |= I915_DISPATCH_RS;
}
 
+   if (args->flags & I915_EXEC_DATA_PORT_COHERENT) {
+   if (INTEL_GEN(eb.i915) < 9) {
+   DRM_DEBUG("Data Port Coherency is only allowed for Gen9 
and above\n");
+   return -EINVAL;
+   }
+   if (eb.engine->class != RENDER_CLASS) {
+   DRM_DEBUG("Data Port Coherency is not available on 
%s\n",
+eb.engine->name);
+   return -EINVAL;
+   }
+   }
+
if (args->flags & I915_EXEC_FENCE_IN) {
in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
if (!in_fence)
@@ -2371,6 +2383,11 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_batch_unpin;
}
 
+   /* Emit the switch of data port coherency state if needed */
+   err = intel_lr_context_modify_data_port_coherency(eb.request,
+   (

[Intel-gfx] [PATCH v2] drm/i915: Add Exec param to control data port coherency.

2018-04-11 Thread Tomasz Lis
ently with CPU, some fine grain SVM resources
(CL_MEM_SVM_FINE_GRAIN_BUFFER). These resources contain descriptors of
computational work that needs to be executed. kern_master analyzes incoming
work descriptors and populates a plain OCL buffer (non-fine-grain) with payload
for kern_worker. Once kern_master is done, kern_worker kicks-in and processes
the payload that kern_master produced. These two kernels work in a loop, one
after another. Since only kern_master requires coherency, kern_worker should
not be forced to pay for it. This means that we need to have the ability to
toggle coherency switch on or off per each GPU submission:
(ENABLE COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> (ENABLE
COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> ...

4. Why the execlist flag approach was chosen?

There are two other ways of providing the functionality to UMDs, besides the
execlist flag:

a) Chicken bit register whitelisting.

This approach would allow adding the functionality without any change to
KMDs interface. Also, it has been determined that whitelisting is safe for
gen10 and gen11. The issue is with gen9, where hardware whitelisting cannot
be used, and OCL driver needs support for it. A workaround there would be to
use command parser, which verifies buffers before execution. But such parsing
comes at a considerable performance cost.

b) Providing the flag as context IOCTL setting.

The data port coherency switch could be implemented as a context parameter,
which would schedule submission of a buffer to switch the coherency flag.
That is an elegant solution with bounds the flag to context, which matches
the hardware placement of the feature. This solution was not accepted
because of OCL driver performance concerns. The OCL driver is constructed
with emphasis on creating small, but very frequent submissions. With such
architecture, adding IOCTL setparam call before submission has considerable
impact on the performance.

v2: Fixed compilation warning.

Bspec: 11419
Signed-off-by: Tomasz Lis 
---
 drivers/gpu/drm/i915/i915_drv.c|  3 ++
 drivers/gpu/drm/i915/i915_gem_context.h|  1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 18 ++
 drivers/gpu/drm/i915/intel_lrc.c   | 53 ++
 drivers/gpu/drm/i915/intel_lrc.h   |  3 ++
 include/uapi/drm/i915_drm.h| 12 ++-
 6 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index f770be1..19493b0 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -436,6 +436,9 @@ static int i915_getparam_ioctl(struct drm_device *dev, void 
*data,
case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
value = 1000 * INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz;
break;
+   case I915_PARAM_HAS_EXEC_DATA_PORT_COHERENCY:
+   value = (INTEL_GEN(dev_priv) >= 9);
+   break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h 
b/drivers/gpu/drm/i915/i915_gem_context.h
index 7854262..00aa309 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -118,6 +118,7 @@ struct i915_gem_context {
 #define CONTEXT_BANNABLE   3
 #define CONTEXT_BANNED 4
 #define CONTEXT_FORCE_SINGLE_SUBMISSION5
+#define CONTEXT_DATA_PORT_COHERENT 6
 
/**
 * @hw_id: - unique identifier for the context
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index c74f5df..ada376c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2274,6 +2274,18 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.batch_flags |= I915_DISPATCH_RS;
}
 
+   if (args->flags & I915_EXEC_DATA_PORT_COHERENT) {
+   if (INTEL_GEN(eb.i915) < 9) {
+   DRM_DEBUG("Data Port Coherency is only allowed for Gen9 
and above\n");
+   return -EINVAL;
+   }
+   if (eb.engine->class != RENDER_CLASS) {
+   DRM_DEBUG("Data Port Coherency is not available on 
%s\n",
+eb.engine->name);
+   return -EINVAL;
+   }
+   }
+
if (args->flags & I915_EXEC_FENCE_IN) {
in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
if (!in_fence)
@@ -2400,6 +2412,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_batch_unpin;
}
 
+   /* Emit the switch of data port coherency state if needed */
+   err = intel_lr_context_modify_data_port_coh

[Intel-gfx] [PATCH v2] drm/i915/gen11: Preempt-to-idle support in execlists.

2018-04-19 Thread Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

v2: Added needs_preempt_context() change so that it is not created when
preempt-to-idle is supported. (Chris)
Updated setting HWACK flag so that it is cleared after
preempt-to-dle. (Chris, Daniele)
Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)

Bspec: 18922
Signed-off-by: Tomasz Lis 
---
 drivers/gpu/drm/i915/i915_drv.h  |  2 ++
 drivers/gpu/drm/i915/i915_gem_context.c  |  4 ++-
 drivers/gpu/drm/i915/i915_pci.c  |  3 +-
 drivers/gpu/drm/i915/intel_device_info.h |  1 +
 drivers/gpu/drm/i915/intel_lrc.c | 47 
 drivers/gpu/drm/i915/intel_lrc.h |  1 +
 6 files changed, 51 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0286911..f445340 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2518,6 +2518,8 @@ intel_info(const struct drm_i915_private *dev_priv)
((dev_priv)->info.has_logical_ring_elsq)
 #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+   ((dev_priv)->info.has_hw_preempt_to_idle)
 
 #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 74435af..d65f469 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -454,7 +454,9 @@ destroy_kernel_context(struct i915_gem_context **ctxp)
 
 static bool needs_preempt_context(struct drm_i915_private *i915)
 {
-   return HAS_LOGICAL_RING_PREEMPTION(i915);
+   return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+  !HAS_HW_PREEMPT_TO_IDLE(i915) &&
+  !USES_GUC_SUBMISSION(i915);
 }
 
 int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 4364922..66b6700 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info 
= {
GEN(11), \
.ddb_size = 2048, \
.has_csr = 0, \
-   .has_logical_ring_elsq = 1
+   .has_logical_ring_elsq = 1, \
+   .has_hw_preempt_to_idle = 1
 
 static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
func(has_logical_ring_contexts); \
func(has_logical_ring_elsq); \
func(has_logical_ring_preemption); \
+   func(has_hw_preempt_to_idle); \
func(has_overlay); \
func(has_pooled_eu); \
func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 029901a..4c94488 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -154,6 +154,7 @@
 #define GEN8_CTX_STATUS_ACTIVE_IDLE(1 << 3)
 #define GEN8_CTX_STATUS_COMPLETE   (1 << 4)
 #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
 
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -552,6 +553,25 @@ static void inject_preempt_context(struct intel_engine_cs 
*engine)
execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
+static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
+{
+   struct intel_engine_execlists *execlists = &engine->execlists;
+
+   GEM_TRACE("%s\n", engine->name);
+
+   /*
+* hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+* HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+*/
+   GEM_BUG_ON(execlists->ctrl_reg == NULL);
+
+   /* trigger preemption to idle */
+   writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+
+   execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
+   execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
struct intel_engine_

[Intel-gfx] [PATCH v1] Second implementation of Data Port Coherency.

2018-06-20 Thread Tomasz Lis
The OCL Team agreed to use IOCTL instead of Exec flag to switch coherency
settings.

Also:
1. I will follow this patch with IGT test for the new functionality.
2. The OCL Team will publish UMD patch for it.

Tomasz Lis (1):
  drm/i915: Add IOCTL Param to control data port coherency.

 drivers/gpu/drm/i915/i915_gem_context.c | 41 ++
 drivers/gpu/drm/i915/i915_gem_context.h |  6 
 drivers/gpu/drm/i915/intel_lrc.c| 51 +
 drivers/gpu/drm/i915/intel_lrc.h|  4 +++
 include/uapi/drm/i915_drm.h |  1 +
 5 files changed, 103 insertions(+)

-- 
2.7.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v1] drm/i915: Add IOCTL Param to control data port coherency.

2018-06-20 Thread Tomasz Lis
master uses, concurrently with CPU, some fine grain SVM resources
(CL_MEM_SVM_FINE_GRAIN_BUFFER). These resources contain descriptors of
computational work that needs to be executed. kern_master analyzes incoming
work descriptors and populates a plain OCL buffer (non-fine-grain) with payload
for kern_worker. Once kern_master is done, kern_worker kicks-in and processes
the payload that kern_master produced. These two kernels work in a loop, one
after another. Since only kern_master requires coherency, kern_worker should
not be forced to pay for it. This means that we need to have the ability to
toggle coherency switch on or off per each GPU submission:
(ENABLE COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> (ENABLE
COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> ...

Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Michal Winiarski 

Bspec: 11419
Signed-off-by: Tomasz Lis 
---
 drivers/gpu/drm/i915/i915_gem_context.c | 41 ++
 drivers/gpu/drm/i915/i915_gem_context.h |  6 
 drivers/gpu/drm/i915/intel_lrc.c| 51 +
 drivers/gpu/drm/i915/intel_lrc.h|  4 +++
 include/uapi/drm/i915_drm.h |  1 +
 5 files changed, 103 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index ccf463a..ea65ae6 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -711,6 +711,24 @@ static bool client_is_banned(struct drm_i915_file_private 
*file_priv)
return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
 }
 
+static int i915_gem_context_set_data_port_coherent(struct i915_gem_context 
*ctx)
+{
+   int ret;
+   ret = intel_lr_context_modify_data_port_coherency(ctx, true);
+   if (!GEM_WARN_ON(ret))
+   __set_bit(CONTEXT_DATA_PORT_COHERENT, &ctx->flags);
+   return ret;
+}
+
+static int i915_gem_context_clear_data_port_coherent(struct i915_gem_context 
*ctx)
+{
+   int ret;
+   ret = intel_lr_context_modify_data_port_coherency(ctx, false);
+   if (!GEM_WARN_ON(ret))
+   __clear_bit(CONTEXT_DATA_PORT_COHERENT, &ctx->flags);
+   return ret;
+}
+
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
  struct drm_file *file)
 {
@@ -784,6 +802,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, 
void *data,
 int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
 {
+   struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_file_private *file_priv = file->driver_priv;
struct drm_i915_gem_context_param *args = data;
struct i915_gem_context *ctx;
@@ -818,6 +837,16 @@ int i915_gem_context_getparam_ioctl(struct drm_device 
*dev, void *data,
case I915_CONTEXT_PARAM_PRIORITY:
args->value = ctx->sched.priority;
break;
+   case I915_CONTEXT_PARAM_COHERENCY:
+   /*
+* ENODEV if the feature is not supported. This removes the need
+* of separate IS_SUPPORTED parameter.
+*/
+   if (INTEL_GEN(dev_priv) < 9)
+   ret = -ENODEV;
+   else
+   args->value = 
i915_gem_context_is_data_port_coherent(ctx);
+   break;
default:
ret = -EINVAL;
break;
@@ -830,6 +859,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, 
void *data,
 int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
 {
+   struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_file_private *file_priv = file->driver_priv;
struct drm_i915_gem_context_param *args = data;
struct i915_gem_context *ctx;
@@ -893,6 +923,17 @@ int i915_gem_context_setparam_ioctl(struct drm_device 
*dev, void *data,
}
break;
 
+   case I915_CONTEXT_PARAM_COHERENCY:
+   if (args->size)
+   ret = -EINVAL;
+   else if (INTEL_GEN(dev_priv) < 9)
+   ret = -ENODEV;
+   else if (args->value)
+   ret = i915_gem_context_set_data_port_coherent(ctx);
+   else
+   ret = i915_gem_context_clear_data_port_coherent(ctx);
+   break;
+
default:
ret = -EINVAL;
break;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h 
b/drivers/gpu/drm/i915/i915_gem_context.h
index b116e49..e8ccb70 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -126,6 +126,7 @@ struct i915_gem_context {
 #define CONTEXT_BANNABLE 

[Intel-gfx] [PATCH v5] drm/i915/gen11: Preempt-to-idle support in execlists.

2018-07-06 Thread Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

v2: Added needs_preempt_context() change so that it is not created when
preempt-to-idle is supported. (Chris)
Updated setting HWACK flag so that it is cleared after
preempt-to-dle. (Chris, Daniele)
Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)

v3: Fixed needs_preempt_context() change. (Chris)
Merged preemption trigger functions to one. (Chris)
Fixed conyext state tonot assume COMPLETED_MASK after preemption,
since idle-to-idle case will not have it set.

v4: Simplified needs_preempt_context() change. (Daniele)
Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)

v5: Renamed inject_preempt_context(). (Daniele)
Removed duplicated GEM_BUG_ON() on HWACK (Daniele)

Bspec: 18922
Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Daniele Ceraolo Spurio 
Cc: Michal Winiarski 
Cc: Mika Kuoppala 
Reviewed-by: Daniele Ceraolo Spurio 
Signed-off-by: Tomasz Lis 
---
 drivers/gpu/drm/i915/i915_drv.h  |   2 +
 drivers/gpu/drm/i915/i915_gem_context.c  |   3 +-
 drivers/gpu/drm/i915/i915_pci.c  |   3 +-
 drivers/gpu/drm/i915/intel_device_info.h |   1 +
 drivers/gpu/drm/i915/intel_lrc.c | 114 ---
 drivers/gpu/drm/i915/intel_lrc.h |   1 +
 6 files changed, 84 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 91a7e4f..c84a66a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2533,6 +2533,8 @@ intel_info(const struct drm_i915_private *dev_priv)
((dev_priv)->info.has_logical_ring_elsq)
 #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+   ((dev_priv)->info.has_hw_preempt_to_idle)
 
 #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index b10770c..bf7faa7 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -464,7 +464,8 @@ destroy_kernel_context(struct i915_gem_context **ctxp)
 
 static bool needs_preempt_context(struct drm_i915_private *i915)
 {
-   return HAS_LOGICAL_RING_PREEMPTION(i915);
+   return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+  !HAS_HW_PREEMPT_TO_IDLE(i915);
 }
 
 int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 55543f1..2da7e77 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -593,7 +593,8 @@ static const struct intel_device_info intel_cannonlake_info 
= {
GEN(11), \
.ddb_size = 2048, \
.has_csr = 0, \
-   .has_logical_ring_elsq = 1
+   .has_logical_ring_elsq = 1, \
+   .has_hw_preempt_to_idle = 1
 
 static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
b/drivers/gpu/drm/i915/intel_device_info.h
index 633f9fb..0be7e03 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
func(has_logical_ring_contexts); \
func(has_logical_ring_elsq); \
func(has_logical_ring_preemption); \
+   func(has_hw_preempt_to_idle); \
func(has_overlay); \
func(has_pooled_eu); \
func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ab89dab..aed4aeb 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -155,6 +155,7 @@
 #define GEN8_CTX_STATUS_ACTIVE_IDLE(1 << 3)
 #define GEN8_CTX_STATUS_COMPLETE   (1 << 4)
 #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
 
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -525,34 +526,49 @@ static void port_assign(struct execlist_port *port, 
struct i915_request *rq)
port_set(port, port_pack(i915_request_get(rq), port_count(port)));
 }
 
-static void inject_preempt_context(struct intel_engine_cs *engine)
+static void execlist_send_preempt_to_idle(struct intel_engine_cs *engine)
 {
struct intel_engine_execlists *execlists = &engine

[Intel-gfx] [PATCH v4] drm/i915: Add IOCTL Param to control data port coherency.

2018-07-09 Thread Tomasz Lis
master uses, concurrently with CPU, some fine grain SVM resources
(CL_MEM_SVM_FINE_GRAIN_BUFFER). These resources contain descriptors of
computational work that needs to be executed. kern_master analyzes incoming
work descriptors and populates a plain OCL buffer (non-fine-grain) with payload
for kern_worker. Once kern_master is done, kern_worker kicks-in and processes
the payload that kern_master produced. These two kernels work in a loop, one
after another. Since only kern_master requires coherency, kern_worker should
not be forced to pay for it. This means that we need to have the ability to
toggle coherency switch on or off per each GPU submission:
(ENABLE COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> (ENABLE
COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> ...

v2: Fixed compilation warning.
v3: Refactored the patch to add IOCTL instead of exec flag.
v4: Renamed and documented the API flag. Used strict values.
Removed redundant GEM_WARN_ON()s. Improved to coding standard.
Introduced a macro for checking whether hardware supports the feature.

Cc: Joonas Lahtinen 
Cc: Tvrtko Ursulin 
Cc: Chris Wilson 
Cc: Michal Winiarski 

Bspec: 11419
Signed-off-by: Tomasz Lis 
---
 drivers/gpu/drm/i915/i915_drv.h |  1 +
 drivers/gpu/drm/i915/i915_gem_context.c | 41 +++
 drivers/gpu/drm/i915/i915_gem_context.h |  6 
 drivers/gpu/drm/i915/intel_lrc.c| 49 +
 drivers/gpu/drm/i915/intel_lrc.h|  4 +++
 include/uapi/drm/i915_drm.h |  6 
 6 files changed, 107 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 09ab124..7d4bbd5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2524,6 +2524,7 @@ intel_info(const struct drm_i915_private *dev_priv)
 #define HAS_EDRAM(dev_priv)(!!((dev_priv)->edram_cap & EDRAM_ENABLED))
 #define HAS_WT(dev_priv)   ((IS_HASWELL(dev_priv) || \
 IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv))
+#define HAS_DATA_PORT_COHERENCY(dev_priv)  (INTEL_GEN(dev_priv) >= 9)
 
 #define HWS_NEEDS_PHYSICAL(dev_priv)   ((dev_priv)->info.hws_needs_physical)
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index b10770c..6db352e 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -711,6 +711,26 @@ static bool client_is_banned(struct drm_i915_file_private 
*file_priv)
return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
 }
 
+static int i915_gem_context_set_data_port_coherent(struct i915_gem_context 
*ctx)
+{
+   int ret;
+
+   ret = intel_lr_context_modify_data_port_coherency(ctx, true);
+   if (!ret)
+   __set_bit(CONTEXT_DATA_PORT_COHERENT, &ctx->flags);
+   return ret;
+}
+
+static int i915_gem_context_clear_data_port_coherent(struct i915_gem_context 
*ctx)
+{
+   int ret;
+
+   ret = intel_lr_context_modify_data_port_coherency(ctx, false);
+   if (!ret)
+   __clear_bit(CONTEXT_DATA_PORT_COHERENT, &ctx->flags);
+   return ret;
+}
+
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
  struct drm_file *file)
 {
@@ -784,6 +804,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, 
void *data,
 int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
 {
+   struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_file_private *file_priv = file->driver_priv;
struct drm_i915_gem_context_param *args = data;
struct i915_gem_context *ctx;
@@ -818,6 +839,12 @@ int i915_gem_context_getparam_ioctl(struct drm_device 
*dev, void *data,
case I915_CONTEXT_PARAM_PRIORITY:
args->value = ctx->sched.priority;
break;
+   case I915_CONTEXT_PARAM_DATA_PORT_COHERENCY:
+   if (!HAS_DATA_PORT_COHERENCY(dev_priv))
+   ret = -ENODEV;
+   else
+   args->value = 
i915_gem_context_is_data_port_coherent(ctx);
+   break;
default:
ret = -EINVAL;
break;
@@ -830,6 +857,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, 
void *data,
 int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
 {
+   struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_file_private *file_priv = file->driver_priv;
struct drm_i915_gem_context_param *args = data;
struct i915_gem_context *ctx;
@@ -893,6 +921,19 @@ int i915_gem_context_setparam_ioctl(struct drm_device 
*dev, void *data,
 

[Intel-gfx] [PATCH v5] drm/i915: Add IOCTL Param to control data port coherency.

2018-07-12 Thread Tomasz Lis
master uses, concurrently with CPU, some fine grain SVM resources
(CL_MEM_SVM_FINE_GRAIN_BUFFER). These resources contain descriptors of
computational work that needs to be executed. kern_master analyzes incoming
work descriptors and populates a plain OCL buffer (non-fine-grain) with payload
for kern_worker. Once kern_master is done, kern_worker kicks-in and processes
the payload that kern_master produced. These two kernels work in a loop, one
after another. Since only kern_master requires coherency, kern_worker should
not be forced to pay for it. This means that we need to have the ability to
toggle coherency switch on or off per each GPU submission:
(ENABLE COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> (ENABLE
COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> ...

v2: Fixed compilation warning.
v3: Refactored the patch to add IOCTL instead of exec flag.
v4: Renamed and documented the API flag. Used strict values.
Removed redundant GEM_WARN_ON()s. Improved to coding standard.
Introduced a macro for checking whether hardware supports the feature.
v5: Renamed some locals. Made the flag write to be lazy.
Updated comments to remove misconceptions. Added gen11 support.

Cc: Joonas Lahtinen 
Cc: Tvrtko Ursulin 
Cc: Chris Wilson 
Cc: Michal Winiarski 

Bspec: 11419
Bspec: 19175
Signed-off-by: Tomasz Lis 
---
 drivers/gpu/drm/i915/i915_drv.h|  1 +
 drivers/gpu/drm/i915/i915_gem_context.c| 29 +---
 drivers/gpu/drm/i915/i915_gem_context.h| 17 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  6 
 drivers/gpu/drm/i915/intel_lrc.c   | 55 ++
 drivers/gpu/drm/i915/intel_lrc.h   |  4 +++
 include/uapi/drm/i915_drm.h|  7 
 7 files changed, 115 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 01dd298..73192e1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2524,6 +2524,7 @@ intel_info(const struct drm_i915_private *dev_priv)
 #define HAS_EDRAM(dev_priv)(!!((dev_priv)->edram_cap & EDRAM_ENABLED))
 #define HAS_WT(dev_priv)   ((IS_HASWELL(dev_priv) || \
 IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv))
+#define HAS_DATA_PORT_COHERENCY(dev_priv)  (INTEL_GEN(dev_priv) >= 9)
 
 #define HWS_NEEDS_PHYSICAL(dev_priv)   ((dev_priv)->info.hws_needs_physical)
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index b10770c..b5b63ac 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -784,6 +784,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, 
void *data,
 int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
 {
+   struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_file_private *file_priv = file->driver_priv;
struct drm_i915_gem_context_param *args = data;
struct i915_gem_context *ctx;
@@ -804,10 +805,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device 
*dev, void *data,
case I915_CONTEXT_PARAM_GTT_SIZE:
if (ctx->ppgtt)
args->value = ctx->ppgtt->vm.total;
-   else if (to_i915(dev)->mm.aliasing_ppgtt)
-   args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total;
+   else if (i915->mm.aliasing_ppgtt)
+   args->value = i915->mm.aliasing_ppgtt->vm.total;
else
-   args->value = to_i915(dev)->ggtt.vm.total;
+   args->value = i915->ggtt.vm.total;
break;
case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
args->value = i915_gem_context_no_error_capture(ctx);
@@ -818,6 +819,12 @@ int i915_gem_context_getparam_ioctl(struct drm_device 
*dev, void *data,
case I915_CONTEXT_PARAM_PRIORITY:
args->value = ctx->sched.priority;
break;
+   case I915_CONTEXT_PARAM_DATA_PORT_COHERENCY:
+   if (!HAS_DATA_PORT_COHERENCY(i915))
+   ret = -ENODEV;
+   else
+   args->value = 
i915_gem_context_is_data_port_coherent_requested(ctx);
+   break;
default:
ret = -EINVAL;
break;
@@ -830,6 +837,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, 
void *data,
 int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
 {
+   struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_file_private *file_priv = file->driver_priv;
struct drm_i915_gem_context_param *args = data;
struct i915_gem_con

[Intel-gfx] [PATCH v6] drm/i915: Add IOCTL Param to control data port coherency.

2018-07-16 Thread Tomasz Lis
master uses, concurrently with CPU, some fine grain SVM resources
(CL_MEM_SVM_FINE_GRAIN_BUFFER). These resources contain descriptors of
computational work that needs to be executed. kern_master analyzes incoming
work descriptors and populates a plain OCL buffer (non-fine-grain) with payload
for kern_worker. Once kern_master is done, kern_worker kicks-in and processes
the payload that kern_master produced. These two kernels work in a loop, one
after another. Since only kern_master requires coherency, kern_worker should
not be forced to pay for it. This means that we need to have the ability to
toggle coherency switch on or off per each GPU submission:
(ENABLE COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> (ENABLE
COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> ...

v2: Fixed compilation warning.
v3: Refactored the patch to add IOCTL instead of exec flag.
v4: Renamed and documented the API flag. Used strict values.
Removed redundant GEM_WARN_ON()s. Improved to coding standard.
Introduced a macro for checking whether hardware supports the feature.
v5: Renamed some locals. Made the flag write to be lazy.
Updated comments to remove misconceptions. Added gen11 support.
v6: Moved the flag write to gen8_enit_flush_render(). Renamed some functions.
Moved all flags checking to one place. Added mutex check.

Cc: Joonas Lahtinen 
Cc: Tvrtko Ursulin 
Cc: Chris Wilson 
Cc: Michal Winiarski 

Bspec: 11419
Bspec: 19175
Signed-off-by: Tomasz Lis 
---
 drivers/gpu/drm/i915/i915_drv.h |  1 +
 drivers/gpu/drm/i915/i915_gem_context.c | 29 +--
 drivers/gpu/drm/i915/i915_gem_context.h | 17 +
 drivers/gpu/drm/i915/intel_lrc.c| 66 -
 include/uapi/drm/i915_drm.h |  7 
 5 files changed, 115 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4fb9373..bae3999 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2524,6 +2524,7 @@ intel_info(const struct drm_i915_private *dev_priv)
 #define HAS_EDRAM(dev_priv)(!!((dev_priv)->edram_cap & EDRAM_ENABLED))
 #define HAS_WT(dev_priv)   ((IS_HASWELL(dev_priv) || \
 IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv))
+#define HAS_DATA_PORT_COHERENCY(dev_priv)  (INTEL_GEN(dev_priv) >= 9)
 
 #define HWS_NEEDS_PHYSICAL(dev_priv)   ((dev_priv)->info.hws_needs_physical)
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index b10770c..44ebc31 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -784,6 +784,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, 
void *data,
 int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
 {
+   struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_file_private *file_priv = file->driver_priv;
struct drm_i915_gem_context_param *args = data;
struct i915_gem_context *ctx;
@@ -804,10 +805,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device 
*dev, void *data,
case I915_CONTEXT_PARAM_GTT_SIZE:
if (ctx->ppgtt)
args->value = ctx->ppgtt->vm.total;
-   else if (to_i915(dev)->mm.aliasing_ppgtt)
-   args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total;
+   else if (i915->mm.aliasing_ppgtt)
+   args->value = i915->mm.aliasing_ppgtt->vm.total;
else
-   args->value = to_i915(dev)->ggtt.vm.total;
+   args->value = i915->ggtt.vm.total;
break;
case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
args->value = i915_gem_context_no_error_capture(ctx);
@@ -818,6 +819,12 @@ int i915_gem_context_getparam_ioctl(struct drm_device 
*dev, void *data,
case I915_CONTEXT_PARAM_PRIORITY:
args->value = ctx->sched.priority;
break;
+   case I915_CONTEXT_PARAM_DATA_PORT_COHERENCY:
+   if (!HAS_DATA_PORT_COHERENCY(i915))
+   ret = -ENODEV;
+   else
+   args->value = 
i915_gem_context_is_data_port_coherent(ctx);
+   break;
default:
ret = -EINVAL;
break;
@@ -830,6 +837,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, 
void *data,
 int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
 {
+   struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_file_private *file_priv = file->driver_priv;
struct drm_i915_gem_context_param *args = data;
struct 

[Intel-gfx] [PATCH v6] drm/i915: Add IOCTL Param to control data port coherency.

2018-10-09 Thread Tomasz Lis
master uses, concurrently with CPU, some fine grain SVM resources
(CL_MEM_SVM_FINE_GRAIN_BUFFER). These resources contain descriptors of
computational work that needs to be executed. kern_master analyzes incoming
work descriptors and populates a plain OCL buffer (non-fine-grain) with payload
for kern_worker. Once kern_master is done, kern_worker kicks-in and processes
the payload that kern_master produced. These two kernels work in a loop, one
after another. Since only kern_master requires coherency, kern_worker should
not be forced to pay for it. This means that we need to have the ability to
toggle coherency switch on or off per each GPU submission:
(ENABLE COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> (ENABLE
COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> ...

v2: Fixed compilation warning.
v3: Refactored the patch to add IOCTL instead of exec flag.
v4: Renamed and documented the API flag. Used strict values.
Removed redundant GEM_WARN_ON()s. Improved to coding standard.
Introduced a macro for checking whether hardware supports the feature.
v5: Renamed some locals. Made the flag write to be lazy.
Updated comments to remove misconceptions. Added gen11 support.
v6: Moved the flag write to gen8_enit_flush_render(). Renamed some functions.
Moved all flags checking to one place. Added mutex check.
v7: Removed 2 comments, improved API comment. (Joonas)

Cc: Joonas Lahtinen 
Cc: Tvrtko Ursulin 
Cc: Chris Wilson 
Cc: Michal Winiarski 

Bspec: 11419
Bspec: 19175
Signed-off-by: Tomasz Lis 
---
 drivers/gpu/drm/i915/i915_drv.h |  1 +
 drivers/gpu/drm/i915/i915_gem_context.c | 29 ---
 drivers/gpu/drm/i915/i915_gem_context.h | 17 +
 drivers/gpu/drm/i915/intel_lrc.c| 64 -
 include/uapi/drm/i915_drm.h | 10 ++
 5 files changed, 116 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 794a8a0..e1ea5cb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2588,6 +2588,7 @@ intel_info(const struct drm_i915_private *dev_priv)
 #define HAS_EDRAM(dev_priv)(!!((dev_priv)->edram_cap & EDRAM_ENABLED))
 #define HAS_WT(dev_priv)   ((IS_HASWELL(dev_priv) || \
 IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv))
+#define HAS_DATA_PORT_COHERENCY(dev_priv)  (INTEL_GEN(dev_priv) >= 9)
 
 #define HWS_NEEDS_PHYSICAL(dev_priv)   ((dev_priv)->info.hws_needs_physical)
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 8cbe580..718ede9 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -847,6 +847,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, 
void *data,
 int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
 {
+   struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_file_private *file_priv = file->driver_priv;
struct drm_i915_gem_context_param *args = data;
struct i915_gem_context *ctx;
@@ -867,10 +868,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device 
*dev, void *data,
case I915_CONTEXT_PARAM_GTT_SIZE:
if (ctx->ppgtt)
args->value = ctx->ppgtt->vm.total;
-   else if (to_i915(dev)->mm.aliasing_ppgtt)
-   args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total;
+   else if (i915->mm.aliasing_ppgtt)
+   args->value = i915->mm.aliasing_ppgtt->vm.total;
else
-   args->value = to_i915(dev)->ggtt.vm.total;
+   args->value = i915->ggtt.vm.total;
break;
case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
args->value = i915_gem_context_no_error_capture(ctx);
@@ -881,6 +882,12 @@ int i915_gem_context_getparam_ioctl(struct drm_device 
*dev, void *data,
case I915_CONTEXT_PARAM_PRIORITY:
args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT;
break;
+   case I915_CONTEXT_PARAM_DATA_PORT_COHERENCY:
+   if (!HAS_DATA_PORT_COHERENCY(i915))
+   ret = -ENODEV;
+   else
+   args->value = 
i915_gem_context_is_data_port_coherent(ctx);
+   break;
default:
ret = -EINVAL;
break;
@@ -893,6 +900,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, 
void *data,
 int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
 {
+   struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_file_private *f

[Intel-gfx] [PATCH v8] drm/i915: Add IOCTL Param to control data port coherency.

2018-10-12 Thread Tomasz Lis
master uses, concurrently with CPU, some fine grain SVM resources
(CL_MEM_SVM_FINE_GRAIN_BUFFER). These resources contain descriptors of
computational work that needs to be executed. kern_master analyzes incoming
work descriptors and populates a plain OCL buffer (non-fine-grain) with payload
for kern_worker. Once kern_master is done, kern_worker kicks-in and processes
the payload that kern_master produced. These two kernels work in a loop, one
after another. Since only kern_master requires coherency, kern_worker should
not be forced to pay for it. This means that we need to have the ability to
toggle coherency switch on or off per each GPU submission:
(ENABLE COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> (ENABLE
COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> ...

v2: Fixed compilation warning.
v3: Refactored the patch to add IOCTL instead of exec flag.
v4: Renamed and documented the API flag. Used strict values.
Removed redundant GEM_WARN_ON()s. Improved to coding standard.
Introduced a macro for checking whether hardware supports the feature.
v5: Renamed some locals. Made the flag write to be lazy.
Updated comments to remove misconceptions. Added gen11 support.
v6: Moved the flag write to gen8_enit_flush_render(). Renamed some functions.
Moved all flags checking to one place. Added mutex check.
v7: Removed 2 comments, improved API comment. (Joonas)
v8: Use non-GEM WARN_ON when in statements. (Tvrtko)

Cc: Joonas Lahtinen 
Cc: Tvrtko Ursulin 
Cc: Chris Wilson 
Cc: Michal Winiarski 

Bspec: 11419
Bspec: 19175
Signed-off-by: Tomasz Lis 
---
 drivers/gpu/drm/i915/i915_drv.h |  1 +
 drivers/gpu/drm/i915/i915_gem_context.c | 29 ---
 drivers/gpu/drm/i915/i915_gem_context.h | 17 +
 drivers/gpu/drm/i915/intel_lrc.c| 64 -
 include/uapi/drm/i915_drm.h | 10 ++
 5 files changed, 116 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3017ef0..90b3a0ff 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2588,6 +2588,7 @@ intel_info(const struct drm_i915_private *dev_priv)
 #define HAS_EDRAM(dev_priv)(!!((dev_priv)->edram_cap & EDRAM_ENABLED))
 #define HAS_WT(dev_priv)   ((IS_HASWELL(dev_priv) || \
 IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv))
+#define HAS_DATA_PORT_COHERENCY(dev_priv)  (INTEL_GEN(dev_priv) >= 9)
 
 #define HWS_NEEDS_PHYSICAL(dev_priv)   ((dev_priv)->info.hws_needs_physical)
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 8cbe580..718ede9 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -847,6 +847,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, 
void *data,
 int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
 {
+   struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_file_private *file_priv = file->driver_priv;
struct drm_i915_gem_context_param *args = data;
struct i915_gem_context *ctx;
@@ -867,10 +868,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device 
*dev, void *data,
case I915_CONTEXT_PARAM_GTT_SIZE:
if (ctx->ppgtt)
args->value = ctx->ppgtt->vm.total;
-   else if (to_i915(dev)->mm.aliasing_ppgtt)
-   args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total;
+   else if (i915->mm.aliasing_ppgtt)
+   args->value = i915->mm.aliasing_ppgtt->vm.total;
else
-   args->value = to_i915(dev)->ggtt.vm.total;
+   args->value = i915->ggtt.vm.total;
break;
case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
args->value = i915_gem_context_no_error_capture(ctx);
@@ -881,6 +882,12 @@ int i915_gem_context_getparam_ioctl(struct drm_device 
*dev, void *data,
case I915_CONTEXT_PARAM_PRIORITY:
args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT;
break;
+   case I915_CONTEXT_PARAM_DATA_PORT_COHERENCY:
+   if (!HAS_DATA_PORT_COHERENCY(i915))
+   ret = -ENODEV;
+   else
+   args->value = 
i915_gem_context_is_data_port_coherent(ctx);
+   break;
default:
ret = -EINVAL;
break;
@@ -893,6 +900,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, 
void *data,
 int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
 {
+   struct drm_i915_private *i915 = to_i91

[Intel-gfx] [PATCH v5] drm/i915/icl: Preempt-to-idle support in execlists.

2018-10-15 Thread Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

v2: Added needs_preempt_context() change so that it is not created when
preempt-to-idle is supported. (Chris)
Updated setting HWACK flag so that it is cleared after
preempt-to-dle. (Chris, Daniele)
Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)

v3: Fixed needs_preempt_context() change. (Chris)
Merged preemption trigger functions to one. (Chris)
Fixed conyext state tonot assume COMPLETED_MASK after preemption,
since idle-to-idle case will not have it set.

v4: Simplified needs_preempt_context() change. (Daniele)
Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)

v5: Renamed inject_preempt_context(). (Daniele)
Removed duplicated GEM_BUG_ON() on HWACK (Daniele)

Bspec: 18922
Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Daniele Ceraolo Spurio 
Cc: Michal Winiarski 
Cc: Mika Kuoppala 
Reviewed-by: Daniele Ceraolo Spurio 
Signed-off-by: Tomasz Lis 
---
 drivers/gpu/drm/i915/i915_drv.h  |   2 +
 drivers/gpu/drm/i915/i915_gem_context.c  |   3 +-
 drivers/gpu/drm/i915/i915_pci.c  |   3 +-
 drivers/gpu/drm/i915/intel_device_info.h |   1 +
 drivers/gpu/drm/i915/intel_lrc.c | 109 +--
 drivers/gpu/drm/i915/intel_lrc.h |   1 +
 6 files changed, 84 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3017ef0..4817438 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2597,6 +2597,8 @@ intel_info(const struct drm_i915_private *dev_priv)
((dev_priv)->info.has_logical_ring_elsq)
 #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+   ((dev_priv)->info.has_hw_preempt_to_idle)
 
 #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 8cbe580..98ca20e 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -529,7 +529,8 @@ static void init_contexts(struct drm_i915_private *i915)
 
 static bool needs_preempt_context(struct drm_i915_private *i915)
 {
-   return HAS_LOGICAL_RING_PREEMPTION(i915);
+   return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+  !HAS_HW_PREEMPT_TO_IDLE(i915);
 }
 
 int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 0a05cc7..f708d97 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -597,7 +597,8 @@ static const struct intel_device_info intel_cannonlake_info 
= {
GEN10_FEATURES, \
GEN(11), \
.ddb_size = 2048, \
-   .has_logical_ring_elsq = 1
+   .has_logical_ring_elsq = 1, \
+   .has_hw_preempt_to_idle = 1
 
 static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
b/drivers/gpu/drm/i915/intel_device_info.h
index af70026..7dcf0fd 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -104,6 +104,7 @@ enum intel_ppgtt {
func(has_logical_ring_contexts); \
func(has_logical_ring_elsq); \
func(has_logical_ring_preemption); \
+   func(has_hw_preempt_to_idle); \
func(has_overlay); \
func(has_pooled_eu); \
func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ff0e2b3..4c2bfed 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -155,6 +155,7 @@
 #define GEN8_CTX_STATUS_ACTIVE_IDLE(1 << 3)
 #define GEN8_CTX_STATUS_COMPLETE   (1 << 4)
 #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
 
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -488,29 +489,49 @@ static void port_assign(struct execlist_port *port, 
struct i915_request *rq)
port_set(port, port_pack(i915_request_get(rq), port_count(port)));
 }
 
-static void inject_preempt_context(struct intel_engine_cs *engine)
+static void execlist_send_preempt_to_idle(struct intel_engine_cs *engine)
 {
struct intel_engine_execlists *execlists = &engine

[Intel-gfx] [PATCH v1] drm/i915/icl: Define MOCS table for Icelake

2018-10-19 Thread Tomasz Lis
The table has been unified across OSes to minimize virtualization overhead.

The MOCS table is now versioned; the patch includes version 1 entries.

BSpec: 34007
BSpec: 560
Signed-off-by: Tomasz Lis 
Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Mika Kuoppala 
Cc: Zhenyu Wang 
---
 drivers/gpu/drm/i915/intel_mocs.c | 246 +-
 1 file changed, 244 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c 
b/drivers/gpu/drm/i915/intel_mocs.c
index 77e9871..b76d6db 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -44,6 +44,8 @@ struct drm_i915_mocs_table {
 #define LE_SCC(value)  ((value) << 8)
 #define LE_PFM(value)  ((value) << 11)
 #define LE_SCF(value)  ((value) << 14)
+#define LE_CoS(value)  ((value) << 15)
+#define LE_SSE(value)  ((value) << 17)
 
 /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
 #define L3_ESC(value)  ((value) << 0)
@@ -96,6 +98,243 @@ struct drm_i915_mocs_table {
  *   may only be updated incrementally by adding entries at the
  *   end.
  */
+static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
+   [0] = {
+ /* Base - Uncached (Deprecated) */
+ .control_value = LE_CACHEABILITY(LE_UC) |
+  LE_TGT_CACHE(LE_TC_LLC) |
+  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0),
+
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
+   },
+   [1] = {
+ /* Base - L3 + LeCC:PAT (Deprecated) */
+ .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
+  LE_TGT_CACHE(LE_TC_LLC) |
+  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0),
+
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+   },
+   [2] = {
+ /* Base - L3 + LLC */
+ .control_value = LE_CACHEABILITY(LE_WB) |
+  LE_TGT_CACHE(LE_TC_LLC) |
+  LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0),
+
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+   },
+   [3] = {
+ /* Base - Uncached */
+ .control_value = LE_CACHEABILITY(LE_UC) |
+  LE_TGT_CACHE(LE_TC_LLC) |
+  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0),
+
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
+   },
+   [4] = {
+ /* Base - L3 */
+ .control_value = LE_CACHEABILITY(LE_UC) |
+  LE_TGT_CACHE(LE_TC_LLC) |
+  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0),
+
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+   },
+   [5] = {
+ /* Base - LLC */
+ .control_value = LE_CACHEABILITY(LE_WB) |
+  LE_TGT_CACHE(LE_TC_LLC) |
+  LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0),
+
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
+   },
+   [6] = {
+ /* Age 0 - LLC */
+ .control_value = LE_CACHEABILITY(LE_WB) |
+  LE_TGT_CACHE(LE_TC_LLC) |
+  LE_LRUM(1) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0),
+
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
+   },
+   [7] = {
+ /* Age 0 - L3 + LLC */
+ .control_value = LE_CACHEABILITY(LE_WB) |
+  LE_TGT_CACHE(LE_TC_LLC) |
+  LE_LRUM(1) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0),
+
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+   },
+   [8] = {
+ /* Age: Don't Chg. - LLC */
+ .control_value = LE_CACHEABILITY(LE_WB) |
+  LE_TGT_CACHE(LE_TC_LLC) |
+  LE_LRUM(2) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0),
+
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
+   },
+   [9] = {
+ /* Age: Don't Chg. - L3 + LLC */
+ .control_value = LE_CACHEABILITY(LE_WB) |
+  LE_TGT_CACHE(LE_TC_LLC) |
+ 

[Intel-gfx] [PATCH v2 1/2] drm/i915/icl: Define MOCS table for Icelake

2018-10-22 Thread Tomasz Lis
The table has been unified across OSes to minimize virtualization overhead.

The MOCS table is now published as part of bspec, and versioned. Entries
are supposed to never be modified, but new ones can be added. Adding
entries increases table version. The patch includes version 1 entries.

Meaning of each entry is now explained in bspec, and user mode clients
are expected to know what each entry means. The 3 entries used for previous
platforms are still compatible with their legacy definitions, but that is
not guaranteed to be true for future platforms.

BSpec: 34007
BSpec: 560
Signed-off-by: Tomasz Lis 
Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Mika Kuoppala 
Cc: Daniele Ceraolo Spurio 
Cc: Zhenyu Wang 
Cc: Zhi A Wang 
Cc: Anuj Phogat 
---
 drivers/gpu/drm/i915/intel_mocs.c | 246 +-
 1 file changed, 244 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c 
b/drivers/gpu/drm/i915/intel_mocs.c
index 77e9871..dc34e83 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -44,6 +44,8 @@ struct drm_i915_mocs_table {
 #define LE_SCC(value)  ((value) << 8)
 #define LE_PFM(value)  ((value) << 11)
 #define LE_SCF(value)  ((value) << 14)
+#define LE_CoS(value)  ((value) << 15)
+#define LE_SSE(value)  ((value) << 17)
 
 /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
 #define L3_ESC(value)  ((value) << 0)
@@ -96,6 +98,243 @@ struct drm_i915_mocs_table {
  *   may only be updated incrementally by adding entries at the
  *   end.
  */
+static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
+   [0] = {
+ /* Base - Uncached (Deprecated) */
+ .control_value = LE_CACHEABILITY(LE_UC) |
+  LE_TGT_CACHE(LE_TC_LLC) |
+  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0),
+
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
+   },
+   [1] = {
+ /* Base - L3 + LeCC:PAT (Deprecated) */
+ .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
+  LE_TGT_CACHE(LE_TC_LLC) |
+  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0),
+
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+   },
+   [2] = {
+ /* Base - L3 + LLC */
+ .control_value = LE_CACHEABILITY(LE_WB) |
+  LE_TGT_CACHE(LE_TC_LLC) |
+  LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0),
+
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+   },
+   [3] = {
+ /* Base - Uncached */
+ .control_value = LE_CACHEABILITY(LE_UC) |
+  LE_TGT_CACHE(LE_TC_LLC) |
+  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0),
+
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
+   },
+   [4] = {
+ /* Base - L3 */
+ .control_value = LE_CACHEABILITY(LE_UC) |
+  LE_TGT_CACHE(LE_TC_LLC) |
+  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0),
+
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+   },
+   [5] = {
+ /* Base - LLC */
+ .control_value = LE_CACHEABILITY(LE_WB) |
+  LE_TGT_CACHE(LE_TC_LLC) |
+  LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0),
+
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
+   },
+   [6] = {
+ /* Age 0 - LLC */
+ .control_value = LE_CACHEABILITY(LE_WB) |
+  LE_TGT_CACHE(LE_TC_LLC) |
+  LE_LRUM(1) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0),
+
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
+   },
+   [7] = {
+ /* Age 0 - L3 + LLC */
+ .control_value = LE_CACHEABILITY(LE_WB) |
+  LE_TGT_CACHE(LE_TC_LLC) |
+  LE_LRUM(1) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0),
+
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+   },
+   [8] = {
+ /* Age: Don't Chg. - LLC */
+ .control_value = LE_CACHEABILITY(LE_WB) 

[Intel-gfx] [PATCH v2 2/2] drm/i915/icl: Add IOCTL for getting MOCS table version

2018-10-22 Thread Tomasz Lis
For Icelake and above, MOCS table for each platform is published
within bspec. The table is versioned, and new entries are assigned
a version number. Existing entries do not change and their version
is constant.

This introduces a parameter which allows getting max version number
of the MOCS entries currently supported, ie. value of 2 would mean
only version 1 and version 2 entries are initialized and can be used
by the user mode clients.

BSpec: 34007
Signed-off-by: Tomasz Lis 
Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Mika Kuoppala 
Cc: Daniele Ceraolo Spurio 
Cc: Zhenyu Wang 
Cc: Zhi A Wang 
Cc: Anuj Phogat 
---
 drivers/gpu/drm/i915/i915_drv.c   |  6 ++
 drivers/gpu/drm/i915/intel_mocs.c | 12 
 drivers/gpu/drm/i915/intel_mocs.h |  1 +
 include/uapi/drm/i915_drm.h   | 11 +++
 4 files changed, 30 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index baac35f..92fa8fd 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -53,6 +53,7 @@
 #include "i915_vgpu.h"
 #include "intel_drv.h"
 #include "intel_uc.h"
+#include "intel_mocs.h"
 
 static struct drm_driver driver;
 
@@ -444,6 +445,11 @@ static int i915_getparam_ioctl(struct drm_device *dev, 
void *data,
case I915_PARAM_MMAP_GTT_COHERENT:
value = INTEL_INFO(dev_priv)->has_coherent_ggtt;
break;
+   case I915_PARAM_MOCS_TABLE_VERSION:
+   value = intel_mocs_table_version(dev_priv);
+   if (!value)
+   return -ENODEV;
+   break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/intel_mocs.c 
b/drivers/gpu/drm/i915/intel_mocs.c
index dc34e83..fc1e98b 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -469,6 +469,18 @@ static i915_reg_t mocs_register(enum intel_engine_id 
engine_id, int index)
 }
 
 /**
+ * intel_mocs_table_version() - get version of mocs table implementation
+ * @i915: i915 device struct.
+ */
+int intel_mocs_table_version(struct drm_i915_private *i915)
+{
+   if (IS_ICELAKE(i915))
+   return 1;
+   else
+   return 0;
+}
+
+/**
  * intel_mocs_init_engine() - emit the mocs control table
  * @engine:The engine for whom to emit the registers.
  *
diff --git a/drivers/gpu/drm/i915/intel_mocs.h 
b/drivers/gpu/drm/i915/intel_mocs.h
index d89080d..dc1d64a 100644
--- a/drivers/gpu/drm/i915/intel_mocs.h
+++ b/drivers/gpu/drm/i915/intel_mocs.h
@@ -55,5 +55,6 @@
 int intel_rcs_context_init_mocs(struct i915_request *rq);
 void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv);
 void intel_mocs_init_engine(struct intel_engine_cs *engine);
+int intel_mocs_table_version(struct drm_i915_private *i915);
 
 #endif
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 298b2e1..16aafc4 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -559,6 +559,17 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_MMAP_GTT_COHERENT   52
 
+/*
+ * Query MOCS table version used during hardware initialization.
+ *
+ * The MOCS table for each platform is published as part of bspec. Entries in
+ * the table are supposed to never be modified, but new enties are added, 
making
+ * more indexes in the table valid. This parameter informs which version
+ * of the table was used to initialize the currently used graphics hardware,
+ * and therefore which MOCS indexes are useable.
+ */
+#define I915_PARAM_MOCS_TABLE_VERSION  53
+
 typedef struct drm_i915_getparam {
__s32 param;
/*
-- 
2.7.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3] drm/i915/icl: Define MOCS table for Icelake

2018-10-23 Thread Tomasz Lis
The table has been unified across OSes to minimize virtualization overhead.

The MOCS table is now published as part of bspec, and versioned. Entries
are supposed to never be modified, but new ones can be added. Adding
entries increases table version. The patch includes version 1 entries.

Meaning of each entry is now explained in bspec, and user mode clients
are expected to know what each entry means. The 3 entries used for previous
platforms are still compatible with their legacy definitions, but that is
not guaranteed to be true for future platforms.

v2: Fixed SCC values, improved commit comment (Daniele)
v3: Improved MOCS table comment (Daniele)

BSpec: 34007
BSpec: 560
Signed-off-by: Tomasz Lis 
Reviewed-by: Daniele Ceraolo Spurio 
Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Mika Kuoppala 
Cc: Daniele Ceraolo Spurio 
Cc: Zhenyu Wang 
Cc: Zhi A Wang 
Cc: Anuj Phogat 
Cc: Adam Cetnerowski 
Cc: Piotr Rozenfeld 
---
 drivers/gpu/drm/i915/intel_mocs.c | 270 --
 1 file changed, 256 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c 
b/drivers/gpu/drm/i915/intel_mocs.c
index 77e9871..b7f96cb 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -44,6 +44,8 @@ struct drm_i915_mocs_table {
 #define LE_SCC(value)  ((value) << 8)
 #define LE_PFM(value)  ((value) << 11)
 #define LE_SCF(value)  ((value) << 14)
+#define LE_CoS(value)  ((value) << 15)
+#define LE_SSE(value)  ((value) << 17)
 
 /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
 #define L3_ESC(value)  ((value) << 0)
@@ -80,22 +82,259 @@ struct drm_i915_mocs_table {
  * LNCFCMOCS0 - LNCFCMOCS32 registers.
  *
  * These tables are intended to be kept reasonably consistent across
- * platforms. However some of the fields are not applicable to all of
- * them.
+ * HW platforms, and for ICL+, be identical across OSes. To achieve
+ * that, for Icelake and above, list of entries is published as part
+ * of bspec.
  *
  * Entries not part of the following tables are undefined as far as
- * userspace is concerned and shouldn't be relied upon.  For the time
- * being they will be implicitly initialized to the strictest caching
- * configuration (uncached) to guarantee forwards compatibility with
- * userspace programs written against more recent kernels providing
- * additional MOCS entries.
+ * userspace is concerned and shouldn't be relied upon.
  *
- * NOTE: These tables MUST start with being uncached and the length
- *   MUST be less than 63 as the last two registers are reserved
- *   by the hardware.  These tables are part of the kernel ABI and
- *   may only be updated incrementally by adding entries at the
- *   end.
+ * The last two entries are reserved by the hardware. For ICL+ they
+ * should be initialized according to bspec and never used, for older
+ * platforms they should never be written to.
+ *
+ * NOTE: These tables are part of bspec and defined as part of hardware
+ *   interface for ICL+. For older platforms, they are part of kernel
+ *   ABI. It is expected that existing entries will remain constant
+ *   and the tables will only be updated by adding new entries.
  */
+static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
+   [0] = {
+ /* Base - Uncached (Deprecated) */
+ .control_value = LE_CACHEABILITY(LE_UC) |
+  LE_TGT_CACHE(LE_TC_LLC) |
+  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0),
+
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
+   },
+   [1] = {
+ /* Base - L3 + LeCC:PAT (Deprecated) */
+ .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
+  LE_TGT_CACHE(LE_TC_LLC) |
+  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0),
+
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+   },
+   [2] = {
+ /* Base - L3 + LLC */
+ .control_value = LE_CACHEABILITY(LE_WB) |
+  LE_TGT_CACHE(LE_TC_LLC) |
+  LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0),
+
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+   },
+   [3] = {
+ /* Base - Uncached */
+ .control_value = LE_CACHEABILITY(LE_UC) |
+  LE_TGT_CACHE(LE_TC_LLC) |
+  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0),
+
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC)

[Intel-gfx] [PATCH v4 2/2] drm/i915/icl: Define MOCS table for Icelake

2018-10-26 Thread Tomasz Lis
The table has been unified across OSes to minimize virtualization overhead.

The MOCS table is now published as part of bspec, and versioned. Entries
are supposed to never be modified, but new ones can be added. Adding
entries increases table version. The patch includes version 1 entries.

Meaning of each entry is now explained in bspec, and user mode clients
are expected to know what each entry means. The 3 entries used for previous
platforms are still compatible with their legacy definitions, but that is
not guaranteed to be true for future platforms.

v2: Fixed SCC values, improved commit comment (Daniele)
v3: Improved MOCS table comment (Daniele)
v4: Moved new entries below gen9 ones. Put common entries into
definition to be used in multiple arrays. (Lucas)

BSpec: 34007
BSpec: 560
Signed-off-by: Tomasz Lis 
Reviewed-by: Daniele Ceraolo Spurio  (v3)
Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Mika Kuoppala 
Cc: Daniele Ceraolo Spurio 
Cc: Zhenyu Wang 
Cc: Zhi A Wang 
Cc: Anuj Phogat 
Cc: Adam Cetnerowski 
Cc: Piotr Rozenfeld 
Cc: Lucas De Marchi 
---
 drivers/gpu/drm/i915/intel_mocs.c | 249 +++---
 1 file changed, 235 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c 
b/drivers/gpu/drm/i915/intel_mocs.c
index 76aed59..2a1e5f0 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -44,6 +44,8 @@ struct drm_i915_mocs_table {
 #define LE_SCC(value)  ((value) << 8)
 #define LE_PFM(value)  ((value) << 11)
 #define LE_SCF(value)  ((value) << 14)
+#define LE_CoS(value)  ((value) << 15)
+#define LE_SSE(value)  ((value) << 17)
 
 /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
 #define L3_ESC(value)  ((value) << 0)
@@ -80,21 +82,21 @@ struct drm_i915_mocs_table {
  * LNCFCMOCS0 - LNCFCMOCS32 registers.
  *
  * These tables are intended to be kept reasonably consistent across
- * platforms. However some of the fields are not applicable to all of
- * them.
+ * HW platforms, and for ICL+, be identical across OSes. To achieve
+ * that, for Icelake and above, list of entries is published as part
+ * of bspec.
  *
  * Entries not part of the following tables are undefined as far as
- * userspace is concerned and shouldn't be relied upon.  For the time
- * being they will be implicitly initialized to the strictest caching
- * configuration (uncached) to guarantee forwards compatibility with
- * userspace programs written against more recent kernels providing
- * additional MOCS entries.
+ * userspace is concerned and shouldn't be relied upon.
  *
- * NOTE: These tables MUST start with being uncached and the length
- *   MUST be less than 63 as the last two registers are reserved
- *   by the hardware.  These tables are part of the kernel ABI and
- *   may only be updated incrementally by adding entries at the
- *   end.
+ * The last two entries are reserved by the hardware. For ICL+ they
+ * should be initialized according to bspec and never used, for older
+ * platforms they should never be written to.
+ *
+ * NOTE: These tables are part of bspec and defined as part of hardware
+ *   interface for ICL+. For older platforms, they are part of kernel
+ *   ABI. It is expected that existing entries will remain constant
+ *   and the tables will only be updated by adding new entries.
  */
 
 #define GEN9_MOCS_TABLE \
@@ -144,6 +146,222 @@ static const struct drm_i915_mocs_entry 
broxton_mocs_table[] = {
},
 };
 
+#define GEN11_MOCS_TABLE \
+   [0] = { \
+ /* Base - Uncached (Deprecated) */ \
+ .control_value = LE_CACHEABILITY(LE_UC) | \
+  LE_TGT_CACHE(LE_TC_LLC) | \
+  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | \
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0), \
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), \
+   }, \
+   [1] = { \
+ /* Base - L3 + LeCC:PAT (Deprecated) */ \
+ .control_value = LE_CACHEABILITY(LE_PAGETABLE) | \
+  LE_TGT_CACHE(LE_TC_LLC) | \
+  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | \
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0), \
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), \
+   }, \
+   [2] = { \
+ /* Base - L3 + LLC */ \
+ .control_value = LE_CACHEABILITY(LE_WB) | \
+  LE_TGT_CACHE(LE_TC_LLC) | \
+  LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | \
+  LE_PFM(0) | LE_SCF(0) | LE_CoS(0) | LE_SSE(0), \
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), \
+   }, \
+   [3] = { \
+ /* Base - Uncached */ \
+ .control_value = LE_CACHEABILITY(LE_U

[Intel-gfx] [PATCH v4 1/2] drm/i915/skl: Rework MOCS tables to keep common part in a define

2018-10-26 Thread Tomasz Lis
The MOCS tables are going to be very similar across platforms.

To reduce the amount of copied code, this patch rips the common part and
puts it into a definition valid for all gen9 platforms.

Signed-off-by: Tomasz Lis 
Suggested-by: Lucas De Marchi 
Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Daniele Ceraolo Spurio 
Cc: Lucas De Marchi 
---
 drivers/gpu/drm/i915/intel_mocs.c | 61 ++-
 1 file changed, 22 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c 
b/drivers/gpu/drm/i915/intel_mocs.c
index 77e9871..76aed59 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -96,26 +96,29 @@ struct drm_i915_mocs_table {
  *   may only be updated incrementally by adding entries at the
  *   end.
  */
-static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
-   [I915_MOCS_UNCACHED] = {
- /* 0x0009 */
- .control_value = LE_CACHEABILITY(LE_UC) |
-  LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-  LE_PFM(0) | LE_SCF(0),
 
- /* 0x0010 */
- .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
-   },
-   [I915_MOCS_PTE] = {
- /* 0x0038 */
- .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
-  LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-  LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-  LE_PFM(0) | LE_SCF(0),
- /* 0x0030 */
- .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+#define GEN9_MOCS_TABLE \
+   [I915_MOCS_UNCACHED] = { \
+ /* 0x0009 */ \
+ .control_value = LE_CACHEABILITY(LE_UC) | \
+  LE_TGT_CACHE(LE_TC_LLC_ELLC) | \
+  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | \
+  LE_PFM(0) | LE_SCF(0), \
+ /* 0x0010 */ \
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), \
+   }, \
+   [I915_MOCS_PTE] = { \
+ /* 0x0038 */ \
+ .control_value = LE_CACHEABILITY(LE_PAGETABLE) | \
+  LE_TGT_CACHE(LE_TC_LLC_ELLC) | \
+  LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | \
+  LE_PFM(0) | LE_SCF(0), \
+ /* 0x0030 */ \
+ .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), \
},
+
+static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
+   GEN9_MOCS_TABLE
[I915_MOCS_CACHED] = {
  /* 0x003b */
  .control_value = LE_CACHEABILITY(LE_WB) |
@@ -129,33 +132,13 @@ static const struct drm_i915_mocs_entry 
skylake_mocs_table[] = {
 
 /* NOTE: the LE_TGT_CACHE is not used on Broxton */
 static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
-   [I915_MOCS_UNCACHED] = {
- /* 0x0009 */
- .control_value = LE_CACHEABILITY(LE_UC) |
-  LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-  LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-  LE_PFM(0) | LE_SCF(0),
-
- /* 0x0010 */
- .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
-   },
-   [I915_MOCS_PTE] = {
- /* 0x0038 */
- .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
-  LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-  LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-  LE_PFM(0) | LE_SCF(0),
-
- /* 0x0030 */
- .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
-   },
+   GEN9_MOCS_TABLE
[I915_MOCS_CACHED] = {
  /* 0x0039 */
  .control_value = LE_CACHEABILITY(LE_UC) |
   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
   LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
   LE_PFM(0) | LE_SCF(0),
-
  /* 0x0030 */
  .l3cc_value =L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
},
-- 
2.7.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3] drm/i915/gen11: Preempt-to-idle support in execlists.

2018-05-11 Thread Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

v2: Added needs_preempt_context() change so that it is not created when
preempt-to-idle is supported. (Chris)
Updated setting HWACK flag so that it is cleared after
preempt-to-dle. (Chris, Daniele)
Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)

v3: Fixed needs_preempt_context() change. (Chris)
Merged preemption trigger functions to one. (Chris)
Fixed context state to not assume COMPLETED_MASK after preemption,
since idle-to-idle case will not have it set.

Bspec: 18922
Signed-off-by: Tomasz Lis 
---
 drivers/gpu/drm/i915/i915_drv.h  |   2 +
 drivers/gpu/drm/i915/i915_gem_context.c  |   5 +-
 drivers/gpu/drm/i915/i915_pci.c  |   3 +-
 drivers/gpu/drm/i915/intel_device_info.h |   1 +
 drivers/gpu/drm/i915/intel_lrc.c | 115 ++-
 drivers/gpu/drm/i915/intel_lrc.h |   1 +
 6 files changed, 92 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 57fb3aa..6e9647b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2535,6 +2535,8 @@ intel_info(const struct drm_i915_private *dev_priv)
((dev_priv)->info.has_logical_ring_elsq)
 #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+   ((dev_priv)->info.has_hw_preempt_to_idle)
 
 #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 33f8a4b..bdac129 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -454,7 +454,10 @@ destroy_kernel_context(struct i915_gem_context **ctxp)
 
 static bool needs_preempt_context(struct drm_i915_private *i915)
 {
-   return HAS_LOGICAL_RING_PREEMPTION(i915);
+   return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+  (!HAS_HW_PREEMPT_TO_IDLE(i915) ||
+   (HAS_HW_PREEMPT_TO_IDLE(i915) &&
+   !USES_GUC_SUBMISSION(i915)));
 }
 
 int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 4364922..66b6700 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info 
= {
GEN(11), \
.ddb_size = 2048, \
.has_csr = 0, \
-   .has_logical_ring_elsq = 1
+   .has_logical_ring_elsq = 1, \
+   .has_hw_preempt_to_idle = 1
 
 static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
func(has_logical_ring_contexts); \
func(has_logical_ring_elsq); \
func(has_logical_ring_preemption); \
+   func(has_hw_preempt_to_idle); \
func(has_overlay); \
func(has_pooled_eu); \
func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 29dcf34..8fe6795 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -154,6 +154,7 @@
 #define GEN8_CTX_STATUS_ACTIVE_IDLE(1 << 3)
 #define GEN8_CTX_STATUS_COMPLETE   (1 << 4)
 #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
 
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -526,31 +527,49 @@ static void port_assign(struct execlist_port *port, 
struct i915_request *rq)
 static void inject_preempt_context(struct intel_engine_cs *engine)
 {
struct intel_engine_execlists *execlists = &engine->execlists;
-   struct intel_context *ce =
-   to_intel_context(engine->i915->preempt_context, engine);
-   unsigned int n;
 
-   GEM_BUG_ON(execlists->preempt_complete_status !=
-  upper_32_bits(ce->lrc_desc));
-   GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
-   _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBI

[Intel-gfx] [PATCH v3] drm/i915/gen11: Preempt-to-idle support in execlists - v3 notes.

2018-05-11 Thread Tomasz Lis
The review of v2 changes touched issues which were addressed in a different way
than planned in that review:

1. Context status processing

While the review went towards finding common path to new preemption flag
combinations and existing cases, I decided to split the two ways, because
the !(status & GEN8_CTX_STATUS_COMPLETED_MASK) condition had to be skipped
from the preemption flow. While pre-gen11 preemption always executed the
preemptive context which guaranteed that flag set, nothing finishes execution
in the new to-real-idle preemption.

2. Testing of the idle-to-idle preemption case

It turns out the IGT test 'smoketest-render' from 'gem_exec_schedule' group
triggers the idle-to-idle case. The test submits a lot of very  short execs
with changing priorities. Its run triggers preemption many times. and due to
the short exec buffer, triggers idle-to-idle case as well, sooner or later.

Tomasz Lis (1):
  drm/i915/gen11: Preempt-to-idle support in execlists.

 drivers/gpu/drm/i915/i915_drv.h  |   2 +
 drivers/gpu/drm/i915/i915_gem_context.c  |   5 +-
 drivers/gpu/drm/i915/i915_pci.c  |   3 +-
 drivers/gpu/drm/i915/intel_device_info.h |   1 +
 drivers/gpu/drm/i915/intel_lrc.c | 115 ++-
 drivers/gpu/drm/i915/intel_lrc.h |   1 +
 6 files changed, 92 insertions(+), 35 deletions(-)

-- 
2.7.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v4] drm/i915/gen11: Preempt-to-idle support in execlists.

2018-05-25 Thread Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

v2: Added needs_preempt_context() change so that it is not created when
preempt-to-idle is supported. (Chris)
Updated setting HWACK flag so that it is cleared after
preempt-to-dle. (Chris, Daniele)
Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)

v3: Fixed needs_preempt_context() change. (Chris)
Merged preemption trigger functions to one. (Chris)
Fixed conyext state tonot assume COMPLETED_MASK after preemption,
since idle-to-idle case will not have it set.

v4: Simplified needs_preempt_context() change. (Daniele)
Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)

Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Daniele Ceraolo Spurio 
Cc: Michal Winiarski 
Cc: Mika Kuoppala 
Bspec: 18922
Signed-off-by: Tomasz Lis 
---
 drivers/gpu/drm/i915/i915_drv.h  |   2 +
 drivers/gpu/drm/i915/i915_gem_context.c  |   3 +-
 drivers/gpu/drm/i915/i915_pci.c  |   3 +-
 drivers/gpu/drm/i915/intel_device_info.h |   1 +
 drivers/gpu/drm/i915/intel_lrc.c | 113 +--
 drivers/gpu/drm/i915/intel_lrc.h |   1 +
 6 files changed, 86 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 487922f..35eddf7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2534,6 +2534,8 @@ intel_info(const struct drm_i915_private *dev_priv)
((dev_priv)->info.has_logical_ring_elsq)
 #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+   ((dev_priv)->info.has_hw_preempt_to_idle)
 
 #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 45393f6..341a5ff 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -455,7 +455,8 @@ destroy_kernel_context(struct i915_gem_context **ctxp)
 
 static bool needs_preempt_context(struct drm_i915_private *i915)
 {
-   return HAS_LOGICAL_RING_PREEMPTION(i915);
+   return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+  !HAS_HW_PREEMPT_TO_IDLE(i915);
 }
 
 int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 97a91e6a..ee09926 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -593,7 +593,8 @@ static const struct intel_device_info intel_cannonlake_info 
= {
GEN(11), \
.ddb_size = 2048, \
.has_csr = 0, \
-   .has_logical_ring_elsq = 1
+   .has_logical_ring_elsq = 1, \
+   .has_hw_preempt_to_idle = 1
 
 static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
func(has_logical_ring_contexts); \
func(has_logical_ring_elsq); \
func(has_logical_ring_preemption); \
+   func(has_hw_preempt_to_idle); \
func(has_overlay); \
func(has_pooled_eu); \
func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8a6058b..f95cb37 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -154,6 +154,7 @@
 #define GEN8_CTX_STATUS_ACTIVE_IDLE(1 << 3)
 #define GEN8_CTX_STATUS_COMPLETE   (1 << 4)
 #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
 
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -522,31 +523,46 @@ static void port_assign(struct execlist_port *port, 
struct i915_request *rq)
 static void inject_preempt_context(struct intel_engine_cs *engine)
 {
struct intel_engine_execlists *execlists = &engine->execlists;
-   struct intel_context *ce =
-   to_intel_context(engine->i915->preempt_context, engine);
-   unsigned int n;
-
-   GEM_BUG_ON(execlists->preempt_complete_status !=
-  upper_32_bits(ce->lrc_desc));
-

[Intel-gfx] [RFC v1] Data port coherency control for UMDs.

2018-03-19 Thread Tomasz Lis
nce kern_master is done, kern_worker kicks-in and processes
the payload that kern_master produced. These two kernels work in a loop, one
after another. Since only kern_master requires coherency, kern_worker should
not be forced to pay for it. This means that we need to have the ability to
toggle coherency switch on or off per each GPU submission:
(ENABLE COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> (ENABLE
COHERENCY) kern_master -> (DISABLE COHERENCY)kern_worker -> ...

Tomasz Lis (1):
  drm/i915: Add Exec param to control data port coherency.

 drivers/gpu/drm/i915/i915_drv.c|  3 ++
 drivers/gpu/drm/i915/i915_gem_context.h|  1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  5 +++
 drivers/gpu/drm/i915/intel_lrc.c   | 56 ++
 drivers/gpu/drm/i915/intel_lrc.h   |  3 ++
 include/uapi/drm/i915_drm.h| 12 ++-
 6 files changed, 79 insertions(+), 1 deletion(-)

--
2.7.4
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC v1] drm/i915: Add Exec param to control data port coherency.

2018-03-19 Thread Tomasz Lis
The patch adds a parameter to control the data port coherency functionality
on a per-exec call basis. When data port coherency flag value is different
than what it was in previous call for the context, a command to switch data
port coherency state is added before the buffer to be executed.

Bspec: 11419
---
 drivers/gpu/drm/i915/i915_drv.c|  3 ++
 drivers/gpu/drm/i915/i915_gem_context.h|  1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 17 ++
 drivers/gpu/drm/i915/intel_lrc.c   | 53 ++
 drivers/gpu/drm/i915/intel_lrc.h   |  3 ++
 include/uapi/drm/i915_drm.h| 12 ++-
 6 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 3df5193..fcb3547 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -436,6 +436,9 @@ static int i915_getparam_ioctl(struct drm_device *dev, void 
*data,
case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
value = 1000 * INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz;
break;
+   case I915_PARAM_HAS_EXEC_DATA_PORT_COHERENCY:
+   value = (INTEL_GEN(dev_priv) >= 9);
+   break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h 
b/drivers/gpu/drm/i915/i915_gem_context.h
index 7854262..00aa309 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -118,6 +118,7 @@ struct i915_gem_context {
 #define CONTEXT_BANNABLE   3
 #define CONTEXT_BANNED 4
 #define CONTEXT_FORCE_SINGLE_SUBMISSION5
+#define CONTEXT_DATA_PORT_COHERENT 6
 
/**
 * @hw_id: - unique identifier for the context
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 8c170db..f848f14 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2245,6 +2245,18 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.batch_flags |= I915_DISPATCH_RS;
}
 
+   if (args->flags & I915_EXEC_DATA_PORT_COHERENT) {
+   if (INTEL_GEN(eb.i915) < 9) {
+   DRM_DEBUG("Data Port Coherency is only allowed for Gen9 
and above\n");
+   return -EINVAL;
+   }
+   if (eb.engine->class != RENDER_CLASS) {
+   DRM_DEBUG("Data Port Coherency is not available on 
%s\n",
+eb.engine->name);
+   return -EINVAL;
+   }
+   }
+
if (args->flags & I915_EXEC_FENCE_IN) {
in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
if (!in_fence)
@@ -2371,6 +2383,11 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_batch_unpin;
}
 
+   /* Emit the switch of data port coherency state if needed */
+   err = intel_lr_context_modify_data_port_coherency(eb.request,
+   (args->flags & I915_EXEC_DATA_PORT_COHERENT) != 0);
+   GEM_WARN_ON(err);
+
if (in_fence) {
err = i915_request_await_dma_fence(eb.request, in_fence);
if (err < 0)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 53f1c00..b847798 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -254,6 +254,59 @@ intel_lr_context_descriptor_update(struct i915_gem_context 
*ctx,
ce->lrc_desc = desc;
 }
 
+static int emit_set_data_port_coherency(struct i915_request *req, bool enable)
+{
+   u32 *cs;
+   i915_reg_t reg;
+
+   GEM_BUG_ON(req->engine->class != RENDER_CLASS);
+   GEM_BUG_ON(INTEL_GEN(req->i915) < 9);
+
+   cs = intel_ring_begin(req, 4);
+   if (IS_ERR(cs))
+   return PTR_ERR(cs);
+
+   if (INTEL_GEN(req->i915) >= 10)
+   reg = CNL_HDC_CHICKEN0;
+   else
+   reg = HDC_CHICKEN0;
+
+   *cs++ = MI_LOAD_REGISTER_IMM(1);
+   *cs++ = i915_mmio_reg_offset(reg);
+   /* Enabling coherency means disabling the bit which forces it off */
+   if (enable)
+   *cs++ = _MASKED_BIT_DISABLE(HDC_FORCE_NON_COHERENT);
+   else
+   *cs++ = _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT);
+   *cs++ = MI_NOOP;
+
+   intel_ring_advance(req, cs);
+
+   return 0;
+}
+
+int
+intel_lr_context_modify_data_port_coherency(struct i915_request *req,
+   bool enable)
+{
+   struct i915_gem_context *ctx = req->ctx;
+   int ret;
+
+   if (test_bit(CONTEXT_DATA_PORT_COHERENT, &ctx->flags) == enable)
+   return 0;
+
+   ret = emit_set_data_port_coherency(req, enable);
+
+   if (!ret) {
+

[Intel-gfx] [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists.

2018-03-27 Thread Tomasz Lis
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

Bspec: 18922
Signed-off-by: Tomasz Lis 
---
 drivers/gpu/drm/i915/i915_drv.h  |  2 ++
 drivers/gpu/drm/i915/i915_pci.c  |  3 ++-
 drivers/gpu/drm/i915/intel_device_info.h |  1 +
 drivers/gpu/drm/i915/intel_lrc.c | 45 +++-
 drivers/gpu/drm/i915/intel_lrc.h |  1 +
 5 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 800230b..c32580b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private *dev_priv)
((dev_priv)->info.has_logical_ring_elsq)
 #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+   ((dev_priv)->info.has_hw_preempt_to_idle)
 
 #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
 
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 4364922..66b6700 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info 
= {
GEN(11), \
.ddb_size = 2048, \
.has_csr = 0, \
-   .has_logical_ring_elsq = 1
+   .has_logical_ring_elsq = 1, \
+   .has_hw_preempt_to_idle = 1
 
 static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
func(has_logical_ring_contexts); \
func(has_logical_ring_elsq); \
func(has_logical_ring_preemption); \
+   func(has_hw_preempt_to_idle); \
func(has_overlay); \
func(has_pooled_eu); \
func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ba7f783..1a22de4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -153,6 +153,7 @@
 #define GEN8_CTX_STATUS_ACTIVE_IDLE(1 << 3)
 #define GEN8_CTX_STATUS_COMPLETE   (1 << 4)
 #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
 
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -183,7 +184,9 @@ static inline bool need_preempt(const struct 
intel_engine_cs *engine,
const struct i915_request *last,
int prio)
 {
-   return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
+   return (engine->i915->preempt_context ||
+   HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
+prio > max(rq_prio(last), 0);
 }
 
 /**
@@ -535,6 +538,25 @@ static void inject_preempt_context(struct intel_engine_cs 
*engine)
execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
+static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
+{
+   struct intel_engine_execlists *execlists = &engine->execlists;
+
+   GEM_TRACE("%s\n", engine->name);
+
+   /*
+* hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+* HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+*/
+   GEM_BUG_ON(execlists->ctrl_reg != NULL);
+
+   /* trigger preemption to idle */
+   writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+
+   execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
+   execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -594,7 +616,10 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
goto unlock;
 
if (need_preempt(engine, last, execlists->queue_priority)) {
-   inject_preempt_context(engine);
+   if (HAS_HW_PREEMPT_TO_IDLE(engine->i915))
+   gen11_preempt_to_idle(engine);
+   else
+  

Re: [Intel-gfx] [PATCH 3/3] drm/i915/glk: Convert a few more IS_BROXTON() to IS_GEN9_LP()

2016-12-19 Thread Tomasz Lis



W dniu 2016-12-16 o 16:42, Ander Conselvan de Oliveira pisze:

From: Michel Thierry 

Commit 89b3c3c7ee9d ("drm/i915/glk: Reuse broxton's cdclk code for GLK")
missed a few of occurences of IS_BROXTON() that should have been
coverted to IS_GEN9_LP().

Fixes: 89b3c3c7ee9d ("drm/i915/glk: Reuse broxton's cdclk code for GLK")
Cc: Ander Conselvan de Oliveira 
Cc: Rodrigo Vivi 
Cc: Daniel Vetter 
Cc: Jani Nikula 
Cc: intel-gfx@lists.freedesktop.org

Checked each change, everything OK.

Reviewed-by: Tomasz Lis 


Signed-off-by: Michel Thierry 
Signed-off-by: Tomasz Lis 
Signed-off-by: Ander Conselvan de Oliveira 

---
  drivers/gpu/drm/i915/i915_sysfs.c| 2 +-
  drivers/gpu/drm/i915/intel_device_info.c | 2 +-
  drivers/gpu/drm/i915/intel_dp.c  | 2 +-
  drivers/gpu/drm/i915/intel_guc_loader.c  | 4 ++--
  4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_sysfs.c 
b/drivers/gpu/drm/i915/i915_sysfs.c
index 40c0ac7..376ac95 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -58,7 +58,7 @@ static u32 calc_residency(struct drm_i915_private *dev_priv,
  
  		if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)

units <<= 8;
-   } else if (IS_BROXTON(dev_priv)) {
+   } else if (IS_GEN9_LP(dev_priv)) {
units = 1;
div = 1200; /* 833.33ns */
}
diff --git a/drivers/gpu/drm/i915/intel_device_info.c 
b/drivers/gpu/drm/i915/intel_device_info.c
index c46415b..6aeb1ed 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -192,7 +192,7 @@ static void gen9_sseu_info_init(struct drm_i915_private 
*dev_priv)
(IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) &&
hweight8(sseu->slice_mask) > 1;
sseu->has_subslice_pg =
-   IS_BROXTON(dev_priv) && sseu_subslice_total(sseu) > 1;
+   IS_GEN9_LP(dev_priv) && sseu_subslice_total(sseu) > 1;
sseu->has_eu_pg = sseu->eu_per_subslice > 2;
  
  	if (IS_BROXTON(dev_priv)) {

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 45ebc96..ae08c19 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -3375,7 +3375,7 @@ intel_dp_set_signal_levels(struct intel_dp *intel_dp)
if (HAS_DDI(dev_priv)) {
signal_levels = ddi_signal_levels(intel_dp);
  
-		if (IS_BROXTON(dev_priv))

+   if (IS_GEN9_LP(dev_priv))
signal_levels = 0;
else
mask = DDI_BUF_EMP_MASK;
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c 
b/drivers/gpu/drm/i915/intel_guc_loader.c
index 21db697..8b74525 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -339,7 +339,7 @@ static u32 guc_wopcm_size(struct drm_i915_private *dev_priv)
u32 wopcm_size = GUC_WOPCM_TOP;
  
  	/* On BXT, the top of WOPCM is reserved for RC6 context */

-   if (IS_BROXTON(dev_priv))
+   if (IS_GEN9_LP(dev_priv))
wopcm_size -= BXT_GUC_WOPCM_RC6_RESERVED;
  
  	return wopcm_size;

@@ -388,7 +388,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0))
I915_WRITE(GEN6_GFXPAUSE, 0x30FFF);
  
-	if (IS_BROXTON(dev_priv))

+   if (IS_GEN9_LP(dev_priv))
I915_WRITE(GEN9LP_GT_PM_CONFIG, GT_DOORBELL_ENABLE);
else
I915_WRITE(GEN9_GT_PM_CONFIG, GT_DOORBELL_ENABLE);


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v2] drm/i915/glk: Convert a few more IS_BROXTON() to IS_GEN9_LP()

2017-01-10 Thread Tomasz Lis

Reviewed-by: Tomasz Lis 

W dniu 2017-01-09 o 18:11, Vivi, Rodrigo pisze:

Reviewed-by: Rodrigo Vivi 

On Mon, 2017-01-09 at 16:51 +0200, Ander Conselvan de Oliveira wrote:

From: Michel Thierry 

Commit cc3f90f0633c ("drm/i915/glk: Reuse broxton code for geminilake")
missed a few of occurences of IS_BROXTON() that should have been
coverted to IS_GEN9_LP().

v2: Cite the right commit. (Ander)

Fixes: cc3f90f0633c ("drm/i915/glk: Reuse broxton code for geminilake")
Cc: Rodrigo Vivi 
Cc: Ander Conselvan de Oliveira 
Cc: Daniel Vetter 
Cc: Jani Nikula 
Cc: intel-gfx@lists.freedesktop.org
Signed-off-by: Michel Thierry 
Signed-off-by: Tomasz Lis 
Signed-off-by: Ander Conselvan de Oliveira 

Reviewed-by: Tomasz Lis  (v1)
---
  drivers/gpu/drm/i915/i915_sysfs.c| 2 +-
  drivers/gpu/drm/i915/intel_device_info.c | 2 +-
  drivers/gpu/drm/i915/intel_dp.c  | 2 +-
  drivers/gpu/drm/i915/intel_guc_loader.c  | 4 ++--
  4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_sysfs.c 
b/drivers/gpu/drm/i915/i915_sysfs.c
index 40c0ac7..376ac95 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -58,7 +58,7 @@ static u32 calc_residency(struct drm_i915_private *dev_priv,
  
  		if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)

units <<= 8;
-   } else if (IS_BROXTON(dev_priv)) {
+   } else if (IS_GEN9_LP(dev_priv)) {
units = 1;
div = 1200; /* 833.33ns */
}
diff --git a/drivers/gpu/drm/i915/intel_device_info.c 
b/drivers/gpu/drm/i915/intel_device_info.c
index f642f6d..fcf8181 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -192,7 +192,7 @@ static void gen9_sseu_info_init(struct drm_i915_private 
*dev_priv)
(IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) &&
hweight8(sseu->slice_mask) > 1;
sseu->has_subslice_pg =
-   IS_BROXTON(dev_priv) && sseu_subslice_total(sseu) > 1;
+   IS_GEN9_LP(dev_priv) && sseu_subslice_total(sseu) > 1;
sseu->has_eu_pg = sseu->eu_per_subslice > 2;
  
  	if (IS_BROXTON(dev_priv)) {

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index fb12896..8c18f72 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -3412,7 +3412,7 @@ intel_dp_set_signal_levels(struct intel_dp *intel_dp)
if (HAS_DDI(dev_priv)) {
signal_levels = ddi_signal_levels(intel_dp);
  
-		if (IS_BROXTON(dev_priv))

+   if (IS_GEN9_LP(dev_priv))
signal_levels = 0;
else
mask = DDI_BUF_EMP_MASK;
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c 
b/drivers/gpu/drm/i915/intel_guc_loader.c
index 35d5690..aa2b866 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -339,7 +339,7 @@ static u32 guc_wopcm_size(struct drm_i915_private *dev_priv)
u32 wopcm_size = GUC_WOPCM_TOP;
  
  	/* On BXT, the top of WOPCM is reserved for RC6 context */

-   if (IS_BROXTON(dev_priv))
+   if (IS_GEN9_LP(dev_priv))
wopcm_size -= BXT_GUC_WOPCM_RC6_RESERVED;
  
  	return wopcm_size;

@@ -388,7 +388,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0))
I915_WRITE(GEN6_GFXPAUSE, 0x30FFF);
  
-	if (IS_BROXTON(dev_priv))

+   if (IS_GEN9_LP(dev_priv))
I915_WRITE(GEN9LP_GT_PM_CONFIG, GT_DOORBELL_ENABLE);
else
I915_WRITE(GEN9_GT_PM_CONFIG, GT_DOORBELL_ENABLE);


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC] drm/fb: Avoid infinite loop when no response from connector.

2017-02-17 Thread Tomasz Lis
This fixes an old patch so it doesn't cause infinite retries:
  drm/fb: add support for tiled monitor configurations.

The max count of iterations, 0xa10070f, was carefully selected based on the fact
that it looks cool.
---
 drivers/gpu/drm/drm_fb_helper.c| 4 +++-
 drivers/gpu/drm/i915/intel_fbdev.c | 5 -
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 0dd5da8..8e6c535 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -2011,7 +2011,9 @@ static bool drm_target_preferred(struct drm_fb_helper 
*fb_helper,
 
if ((conn_configured & mask) != mask) {
tile_pass++;
-   goto retry;
+   if (tile_pass < 0xa10070f)
+   goto retry;
+   DRM_ERROR("Max connector check retry count exceeded\n");
}
return true;
 }
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c 
b/drivers/gpu/drm/i915/intel_fbdev.c
index bc65ecf..2fd0f09 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -498,7 +498,10 @@ static bool intel_fb_initial_config(struct drm_fb_helper 
*fb_helper,
 
if (count > 0 && (conn_configured & mask) != mask) {
pass++;
-   goto retry;
+   if (pass < 0xa10070f)
+   goto retry;
+   DRM_ERROR("Max connector check retry count exceeded\n");
+   goto bail;
}
 
/*
-- 
2.7.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/fb: Proper support of boundary conditions in bitmasks.

2017-02-17 Thread Tomasz Lis
The recently introduced patch changed behavior of masks when
the bit number is negative. Instead of no bits set, the new way
makes all bits set. Problematic patch:
  drm/i915: Avoid BIT(max) - 1 and use GENMASK(max - 1, 0)

This behaviour was not considered when making changes, and boundary
value of count (=0) is now resulting in a mask with all bits on, since
the value is directly decreased and therefore negative. Checking if
all bits are set leads to infinite loop.

This patch introduces an additional check to avoid empty masks. It
reverts the control flow to the exact same way it worked before
the problematic patch.

Signed-off-by: Tomasz Lis 
---
 drivers/gpu/drm/i915/intel_fbdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_fbdev.c 
b/drivers/gpu/drm/i915/intel_fbdev.c
index e6f3eb2d..bc65ecf 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -496,7 +496,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper 
*fb_helper,
conn_configured |= BIT(i);
}
 
-   if ((conn_configured & mask) != mask) {
+   if (count > 0 && (conn_configured & mask) != mask) {
pass++;
goto retry;
}
-- 
2.7.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/fbdev: Stop repeating tile configuration on stagnation

2017-02-20 Thread Tomasz Lis
I tested this variant (reverted the change to "pass" variable before 
testing), and it fixes the issue with count=0 as well as possible 
infinite loop issue.


But something needs to be done with the "pass" var, one way or the other 
- commented below.



W dniu 2017-02-18 o 16:37, Chris Wilson pisze:

If we cease making progress in finding matching outputs for a tiled
configuration, stop looping over the remaining unconfigured outputs.

Fixes: b0ee9e7fa5b4 ("drm/fb: add support for tiled monitor configurations. 
(v2)")
Cc: Tomasz Lis 
Cc: Dave Airlie 
Cc: Daniel Vetter 
Cc: Jani Nikula 
Cc: Sean Paul 
Cc:  # v3.19+
---
  drivers/gpu/drm/i915/intel_fbdev.c | 7 +++
  1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_fbdev.c 
b/drivers/gpu/drm/i915/intel_fbdev.c
index 041322fef607..32d0af6b07a7 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -348,7 +348,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper 
*fb_helper,
bool *enabled, int width, int height)
  {
struct drm_i915_private *dev_priv = to_i915(fb_helper->dev);
-   unsigned long conn_configured, mask;
+   unsigned long conn_configured, conn_seq, mask;
unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG);
int i, j;
bool *save_enabled;
@@ -365,6 +365,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper 
*fb_helper,
mask = GENMASK(count - 1, 0);
conn_configured = 0;
  retry:
+   conn_seq = conn_configured;
for (i = 0; i < count; i++) {
struct drm_fb_helper_connector *fb_conn;
struct drm_connector *connector;
@@ -489,10 +490,8 @@ static bool intel_fb_initial_config(struct drm_fb_helper 
*fb_helper,
conn_configured |= BIT(i);
}
  
-	if ((conn_configured & mask) != mask) {

-   pass++;
This doesn't seem right; increasing the amount of passes should stay, or 
the use of "pass" variable should be completely replaced by conn_seq.

- Tomasz

+   if ((conn_configured & mask) != mask && conn_configured != conn_seq)
goto retry;
-   }
  
  	/*

 * If the BIOS didn't enable everything it could, fall back to have the


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/fbdev: Stop repeating tile configuration on stagnation

2017-02-24 Thread Tomasz Lis

Looks good to me.

Reviewed-by: Tomasz Lis 


W dniu 2017-02-20 o 16:46, Chris Wilson pisze:

On Mon, Feb 20, 2017 at 04:37:47PM +0100, Tomasz Lis wrote:

I tested this variant (reverted the change to "pass" variable before
testing), and it fixes the issue with count=0 as well as possible
infinite loop issue.

But something needs to be done with the "pass" var, one way or the
other - commented below.


W dniu 2017-02-18 o 16:37, Chris Wilson pisze:

If we cease making progress in finding matching outputs for a tiled
configuration, stop looping over the remaining unconfigured outputs.

Fixes: b0ee9e7fa5b4 ("drm/fb: add support for tiled monitor configurations. 
(v2)")
Cc: Tomasz Lis 
Cc: Dave Airlie 
Cc: Daniel Vetter 
Cc: Jani Nikula 
Cc: Sean Paul 
Cc:  # v3.19+
---
  drivers/gpu/drm/i915/intel_fbdev.c | 7 +++
  1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_fbdev.c 
b/drivers/gpu/drm/i915/intel_fbdev.c
index 041322fef607..32d0af6b07a7 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -348,7 +348,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper 
*fb_helper,
bool *enabled, int width, int height)
  {
struct drm_i915_private *dev_priv = to_i915(fb_helper->dev);
-   unsigned long conn_configured, mask;
+   unsigned long conn_configured, conn_seq, mask;
unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG);
int i, j;
bool *save_enabled;
@@ -365,6 +365,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper 
*fb_helper,
mask = GENMASK(count - 1, 0);
conn_configured = 0;
  retry:
+   conn_seq = conn_configured;
for (i = 0; i < count; i++) {
struct drm_fb_helper_connector *fb_conn;
struct drm_connector *connector;
@@ -489,10 +490,8 @@ static bool intel_fb_initial_config(struct drm_fb_helper 
*fb_helper,
conn_configured |= BIT(i);
}
-   if ((conn_configured & mask) != mask) {
-   pass++;

This doesn't seem right; increasing the amount of passes should
stay, or the use of "pass" variable should be completely replaced by
conn_seq.

Good catch.

diff --git a/drivers/gpu/drm/i915/intel_fbdev.c 
b/drivers/gpu/drm/i915/intel_fbdev.c
index 32d0af6b07a7..f7e9a4e69595 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -355,7 +355,6 @@ static bool intel_fb_initial_config(struct drm_fb_helper 
*fb_helper,
 bool fallback = true;
 int num_connectors_enabled = 0;
 int num_connectors_detected = 0;
-   int pass = 0;
  
 save_enabled = kcalloc(count, sizeof(bool), GFP_KERNEL);

 if (!save_enabled)
@@ -379,7 +378,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper 
*fb_helper,
 if (conn_configured & BIT(i))
 continue;
  
-   if (pass == 0 && !connector->has_tile)

+   if (conn_seq == 0 && !connector->has_tile)
 continue;
  
 if (connector->status == connector_status_connected)


Suggestions for a better name than conn_seq much appreciated.
-Chris



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx