Re: [RFC PATCH v3 2/2] drm/i915/gen7: Clear all EU/L3 residual contexts

2020-01-20 Thread Chris Wilson
Quoting Akeem G Abodunrin (2020-01-16 17:46:55)
> +static u32
> +gen7_fill_interface_descriptor(struct batch_chunk *state,
> +  const struct batch_vals *bv,
> +  const struct cb_kernel *kernel,
> +  unsigned int count)
> +{
> +   u32 *cs = batch_alloc_items(state, 32, 8 * count);
> +   u32 offset = batch_offset(state, cs);
> +
> +   *cs++ = gen7_fill_kernel_data(state, kernel->data, kernel->size);
> +   *cs++ = (1 << 7) | (1 << 13);
> +   *cs++ = 0;
> +   *cs++ = (gen7_fill_binding_table(state, bv) - state->offset) | 1;
> +   *cs++ = 0;
> +   *cs++ = 0;
> +   *cs++ = 0;
> +   *cs++ = 0;
> +   /* 1 - 63dummy idds */
> +   memset32(cs, 0x00, (count - 1) * 8);
> +   batch_advance(state, cs);

cs is not at the end of the pack here. [cs + (count - 1) * 8]

> +
> +   return offset;
> +}

All others look ok.
-Chris
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[RFC PATCH v3 2/2] drm/i915/gen7: Clear all EU/L3 residual contexts

2020-01-16 Thread Akeem G Abodunrin
From: Prathap Kumar Valsan 

 NOTE: 
This series is in active development and is not intended to be merged to
mainline in its current form. The intent of the RFC is simply to outline
the strategy for the mitigation, as a focus for active discussion, and
to openly share progress. There has been only minimal attention paid to
performance thus far, as the focus is on robustness. It is not
anticipated that there will be any measurable performance impact in the
final version.
 END NOTE 

On gen7 and gen7.5 devices, there could be leftover data residuals in
EU/L3 from the retiring context. This patch introduces workaround to clear
that residual contexts, by submitting a batch buffer with dedicated HW
context to the GPU with ring allocation for each context switching.

v2: Addressed comments about unused code, code formatting, and include
additional debug code as suggested by Chris Wilson.
v3: Expand debug code for every batch_alloc_items() call...
Current patch series shows significant performance improvements, on par
with current drm-tips.

Signed-off-by: Mika Kuoppala 
Signed-off-by: Prathap Kumar Valsan 
Signed-off-by: Akeem G Abodunrin 
Cc: Chris Wilson 
Cc: Balestrieri Francesco 
Cc: Bloomfield Jon 
Cc: Dutt Sudeep 
---
 drivers/gpu/drm/i915/Makefile |   1 +
 drivers/gpu/drm/i915/gt/gen7_renderclear.c| 524 ++
 drivers/gpu/drm/i915/gt/gen7_renderclear.h|  15 +
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h  |  17 +-
 .../gpu/drm/i915/gt/intel_ring_submission.c   |   3 +-
 drivers/gpu/drm/i915/i915_utils.h |   5 +
 6 files changed, 561 insertions(+), 4 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/gen7_renderclear.c
 create mode 100644 drivers/gpu/drm/i915/gt/gen7_renderclear.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 3c88d7d8c764..f96bae664a03 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -78,6 +78,7 @@ gt-y += \
gt/debugfs_gt.o \
gt/debugfs_gt_pm.o \
gt/gen6_ppgtt.o \
+   gt/gen7_renderclear.o \
gt/gen8_ppgtt.o \
gt/intel_breadcrumbs.o \
gt/intel_context.o \
diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c 
b/drivers/gpu/drm/i915/gt/gen7_renderclear.c
new file mode 100644
index ..5425c2149b30
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c
@@ -0,0 +1,524 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "gen7_renderclear.h"
+#include "i915_drv.h"
+#include "i915_utils.h"
+#include "intel_gpu_commands.h"
+
+#define MAX_URB_ENTRIES 64
+#define STATE_SIZE (4 * 1024)
+#define GT3_INLINE_DATA_DELAYS 0x1E00
+#define batch_advance(Y, CS) GEM_BUG_ON((Y)->end != (CS))
+
+/* Media CB Kernel for gen7 devices */
+static const u32 cb7_kernel[][4] = {
+   { 0x0001, 0x26020128, 0x0024, 0x },
+   { 0x0040, 0x20280c21, 0x0028, 0x0001 },
+   { 0x0110, 0x2c20, 0x002c, 0x },
+   { 0x00010220, 0x34001c00, 0x1400, 0x002c },
+   { 0x0061, 0x20600061, 0x, 0x },
+   { 0x0008, 0x20601c85, 0x0e00, 0x000c },
+   { 0x0005, 0x20601ca5, 0x0060, 0x0001 },
+   { 0x0008, 0x20641c85, 0x0e00, 0x000d },
+   { 0x0005, 0x20641ca5, 0x0064, 0x0003 },
+   { 0x0041, 0x207424a5, 0x0064, 0x0034 },
+   { 0x0040, 0x206014a5, 0x0060, 0x0074 },
+   { 0x0008, 0x20681c85, 0x0e00, 0x0008 },
+   { 0x0005, 0x20681ca5, 0x0068, 0x000f },
+   { 0x0041, 0x20701ca5, 0x0060, 0x0010 },
+   { 0x0040, 0x206814a5, 0x0068, 0x0070 },
+   { 0x0061, 0x20a00061, 0x, 0x },
+   { 0x0005, 0x206c1c85, 0x0e00, 0x0007 },
+   { 0x0041, 0x206c1ca5, 0x006c, 0x0004 },
+   { 0x0061, 0x20800021, 0x008d, 0x },
+   { 0x0001, 0x20800021, 0x006c, 0x },
+   { 0x0001, 0x20840021, 0x0068, 0x },
+   { 0x0001, 0x20880061, 0x, 0x0003 },
+   { 0x0005, 0x208c0d21, 0x0086, 0x },
+   { 0x05600032, 0x20a01fa1, 0x008d0080, 0x02190001 },
+   { 0x0040, 0x20a01ca5, 0x00a0, 0x0001 },
+   { 0x05600032, 0x20a01fa1, 0x008d0080, 0x040a8001 },
+   { 0x0240, 0x20281c21, 0x0028, 0x },
+   { 0x00010220, 0x34001c00, 0x1400, 0xfffc },
+   { 0x0001, 0x26020128, 0x0024, 0x },
+   { 0x0001, 0x22e4, 0x, 0x },
+   { 0x0001, 0x220801ec, 0x, 0x007f007f },
+   { 0x0061, 0x20400021, 0x008d, 0x },
+   { 0x0061, 0x2fe00021, 0x008d, 0x },
+   { 0x0021, 0x20400121, 0x00450020, 0x },
+   { 0x0001, 0x20480061, 0x, 0x000f000f },
+   { 0x0005, 0x204c0