Re: linux-next: build failure after merge of the drm-misc tree

2021-10-13 Thread Karol Herbst
On Thu, Oct 14, 2021 at 5:02 AM  wrote:
>
> Hi,
>
> I review the code.
>
> It seems I forget to delete the definition of the variable "inst",I'm sry for 
> that.: (
>
> I'll submit another patch soon.
>

I already wrote the patch and pushed it:
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit?id=381ba6a6baf104b572379c6b2deab884555104d4

>
> > Hi all,
> >
> > After merging the drm-misc tree, today's linux-next build (x86_64
> > allmodconfig) failed like this:
> >
> > drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c: In function 
> > 'gp100_vmm_fault_cancel':
> > drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c:491:6: error: unused 
> > variable 'inst' [-Werror=unused-variable]
> >   491 |  u32 inst, aper;
> >   |  ^~~~
> > cc1: all warnings being treated as errors
> >
> > Caused by commit
> >
> >   404046cf4805 ("drm/nouveau/mmu/gp100-: drop unneeded assignment in the if 
> > condition.")
> >
> > I have used the drm-misc tree from next-20211011 for today.
> >
> > --
> > Cheers,
> > Stephen Rothwell



Re: [PATCH 08/25] drm/i915/guc: Add multi-lrc context registration

2021-10-13 Thread Matthew Brost
On Wed, Oct 13, 2021 at 05:10:39PM -0700, John Harrison wrote:
> On 10/13/2021 13:42, Matthew Brost wrote:
> > Add multi-lrc context registration H2G. In addition a workqueue and
> > process descriptor are setup during multi-lrc context registration as
> > these data structures are needed for multi-lrc submission.
> > 
> > v2:
> >   (John Harrison)
> >- Move GuC specific fields into sub-struct
> >- Clean up WQ defines
> >- Add comment explaining math to derive WQ / PD address
> > v3:
> >   (John Harrison)
> >- Add PARENT_SCRATCH_SIZE define
> >- Update comment explaining multi-lrc register
> > 
> > Signed-off-by: Matthew Brost 
> > ---
> >   drivers/gpu/drm/i915/gt/intel_context_types.h |  12 ++
> >   drivers/gpu/drm/i915/gt/intel_lrc.c   |   5 +
> >   .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |   1 +
> >   drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |   2 -
> >   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 116 +-
> >   5 files changed, 133 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
> > b/drivers/gpu/drm/i915/gt/intel_context_types.h
> > index 76dfca57cb45..48decb5ee954 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> > @@ -239,6 +239,18 @@ struct intel_context {
> > struct intel_context *parent;
> > /** @number_children: number of children if parent */
> > u8 number_children;
> > +   /** @guc: GuC specific members for parallel submission */
> > +   struct {
> > +   /** @wqi_head: head pointer in work queue */
> > +   u16 wqi_head;
> > +   /** @wqi_tail: tail pointer in work queue */
> > +   u16 wqi_tail;
> > +   /**
> > +* @parent_page: page in context state (ce->state) used
> > +* by parent for work queue, process descriptor
> > +*/
> > +   u8 parent_page;
> > +   } guc;
> > } parallel;
> >   #ifdef CONFIG_DRM_I915_SELFTEST
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
> > b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index 3ef9eaf8c50e..57339d5c1fc8 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -942,6 +942,11 @@ __lrc_alloc_state(struct intel_context *ce, struct 
> > intel_engine_cs *engine)
> > context_size += PAGE_SIZE;
> > }
> > +   if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {
> > +   ce->parallel.guc.parent_page = context_size / PAGE_SIZE;
> > +   context_size += PAGE_SIZE;
> This needs to be += PARENT_SCRATCH_SIZE.
> 

Which is PAGE_SIZE. I guess we should move PARENT_SCRATCH_SIZE to
intel_context.h (or *types.h) then. Will do.

Matt

> John.
> 
> > +   }
> > +
> > obj = i915_gem_object_create_lmem(engine->i915, context_size,
> >   I915_BO_ALLOC_PM_VOLATILE);
> > if (IS_ERR(obj))
> > diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h 
> > b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
> > index 8ff58aff..ba10bd374cee 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
> > +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
> > @@ -142,6 +142,7 @@ enum intel_guc_action {
> > INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
> > INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
> > INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
> > +   INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
> > INTEL_GUC_ACTION_RESET_CLIENT = 0x5507,
> > INTEL_GUC_ACTION_LIMIT
> >   };
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
> > index fa4be13c8854..0eeb2a9feeed 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
> > @@ -52,8 +52,6 @@
> >   #define GUC_DOORBELL_INVALID  256
> > -#define GUC_WQ_SIZE(PAGE_SIZE * 2)
> > -
> >   /* Work queue item header definitions */
> >   #define WQ_STATUS_ACTIVE  1
> >   #define WQ_STATUS_SUSPENDED   2
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > index 84b8e64b148f..58a6f494be8f 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > @@ -344,6 +344,47 @@ static inline struct i915_priolist *to_priolist(struct 
> > rb_node *rb)
> > return rb_entry(rb, struct i915_priolist, node);
> >   }
> > +/*
> > + * When using multi-lrc submission a scratch memory area is reserved in the
> > + * parent's context state for the process descriptor and work queue. 
> > Currently
> > + * the scratch area is sized to a page.

[PATCH v10] drm/bridge: add it6505 driver

2021-10-13 Thread allen
This adds support for the iTE IT6505.
This device can convert DPI signal to DP output.

From: Allen Chen 
Tested-by: Hsin-yi Wang 
Signed-off-by: Hermes Wu 
Signed-off-by: Allen Chen 
---
This patch depends on 
https://patchwork.kernel.org/project/linux-mediatek/patch/20210722062246.2512666-4-...@ravnborg.org/
---
 drivers/gpu/drm/bridge/Kconfig  |8 +
 drivers/gpu/drm/bridge/Makefile |1 +
 drivers/gpu/drm/bridge/ite-it6505.c | 3335 +++
 3 files changed, 3344 insertions(+)
 create mode 100644 drivers/gpu/drm/bridge/ite-it6505.c

diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig
index 431b6e12a81fe..e1afe171075f5 100644
--- a/drivers/gpu/drm/bridge/Kconfig
+++ b/drivers/gpu/drm/bridge/Kconfig
@@ -74,6 +74,14 @@ config DRM_DISPLAY_CONNECTOR
  on ARM-based platforms. Saying Y here when this driver is not needed
  will not cause any issue.
 
+config DRM_ITE_IT6505
+tristate "ITE IT6505 DisplayPort bridge"
+depends on OF
+select DRM_KMS_HELPER
+select EXTCON
+help
+  ITE IT6505 DisplayPort bridge chip driver.
+
 config DRM_LONTIUM_LT8912B
tristate "Lontium LT8912B DSI/HDMI bridge"
depends on OF
diff --git a/drivers/gpu/drm/bridge/Makefile b/drivers/gpu/drm/bridge/Makefile
index f2c73683cfcb7..425844c304953 100644
--- a/drivers/gpu/drm/bridge/Makefile
+++ b/drivers/gpu/drm/bridge/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_DRM_CHIPONE_ICN6211) += chipone-icn6211.o
 obj-$(CONFIG_DRM_CHRONTEL_CH7033) += chrontel-ch7033.o
 obj-$(CONFIG_DRM_CROS_EC_ANX7688) += cros-ec-anx7688.o
 obj-$(CONFIG_DRM_DISPLAY_CONNECTOR) += display-connector.o
+obj-$(CONFIG_DRM_ITE_IT6505) += ite-it6505.o
 obj-$(CONFIG_DRM_LONTIUM_LT8912B) += lontium-lt8912b.o
 obj-$(CONFIG_DRM_LONTIUM_LT9611) += lontium-lt9611.o
 obj-$(CONFIG_DRM_LONTIUM_LT9611UXC) += lontium-lt9611uxc.o
diff --git a/drivers/gpu/drm/bridge/ite-it6505.c 
b/drivers/gpu/drm/bridge/ite-it6505.c
new file mode 100644
index 0..e35b28386563b
--- /dev/null
+++ b/drivers/gpu/drm/bridge/ite-it6505.c
@@ -0,0 +1,3335 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (c) 2020, The Linux Foundation. All rights reserved.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#define REG_IC_VER 0x04
+
+#define REG_RESET_CTRL 0x05
+#define VIDEO_RESET BIT(0)
+#define AUDIO_RESET BIT(1)
+#define ALL_LOGIC_RESET BIT(2)
+#define AUX_RESET BIT(3)
+#define HDCP_RESET BIT(4)
+
+#define INT_STATUS_01 0x06
+#define INT_MASK_01 0x09
+#define INT_HPD_CHANGE 0
+#define INT_RECEIVE_HPD_IRQ 1
+#define INT_SCDT_CHANGE 2
+#define INT_HDCP_FAIL 3
+#define INT_HDCP_DONE 4
+#define BIT_OFFSET(x) (((x) - INT_STATUS_01) * BITS_PER_BYTE)
+#define BIT_INT_HPD INT_HPD_CHANGE
+#define BIT_INT_HPD_IRQ INT_RECEIVE_HPD_IRQ
+#define BIT_INT_SCDT INT_SCDT_CHANGE
+#define BIT_INT_HDCP_FAIL INT_HDCP_FAIL
+#define BIT_INT_HDCP_DONE INT_HDCP_DONE
+
+#define INT_STATUS_02 0x07
+#define INT_MASK_02 0x0A
+#define INT_AUX_CMD_FAIL 0
+#define INT_HDCP_KSV_CHECK 1
+#define INT_AUDIO_FIFO_ERROR 2
+#define BIT_INT_AUX_CMD_FAIL (BIT_OFFSET(0x07) + INT_AUX_CMD_FAIL)
+#define BIT_INT_HDCP_KSV_CHECK (BIT_OFFSET(0x07) + INT_HDCP_KSV_CHECK)
+#define BIT_INT_AUDIO_FIFO_ERROR (BIT_OFFSET(0x07) + INT_AUDIO_FIFO_ERROR)
+
+#define INT_STATUS_03 0x08
+#define INT_MASK_03 0x0B
+#define INT_LINK_TRAIN_FAIL 4
+#define INT_VID_FIFO_ERROR 5
+#define INT_IO_LATCH_FIFO_OVERFLOW 7
+#define BIT_INT_LINK_TRAIN_FAIL (BIT_OFFSET(0x08) + INT_LINK_TRAIN_FAIL)
+#define BIT_INT_VID_FIFO_ERROR (BIT_OFFSET(0x08) + INT_VID_FIFO_ERROR)
+#define BIT_INT_IO_FIFO_OVERFLOW (BIT_OFFSET(0x08) + 
INT_IO_LATCH_FIFO_OVERFLOW)
+
+#define REG_SYSTEM_STS 0x0D
+#define INT_STS BIT(0)
+#define HPD_STS BIT(1)
+#define VIDEO_STB BIT(2)
+
+#define REG_LINK_TRAIN_STS 0x0E
+#define LINK_STATE_CR BIT(2)
+#define LINK_STATE_EQ BIT(3)
+#define LINK_STATE_NORP BIT(4)
+
+#define REG_BANK_SEL 0x0F
+#define REG_CLK_CTRL0 0x10
+#define M_PCLK_DELAY 0x03
+
+#define REG_AUX_OPT 0x11
+#define AUX_AUTO_RST BIT(0)
+#define AUX_FIX_FREQ BIT(3)
+
+#define REG_DATA_CTRL0 0x12
+#define VIDEO_LATCH_EDGE BIT(4)
+#define ENABLE_PCLK_COUNTER BIT(7)
+
+#define REG_PCLK_COUNTER_VALUE 0x13
+
+#define REG_501_FIFO_CTRL 0x15
+#define RST_501_FIFO BIT(1)
+
+#define REG_TRAIN_CTRL0 0x16
+#define FORCE_LBR BIT(0)
+#define LANE_COUNT_MASK 0x06
+#define LANE_SWAP BIT(3)
+#define SPREAD_AMP_5 BIT(4)
+#define FORCE_CR_DONE BIT(5)
+#define FORCE_EQ_DONE BIT(6)
+
+#define REG_TRAIN_CTRL1 0x17
+#define AUTO_TRAIN BIT(0)
+#define MANUAL_TRAIN BIT(1)
+#define FORCE_RETRAIN BIT(2)
+
+#define REG_AUX_CTRL 0x23
+#define CLR_EDID_FIFO BIT(0)
+#define AUX_USER_MODE BIT(1)
+#define AUX_NO_SEG

Re: [PATCH 0/6] drm/i915: Failsafe migration blits

2021-10-13 Thread Dave Airlie
On Fri, 8 Oct 2021 at 23:36, Thomas Hellström
 wrote:
>
> This patch series introduces failsafe migration blits.
> The reason for this seemingly strange concept is that if the initial
> clearing or readback of LMEM fails for some reason, and we then set up
> either GPU- or CPU ptes to the allocated LMEM, we can expose old
> contents from other clients.

Can we enumerate "for some reason" here?

This feels like "security" with no defined threat model. Maybe if the
cover letter contains more details on the threat model it would make
more sense.

Dave.


Re: [PATCH 16/25] drm/i915/guc: Connect UAPI to GuC multi-lrc interface

2021-10-13 Thread John Harrison

On 10/13/2021 13:42, Matthew Brost wrote:

Introduce 'set parallel submit' extension to connect UAPI to GuC
multi-lrc interface. Kernel doc in new uAPI should explain it all.

IGT: https://patchwork.freedesktop.org/patch/447008/?series=93071&rev=1
media UMD: https://github.com/intel/media-driver/pull/1252

v2:
  (Daniel Vetter)
   - Add IGT link and placeholder for media UMD link
v3:
  (Kernel test robot)
   - Fix warning in unpin engines call
  (John Harrison)
   - Reword a bunch of the kernel doc
v4:
  (John Harrison)
   - Add comment why perma-pin is done after setting gem context
   - Update some comments / docs for proto contexts

Cc: Tvrtko Ursulin 
Signed-off-by: Matthew Brost 
---
  drivers/gpu/drm/i915/gem/i915_gem_context.c   | 228 +-
  .../gpu/drm/i915/gem/i915_gem_context_types.h |  16 +-
  drivers/gpu/drm/i915/gt/intel_context_types.h |   9 +-
  drivers/gpu/drm/i915/gt/intel_engine.h|  12 +-
  drivers/gpu/drm/i915/gt/intel_engine_cs.c |   6 +-
  .../drm/i915/gt/intel_execlists_submission.c  |   6 +-
  drivers/gpu/drm/i915/gt/selftest_execlists.c  |  12 +-
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 114 -
  include/uapi/drm/i915_drm.h   | 131 ++
  9 files changed, 503 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index d225d3dd0b40..6f23aff6e642 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -556,9 +556,150 @@ set_proto_ctx_engines_bond(struct i915_user_extension 
__user *base, void *data)
return 0;
  }
  
+static int

+set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base,
+ void *data)
+{
+   struct i915_context_engines_parallel_submit __user *ext =
+   container_of_user(base, typeof(*ext), base);
+   const struct set_proto_ctx_engines *set = data;
+   struct drm_i915_private *i915 = set->i915;
+   u64 flags;
+   int err = 0, n, i, j;
+   u16 slot, width, num_siblings;
+   struct intel_engine_cs **siblings = NULL;
+   intel_engine_mask_t prev_mask;
+
+   /* Disabling for now */
+   return -ENODEV;
+
+   /* FIXME: This is NIY for execlists */
+   if (!(intel_uc_uses_guc_submission(&i915->gt.uc)))
+   return -ENODEV;
+
+   if (get_user(slot, &ext->engine_index))
+   return -EFAULT;
+
+   if (get_user(width, &ext->width))
+   return -EFAULT;
+
+   if (get_user(num_siblings, &ext->num_siblings))
+   return -EFAULT;
+
+   if (slot >= set->num_engines) {
+   drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n",
+   slot, set->num_engines);
+   return -EINVAL;
+   }
+
+   if (set->engines[slot].type != I915_GEM_ENGINE_TYPE_INVALID) {
+   drm_dbg(&i915->drm,
+   "Invalid placement[%d], already occupied\n", slot);
+   return -EINVAL;
+   }
+
+   if (get_user(flags, &ext->flags))
+   return -EFAULT;
+
+   if (flags) {
+   drm_dbg(&i915->drm, "Unknown flags 0x%02llx", flags);
+   return -EINVAL;
+   }
+
+   for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
+   err = check_user_mbz(&ext->mbz64[n]);
+   if (err)
+   return err;
+   }
+
+   if (width < 2) {
+   drm_dbg(&i915->drm, "Width (%d) < 2\n", width);
+   return -EINVAL;
+   }
+
+   if (num_siblings < 1) {
+   drm_dbg(&i915->drm, "Number siblings (%d) < 1\n",
+   num_siblings);
+   return -EINVAL;
+   }
+
+   siblings = kmalloc_array(num_siblings * width,
+sizeof(*siblings),
+GFP_KERNEL);
+   if (!siblings)
+   return -ENOMEM;
+
+   /* Create contexts / engines */
+   for (i = 0; i < width; ++i) {
+   intel_engine_mask_t current_mask = 0;
+   struct i915_engine_class_instance prev_engine;
+
+   for (j = 0; j < num_siblings; ++j) {
+   struct i915_engine_class_instance ci;
+
+   n = i * num_siblings + j;
+   if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) {
+   err = -EFAULT;
+   goto out_err;
+   }
+
+   siblings[n] =
+   intel_engine_lookup_user(i915, ci.engine_class,
+ci.engine_instance);
+   if (!siblings[n]) {
+   drm_dbg(&i915->drm,
+   "Invalid sibling[%d]: { class:%d, inst:%d 
}\n",
+

Re: [PATCH 22/25] drm/i915: Make request conflict tracking understand parallel submits

2021-10-13 Thread John Harrison

On 10/13/2021 13:42, Matthew Brost wrote:

If an object in the excl or shared slot is a composite fence from a
parallel submit and the current request in the conflict tracking is from
the same parallel context there is no need to enforce ordering as the
ordering is already implicit. Make the request conflict tracking
understand this by comparing a parallel submit's parent context and
skipping conflict insertion if the values match.

v2:
  (John Harrison)
   - Reword commit message

Signed-off-by: Matthew Brost 

Reviewed-by: John Harrison 


---
  drivers/gpu/drm/i915/i915_request.c | 43 +++--
  1 file changed, 29 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index 8bdf9f2f9b90..820a1f38b271 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1335,6 +1335,25 @@ i915_request_await_external(struct i915_request *rq, 
struct dma_fence *fence)
return err;
  }
  
+static inline bool is_parallel_rq(struct i915_request *rq)

+{
+   return intel_context_is_parallel(rq->context);
+}
+
+static inline struct intel_context *request_to_parent(struct i915_request *rq)
+{
+   return intel_context_to_parent(rq->context);
+}
+
+static bool is_same_parallel_context(struct i915_request *to,
+struct i915_request *from)
+{
+   if (is_parallel_rq(to))
+   return request_to_parent(to) == request_to_parent(from);
+
+   return false;
+}
+
  int
  i915_request_await_execution(struct i915_request *rq,
 struct dma_fence *fence)
@@ -1366,11 +1385,14 @@ i915_request_await_execution(struct i915_request *rq,
 * want to run our callback in all cases.
 */
  
-		if (dma_fence_is_i915(fence))

+   if (dma_fence_is_i915(fence)) {
+   if (is_same_parallel_context(rq, to_request(fence)))
+   continue;
ret = __i915_request_await_execution(rq,
 to_request(fence));
-   else
+   } else {
ret = i915_request_await_external(rq, fence);
+   }
if (ret < 0)
return ret;
} while (--nchild);
@@ -1471,10 +1493,13 @@ i915_request_await_dma_fence(struct i915_request *rq, 
struct dma_fence *fence)
 fence))
continue;
  
-		if (dma_fence_is_i915(fence))

+   if (dma_fence_is_i915(fence)) {
+   if (is_same_parallel_context(rq, to_request(fence)))
+   continue;
ret = i915_request_await_request(rq, to_request(fence));
-   else
+   } else {
ret = i915_request_await_external(rq, fence);
+   }
if (ret < 0)
return ret;
  
@@ -1525,16 +1550,6 @@ i915_request_await_object(struct i915_request *to,

return ret;
  }
  
-static inline bool is_parallel_rq(struct i915_request *rq)

-{
-   return intel_context_is_parallel(rq->context);
-}
-
-static inline struct intel_context *request_to_parent(struct i915_request *rq)
-{
-   return intel_context_to_parent(rq->context);
-}
-
  static struct i915_request *
  __i915_request_ensure_parallel_ordering(struct i915_request *rq,
struct intel_timeline *timeline)




Re: [PATCH 21/25] drm/i915/guc: Handle errors in multi-lrc requests

2021-10-13 Thread John Harrison

On 10/13/2021 13:42, Matthew Brost wrote:

If an error occurs in the front end when multi-lrc requests are getting
generated we need to skip these in the backend but we still need to
emit the breadcrumbs seqno. An issues arises because with multi-lrc
breadcrumbs there is a handshake between the parent and children to make
forward progress. If all the requests are not present this handshake
doesn't work. To work around this, if multi-lrc request has an error we
skip the handshake but still emit the breadcrumbs seqno.

v2:
  (John Harrison)
   - Add comment explaining the skipping of the handshake logic
   - Fix typos in the commit message
v3:
  (John Harrison)
   - Fix up some comments about the math to NOP the ring

Signed-off-by: Matthew Brost 

Reviewed-by: John Harrison 


---
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 69 ++-
  1 file changed, 66 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index bfafe996e2d2..80d8ce68ff59 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -4076,8 +4076,8 @@ static int 
emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
  }
  
  static u32 *

-emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
-u32 *cs)
+__emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
+  u32 *cs)
  {
struct intel_context *ce = rq->context;
u8 i;
@@ -4105,6 +4105,45 @@ emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct 
i915_request *rq,
  get_children_go_addr(ce),
  0);
  
+	return cs;

+}
+
+/*
+ * If this true, a submission of multi-lrc requests had an error and the
+ * requests need to be skipped. The front end (execuf IOCTL) should've called
+ * i915_request_skip which squashes the BB but we still need to emit the fini
+ * breadrcrumbs seqno write. At this point we don't know how many of the
+ * requests in the multi-lrc submission were generated so we can't do the
+ * handshake between the parent and children (e.g. if 4 requests should be
+ * generated but 2nd hit an error only 1 would be seen by the GuC backend).
+ * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error
+ * has occurred on any of the requests in submission / relationship.
+ */
+static inline bool skip_handshake(struct i915_request *rq)
+{
+   return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags);
+}
+
+static u32 *
+emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
+u32 *cs)
+{
+   struct intel_context *ce = rq->context;
+
+   GEM_BUG_ON(!intel_context_is_parent(ce));
+
+   if (unlikely(skip_handshake(rq))) {
+   /*
+* NOP everything in 
__emit_fini_breadcrumb_parent_no_preempt_mid_batch,
+* the -6 comes from the length of the emits below.
+*/
+   memset(cs, 0, sizeof(u32) *
+  (ce->engine->emit_fini_breadcrumb_dw - 6));
+   cs += ce->engine->emit_fini_breadcrumb_dw - 6;
+   } else {
+   cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs);
+   }
+
/* Emit fini breadcrumb */
cs = gen8_emit_ggtt_write(cs,
  rq->fence.seqno,
@@ -4121,7 +4160,8 @@ emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct 
i915_request *rq,
  }
  
  static u32 *

-emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, u32 
*cs)
+__emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
+ u32 *cs)
  {
struct intel_context *ce = rq->context;
struct intel_context *parent = intel_context_to_parent(ce);
@@ -4148,6 +4188,29 @@ emit_fini_breadcrumb_child_no_preempt_mid_batch(struct 
i915_request *rq, u32 *cs
*cs++ = get_children_go_addr(parent);
*cs++ = 0;
  
+	return cs;

+}
+
+static u32 *
+emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
+   u32 *cs)
+{
+   struct intel_context *ce = rq->context;
+
+   GEM_BUG_ON(!intel_context_is_child(ce));
+
+   if (unlikely(skip_handshake(rq))) {
+   /*
+* NOP everything in 
__emit_fini_breadcrumb_child_no_preempt_mid_batch,
+* the -6 comes from the length of the emits below.
+*/
+   memset(cs, 0, sizeof(u32) *
+  (ce->engine->emit_fini_breadcrumb_dw - 6));
+   cs += ce->engine->emit_fini_breadcrumb_dw - 6;
+   } else {
+   cs = __emit_fini_breadcrumb_child_no_preempt_mid_batc

Re: [PATCH 20/25] drm/i915: Multi-BB execbuf

2021-10-13 Thread John Harrison

On 10/13/2021 13:42, Matthew Brost wrote:

Allow multiple batch buffers to be submitted in a single execbuf IOCTL
after a context has been configured with the 'set_parallel' extension.
The number batches is implicit based on the contexts configuration.

This is implemented with a series of loops. First a loop is used to find
all the batches, a loop to pin all the HW contexts, a loop to create all
the requests, a loop to submit (emit BB start, etc...) all the requests,
a loop to tie the requests to the VMAs they touch, and finally a loop to
commit the requests to the backend.

A composite fence is also created for the generated requests to return
to the user and to stick in dma resv slots.

No behavior from the existing IOCTL should be changed aside from when
throttling because the ring for a context is full. In this situation,
i915 will now wait while holding the object locks. This change was done
because the code is much simpler to wait while holding the locks and we
believe there isn't a huge benefit of dropping these locks. If this
proves false we can restructure the code to drop the locks during the
wait.

IGT: https://patchwork.freedesktop.org/patch/447008/?series=93071&rev=1
media UMD: https://github.com/intel/media-driver/pull/1252

v2:
  (Matthew Brost)
   - Return proper error value if i915_request_create fails
v3:
  (John Harrison)
   - Add comment explaining create / add order loops + locking
   - Update commit message explaining different in IOCTL behavior
   - Line wrap some comments
   - eb_add_request returns void
   - Return -EINVAL rather triggering BUG_ON if cmd parser used
  (Checkpatch)
   - Check eb->batch_len[*current_batch]
v4:
  (CI)
   - Set batch len if passed if via execbuf args
   - Call __i915_request_skip after __i915_request_commit
  (Kernel test robot)
   - Initialize rq to NULL in eb_pin_timeline

Signed-off-by: Matthew Brost 
---
  .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 783 --
  drivers/gpu/drm/i915/gt/intel_context.h   |   8 +-
  drivers/gpu/drm/i915/gt/intel_context_types.h |  10 +
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c |   2 +
  drivers/gpu/drm/i915/i915_request.h   |   9 +
  drivers/gpu/drm/i915/i915_vma.c   |  21 +-
  drivers/gpu/drm/i915/i915_vma.h   |  13 +-
  7 files changed, 595 insertions(+), 251 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index c75afc8784e3..6509c9d8c298 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -246,17 +246,25 @@ struct i915_execbuffer {
struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
struct eb_vma *vma;
  
-	struct intel_engine_cs *engine; /** engine to queue the request to */

+   struct intel_gt *gt; /* gt for the execbuf */
struct intel_context *context; /* logical state for the request */
struct i915_gem_context *gem_context; /** caller's context */
  
-	struct i915_request *request; /** our request to build */

-   struct eb_vma *batch; /** identity of the batch obj/vma */
+   /** our requests to build */
+   struct i915_request *requests[MAX_ENGINE_INSTANCE + 1];
+   /** identity of the batch obj/vma */
+   struct eb_vma *batches[MAX_ENGINE_INSTANCE + 1];
struct i915_vma *trampoline; /** trampoline used for chaining */
  
+	/** used for excl fence in dma_resv objects when > 1 BB submitted */

+   struct dma_fence *composite_fence;
+
/** actual size of execobj[] as we may extend it for the cmdparser */
unsigned int buffer_count;
  
+	/* number of batches in execbuf IOCTL */

+   unsigned int num_batches;
+
/** list of vma not yet bound during reservation phase */
struct list_head unbound;
  
@@ -283,7 +291,8 @@ struct i915_execbuffer {
  
  	u64 invalid_flags; /** Set of execobj.flags that are invalid */
  
-	u64 batch_len; /** Length of batch within object */

+   /** Length of batch within object */
+   u64 batch_len[MAX_ENGINE_INSTANCE + 1];
u32 batch_start_offset; /** Location within object of batch */
u32 batch_flags; /** Flags composed for emit_bb_start() */
struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch 
buffer */
@@ -301,14 +310,13 @@ struct i915_execbuffer {
  };
  
  static int eb_parse(struct i915_execbuffer *eb);

-static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb,
- bool throttle);
+static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle);
  static void eb_unpin_engine(struct i915_execbuffer *eb);
  
  static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)

  {
-   return intel_engine_requires_cmd_parser(eb->engine) ||
-   (intel_engine_using_cmd_parser(eb->engine) &&
+   return intel_engine_requires_cmd_parser(eb->context->engine)

Re: [PATCH 19/25] drm/i915/guc: Implement no mid batch preemption for multi-lrc

2021-10-13 Thread John Harrison

On 10/13/2021 13:42, Matthew Brost wrote:

For some users of multi-lrc, e.g. split frame, it isn't safe to preempt
mid BB. To safely enable preemption at the BB boundary, a handshake
between parent and child is needed, syncing the set of BBs at the
beginning and end of each batch. This is implemented via custom
emit_bb_start & emit_fini_breadcrumb functions and enabled by default if
a context is configured by set parallel extension.

Lastly, this patch updates the process descriptor to the correct size as
the memory used in the handshake is directly after the process
descriptor.

v2:
  (John Harrison)
   - Fix a few comments wording
   - Add struture for parent page layout
v3:
  (Jojhn Harrison)
   - A structure for sync semaphore
   - Use offsetof to calc address
   - Update commit message

Signed-off-by: Matthew Brost 
---
  drivers/gpu/drm/i915/gt/intel_context.c   |   2 +-
  drivers/gpu/drm/i915/gt/intel_context_types.h |   2 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |   2 +-
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 333 +-
  4 files changed, 326 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index 6aab60584ee5..5634d14052bc 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -570,7 +570,7 @@ void intel_context_bind_parent_child(struct intel_context 
*parent,
GEM_BUG_ON(intel_context_is_child(child));
GEM_BUG_ON(intel_context_is_parent(child));
  
-	parent->parallel.number_children++;

+   parent->parallel.child_index = parent->parallel.number_children++;
list_add_tail(&child->parallel.child_link,
  &parent->parallel.child_list);
child->parallel.parent = parent;
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 1d880303a7e4..95a5b94b4ece 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -250,6 +250,8 @@ struct intel_context {
struct i915_request *last_rq;
/** @number_children: number of children if parent */
u8 number_children;
+   /** @child_index: index into child_list if child */
+   u8 child_index;
/** @guc: GuC specific members for parallel submission */
struct {
/** @wqi_head: head pointer in work queue */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index c14fc15dd3a8..2eba6b598e66 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -186,7 +186,7 @@ struct guc_process_desc {
u32 wq_status;
u32 engine_presence;
u32 priority;
-   u32 reserved[30];
+   u32 reserved[36];
  } __packed;
  
  #define CONTEXT_REGISTRATION_FLAG_KMD	BIT(0)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 09a3a9dd7ff6..ae08a196ba0a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -11,6 +11,7 @@
  #include "gt/intel_context.h"
  #include "gt/intel_engine_pm.h"
  #include "gt/intel_engine_heartbeat.h"
+#include "gt/intel_gpu_commands.h"
  #include "gt/intel_gt.h"
  #include "gt/intel_gt_irq.h"
  #include "gt/intel_gt_pm.h"
@@ -368,11 +369,16 @@ static inline struct i915_priolist *to_priolist(struct 
rb_node *rb)
  
  /*

   * When using multi-lrc submission a scratch memory area is reserved in the
- * parent's context state for the process descriptor and work queue. Currently
- * the scratch area is sized to a page.
+ * parent's context state for the process descriptor, work queue, and handhake

handhake -> handshake


+ * between the parent + children contexts to insert safe preemption points
+ * between each of BBs. Currently the scratch area is sized to a page.

of BBs -> of the BBs

With those fixed:
Reviewed-by: John Harrison 



   *
   * The layout of this scratch area is below:
   * 0  guc_process_desc
+ * + sizeof(struct guc_process_desc)   child go
+ * + CACHELINE_BYTES   child join[0]
+ * ...
+ * + CACHELINE_BYTES   child join[n - 1]
   * ...unused
   * PARENT_SCRATCH_SIZE / 2work queue start
   * ...work queue
@@ -381,7 +387,25 @@ static inline struct i915_priolist *to_priolist(struct 
rb_node *rb)
  #define PARENT_SCRATCH_SIZE   PAGE_SIZE
  #define WQ_SIZE   (PARENT_SCRATCH_SIZE / 2)
  #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE)
-static u32 __get_process_desc_offset(struct intel_context *ce)
+
+struct sync

Re: [PATCH 14/25] drm/i915/guc: Implement multi-lrc reset

2021-10-13 Thread John Harrison

On 10/13/2021 13:42, Matthew Brost wrote:

Update context and full GPU reset to work with multi-lrc. The idea is
parent context tracks all the active requests inflight for itself and
its children. The parent context owns the reset replaying / canceling
requests as needed.

v2:
  (John Harrison)
   - Simply loop in find active request
   - Add comments to find ative request / reset loop
v3:
  (John Harrison)
   - s/its'/its/g
   - Fix comment when searching for active request
   - Reorder if state in __guc_reset_context

Signed-off-by: Matthew Brost 

Reviewed-by: John Harrison 


---
  drivers/gpu/drm/i915/gt/intel_context.c   | 15 +++-
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 69 ++-
  2 files changed, 63 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index 79f321c6c008..6aab60584ee5 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -529,20 +529,29 @@ struct i915_request *intel_context_create_request(struct 
intel_context *ce)
  
  struct i915_request *intel_context_find_active_request(struct intel_context *ce)

  {
+   struct intel_context *parent = intel_context_to_parent(ce);
struct i915_request *rq, *active = NULL;
unsigned long flags;
  
  	GEM_BUG_ON(!intel_engine_uses_guc(ce->engine));
  
-	spin_lock_irqsave(&ce->guc_state.lock, flags);

-   list_for_each_entry_reverse(rq, &ce->guc_state.requests,
+   /*
+* We search the parent list to find an active request on the submitted
+* context. The parent list contains the requests for all the contexts
+* in the relationship so we have to do a compare of each request's
+* context.
+*/
+   spin_lock_irqsave(&parent->guc_state.lock, flags);
+   list_for_each_entry_reverse(rq, &parent->guc_state.requests,
sched.link) {
+   if (rq->context != ce)
+   continue;
if (i915_request_completed(rq))
break;
  
  		active = rq;

}
-   spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+   spin_unlock_irqrestore(&parent->guc_state.lock, flags);
  
  	return active;

  }
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index f690b7c2b295..bc052d206861 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -683,6 +683,11 @@ static inline int rq_prio(const struct i915_request *rq)
return rq->sched.attr.priority;
  }
  
+static inline bool is_multi_lrc(struct intel_context *ce)

+{
+   return intel_context_is_parallel(ce);
+}
+
  static bool is_multi_lrc_rq(struct i915_request *rq)
  {
return intel_context_is_parallel(rq->context);
@@ -1218,10 +1223,15 @@ __unwind_incomplete_requests(struct intel_context *ce)
  
  static void __guc_reset_context(struct intel_context *ce, bool stalled)

  {
+   bool local_stalled;
struct i915_request *rq;
unsigned long flags;
u32 head;
+   int i, number_children = ce->parallel.number_children;
bool skip = false;
+   struct intel_context *parent = ce;
+
+   GEM_BUG_ON(intel_context_is_child(ce));
  
  	intel_context_get(ce);
  
@@ -1247,25 +1257,38 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled)

if (unlikely(skip))
goto out_put;
  
-	rq = intel_context_find_active_request(ce);

-   if (!rq) {
-   head = ce->ring->tail;
-   stalled = false;
-   goto out_replay;
-   }
+   /*
+* For each context in the relationship find the hanging request
+* resetting each context / request as needed
+*/
+   for (i = 0; i < number_children + 1; ++i) {
+   if (!intel_context_is_pinned(ce))
+   goto next_context;
+
+   local_stalled = false;
+   rq = intel_context_find_active_request(ce);
+   if (!rq) {
+   head = ce->ring->tail;
+   goto out_replay;
+   }
  
-	if (!i915_request_started(rq))

-   stalled = false;
+   if (i915_request_started(rq))
+   local_stalled = true;
  
-	GEM_BUG_ON(i915_active_is_idle(&ce->active));

-   head = intel_ring_wrap(ce->ring, rq->head);
-   __i915_request_reset(rq, stalled);
+   GEM_BUG_ON(i915_active_is_idle(&ce->active));
+   head = intel_ring_wrap(ce->ring, rq->head);
  
+		__i915_request_reset(rq, local_stalled && stalled);

  out_replay:
-   guc_reset_state(ce, head, stalled);
-   __unwind_incomplete_requests(ce);
+   guc_reset_state(ce, head, local_stalled && stalled);
+next_context:
+   if (i != number_children)
+   

Re: [PATCH 12/25] drm/i915/guc: Implement multi-lrc submission

2021-10-13 Thread John Harrison

On 10/13/2021 13:42, Matthew Brost wrote:

Implement multi-lrc submission via a single workqueue entry and single
H2G. The workqueue entry contains an updated tail value for each
request, of all the contexts in the multi-lrc submission, and updates
these values simultaneously. As such, the tasklet and bypass path have
been updated to coalesce requests into a single submission.

v2:
  (John Harrison)
   - s/wqe/wqi
   - Use FIELD_PREP macros
   - Add GEM_BUG_ONs ensures length fits within field
   - Add comment / white space to intel_guc_write_barrier
  (Kernel test robot)
   - Make need_tasklet a static function
v3:
  (Docs)
   - A comment for submission_stall_reason
v4:
  (Kernel test robot)
   - Initialize return value in bypass tasklt submit function
  (John Harrison)
   - Add comment near work queue defs
   - Add BUILD_BUG_ON to ensure WQ_SIZE is a power of 2
   - Update write_barrier comment to talk about work queue

Signed-off-by: Matthew Brost 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc.c|  29 ++
  drivers/gpu/drm/i915/gt/uc/intel_guc.h|  11 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |  24 +-
  drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |  30 +-
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 323 +++---
  drivers/gpu/drm/i915/i915_request.h   |   8 +
  6 files changed, 350 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 8f8182bf7c11..6e228343e8cb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -756,3 +756,32 @@ void intel_guc_load_status(struct intel_guc *guc, struct 
drm_printer *p)
}
}
  }
+
+void intel_guc_write_barrier(struct intel_guc *guc)
+{
+   struct intel_gt *gt = guc_to_gt(guc);
+
+   if (i915_gem_object_is_lmem(guc->ct.vma->obj)) {
+   /*
+* Ensure intel_uncore_write_fw can be used rather than
+* intel_uncore_write.
+*/
+   GEM_BUG_ON(guc->send_regs.fw_domains);
+
+   /*
+* This register is used by the i915 and GuC for MMIO based
+* communication. Once we are in this code CTBs are the only
+* method the i915 uses to communicate with the GuC so it is
+* safe to write to this register (a value of 0 is NOP for MMIO
+* communication). If we ever start mixing CTBs and MMIOs a new
+* register will have to be chosen. This function is also used
+* to enforce ordering of a work queue item write and an update
+* to the process descriptor. When a work queue is being used,
+* CTBs are also the only mechanism of communication.
+*/
+   intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0);
+   } else {
+   /* wmb() sufficient for a barrier if in smem */
+   wmb();
+   }
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 4ca197f400ba..31cf9fb48c7e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -46,6 +46,15 @@ struct intel_guc {
 * submitted until the stalled request is processed.
 */
struct i915_request *stalled_request;
+   /**
+* @submission_stall_reason: reason why submission is stalled
+*/
+   enum {
+   STALL_NONE,
+   STALL_REGISTER_CONTEXT,
+   STALL_MOVE_LRC_TAIL,
+   STALL_ADD_REQUEST,
+   } submission_stall_reason;
  
  	/* intel_guc_recv interrupt related state */

/** @irq_lock: protects GuC irq state */
@@ -367,4 +376,6 @@ void intel_guc_submission_cancel_requests(struct intel_guc 
*guc);
  
  void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p);
  
+void intel_guc_write_barrier(struct intel_guc *guc);

+
  #endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 0a3504bc0b61..a0cc34be7b56 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -383,28 +383,6 @@ static u32 ct_get_next_fence(struct intel_guc_ct *ct)
return ++ct->requests.last_fence;
  }
  
-static void write_barrier(struct intel_guc_ct *ct)

-{
-   struct intel_guc *guc = ct_to_guc(ct);
-   struct intel_gt *gt = guc_to_gt(guc);
-
-   if (i915_gem_object_is_lmem(guc->ct.vma->obj)) {
-   GEM_BUG_ON(guc->send_regs.fw_domains);
-   /*
-* This register is used by the i915 and GuC for MMIO based
-* communication. Once we are in this code CTBs are the only
-* method the i915 uses to communicate with the GuC so it is
-* safe to write to this register (a value of 0 is NOP for MMIO
-   

Re: [Freedreno] [PATCH v4 00/24] drm/bridge: Make panel and bridge probe order consistent

2021-10-13 Thread Rob Clark
On Wed, Oct 13, 2021 at 7:16 AM Maxime Ripard  wrote:
>
> Hi Caleb,
>
> On Thu, Sep 30, 2021 at 09:20:52PM +0100, Caleb Connolly wrote:
> > Hi,
> >
> > On 30/09/2021 20:49, Amit Pundir wrote:
> > > On Thu, 30 Sept 2021 at 04:50, Rob Clark  wrote:
> > > >
> > > > On Wed, Sep 29, 2021 at 2:51 PM John Stultz  
> > > > wrote:
> > > > >
> > > > > On Wed, Sep 29, 2021 at 2:32 PM John Stultz  
> > > > > wrote:
> > > > > > On Wed, Sep 29, 2021 at 2:27 PM John Stultz 
> > > > > >  wrote:
> > > > > > > On Fri, Sep 10, 2021 at 3:12 AM Maxime Ripard  
> > > > > > > wrote:
> > > > > > > > The best practice to avoid those issues is to register its 
> > > > > > > > functions only after
> > > > > > > > all its dependencies are live. We also shouldn't wait any 
> > > > > > > > longer than we should
> > > > > > > > to play nice with the other components that are waiting for us, 
> > > > > > > > so in our case
> > > > > > > > that would mean moving the DSI device registration to the 
> > > > > > > > bridge probe.
> > > > > > > >
> > > > > > > > I also had a look at all the DSI hosts, and it seems that 
> > > > > > > > exynos, kirin and msm
> > > > > > > > would be affected by this and wouldn't probe anymore after 
> > > > > > > > those changes.
> > > > > > > > Exynos and kirin seems to be simple enough for a mechanical 
> > > > > > > > change (that still
> > > > > > > > requires to be tested), but the changes in msm seemed to be far 
> > > > > > > > more important
> > > > > > > > and I wasn't confortable doing them.
> > > > > > >
> > > > > > >
> > > > > > > Hey Maxime,
> > > > > > >Sorry for taking so long to get to this, but now that plumbers 
> > > > > > > is
> > > > > > > over I've had a chance to check it out on kirin
> > > > > > >
> > > > > > > Rob Clark pointed me to his branch with some fixups here:
> > > > > > > 
> > > > > > > https://gitlab.freedesktop.org/robclark/msm/-/commits/for-mripard/bridge-rework
> > > > > > >
> > > > > > > But trying to boot hikey with that, I see the following loop 
> > > > > > > indefinitely:
> > > > > > > [4.632132] adv7511 2-0039: supply avdd not found, using dummy 
> > > > > > > regulator
> > > > > > > [4.638961] adv7511 2-0039: supply dvdd not found, using dummy 
> > > > > > > regulator
> > > > > > > [4.645741] adv7511 2-0039: supply pvdd not found, using dummy 
> > > > > > > regulator
> > > > > > > [4.652483] adv7511 2-0039: supply a2vdd not found, using 
> > > > > > > dummy regulator
> > > > > > > [4.659342] adv7511 2-0039: supply v3p3 not found, using dummy 
> > > > > > > regulator
> > > > > > > [4.666086] adv7511 2-0039: supply v1p2 not found, using dummy 
> > > > > > > regulator
> > > > > > > [4.681898] adv7511 2-0039: failed to find dsi host
> > > > > >
> > > > > > I just realized Rob's tree is missing the kirin patch. My apologies!
> > > > > > I'll retest and let you know.
> > > > >
> > > > > Ok, just retested including the kirin patch and unfortunately I'm
> > > > > still seeing the same thing.  :(
> > > > >
> > > > > Will dig a bit and let you know when I find more.
> > > >
> > > > Did you have a chance to test it on anything using drm/msm with DSI
> > > > panels?  That would at least confirm that I didn't miss anything in
> > > > the drm/msm patch to swap the dsi-host vs bridge ordering..
> > >
> > > Hi, smoke tested
> > > https://gitlab.freedesktop.org/robclark/msm/-/commits/for-mripard/bridge-rework
> > > on Pocophone F1 (sdm845 / A630) with v5.15-rc3. I see no obvious
> > > regressions in my limited testing so far including video (youtube)
> > > playback.
> > Tested on the OnePlus 6 too booting AOSP, works fine. This *fixes*
> > FBDEV_EMULATION (so we can get a working framebuffer console) which was
> > otherwise broken on 5.15.
> >
> > However it spits out some warnings during boot: 
> > https://p.calebs.dev/gucysowyna.yaml
>
> Thanks for testing. It looks like the runtime_pm ordering between the
> msm devices changed a bit with the conversion Rob did.
>
> Rob, do you know what could be going on?
>

Not entirely sure.. I didn't see that first splat, but maybe I was
missing some debug config? (The 2nd one is kind of "normal", I think
related to bootloader leaving the display on)

BR,
-R


Re: [Intel-gfx] [PATCH v5] drm/i915/gt: move remaining debugfs interfaces into gt

2021-10-13 Thread Andi Shyti
Hi Lucas,

On Wed, Oct 13, 2021 at 05:04:27PM -0700, Lucas De Marchi wrote:
> On Wed, Oct 13, 2021 at 12:17:38AM +0200, Andi Shyti wrote:
> > From: Andi Shyti 
> > 
> > The following interfaces:
> > 
> >  i915_wedged
> >  i915_forcewake_user
> > 
> > are dependent on gt values. Put them inside gt/ and drop the
> > "i915_" prefix name. This would be the new structure:
> > 
> >  dri/0/gt
> >  |
> >  +-- forcewake_user
> >  |
> >  \-- reset
> > 
> > For backwards compatibility with existing igt (and the slight
> > semantic difference between operating on the i915 abi entry
> > points and the deep gt info):
> > 
> >  dri/0
> >  |
> >  +-- i915_wedged
> >  |
> >  \-- i915_forcewake_user
> > 
> > remain at the top level.
> > 
> > Signed-off-by: Andi Shyti 
> > Cc: Tvrtko Ursulin 
> > Cc: Chris Wilson 
> > Reviewed-by: Lucas De Marchi 
> 
> do you want me to push this?

yes, please.

Thanks,
Andi


Re: [PATCH 08/25] drm/i915/guc: Add multi-lrc context registration

2021-10-13 Thread John Harrison

On 10/13/2021 13:42, Matthew Brost wrote:

Add multi-lrc context registration H2G. In addition a workqueue and
process descriptor are setup during multi-lrc context registration as
these data structures are needed for multi-lrc submission.

v2:
  (John Harrison)
   - Move GuC specific fields into sub-struct
   - Clean up WQ defines
   - Add comment explaining math to derive WQ / PD address
v3:
  (John Harrison)
   - Add PARENT_SCRATCH_SIZE define
   - Update comment explaining multi-lrc register

Signed-off-by: Matthew Brost 
---
  drivers/gpu/drm/i915/gt/intel_context_types.h |  12 ++
  drivers/gpu/drm/i915/gt/intel_lrc.c   |   5 +
  .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |   1 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |   2 -
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 116 +-
  5 files changed, 133 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 76dfca57cb45..48decb5ee954 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -239,6 +239,18 @@ struct intel_context {
struct intel_context *parent;
/** @number_children: number of children if parent */
u8 number_children;
+   /** @guc: GuC specific members for parallel submission */
+   struct {
+   /** @wqi_head: head pointer in work queue */
+   u16 wqi_head;
+   /** @wqi_tail: tail pointer in work queue */
+   u16 wqi_tail;
+   /**
+* @parent_page: page in context state (ce->state) used
+* by parent for work queue, process descriptor
+*/
+   u8 parent_page;
+   } guc;
} parallel;
  
  #ifdef CONFIG_DRM_I915_SELFTEST

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 3ef9eaf8c50e..57339d5c1fc8 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -942,6 +942,11 @@ __lrc_alloc_state(struct intel_context *ce, struct 
intel_engine_cs *engine)
context_size += PAGE_SIZE;
}
  
+	if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {

+   ce->parallel.guc.parent_page = context_size / PAGE_SIZE;
+   context_size += PAGE_SIZE;

This needs to be += PARENT_SCRATCH_SIZE.

John.


+   }
+
obj = i915_gem_object_create_lmem(engine->i915, context_size,
  I915_BO_ALLOC_PM_VOLATILE);
if (IS_ERR(obj))
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h 
b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index 8ff58aff..ba10bd374cee 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -142,6 +142,7 @@ enum intel_guc_action {
INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
+   INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
INTEL_GUC_ACTION_RESET_CLIENT = 0x5507,
INTEL_GUC_ACTION_LIMIT
  };
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index fa4be13c8854..0eeb2a9feeed 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -52,8 +52,6 @@
  
  #define GUC_DOORBELL_INVALID		256
  
-#define GUC_WQ_SIZE			(PAGE_SIZE * 2)

-
  /* Work queue item header definitions */
  #define WQ_STATUS_ACTIVE  1
  #define WQ_STATUS_SUSPENDED   2
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 84b8e64b148f..58a6f494be8f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -344,6 +344,47 @@ static inline struct i915_priolist *to_priolist(struct 
rb_node *rb)
return rb_entry(rb, struct i915_priolist, node);
  }
  
+/*

+ * When using multi-lrc submission a scratch memory area is reserved in the
+ * parent's context state for the process descriptor and work queue. Currently
+ * the scratch area is sized to a page.
+ *
+ * The layout of this scratch area is below:
+ * 0   guc_process_desc
+ * ... unused
+ * PARENT_SCRATCH_SIZE / 2 work queue start
+ * ... work queue
+ * PARENT_SCRATCH_SIZE - 1 work queue end
+ */
+#define PARENT_SCRATCH_SIZEPAGE_SIZE
+#define WQ_SIZE(PARENT_SCRATCH_SIZE / 2)
+#define WQ_OFFSET

Re: [PATCH 03/25] drm/i915/guc: Take engine PM when a context is pinned with GuC submission

2021-10-13 Thread John Harrison

On 10/13/2021 13:42, Matthew Brost wrote:

Taking a PM reference to prevent intel_gt_wait_for_idle from short
circuiting while any user context has scheduling enabled. Returning GT
idle when it is not can cause all sorts of issues throughout the stack.

v2:
  (Daniel Vetter)
   - Add might_lock annotations to pin / unpin function
v3:
  (CI)
   - Drop intel_engine_pm_might_put from unpin path as an async put is
 used
v4:
  (John Harrison)
   - Make intel_engine_pm_might_get/put work with GuC virtual engines
   - Update commit message
v5:
   - Update commit message again

Signed-off-by: Matthew Brost 

Reviewed-by: John Harrison 


---
  drivers/gpu/drm/i915/gt/intel_context.c   |  2 ++
  drivers/gpu/drm/i915/gt/intel_engine_pm.h | 32 +
  drivers/gpu/drm/i915/gt/intel_gt_pm.h | 10 ++
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 36 +--
  drivers/gpu/drm/i915/intel_wakeref.h  | 12 +++
  5 files changed, 89 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index d008ef8623ce..f98c9f470ba1 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -240,6 +240,8 @@ int __intel_context_do_pin_ww(struct intel_context *ce,
if (err)
goto err_post_unpin;
  
+	intel_engine_pm_might_get(ce->engine);

+
if (unlikely(intel_context_is_closed(ce))) {
err = -ENOENT;
goto err_unlock;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h 
b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
index 6fdeae668e6e..d68675925b79 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -6,9 +6,11 @@
  #ifndef INTEL_ENGINE_PM_H
  #define INTEL_ENGINE_PM_H
  
+#include "i915_drv.h"

  #include "i915_request.h"
  #include "intel_engine_types.h"
  #include "intel_wakeref.h"
+#include "intel_gt_pm.h"
  
  static inline bool

  intel_engine_pm_is_awake(const struct intel_engine_cs *engine)
@@ -31,6 +33,21 @@ static inline bool intel_engine_pm_get_if_awake(struct 
intel_engine_cs *engine)
return intel_wakeref_get_if_active(&engine->wakeref);
  }
  
+static inline void intel_engine_pm_might_get(struct intel_engine_cs *engine)

+{
+   if (!intel_engine_is_virtual(engine)) {
+   intel_wakeref_might_get(&engine->wakeref);
+   } else {
+   struct intel_gt *gt = engine->gt;
+   struct intel_engine_cs *tengine;
+   intel_engine_mask_t tmp, mask = engine->mask;
+
+   for_each_engine_masked(tengine, gt, mask, tmp)
+   intel_wakeref_might_get(&tengine->wakeref);
+   }
+   intel_gt_pm_might_get(engine->gt);
+}
+
  static inline void intel_engine_pm_put(struct intel_engine_cs *engine)
  {
intel_wakeref_put(&engine->wakeref);
@@ -52,6 +69,21 @@ static inline void intel_engine_pm_flush(struct 
intel_engine_cs *engine)
intel_wakeref_unlock_wait(&engine->wakeref);
  }
  
+static inline void intel_engine_pm_might_put(struct intel_engine_cs *engine)

+{
+   if (!intel_engine_is_virtual(engine)) {
+   intel_wakeref_might_put(&engine->wakeref);
+   } else {
+   struct intel_gt *gt = engine->gt;
+   struct intel_engine_cs *tengine;
+   intel_engine_mask_t tmp, mask = engine->mask;
+
+   for_each_engine_masked(tengine, gt, mask, tmp)
+   intel_wakeref_might_put(&tengine->wakeref);
+   }
+   intel_gt_pm_might_put(engine->gt);
+}
+
  static inline struct i915_request *
  intel_engine_create_kernel_request(struct intel_engine_cs *engine)
  {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h 
b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index 05de6c1af25b..bc898df7a48c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -31,6 +31,11 @@ static inline bool intel_gt_pm_get_if_awake(struct intel_gt 
*gt)
return intel_wakeref_get_if_active(>->wakeref);
  }
  
+static inline void intel_gt_pm_might_get(struct intel_gt *gt)

+{
+   intel_wakeref_might_get(>->wakeref);
+}
+
  static inline void intel_gt_pm_put(struct intel_gt *gt)
  {
intel_wakeref_put(>->wakeref);
@@ -41,6 +46,11 @@ static inline void intel_gt_pm_put_async(struct intel_gt *gt)
intel_wakeref_put_async(>->wakeref);
  }
  
+static inline void intel_gt_pm_might_put(struct intel_gt *gt)

+{
+   intel_wakeref_might_put(>->wakeref);
+}
+
  #define with_intel_gt_pm(gt, tmp) \
for (tmp = 1, intel_gt_pm_get(gt); tmp; \
 intel_gt_pm_put(gt), tmp = 0)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index d2ce47b5541e..51d3963cebbf 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -157

Re: [Intel-gfx] [PATCH v5] drm/i915/gt: move remaining debugfs interfaces into gt

2021-10-13 Thread Lucas De Marchi

On Wed, Oct 13, 2021 at 12:17:38AM +0200, Andi Shyti wrote:

From: Andi Shyti 

The following interfaces:

 i915_wedged
 i915_forcewake_user

are dependent on gt values. Put them inside gt/ and drop the
"i915_" prefix name. This would be the new structure:

 dri/0/gt
 |
 +-- forcewake_user
 |
 \-- reset

For backwards compatibility with existing igt (and the slight
semantic difference between operating on the i915 abi entry
points and the deep gt info):

 dri/0
 |
 +-- i915_wedged
 |
 \-- i915_forcewake_user

remain at the top level.

Signed-off-by: Andi Shyti 
Cc: Tvrtko Ursulin 
Cc: Chris Wilson 
Reviewed-by: Lucas De Marchi 


do you want me to push this?

Lucas De Marchi


Re: [PATCH 02/25] drm/i915/guc: Take GT PM ref when deregistering context

2021-10-13 Thread John Harrison

On 10/13/2021 13:42, Matthew Brost wrote:

Taking a PM reference to prevent intel_gt_wait_for_idle from short
circuiting while a deregister context H2G is in flight. To do this must
issue the deregister H2G from a worker as context can be destroyed from
an atomic context and taking GT PM ref blows up. Previously we took a
runtime PM from this atomic context which worked but will stop working
once runtime pm autosuspend in enabled.

So this patch is two fold, stop intel_gt_wait_for_idle from short
circuting and fix runtime pm autosuspend.

v2:
  (John Harrison)
   - Split structure changes out in different patch
  (Tvrtko)
   - Don't drop lock in deregister_destroyed_contexts
v3:
  (John Harrison)
   - Flush destroyed contexts before destroying context reg pool

Signed-off-by: Matthew Brost 

Reviewed-by: John Harrison 


---
  drivers/gpu/drm/i915/gt/intel_context.c   |   2 +
  drivers/gpu/drm/i915/gt/intel_context_types.h |   7 +
  drivers/gpu/drm/i915/gt/intel_engine_pm.h |   5 +
  drivers/gpu/drm/i915/gt/intel_gt_pm.h |   4 +
  drivers/gpu/drm/i915/gt/uc/intel_guc.h|  11 ++
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 146 +++---
  6 files changed, 121 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index 35babd02ddfe..d008ef8623ce 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -400,6 +400,8 @@ intel_context_init(struct intel_context *ce, struct 
intel_engine_cs *engine)
ce->guc_id.id = GUC_INVALID_LRC_ID;
INIT_LIST_HEAD(&ce->guc_id.link);
  
+	INIT_LIST_HEAD(&ce->destroyed_link);

+
/*
 * Initialize fence to be complete as this is expected to be complete
 * unless there is a pending schedule disable outstanding.
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index e7e3984aab78..4613d027cbc3 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -213,6 +213,13 @@ struct intel_context {
struct list_head link;
} guc_id;
  
+	/**

+* @destroyed_link: link in guc->submission_state.destroyed_contexts, in
+* list when context is pending to be destroyed (deregistered with the
+* GuC), protected by guc->submission_state.lock
+*/
+   struct list_head destroyed_link;
+
  #ifdef CONFIG_DRM_I915_SELFTEST
/**
 * @drop_schedule_enable: Force drop of schedule enable G2H for selftest
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h 
b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
index 8520c595f5e1..6fdeae668e6e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -16,6 +16,11 @@ intel_engine_pm_is_awake(const struct intel_engine_cs 
*engine)
return intel_wakeref_is_active(&engine->wakeref);
  }
  
+static inline void __intel_engine_pm_get(struct intel_engine_cs *engine)

+{
+   __intel_wakeref_get(&engine->wakeref);
+}
+
  static inline void intel_engine_pm_get(struct intel_engine_cs *engine)
  {
intel_wakeref_get(&engine->wakeref);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h 
b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index d0588d8aaa44..05de6c1af25b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -41,6 +41,10 @@ static inline void intel_gt_pm_put_async(struct intel_gt *gt)
intel_wakeref_put_async(>->wakeref);
  }
  
+#define with_intel_gt_pm(gt, tmp) \

+   for (tmp = 1, intel_gt_pm_get(gt); tmp; \
+intel_gt_pm_put(gt), tmp = 0)
+
  static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
  {
return intel_wakeref_wait_for_idle(>->wakeref);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 82e248c2290c..74f071a0b6d5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -90,6 +90,17 @@ struct intel_guc {
 * refs
 */
struct list_head guc_id_list;
+   /**
+* @destroyed_contexts: list of contexts waiting to be destroyed
+* (deregistered with the GuC)
+*/
+   struct list_head destroyed_contexts;
+   /**
+* @destroyed_worker: worker to deregister contexts, need as we
+* need to take a GT PM reference and can't from destroy
+* function as it might be in an atomic context (no sleeping)
+*/
+   struct work_struct destroyed_worker;
} submission_state;
  
  	/**

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index b2646b088c7f..d2ce47b5541e 100644
--- a/drivers/gpu/drm/i915/gt/uc/int

Re: [PATCH 01/25] drm/i915/guc: Move GuC guc_id allocation under submission state sub-struct

2021-10-13 Thread John Harrison

On 10/13/2021 13:42, Matthew Brost wrote:

Move guc_id allocation under submission state sub-struct as a future
patch will reuse the spin lock as a global submission state lock. Moving
this into sub-struct makes ownership of fields / lock clear.

v2:
  (Docs)
   - Add comment for submission_state sub-structure
v3:
  (John Harrison)
   - Fixup a few comments

Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
  drivers/gpu/drm/i915/gt/intel_context_types.h |  6 +--
  drivers/gpu/drm/i915/gt/uc/intel_guc.h| 28 +++
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 48 ++-
  3 files changed, 47 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 12252c411159..e7e3984aab78 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -197,18 +197,18 @@ struct intel_context {
struct {
/**
 * @id: handle which is used to uniquely identify this context
-* with the GuC, protected by guc->contexts_lock
+* with the GuC, protected by guc->submission_state.lock
 */
u16 id;
/**
 * @ref: the number of references to the guc_id, when
 * transitioning in and out of zero protected by
-* guc->contexts_lock
+* guc->submission_state.lock
 */
atomic_t ref;
/**
 * @link: in guc->guc_id_list when the guc_id has no refs but is
-* still valid, protected by guc->contexts_lock
+* still valid, protected by guc->submission_state.lock
 */
struct list_head link;
} guc_id;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 5dd174babf7a..82e248c2290c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -71,16 +71,26 @@ struct intel_guc {
} interrupts;
  
  	/**

-* @contexts_lock: protects guc_ids, guc_id_list, ce->guc_id.id, and
-* ce->guc_id.ref when transitioning in and out of zero
+* @submission_state: sub-structure for submission state protected by
+* single lock
 */
-   spinlock_t contexts_lock;
-   /** @guc_ids: used to allocate unique ce->guc_id.id values */
-   struct ida guc_ids;
-   /**
-* @guc_id_list: list of intel_context with valid guc_ids but no refs
-*/
-   struct list_head guc_id_list;
+   struct {
+   /**
+* @lock: protects everything in submission_state,
+* ce->guc_id.id, and ce->guc_id.ref when transitioning in and
+* out of zero
+*/
+   spinlock_t lock;
+   /**
+* @guc_ids: used to allocate new guc_ids
+*/
+   struct ida guc_ids;
+   /**
+* @guc_id_list: list of intel_context with valid guc_ids but no
+* refs
+*/
+   struct list_head guc_id_list;
+   } submission_state;
  
  	/**

 * @submission_supported: tracks whether we support GuC submission on
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index ba0de35f6323..b2646b088c7f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -68,14 +68,14 @@
   * fence is used to stall all requests associated with this guc_id until the
   * corresponding G2H returns indicating the guc_id has been deregistered.
   *
- * guc_ids:
+ * submission_state.guc_ids:
   * Unique number associated with private GuC context data passed in during
   * context registration / submission / deregistration. 64k available. Simple 
ida
   * is used for allocation.
   *
   * Stealing guc_ids:
   * If no guc_ids are available they can be stolen from another context at
- * request creation time if that context is unpinned. If a guc_id can't be 
found
+ * request creation time if that context is unpinned. If a guc_id an't be found

Oops?

John.



   * we punt this problem to the user as we believe this is near impossible to 
hit
   * during normal use cases.
   *
@@ -89,7 +89,7 @@
   * sched_engine can be submitting at a time. Currently only one sched_engine 
is
   * used for all of GuC submission but that could change in the future.
   *
- * guc->contexts_lock
+ * guc->submission_state.lock
   * Protects guc_id allocation for the given GuC, i.e. only one context can be
   * doing guc_id allocation operations at a time for each GuC in the system.
   *
@@ -103,7 +103,7 @@
   *
   * Lock ordering rules:
   * sched_engine->lock -> ce->guc_state.lock
- * guc->contexts_l

[GIT PULL] mediatek drm fixes for 5.15

2021-10-13 Thread Chun-Kuang Hu
Hi, Dave & Daniel:

This includes:

1. Revert series "CMDQ refinement of Mediatek DRM driver"

Regards,
Chun-Kuang.

The following changes since commit 6880fa6c56601bb8ed59df6c30fd390cc5f6dd8f:

  Linux 5.15-rc1 (2021-09-12 16:28:37 -0700)

are available in the Git repository at:

  https://git.kernel.org/pub/scm/linux/kernel/git/chunkuang.hu/linux.git 
tags/mediatek-drm-fixes-5.15

for you to fetch changes up to 4157a441ff068cc406513e7b8069efa19bba89d0:

  Revert "drm/mediatek: Use mailbox rx_callback instead of cmdq_task_cb" 
(2021-10-12 08:02:27 +0800)


Mediatek DRM Fixes for Linux 5.15

1. Revert series "CMDQ refinement of Mediatek DRM driver"


Chun-Kuang Hu (5):
  Revert "drm/mediatek: Clear pending flag when cmdq packet is done"
  Revert "drm/mediatek: Add cmdq_handle in mtk_crtc"
  Revert "drm/mediatek: Detect CMDQ execution timeout"
  Revert "drm/mediatek: Remove struct cmdq_client"
  Revert "drm/mediatek: Use mailbox rx_callback instead of cmdq_task_cb"

 drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 157 +---
 1 file changed, 24 insertions(+), 133 deletions(-)


[PATCH v3 6/7] drm/kmb: Enable ADV bridge after modeset

2021-10-13 Thread Anitha Chrisanthus
On KMB, ADV bridge must be programmed and powered on prior to
MIPI DSI HW initialization.

Fixes: 98521f4d4b4c ("drm/kmb: Mipi DSI part of the display driver")
Signed-off-by: Anitha Chrisanthus 
---
 drivers/gpu/drm/kmb/kmb_dsi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/kmb/kmb_dsi.c b/drivers/gpu/drm/kmb/kmb_dsi.c
index a0669b842ff5..7ab6b7b44cbc 100644
--- a/drivers/gpu/drm/kmb/kmb_dsi.c
+++ b/drivers/gpu/drm/kmb/kmb_dsi.c
@@ -1341,6 +1341,7 @@ static void connect_lcd_to_mipi(struct kmb_dsi *kmb_dsi)
return;
}
 
+   drm_bridge_chain_enable(adv_bridge);
/* DISABLE MIPI->CIF CONNECTION */
regmap_write(msscam, MSS_MIPI_CIF_CFG, 0);
 
-- 
2.25.1



[PATCH v3 7/7] drm/kmb: Enable support for framebuffer console

2021-10-13 Thread Anitha Chrisanthus
Enable support for fbcon (framebuffer console).
The user can initialize fbcon by loading kmb-drm with the parameter
console=1.

v2: added missing static clk_enable

Signed-off-by: Edmund Dea 
Signed-off-by: Anitha Chrisanthus 
---
 drivers/gpu/drm/kmb/kmb_drv.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/kmb/kmb_drv.c b/drivers/gpu/drm/kmb/kmb_drv.c
index 961ac6fb5fcf..b4e66eac63b5 100644
--- a/drivers/gpu/drm/kmb/kmb_drv.c
+++ b/drivers/gpu/drm/kmb/kmb_drv.c
@@ -5,6 +5,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -15,6 +16,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -24,6 +26,12 @@
 #include "kmb_dsi.h"
 #include "kmb_regs.h"
 
+/* Module Parameters */
+static bool console;
+module_param(console, bool, 0400);
+MODULE_PARM_DESC(console,
+"Enable framebuffer console support (0=disable [default], 
1=on)");
+
 static int kmb_display_clk_enable(struct kmb_drm_private *kmb)
 {
int ret = 0;
@@ -559,6 +567,9 @@ static int kmb_probe(struct platform_device *pdev)
if (ret)
goto err_register;
 
+   if (console)
+   drm_fbdev_generic_setup(&kmb->drm, 32);
+
return 0;
 
  err_register:
-- 
2.25.1



[PATCH v3 5/7] drm/kmb: Corrected typo in handle_lcd_irq

2021-10-13 Thread Anitha Chrisanthus
Check for Overflow bits for layer3 in the irq handler.

Fixes: 7f7b96a8a0a1 ("drm/kmb: Add support for KeemBay Display")
Signed-off-by: Anitha Chrisanthus 
---
 drivers/gpu/drm/kmb/kmb_drv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/kmb/kmb_drv.c b/drivers/gpu/drm/kmb/kmb_drv.c
index 12ce669650cc..961ac6fb5fcf 100644
--- a/drivers/gpu/drm/kmb/kmb_drv.c
+++ b/drivers/gpu/drm/kmb/kmb_drv.c
@@ -380,7 +380,7 @@ static irqreturn_t handle_lcd_irq(struct drm_device *dev)
if (val & LAYER3_DMA_FIFO_UNDERFLOW)
drm_dbg(&kmb->drm,
"LAYER3:GL1 DMA UNDERFLOW val = 0x%lx", val);
-   if (val & LAYER3_DMA_FIFO_UNDERFLOW)
+   if (val & LAYER3_DMA_FIFO_OVERFLOW)
drm_dbg(&kmb->drm,
"LAYER3:GL1 DMA OVERFLOW val = 0x%lx", val);
}
-- 
2.25.1



[PATCH v3 4/7] drm/kmb: Disable change of plane parameters

2021-10-13 Thread Anitha Chrisanthus
From: Edmund Dea 

Due to HW limitations, KMB cannot change height, width, or
pixel format after initial plane configuration.

v2: removed memset disp_cfg as it is already zero.

Fixes: 7f7b96a8a0a1 ("drm/kmb: Add support for KeemBay Display")
Signed-off-by: Edmund Dea 
Signed-off-by: Anitha Chrisanthus 
---
 drivers/gpu/drm/kmb/kmb_drv.h   |  1 +
 drivers/gpu/drm/kmb/kmb_plane.c | 43 -
 drivers/gpu/drm/kmb/kmb_plane.h |  6 +
 3 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/kmb/kmb_drv.h b/drivers/gpu/drm/kmb/kmb_drv.h
index d297218869e8..b3203f583a46 100644
--- a/drivers/gpu/drm/kmb/kmb_drv.h
+++ b/drivers/gpu/drm/kmb/kmb_drv.h
@@ -57,6 +57,7 @@ struct kmb_drm_private {
spinlock_t  irq_lock;
int irq_lcd;
int sys_clk_mhz;
+   struct disp_cfg init_disp_cfg[KMB_MAX_PLANES];
struct layer_status plane_status[KMB_MAX_PLANES];
int kmb_under_flow;
int kmb_flush_done;
diff --git a/drivers/gpu/drm/kmb/kmb_plane.c b/drivers/gpu/drm/kmb/kmb_plane.c
index 06b0c42c9e91..00404ba4126d 100644
--- a/drivers/gpu/drm/kmb/kmb_plane.c
+++ b/drivers/gpu/drm/kmb/kmb_plane.c
@@ -67,8 +67,21 @@ static const u32 kmb_formats_v[] = {
 
 static unsigned int check_pixel_format(struct drm_plane *plane, u32 format)
 {
+   struct kmb_drm_private *kmb;
+   struct kmb_plane *kmb_plane = to_kmb_plane(plane);
int i;
+   int plane_id = kmb_plane->id;
+   struct disp_cfg init_disp_cfg;
 
+   kmb = to_kmb(plane->dev);
+   init_disp_cfg = kmb->init_disp_cfg[plane_id];
+   /* Due to HW limitations, changing pixel format after initial
+* plane configuration is not supported.
+*/
+   if (init_disp_cfg.format && init_disp_cfg.format != format) {
+   drm_dbg(&kmb->drm, "Cannot change format after initial plane 
configuration");
+   return -EINVAL;
+   }
for (i = 0; i < plane->format_count; i++) {
if (plane->format_types[i] == format)
return 0;
@@ -81,11 +94,17 @@ static int kmb_plane_atomic_check(struct drm_plane *plane,
 {
struct drm_plane_state *new_plane_state = 
drm_atomic_get_new_plane_state(state,

 plane);
+   struct kmb_drm_private *kmb;
+   struct kmb_plane *kmb_plane = to_kmb_plane(plane);
+   int plane_id = kmb_plane->id;
+   struct disp_cfg init_disp_cfg;
struct drm_framebuffer *fb;
int ret;
struct drm_crtc_state *crtc_state;
bool can_position;
 
+   kmb = to_kmb(plane->dev);
+   init_disp_cfg = kmb->init_disp_cfg[plane_id];
fb = new_plane_state->fb;
if (!fb || !new_plane_state->crtc)
return 0;
@@ -99,6 +118,16 @@ static int kmb_plane_atomic_check(struct drm_plane *plane,
new_plane_state->crtc_w < KMB_FB_MIN_WIDTH ||
new_plane_state->crtc_h < KMB_FB_MIN_HEIGHT)
return -EINVAL;
+
+   /* Due to HW limitations, changing plane height or width after
+* initial plane configuration is not supported.
+*/
+   if ((init_disp_cfg.width && init_disp_cfg.height) &&
+   (init_disp_cfg.width != fb->width ||
+   init_disp_cfg.height != fb->height)) {
+   drm_dbg(&kmb->drm, "Cannot change plane height or width after 
initial configuration");
+   return -EINVAL;
+   }
can_position = (plane->type == DRM_PLANE_TYPE_OVERLAY);
crtc_state =
drm_atomic_get_existing_crtc_state(state,
@@ -335,6 +364,7 @@ static void kmb_plane_atomic_update(struct drm_plane *plane,
unsigned char plane_id;
int num_planes;
static dma_addr_t addr[MAX_SUB_PLANES];
+   struct disp_cfg *init_disp_cfg;
 
if (!plane || !new_plane_state || !old_plane_state)
return;
@@ -357,7 +387,8 @@ static void kmb_plane_atomic_update(struct drm_plane *plane,
}
spin_unlock_irq(&kmb->irq_lock);
 
-   src_w = (new_plane_state->src_w >> 16);
+   init_disp_cfg = &kmb->init_disp_cfg[plane_id];
+   src_w = new_plane_state->src_w >> 16;
src_h = new_plane_state->src_h >> 16;
crtc_x = new_plane_state->crtc_x;
crtc_y = new_plane_state->crtc_y;
@@ -500,6 +531,16 @@ static void kmb_plane_atomic_update(struct drm_plane 
*plane,
 
/* Enable DMA */
kmb_write_lcd(kmb, LCD_LAYERn_DMA_CFG(plane_id), dma_cfg);
+
+   /* Save initial display config */
+   if (!init_disp_cfg->width ||
+   !init_disp_cfg->height ||
+   !init_disp_cfg->format) {
+   init_disp_cfg->width = width;
+   init_disp_cfg->height = height;
+   init_disp_cfg->format 

[PATCH v3 3/7] drm/kmb: Remove clearing DPHY regs

2021-10-13 Thread Anitha Chrisanthus
From: Edmund Dea 

Don't clear the shared DPHY registers common to MIPI Rx and MIPI Tx during
DSI initialization since this was causing MIPI Rx reset. Rest of the
writes are bitwise, so will not affect Mipi Rx side.

Fixes: 98521f4d4b4c ("drm/kmb: Mipi DSI part of the display driver")
Signed-off-by: Anitha Chrisanthus 
Signed-off-by: Edmund Dea 
---
 drivers/gpu/drm/kmb/kmb_dsi.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/kmb/kmb_dsi.c b/drivers/gpu/drm/kmb/kmb_dsi.c
index 86e8e7943e89..a0669b842ff5 100644
--- a/drivers/gpu/drm/kmb/kmb_dsi.c
+++ b/drivers/gpu/drm/kmb/kmb_dsi.c
@@ -1393,11 +1393,6 @@ int kmb_dsi_mode_set(struct kmb_dsi *kmb_dsi, struct 
drm_display_mode *mode,
mipi_tx_init_cfg.lane_rate_mbps = data_rate;
}
 
-   kmb_write_mipi(kmb_dsi, DPHY_ENABLE, 0);
-   kmb_write_mipi(kmb_dsi, DPHY_INIT_CTRL0, 0);
-   kmb_write_mipi(kmb_dsi, DPHY_INIT_CTRL1, 0);
-   kmb_write_mipi(kmb_dsi, DPHY_INIT_CTRL2, 0);
-
/* Initialize mipi controller */
mipi_tx_init_cntrl(kmb_dsi, &mipi_tx_init_cfg);
 
-- 
2.25.1



[PATCH v3 2/7] drm/kmb: Limit supported mode to 1080p

2021-10-13 Thread Anitha Chrisanthus
KMB only supports single resolution(1080p), this commit checks for
1920x1080x60 or 1920x1080x59 in crtc_mode_valid.
Also, modes with vfp < 4 are not supported in KMB display. This change
prunes display modes with vfp < 4.

v2: added vfp check

Fixes: 7f7b96a8a0a1 ("drm/kmb: Add support for KeemBay Display")
Signed-off-by: Anitha Chrisanthus 
Signed-off-by: Edmund Dea 
---
 drivers/gpu/drm/kmb/kmb_crtc.c | 34 ++
 drivers/gpu/drm/kmb/kmb_drv.h  | 13 ++---
 2 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/kmb/kmb_crtc.c b/drivers/gpu/drm/kmb/kmb_crtc.c
index 44327bc629ca..08a45e813db7 100644
--- a/drivers/gpu/drm/kmb/kmb_crtc.c
+++ b/drivers/gpu/drm/kmb/kmb_crtc.c
@@ -185,11 +185,45 @@ static void kmb_crtc_atomic_flush(struct drm_crtc *crtc,
spin_unlock_irq(&crtc->dev->event_lock);
 }
 
+static enum drm_mode_status
+   kmb_crtc_mode_valid(struct drm_crtc *crtc,
+   const struct drm_display_mode *mode)
+{
+   int refresh;
+   struct drm_device *dev = crtc->dev;
+   int vfp = mode->vsync_start - mode->vdisplay;
+
+   if (mode->vdisplay < KMB_CRTC_MAX_HEIGHT) {
+   drm_dbg(dev, "height = %d less than %d",
+   mode->vdisplay, KMB_CRTC_MAX_HEIGHT);
+   return MODE_BAD_VVALUE;
+   }
+   if (mode->hdisplay < KMB_CRTC_MAX_WIDTH) {
+   drm_dbg(dev, "width = %d less than %d",
+   mode->hdisplay, KMB_CRTC_MAX_WIDTH);
+   return MODE_BAD_HVALUE;
+   }
+   refresh = drm_mode_vrefresh(mode);
+   if (refresh < KMB_MIN_VREFRESH || refresh > KMB_MAX_VREFRESH) {
+   drm_dbg(dev, "refresh = %d less than %d or greater than %d",
+   refresh, KMB_MIN_VREFRESH, KMB_MAX_VREFRESH);
+   return MODE_BAD;
+   }
+
+   if (vfp < KMB_CRTC_MIN_VFP) {
+   drm_dbg(dev, "vfp = %d less than %d", vfp, KMB_CRTC_MIN_VFP);
+   return MODE_BAD;
+   }
+
+   return MODE_OK;
+}
+
 static const struct drm_crtc_helper_funcs kmb_crtc_helper_funcs = {
.atomic_begin = kmb_crtc_atomic_begin,
.atomic_enable = kmb_crtc_atomic_enable,
.atomic_disable = kmb_crtc_atomic_disable,
.atomic_flush = kmb_crtc_atomic_flush,
+   .mode_valid = kmb_crtc_mode_valid,
 };
 
 int kmb_setup_crtc(struct drm_device *drm)
diff --git a/drivers/gpu/drm/kmb/kmb_drv.h b/drivers/gpu/drm/kmb/kmb_drv.h
index 69a62e2d03ff..d297218869e8 100644
--- a/drivers/gpu/drm/kmb/kmb_drv.h
+++ b/drivers/gpu/drm/kmb/kmb_drv.h
@@ -18,13 +18,20 @@
 
 #define DRIVER_DATE"20210223"
 #define DRIVER_MAJOR   1
-#define DRIVER_MINOR   1
-
+#define DRIVER_MINOR   2
+
+/* Platform definitions */
+#define KMB_CRTC_MIN_VFP   4
+#define KMB_CRTC_MAX_WIDTH 1920 /* max width in pixels */
+#define KMB_CRTC_MAX_HEIGHT1080 /* max height in pixels */
+#define KMB_CRTC_MIN_WIDTH 1920
+#define KMB_CRTC_MIN_HEIGHT1080
 #define KMB_FB_MAX_WIDTH   1920
 #define KMB_FB_MAX_HEIGHT  1080
 #define KMB_FB_MIN_WIDTH   1
 #define KMB_FB_MIN_HEIGHT  1
-
+#define KMB_MIN_VREFRESH   59/*vertical refresh in Hz */
+#define KMB_MAX_VREFRESH   60/*vertical refresh in Hz */
 #define KMB_LCD_DEFAULT_CLK2
 #define KMB_SYS_CLK_MHZ500
 
-- 
2.25.1



[PATCH v3 1/7] drm/kmb: Work around for higher system clock

2021-10-13 Thread Anitha Chrisanthus
Use a different value for system clock offset in the
ppl/llp ratio calculations for clocks higher than 500 Mhz.

Fixes: 98521f4d4b4c ("drm/kmb: Mipi DSI part of the display driver")
Signed-off-by: Anitha Chrisanthus 
---
 drivers/gpu/drm/kmb/kmb_dsi.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/kmb/kmb_dsi.c b/drivers/gpu/drm/kmb/kmb_dsi.c
index 1793cd31b117..86e8e7943e89 100644
--- a/drivers/gpu/drm/kmb/kmb_dsi.c
+++ b/drivers/gpu/drm/kmb/kmb_dsi.c
@@ -482,6 +482,10 @@ static u32 mipi_tx_fg_section_cfg(struct kmb_dsi *kmb_dsi,
return 0;
 }
 
+#define CLK_DIFF_LOW 50
+#define CLK_DIFF_HI 60
+#define SYSCLK_500  500
+
 static void mipi_tx_fg_cfg_regs(struct kmb_dsi *kmb_dsi, u8 frame_gen,
struct mipi_tx_frame_timing_cfg *fg_cfg)
 {
@@ -492,7 +496,12 @@ static void mipi_tx_fg_cfg_regs(struct kmb_dsi *kmb_dsi, 
u8 frame_gen,
/* 500 Mhz system clock minus 50 to account for the difference in
 * MIPI clock speed in RTL tests
 */
-   sysclk = kmb_dsi->sys_clk_mhz - 50;
+   if (kmb_dsi->sys_clk_mhz == SYSCLK_500) {
+   sysclk = kmb_dsi->sys_clk_mhz - CLK_DIFF_LOW;
+   } else {
+   /* 700 Mhz clk*/
+   sysclk = kmb_dsi->sys_clk_mhz - CLK_DIFF_HI;
+   }
 
/* PPL-Pixel Packing Layer, LLP-Low Level Protocol
 * Frame genartor timing parameters are clocked on the system clock,
-- 
2.25.1



Re: [RFC PATCH v2 2/2] RDMA/rxe: Add dma-buf support

2021-10-13 Thread Shunsuke Mie
2021年10月13日(水) 20:42 Daniel Vetter :
>
> On Fri, Oct 01, 2021 at 12:56:48PM +0900, Shunsuke Mie wrote:
> > 2021年9月30日(木) 23:41 Daniel Vetter :
> > >
> > > On Wed, Sep 29, 2021 at 01:19:05PM +0900, Shunsuke Mie wrote:
> > > > Implement a ib device operation ‘reg_user_mr_dmabuf’. Generate a
> > > > rxe_map from the memory space linked the passed dma-buf.
> > > >
> > > > Signed-off-by: Shunsuke Mie 
> > > > ---
> > > >  drivers/infiniband/sw/rxe/rxe_loc.h   |   2 +
> > > >  drivers/infiniband/sw/rxe/rxe_mr.c| 118 ++
> > > >  drivers/infiniband/sw/rxe/rxe_verbs.c |  34 
> > > >  drivers/infiniband/sw/rxe/rxe_verbs.h |   2 +
> > > >  4 files changed, 156 insertions(+)
> > > >
> > > > diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h 
> > > > b/drivers/infiniband/sw/rxe/rxe_loc.h
> > > > index 1ca43b859d80..8bc19ea1a376 100644
> > > > --- a/drivers/infiniband/sw/rxe/rxe_loc.h
> > > > +++ b/drivers/infiniband/sw/rxe/rxe_loc.h
> > > > @@ -75,6 +75,8 @@ u8 rxe_get_next_key(u32 last_key);
> > > >  void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr);
> > > >  int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 
> > > > iova,
> > > >int access, struct rxe_mr *mr);
> > > > +int rxe_mr_dmabuf_init_user(struct rxe_pd *pd, int fd, u64 start, u64 
> > > > length,
> > > > + u64 iova, int access, struct rxe_mr *mr);
> > > >  int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr 
> > > > *mr);
> > > >  int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
> > > >   enum rxe_mr_copy_dir dir);
> > > > diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c 
> > > > b/drivers/infiniband/sw/rxe/rxe_mr.c
> > > > index 53271df10e47..af6ef671c3a5 100644
> > > > --- a/drivers/infiniband/sw/rxe/rxe_mr.c
> > > > +++ b/drivers/infiniband/sw/rxe/rxe_mr.c
> > > > @@ -4,6 +4,7 @@
> > > >   * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
> > > >   */
> > > >
> > > > +#include 
> > > >  #include "rxe.h"
> > > >  #include "rxe_loc.h"
> > > >
> > > > @@ -245,6 +246,120 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 
> > > > start, u64 length, u64 iova,
> > > >   return err;
> > > >  }
> > > >
> > > > +static int rxe_map_dmabuf_mr(struct rxe_mr *mr,
> > > > +  struct ib_umem_dmabuf *umem_dmabuf)
> > > > +{
> > > > + struct rxe_map_set *set;
> > > > + struct rxe_phys_buf *buf = NULL;
> > > > + struct rxe_map **map;
> > > > + void *vaddr, *vaddr_end;
> > > > + int num_buf = 0;
> > > > + int err;
> > > > + size_t remain;
> > > > +
> > > > + mr->dmabuf_map = kzalloc(sizeof &mr->dmabuf_map, GFP_KERNEL);
> > >
> > > dmabuf_maps are just tagged pointers (and we could shrink them to actually
> > > just a tagged pointer if anyone cares about the overhead of the separate
> > > bool), allocating them seperately is overkill.
> >
> > I agree with you. However, I think it is needed to unmap by
> > dma_buf_vunmap(). If there is another simple way to unmap it. It is not
> > needed I think. What do you think about it?
>
> dma_buf_vunmap does not kfree the dma_buf_map argument, so that's no
> reason to allocate it separately. Or I'm confused.
I had a misunderstood. Yes, It is not needed to allocate an object.
Actually some
implementations don't alloc/free the argument.
e.g. gpu/drm/drm_gem_cma_helper.c
I'll fix it.

> Also apologies, I'm way behind on mails.
No problem. Thank you for your answer.

Thanks,
Shunsuke

> -Daniel
>
> >
> > > > + if (!mr->dmabuf_map) {
> > > > + err = -ENOMEM;
> > > > + goto err_out;
> > > > + }
> > > > +
> > > > + err = dma_buf_vmap(umem_dmabuf->dmabuf, mr->dmabuf_map);
> > > > + if (err)
> > > > + goto err_free_dmabuf_map;
> > > > +
> > > > + set = mr->cur_map_set;
> > > > + set->page_shift = PAGE_SHIFT;
> > > > + set->page_mask = PAGE_SIZE - 1;
> > > > +
> > > > + map = set->map;
> > > > + buf = map[0]->buf;
> > > > +
> > > > + vaddr = mr->dmabuf_map->vaddr;
> > >
> > > dma_buf_map can be an __iomem too, you shouldn't dig around in this, but
> > > use the dma-buf-map.h helpers instead. On x86 (and I think also on most
> > > arm) it doesn't matter, but it's kinda not very nice in a pure software
> > > driver.
> > >
> > > If anything is missing in dma-buf-map.h wrappers just add more.
> > >
> > > Or alternatively you need to fail the import if you can't handle __iomem.
> > >
> > > Aside from these I think the dma-buf side here for cpu access looks
> > > reasonable now.
> > > -Daniel
> > I'll see the dma-buf-map.h and consider the error handling that you 
> > suggested.
> > I appreciate your support.
> >
> > Thanks a lot,
> > Shunsuke.
> >
> > > > + vaddr_end = vaddr + umem_dmabuf->dmabuf->size;
> > > > + remain = umem_dmabuf->dmabuf->size;
> > > > +
> > > > + for (; remain; vaddr += PAGE_SIZE) {
> > > > + if (num

Re: [PATCH] drm: Update MST First Link Slot Information Based on Encoding Format

2021-10-13 Thread Lyude Paul
Some comments below (also, sorry again for the mixup on the last review!)

On Tue, 2021-10-12 at 17:58 -0400, Bhawanpreet Lakha wrote:
> 8b/10b encoding format requires to reserve the first slot for
> recording metadata. Real data transmission starts from the second slot,
> with a total of available 63 slots available.
> 
> In 128b/132b encoding format, metadata is transmitted separately
> in LLCP packet before MTP. Real data transmission starts from
> the first slot, with a total of 64 slots available.
> 
> v2:
> * Remove get_mst_link_encoding_cap
> * Move total/start slots to mst_state, and copy it to mst_mgr in
> atomic_check
> 
> Signed-off-by: Fangzhi Zuo 
> Signed-off-by: Bhawanpreet Lakha 
> ---
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 28 +++
>  drivers/gpu/drm/drm_dp_mst_topology.c | 35 +++
>  include/drm/drm_dp_mst_helper.h   | 13 +++
>  3 files changed, 69 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 5020f2d36fe1..4ad50eb0091a 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -10612,6 +10612,8 @@ static int amdgpu_dm_atomic_check(struct drm_device
> *dev,
>  #if defined(CONFIG_DRM_AMD_DC_DCN)
> struct dsc_mst_fairness_vars vars[MAX_PIPES];
>  #endif
> +   struct drm_dp_mst_topology_state *mst_state;
> +   struct drm_dp_mst_topology_mgr *mgr;
>  
> trace_amdgpu_dm_atomic_check_begin(state);
>  
> @@ -10819,6 +10821,32 @@ static int amdgpu_dm_atomic_check(struct drm_device
> *dev,
> lock_and_validation_needed = true;
> }
>  
> +#if defined(CONFIG_DRM_AMD_DC_DCN)
> +   for_each_new_mst_mgr_in_state(state, mgr, mst_state, i) {
> +   struct amdgpu_dm_connector *aconnector;
> +   struct drm_connector *connector;
> +   struct drm_connector_list_iter iter;
> +   u8 link_coding_cap;
> +
> +   if (!mgr->mst_state )
> +   continue;

extraneous space

> +
> +   drm_connector_list_iter_begin(dev, &iter);
> +   drm_for_each_connector_iter(connector, &iter) {
> +   int id = connector->index;
> +
> +   if (id == mst_state->mgr->conn_base_id) {
> +   aconnector =
> to_amdgpu_dm_connector(connector);
> +   link_coding_cap =
> dc_link_dp_mst_decide_link_encoding_format(aconnector->dc_link);
> +   drm_dp_mst_update_coding_cap(mst_state,
> link_coding_cap);
> +
> +   break;
> +   }
> +   }
> +   drm_connector_list_iter_end(&iter);
> +
> +   }
> +#endif
> /**
>  * Streams and planes are reset when there are changes that affect
>  * bandwidth. Anything that affects bandwidth needs to go through
> diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c
> b/drivers/gpu/drm/drm_dp_mst_topology.c
> index ad0795afc21c..fb5c47c4cb2e 100644
> --- a/drivers/gpu/drm/drm_dp_mst_topology.c
> +++ b/drivers/gpu/drm/drm_dp_mst_topology.c
> @@ -3368,7 +3368,7 @@ int drm_dp_update_payload_part1(struct
> drm_dp_mst_topology_mgr *mgr)
> struct drm_dp_payload req_payload;
> struct drm_dp_mst_port *port;
> int i, j;
> -   int cur_slots = 1;
> +   int cur_slots = mgr->start_slot;
> bool skip;
>  
> mutex_lock(&mgr->payload_lock);
> @@ -4321,7 +4321,7 @@ int drm_dp_find_vcpi_slots(struct
> drm_dp_mst_topology_mgr *mgr,
> num_slots = DIV_ROUND_UP(pbn, mgr->pbn_div);
>  
> /* max. time slots - one slot for MTP header */
> -   if (num_slots > 63)
> +   if (num_slots > mgr->total_avail_slots)
> return -ENOSPC;

For reasons I will explain a little further in this email, we might want to
drop this…

> return num_slots;
>  }
> @@ -4333,7 +4333,7 @@ static int drm_dp_init_vcpi(struct
> drm_dp_mst_topology_mgr *mgr,
> int ret;
>  
> /* max. time slots - one slot for MTP header */
> -   if (slots > 63)
> +   if (slots > mgr->total_avail_slots)

…and this

> return -ENOSPC;
>  
> vcpi->pbn = pbn;
> @@ -4507,6 +4507,18 @@ int drm_dp_atomic_release_vcpi_slots(struct
> drm_atomic_state *state,
>  }
>  EXPORT_SYMBOL(drm_dp_atomic_release_vcpi_slots);
>  
> +void drm_dp_mst_update_coding_cap(struct drm_dp_mst_topology_state
> *mst_state, uint8_t link_coding_cap)

Need some kdocs here

> +{
> +   if (link_coding_cap == DP_CAP_ANSI_128B132B) {
> +   mst_state->total_avail_slots = 64;
> +   mst_state->start_slot = 0;
> +   }
> +
> +   DRM_DEBUG_KMS("%s coding format on mgr 0x%p\n",
> +   (link_coding_cap == DP_CAP_ANSI_128B132B) ?
> "128b/132b":

Re: [PATCH 10/10] drm/i915: Add privacy-screen support (v3)

2021-10-13 Thread Ville Syrjälä
On Tue, Oct 05, 2021 at 10:23:22PM +0200, Hans de Goede wrote:
> Add support for eDP panels with a built-in privacy screen using the
> new drm_privacy_screen class.
> 
> Changes in v3:
> - Move drm_privacy_screen_get() call to intel_ddi_init_dp_connector()
> 
> Changes in v2:
> - Call drm_connector_update_privacy_screen() from
>   intel_enable_ddi_dp() / intel_ddi_update_pipe_dp() instead of adding a
>   for_each_new_connector_in_state() loop to intel_atomic_commit_tail()
> - Move the probe-deferral check to the intel_modeset_probe_defer() helper
> 
> Signed-off-by: Hans de Goede 
> ---
>  drivers/gpu/drm/i915/display/intel_atomic.c  |  1 +
>  drivers/gpu/drm/i915/display/intel_ddi.c | 16 
>  drivers/gpu/drm/i915/display/intel_display.c | 10 ++
>  3 files changed, 27 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c 
> b/drivers/gpu/drm/i915/display/intel_atomic.c
> index b4e7ac51aa31..a62550711e98 100644
> --- a/drivers/gpu/drm/i915/display/intel_atomic.c
> +++ b/drivers/gpu/drm/i915/display/intel_atomic.c
> @@ -139,6 +139,7 @@ int intel_digital_connector_atomic_check(struct 
> drm_connector *conn,
>   new_conn_state->base.picture_aspect_ratio != 
> old_conn_state->base.picture_aspect_ratio ||
>   new_conn_state->base.content_type != 
> old_conn_state->base.content_type ||
>   new_conn_state->base.scaling_mode != 
> old_conn_state->base.scaling_mode ||
> + new_conn_state->base.privacy_screen_sw_state != 
> old_conn_state->base.privacy_screen_sw_state ||
>   !drm_connector_atomic_hdr_metadata_equal(old_state, new_state))
>   crtc_state->mode_changed = true;
>  
> diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c 
> b/drivers/gpu/drm/i915/display/intel_ddi.c
> index 0d4cf7fa8720..272714e07cc6 100644
> --- a/drivers/gpu/drm/i915/display/intel_ddi.c
> +++ b/drivers/gpu/drm/i915/display/intel_ddi.c
> @@ -25,6 +25,7 @@
>   *
>   */
>  
> +#include 
>  #include 
>  
>  #include "i915_drv.h"
> @@ -2946,6 +2947,7 @@ static void intel_enable_ddi_dp(struct 
> intel_atomic_state *state,
>   if (port == PORT_A && DISPLAY_VER(dev_priv) < 9)
>   intel_dp_stop_link_train(intel_dp, crtc_state);
>  
> + drm_connector_update_privacy_screen(conn_state);
>   intel_edp_backlight_on(crtc_state, conn_state);
>  
>   if (!dig_port->lspcon.active || dig_port->dp.has_hdmi_sink)
> @@ -3161,6 +3163,7 @@ static void intel_ddi_update_pipe_dp(struct 
> intel_atomic_state *state,
>   intel_drrs_update(intel_dp, crtc_state);
>  
>   intel_backlight_update(state, encoder, crtc_state, conn_state);
> + drm_connector_update_privacy_screen(conn_state);
>  }
>  
>  void intel_ddi_update_pipe(struct intel_atomic_state *state,
> @@ -3979,6 +3982,19 @@ intel_ddi_init_dp_connector(struct intel_digital_port 
> *dig_port)
>   return NULL;
>   }
>  
> + if (dig_port->base.type == INTEL_OUTPUT_EDP) {

Connector type check would be a bit more consistent with what this is
about I think. But there's is 1:1 correspondence with the encoder type
for eDP so not a particularly important point.

Reviewed-by: Ville Syrjälä 

> + struct drm_device *dev = dig_port->base.base.dev;
> + struct drm_privacy_screen *privacy_screen;
> +
> + privacy_screen = drm_privacy_screen_get(dev->dev, NULL);
> + if (!IS_ERR(privacy_screen)) {
> + 
> drm_connector_attach_privacy_screen_provider(&connector->base,
> +  
> privacy_screen);
> + } else if (PTR_ERR(privacy_screen) != -ENODEV) {
> + drm_warn(dev, "Error getting privacy-screen\n");
> + }
> + }
> +
>   return connector;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
> b/drivers/gpu/drm/i915/display/intel_display.c
> index 86dbe366a907..84715a779d9d 100644
> --- a/drivers/gpu/drm/i915/display/intel_display.c
> +++ b/drivers/gpu/drm/i915/display/intel_display.c
> @@ -42,6 +42,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  
> @@ -12769,6 +12770,8 @@ void intel_modeset_driver_remove_nogem(struct 
> drm_i915_private *i915)
>  
>  bool intel_modeset_probe_defer(struct pci_dev *pdev)
>  {
> + struct drm_privacy_screen *privacy_screen;
> +
>   /*
>* apple-gmux is needed on dual GPU MacBook Pro
>* to probe the panel if we're the inactive GPU.
> @@ -12776,6 +12779,13 @@ bool intel_modeset_probe_defer(struct pci_dev *pdev)
>   if (vga_switcheroo_client_probe_defer(pdev))
>   return true;
>  
> + /* If the LCD panel has a privacy-screen, wait for it */
> + privacy_screen = drm_privacy_screen_get(&pdev->dev, NULL);
> + if (IS_ERR(privacy_screen) && PTR_ERR(privacy_screen) == -EPROBE_DEFER)
> + return true;
> +
> + drm_privacy_screen_put(privacy_screen);
> +
> 

Re: [PATCH] drm: Update MST First Link Slot Information Based on Encoding Format

2021-10-13 Thread kernel test robot
Hi Bhawanpreet,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on drm-intel/for-linux-next]
[also build test ERROR on drm-tip/drm-tip linus/master v5.15-rc5 next-20211013]
[cannot apply to drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next 
airlied/drm-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Bhawanpreet-Lakha/drm-Update-MST-First-Link-Slot-Information-Based-on-Encoding-Format/20211013-060001
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: x86_64-randconfig-a005-20211013 (attached as .config)
compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project 
b6a8c695542b2987eb9a203d5663a0740cb4725f)
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/0day-ci/linux/commit/5604bf980dcbfdd7650b7e1d5d4a2fd9f18cd866
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Bhawanpreet-Lakha/drm-Update-MST-First-Link-Slot-Information-Based-on-Encoding-Format/20211013-060001
git checkout 5604bf980dcbfdd7650b7e1d5d4a2fd9f18cd866
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 
ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   In file included from 
drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:35:
   In file included from 
drivers/gpu/drm/amd/amdgpu/../display/dmub/dmub_srv.h:67:
   drivers/gpu/drm/amd/amdgpu/../display/dmub/inc/dmub_cmd.h:2819:12: warning: 
variable 'temp' set but not used [-Wunused-but-set-variable]
   uint64_t temp;
^
   drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:633:6: warning: 
no previous prototype for function 'dmub_aux_setconfig_callback' 
[-Wmissing-prototypes]
   void dmub_aux_setconfig_callback(struct amdgpu_device *adev, struct 
dmub_notification *notify)
^
   drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:633:1: note: 
declare 'static' if the function is not intended to be used outside of this 
translation unit
   void dmub_aux_setconfig_callback(struct amdgpu_device *adev, struct 
dmub_notification *notify)
   ^
   static 
   drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:649:6: warning: 
no previous prototype for function 'dmub_hpd_callback' [-Wmissing-prototypes]
   void dmub_hpd_callback(struct amdgpu_device *adev, struct dmub_notification 
*notify)
^
   drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:649:1: note: 
declare 'static' if the function is not intended to be used outside of this 
translation unit
   void dmub_hpd_callback(struct amdgpu_device *adev, struct dmub_notification 
*notify)
   ^
   static 
   drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:706:6: warning: 
no previous prototype for function 'register_dmub_notify_callback' 
[-Wmissing-prototypes]
   bool register_dmub_notify_callback(struct amdgpu_device *adev, enum 
dmub_notification_type type,
^
   drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:706:1: note: 
declare 'static' if the function is not intended to be used outside of this 
translation unit
   bool register_dmub_notify_callback(struct amdgpu_device *adev, enum 
dmub_notification_type type,
   ^
   static 
   drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:7174:12: 
warning: variable 'clock' set but not used [-Wunused-but-set-variable]
   int i, j, clock;
 ^
>> drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:10912:23: error: 
>> implicit declaration of function 
>> 'dc_link_dp_mst_decide_link_encoding_format' 
>> [-Werror,-Wimplicit-function-declaration]
   link_coding_cap = 
dc_link_dp_mst_decide_link_encoding_format(aconnector->dc_link);
 ^
   drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:11394:5: 
warning: no previous prototype for function 
'amdgpu_dm_set_dmub_async_sync_status' [-Wmissing-prototypes]
   int amdgpu_dm_set_dmub_async_sync_status(bool is_cmd_aux, struct dc_context 
*ctx,
   ^
   drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:11394:1: note: 
declare 'static' if the function is not intended to be used outside of this 
translation unit
   int amdgpu_dm_set_dmub_async_sync_status(bool is_cmd_aux, struct dc_context 
*c

Re: [PATCH 2/2] drm/i915/dp: use new link training delay helpers

2021-10-13 Thread Ville Syrjälä
On Tue, Oct 12, 2021 at 05:43:21PM +0300, Jani Nikula wrote:
> Use the new link training delay helpers, fixing the delays for
> 128b/132b.
> 
> For existing 8b/10b functionality, this will cause additional 1-byte
> DPCD reads for LTTPR delays instead of using the cached values. It's
> just too complicated to combine generic helpers with local caching in a
> sensible way.
> 
> Cc: Ville Syrjälä 
> Signed-off-by: Jani Nikula 

I was just pondering if the extra DPCD reads might cause some grief for
some compliance test stuff. But I guess if that happens we could just
read them all at the very start of link training, or something.

Reviewed-by: Ville Syrjälä 

> ---
>  .../drm/i915/display/intel_dp_link_training.c | 38 +++
>  1 file changed, 13 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c 
> b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> index 85676c953e0a..a72f2dc93718 100644
> --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> @@ -683,15 +683,6 @@ intel_dp_prepare_link_train(struct intel_dp *intel_dp,
>   return true;
>  }
>  
> -static void intel_dp_link_training_clock_recovery_delay(struct intel_dp 
> *intel_dp,
> - enum drm_dp_phy dp_phy)
> -{
> - if (dp_phy == DP_PHY_DPRX)
> - drm_dp_link_train_clock_recovery_delay(&intel_dp->aux, 
> intel_dp->dpcd);
> - else
> - drm_dp_lttpr_link_train_clock_recovery_delay();
> -}
> -
>  static bool intel_dp_adjust_request_changed(const struct intel_crtc_state 
> *crtc_state,
>   const u8 
> old_link_status[DP_LINK_STATUS_SIZE],
>   const u8 
> new_link_status[DP_LINK_STATUS_SIZE])
> @@ -750,6 +741,11 @@ intel_dp_link_training_clock_recovery(struct intel_dp 
> *intel_dp,
>   u8 link_status[DP_LINK_STATUS_SIZE];
>   bool max_vswing_reached = false;
>   char phy_name[10];
> + int delay_us;
> +
> + delay_us = drm_dp_read_clock_recovery_delay(&intel_dp->aux,
> + intel_dp->dpcd, dp_phy,
> + 
> intel_dp_is_uhbr(crtc_state));
>  
>   intel_dp_phy_name(dp_phy, phy_name, sizeof(phy_name));
>  
> @@ -777,7 +773,7 @@ intel_dp_link_training_clock_recovery(struct intel_dp 
> *intel_dp,
>  
>   voltage_tries = 1;
>   for (cr_tries = 0; cr_tries < max_cr_tries; ++cr_tries) {
> - intel_dp_link_training_clock_recovery_delay(intel_dp, dp_phy);
> + usleep_range(delay_us, 2 * delay_us);
>  
>   if (drm_dp_dpcd_read_phy_link_status(&intel_dp->aux, dp_phy,
>link_status) < 0) {
> @@ -895,19 +891,6 @@ static u32 intel_dp_training_pattern(struct intel_dp 
> *intel_dp,
>   return DP_TRAINING_PATTERN_2;
>  }
>  
> -static void
> -intel_dp_link_training_channel_equalization_delay(struct intel_dp *intel_dp,
> -   enum drm_dp_phy dp_phy)
> -{
> - if (dp_phy == DP_PHY_DPRX) {
> - drm_dp_link_train_channel_eq_delay(&intel_dp->aux, 
> intel_dp->dpcd);
> - } else {
> - const u8 *phy_caps = intel_dp_lttpr_phy_caps(intel_dp, dp_phy);
> -
> - drm_dp_lttpr_link_train_channel_eq_delay(&intel_dp->aux, 
> phy_caps);
> - }
> -}
> -
>  /*
>   * Perform the link training channel equalization phase on the given DP PHY
>   * using one of training pattern 2, 3 or 4 depending on the source and
> @@ -925,6 +908,11 @@ intel_dp_link_training_channel_equalization(struct 
> intel_dp *intel_dp,
>   u8 link_status[DP_LINK_STATUS_SIZE];
>   bool channel_eq = false;
>   char phy_name[10];
> + int delay_us;
> +
> + delay_us = drm_dp_read_channel_eq_delay(&intel_dp->aux,
> + intel_dp->dpcd, dp_phy,
> + intel_dp_is_uhbr(crtc_state));
>  
>   intel_dp_phy_name(dp_phy, phy_name, sizeof(phy_name));
>  
> @@ -944,8 +932,8 @@ intel_dp_link_training_channel_equalization(struct 
> intel_dp *intel_dp,
>   }
>  
>   for (tries = 0; tries < 5; tries++) {
> - intel_dp_link_training_channel_equalization_delay(intel_dp,
> -   dp_phy);
> + usleep_range(delay_us, 2 * delay_us);
> +
>   if (drm_dp_dpcd_read_phy_link_status(&intel_dp->aux, dp_phy,
>link_status) < 0) {
>   drm_err(&i915->drm,
> -- 
> 2.30.2

-- 
Ville Syrjälä
Intel


Re: [PATCH 1/2] drm/dp: add helpers to read link training delays

2021-10-13 Thread Ville Syrjälä
On Tue, Oct 12, 2021 at 05:43:20PM +0300, Jani Nikula wrote:
> The link training delays are different and/or available in different
> DPCD offsets depending on:
> 
> - Clock recovery vs. channel equalization
> - DPRX vs. LTTPR
> - 128b/132b vs. 8b/10b
> - DPCD 1.4+ vs. earlier
> 
> Add helpers to get the correct delays in us, reading DPCD if
> necessary. This is more straightforward than trying to retrofit the
> existing helpers to take 128b/132b into account.
> 
> Having to pass in the DPCD receiver cap field seems unavoidable, because
> reading it involves checking the revision and reading extended receiver
> cap. So unfortunately the interface is mixed cached and read as needed.
> 
> Cc: Ville Syrjälä 
> Signed-off-by: Jani Nikula 
> ---
>  drivers/gpu/drm/drm_dp_helper.c | 132 
>  include/drm/drm_dp_helper.h |  21 -
>  2 files changed, 151 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
> index 4d0d1e8e51fa..04ebef7f5aa7 100644
> --- a/drivers/gpu/drm/drm_dp_helper.c
> +++ b/drivers/gpu/drm/drm_dp_helper.c
> @@ -154,6 +154,138 @@ u8 drm_dp_get_adjust_request_post_cursor(const u8 
> link_status[DP_LINK_STATUS_SIZ
>  }
>  EXPORT_SYMBOL(drm_dp_get_adjust_request_post_cursor);
>  
> +static int __8b10b_clock_recovery_delay_us(const struct drm_dp_aux *aux, u8 
> rd_interval)
> +{
> + if (rd_interval > 4)
> + drm_dbg_kms(aux->drm_dev, "%s: invalid AUX interval 0x%02x (max 
> 4)\n",
> + aux->name, rd_interval);
> +
> + if (rd_interval == 0)
> + return 100;
> +
> + return rd_interval * 4 * USEC_PER_MSEC;
> +}
> +
> +static int __8b10b_channel_eq_delay_us(const struct drm_dp_aux *aux, u8 
> rd_interval)
> +{
> + if (rd_interval > 4)
> + drm_dbg_kms(aux->drm_dev, "%s: invalid AUX interval 0x%02x (max 
> 4)\n",
> + aux->name, rd_interval);
> +
> + if (rd_interval == 0)
> + return 400;
> +
> + return rd_interval * 4 * USEC_PER_MSEC;
> +}

Is there a reason you're not reusing these in the existing sleepy
functions? Maybe just passing in the dpcd receiver cap all the way 
would also be nicer since then these functions would do all the work,
instead of splitting it partially between these and the caller.
Also with the 1.4+ case handled elsewhere there won't be debug
spew for illegal values (not sure we care too much though).

> +
> +static int __128b132b_channel_eq_delay_us(const struct drm_dp_aux *aux, u8 
> rd_interval)
> +{
> + switch (rd_interval) {
> + default:
> + drm_dbg_kms(aux->drm_dev, "%s: invalid AUX interval 0x%02x\n",
> + aux->name, rd_interval);
> + fallthrough;
> + case DP_128B132B_TRAINING_AUX_RD_INTERVAL_400_US:
> + return 400;
> + case DP_128B132B_TRAINING_AUX_RD_INTERVAL_4_MS:
> + return 4000;
> + case DP_128B132B_TRAINING_AUX_RD_INTERVAL_8_MS:
> + return 8000;
> + case DP_128B132B_TRAINING_AUX_RD_INTERVAL_12_MS:
> + return 12000;
> + case DP_128B132B_TRAINING_AUX_RD_INTERVAL_16_MS:
> + return 16000;
> + case DP_128B132B_TRAINING_AUX_RD_INTERVAL_32_MS:
> + return 32000;
> + case DP_128B132B_TRAINING_AUX_RD_INTERVAL_64_MS:
> + return 64000;
> + }
> +}

The spec does claim that only 00-06 are legal also for the CR delay.
So here too we lose the debug spew if we don' have the CR version
of this.

> +
> +/*
> + * The link training delays are different for:
> + *
> + *  - Clock recovery vs. channel equalization
> + *  - DPRX vs. LTTPR
> + *  - 128b/132b vs. 8b/10b
> + *  - DPCD rev 1.3 vs. later
> + *
> + * Get the correct delay in us, reading DPCD if necessary.
> + */
> +static int __read_delay(struct drm_dp_aux *aux, const u8 
> dpcd[DP_RECEIVER_CAP_SIZE],
> + enum drm_dp_phy dp_phy, bool uhbr, bool cr)
> +{
> + int (*parse)(const struct drm_dp_aux *aux, u8 rd_interval);
> + unsigned int offset;
> + u8 rd_interval, mask;
> + int delay_us;
> +
> + if (dp_phy == DP_PHY_DPRX) {
> + if (uhbr) {
> + if (cr)
> + return 100;
> +
> + offset = DP_128B132B_TRAINING_AUX_RD_INTERVAL;
> + mask = DP_128B132B_TRAINING_AUX_RD_INTERVAL_MASK;
> + parse = __128b132b_channel_eq_delay_us;
> + } else {
> + if (cr && dpcd[DP_DPCD_REV] >= DP_DPCD_REV_14)
> + return 100;
> +
> + offset = DP_TRAINING_AUX_RD_INTERVAL;
> + mask = DP_TRAINING_AUX_RD_MASK;
> + if (cr)
> + parse = __8b10b_clock_recovery_delay_us;
> + else
> + parse = __8b10b_channel_eq_delay_us;
> + }
> + } e

Re: [PATCH 1/4] drm: Introduce drm_modeset_lock_ctx_retry()

2021-10-13 Thread Fernando Ramos
On 21/10/13 03:06PM, Ville Syrjälä wrote:
> > And yes C is dangerous, but also C is verbose. I think one lesson from igt
> > is that too many magic block constructs are bad, it's just not how C
> > works. Definitely not in the kernel, where "oops I got it wrong because it
> > was too clever" is bad.
> > 
> > > > Yes the macro we have is also not nice, but at least it's a screaming
> > > > macro since it's all uppercase, so options are all a bit sucky. Which
> > > > leads me to think we have a bit a https://xkcd.com/927/ situation going
> > > > on.
> > > > 
> > > > I think minimally we should have one way to do this.
> > > 
> > > Well, there is no one way atm. All you can do is hand roll all the
> > > boilerplate (and likely get it slightly wrong) if you don't want
> > > lock_all.
> > > 
> > > The current macros only help with lock_all, and IMO the hidden gotos
> > > are even uglier than a hidden for loop. Fernando already hit a case
> > > where he couldn't use the macros twice due to conflicting goto
> > > labels. With this for loop thing I think it would have just worked(tm).
> > 
> > I'm totally ok with repainting the shed, I just don't want some 80s
> > multicolor flash show.
> 
> You have a better idea in mind?

Sorry, I completely forgot this discussion was going on and I just published V4
of my patch set here:

https://lore.kernel.org/dri-devel/20211013204846.90026-1-green...@u92.eu/

Please, feel free to let me know (ideally, as a reply to the corresponding i915
patch from that set) if you rather me not to modify i915 files for now.

Thanks.


[PATCH 11/25] drm/i915/guc: Implement parallel context pin / unpin functions

2021-10-13 Thread Matthew Brost
Parallel contexts are perma-pinned by the upper layers which makes the
backend implementation rather simple. The parent pins the guc_id and
children increment the parent's pin count on pin to ensure all the
contexts are unpinned before we disable scheduling with the GuC / or
deregister the context.

v2:
 (Daniel Vetter)
  - Perma-pin parallel contexts

Signed-off-by: Matthew Brost 
---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 70 +++
 1 file changed, 70 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index c4d7a5c3b558..9fc40e3c1794 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -2585,6 +2585,76 @@ static const struct intel_context_ops 
virtual_guc_context_ops = {
.get_sibling = guc_virtual_get_sibling,
 };
 
+/* Future patches will use this function */
+__maybe_unused
+static int guc_parent_context_pin(struct intel_context *ce, void *vaddr)
+{
+   struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
+   struct intel_guc *guc = ce_to_guc(ce);
+   int ret;
+
+   GEM_BUG_ON(!intel_context_is_parent(ce));
+   GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
+
+   ret = pin_guc_id(guc, ce);
+   if (unlikely(ret < 0))
+   return ret;
+
+   return __guc_context_pin(ce, engine, vaddr);
+}
+
+/* Future patches will use this function */
+__maybe_unused
+static int guc_child_context_pin(struct intel_context *ce, void *vaddr)
+{
+   struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
+
+   GEM_BUG_ON(!intel_context_is_child(ce));
+   GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
+
+   __intel_context_pin(ce->parallel.parent);
+   return __guc_context_pin(ce, engine, vaddr);
+}
+
+/* Future patches will use this function */
+__maybe_unused
+static void guc_parent_context_unpin(struct intel_context *ce)
+{
+   struct intel_guc *guc = ce_to_guc(ce);
+
+   GEM_BUG_ON(context_enabled(ce));
+   GEM_BUG_ON(intel_context_is_barrier(ce));
+   GEM_BUG_ON(!intel_context_is_parent(ce));
+   GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
+
+   unpin_guc_id(guc, ce);
+   lrc_unpin(ce);
+}
+
+/* Future patches will use this function */
+__maybe_unused
+static void guc_child_context_unpin(struct intel_context *ce)
+{
+   GEM_BUG_ON(context_enabled(ce));
+   GEM_BUG_ON(intel_context_is_barrier(ce));
+   GEM_BUG_ON(!intel_context_is_child(ce));
+   GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
+
+   lrc_unpin(ce);
+}
+
+/* Future patches will use this function */
+__maybe_unused
+static void guc_child_context_post_unpin(struct intel_context *ce)
+{
+   GEM_BUG_ON(!intel_context_is_child(ce));
+   GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent));
+   GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
+
+   lrc_post_unpin(ce);
+   intel_context_unpin(ce->parallel.parent);
+}
+
 static bool
 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b)
 {
-- 
2.32.0



[PATCH 13/25] drm/i915/guc: Insert submit fences between requests in parent-child relationship

2021-10-13 Thread Matthew Brost
The GuC must receive requests in the order submitted for contexts in a
parent-child relationship to function correctly. To ensure this, insert
a submit fence between the current request and last request submitted
for requests / contexts in a parent child relationship. This is
conceptually similar to a single timeline.

Signed-off-by: Matthew Brost 
Cc: John Harrison 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/gt/intel_context.h   |   5 +
 drivers/gpu/drm/i915/gt/intel_context_types.h |   6 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |   5 +-
 drivers/gpu/drm/i915/i915_request.c   | 120 ++
 4 files changed, 108 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.h 
b/drivers/gpu/drm/i915/gt/intel_context.h
index b63c10a144af..1bc705f98e2a 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -75,6 +75,11 @@ intel_context_to_parent(struct intel_context *ce)
}
 }
 
+static inline bool intel_context_is_parallel(struct intel_context *ce)
+{
+   return intel_context_is_child(ce) || intel_context_is_parent(ce);
+}
+
 void intel_context_bind_parent_child(struct intel_context *parent,
 struct intel_context *child);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 48decb5ee954..8309d1141d0a 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -237,6 +237,12 @@ struct intel_context {
};
/** @parent: pointer to parent if child */
struct intel_context *parent;
+   /**
+* @last_rq: last request submitted on a parallel context, used
+* to insert submit fences between requests in the parallel
+* context
+*/
+   struct i915_request *last_rq;
/** @number_children: number of children if parent */
u8 number_children;
/** @guc: GuC specific members for parallel submission */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 77591e764195..f690b7c2b295 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -685,8 +685,7 @@ static inline int rq_prio(const struct i915_request *rq)
 
 static bool is_multi_lrc_rq(struct i915_request *rq)
 {
-   return intel_context_is_child(rq->context) ||
-   intel_context_is_parent(rq->context);
+   return intel_context_is_parallel(rq->context);
 }
 
 static bool can_merge_rq(struct i915_request *rq,
@@ -2874,6 +2873,8 @@ static void guc_parent_context_unpin(struct intel_context 
*ce)
GEM_BUG_ON(!intel_context_is_parent(ce));
GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
 
+   if (ce->parallel.last_rq)
+   i915_request_put(ce->parallel.last_rq);
unpin_guc_id(guc, ce);
lrc_unpin(ce);
 }
diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index c0d27072c28d..8bdf9f2f9b90 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1525,36 +1525,62 @@ i915_request_await_object(struct i915_request *to,
return ret;
 }
 
+static inline bool is_parallel_rq(struct i915_request *rq)
+{
+   return intel_context_is_parallel(rq->context);
+}
+
+static inline struct intel_context *request_to_parent(struct i915_request *rq)
+{
+   return intel_context_to_parent(rq->context);
+}
+
 static struct i915_request *
-__i915_request_add_to_timeline(struct i915_request *rq)
+__i915_request_ensure_parallel_ordering(struct i915_request *rq,
+   struct intel_timeline *timeline)
 {
-   struct intel_timeline *timeline = i915_request_timeline(rq);
struct i915_request *prev;
 
-   /*
-* Dependency tracking and request ordering along the timeline
-* is special cased so that we can eliminate redundant ordering
-* operations while building the request (we know that the timeline
-* itself is ordered, and here we guarantee it).
-*
-* As we know we will need to emit tracking along the timeline,
-* we embed the hooks into our request struct -- at the cost of
-* having to have specialised no-allocation interfaces (which will
-* be beneficial elsewhere).
-*
-* A second benefit to open-coding i915_request_await_request is
-* that we can apply a slight variant of the rules specialised
-* for timelines that jump between engines (such as virtual engines).
-* If we consider the case of virtual engine, we must emit a dma-fence
-* to prevent scheduling of the second request until the first is
-* compl

[PATCH 05/25] drm/i915: Add logical engine mapping

2021-10-13 Thread Matthew Brost
Add logical engine mapping. This is required for split-frame, as
workloads need to be placed on engines in a logically contiguous manner.

v2:
 (Daniel Vetter)
  - Add kernel doc for new fields
v3:
 (Tvrtko)
  - Update comment for new logical_mask field
v4:
 (John Harrison)
  - Update comment for new logical_mask field

Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 60 ---
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |  7 +++
 .../drm/i915/gt/intel_execlists_submission.c  |  1 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c|  2 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 21 +--
 5 files changed, 62 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 2ae57e4656a3..2eb798ad068b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -290,7 +290,8 @@ static void nop_irq_handler(struct intel_engine_cs *engine, 
u16 iir)
GEM_DEBUG_WARN_ON(iir);
 }
 
-static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
+static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
+ u8 logical_instance)
 {
const struct engine_info *info = &intel_engines[id];
struct drm_i915_private *i915 = gt->i915;
@@ -335,6 +336,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum 
intel_engine_id id)
 
engine->class = info->class;
engine->instance = info->instance;
+   engine->logical_mask = BIT(logical_instance);
__sprint_engine_name(engine);
 
engine->props.heartbeat_interval_ms =
@@ -588,6 +590,37 @@ static intel_engine_mask_t init_engine_mask(struct 
intel_gt *gt)
return info->engine_mask;
 }
 
+static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids,
+u8 class, const u8 *map, u8 num_instances)
+{
+   int i, j;
+   u8 current_logical_id = 0;
+
+   for (j = 0; j < num_instances; ++j) {
+   for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
+   if (!HAS_ENGINE(gt, i) ||
+   intel_engines[i].class != class)
+   continue;
+
+   if (intel_engines[i].instance == map[j]) {
+   logical_ids[intel_engines[i].instance] =
+   current_logical_id++;
+   break;
+   }
+   }
+   }
+}
+
+static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class)
+{
+   int i;
+   u8 map[MAX_ENGINE_INSTANCE + 1];
+
+   for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i)
+   map[i] = i;
+   populate_logical_ids(gt, logical_ids, class, map, ARRAY_SIZE(map));
+}
+
 /**
  * intel_engines_init_mmio() - allocate and prepare the Engine Command 
Streamers
  * @gt: pointer to struct intel_gt
@@ -599,7 +632,8 @@ int intel_engines_init_mmio(struct intel_gt *gt)
struct drm_i915_private *i915 = gt->i915;
const unsigned int engine_mask = init_engine_mask(gt);
unsigned int mask = 0;
-   unsigned int i;
+   unsigned int i, class;
+   u8 logical_ids[MAX_ENGINE_INSTANCE + 1];
int err;
 
drm_WARN_ON(&i915->drm, engine_mask == 0);
@@ -609,15 +643,23 @@ int intel_engines_init_mmio(struct intel_gt *gt)
if (i915_inject_probe_failure(i915))
return -ENODEV;
 
-   for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
-   if (!HAS_ENGINE(gt, i))
-   continue;
+   for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) {
+   setup_logical_ids(gt, logical_ids, class);
 
-   err = intel_engine_setup(gt, i);
-   if (err)
-   goto cleanup;
+   for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
+   u8 instance = intel_engines[i].instance;
+
+   if (intel_engines[i].class != class ||
+   !HAS_ENGINE(gt, i))
+   continue;
 
-   mask |= BIT(i);
+   err = intel_engine_setup(gt, i,
+logical_ids[instance]);
+   if (err)
+   goto cleanup;
+
+   mask |= BIT(i);
+   }
}
 
/*
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 9167ce52487c..e0f773585c29 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -269,6 +269,13 @@ struct intel_engine_cs {
unsigned int guc_id;
 
intel_engine_mask_t mask;
+   /**
+* @logical_mask: logical mask of engine,

[PATCH 25/25] drm/i915/execlists: Weak parallel submission support for execlists

2021-10-13 Thread Matthew Brost
A weak implementation of parallel submission (multi-bb execbuf IOCTL) for
execlists. Doing as little as possible to support this interface for
execlists - basically just passing submit fences between each request
generated and virtual engines are not allowed. This is on par with what
is there for the existing (hopefully soon deprecated) bonding interface.

We perma-pin these execlists contexts to align with GuC implementation.

Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 10 ++--
 drivers/gpu/drm/i915/gt/intel_context.c   |  4 +-
 .../drm/i915/gt/intel_execlists_submission.c  | 56 ++-
 drivers/gpu/drm/i915/gt/intel_lrc.c   |  2 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  2 -
 5 files changed, 64 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 66c7f3c0d08b..c1436f21b271 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct 
i915_user_extension __user *base,
struct intel_engine_cs **siblings = NULL;
intel_engine_mask_t prev_mask;
 
-   /* FIXME: This is NIY for execlists */
-   if (!(intel_uc_uses_guc_submission(&i915->gt.uc)))
-   return -ENODEV;
-
if (get_user(slot, &ext->engine_index))
return -EFAULT;
 
@@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct 
i915_user_extension __user *base,
if (get_user(num_siblings, &ext->num_siblings))
return -EFAULT;
 
+   if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) {
+   drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC 
mode\n",
+   num_siblings);
+   return -EINVAL;
+   }
+
if (slot >= set->num_engines) {
drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n",
slot, set->num_engines);
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index 5634d14052bc..1bec92e1d8e6 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context 
*ce)
 
__i915_active_acquire(&ce->active);
 
-   if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine))
+   if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) ||
+   intel_context_is_parallel(ce))
return 0;
 
/* Preallocate tracking nodes */
@@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context 
*parent,
 * Callers responsibility to validate that this function is used
 * correctly but we use GEM_BUG_ON here ensure that they do.
 */
-   GEM_BUG_ON(!intel_engine_uses_guc(parent->engine));
GEM_BUG_ON(intel_context_is_pinned(parent));
GEM_BUG_ON(intel_context_is_child(parent));
GEM_BUG_ON(intel_context_is_pinned(child));
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c 
b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index bedb80057046..8cd986bdf26c 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs 
*engine)
 
 static bool ctx_single_port_submission(const struct intel_context *ce)
 {
-   return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
-   intel_context_force_single_submission(ce));
+   return intel_context_force_single_submission(ce);
 }
 
 static bool can_merge_ctx(const struct intel_context *prev,
@@ -2598,6 +2597,58 @@ static void execlists_context_cancel_request(struct 
intel_context *ce,
  current->comm);
 }
 
+static struct intel_context *
+execlists_create_parallel(struct intel_engine_cs **engines,
+ unsigned int num_siblings,
+ unsigned int width)
+{
+   struct intel_engine_cs **siblings = NULL;
+   struct intel_context *parent = NULL, *ce, *err;
+   int i, j;
+
+   GEM_BUG_ON(num_siblings != 1);
+
+   siblings = kmalloc_array(num_siblings,
+sizeof(*siblings),
+GFP_KERNEL);
+   if (!siblings)
+   return ERR_PTR(-ENOMEM);
+
+   for (i = 0; i < width; ++i) {
+   for (j = 0; j < num_siblings; ++j)
+   siblings[j] = engines[i * num_siblings + j];
+
+   ce = intel_context_create(siblings[0]);
+   if (!ce) {
+   err = ERR_PTR(-ENOMEM);
+   goto unwind;
+   }
+
+   if (i == 0)
+   parent = ce;
+

[PATCH 10/25] drm/i915/guc: Assign contexts in parent-child relationship consecutive guc_ids

2021-10-13 Thread Matthew Brost
Assign contexts in parent-child relationship consecutive guc_ids. This
is accomplished by partitioning guc_id space between ones that need to
be consecutive (1/16 available guc_ids) and ones that do not (15/16 of
available guc_ids). The consecutive search is implemented via the bitmap
API.

This is a precursor to the full GuC multi-lrc implementation but aligns
to how GuC mutli-lrc interface is defined - guc_ids must be consecutive
when using the GuC multi-lrc interface.

v2:
 (Daniel Vetter)
  - Explicitly state why we assign consecutive guc_ids
v3:
 (John Harrison)
  - Bring back in spin lock

Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|   6 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 104 ++
 2 files changed, 86 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 74f071a0b6d5..4ca197f400ba 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -82,9 +82,13 @@ struct intel_guc {
 */
spinlock_t lock;
/**
-* @guc_ids: used to allocate new guc_ids
+* @guc_ids: used to allocate new guc_ids, single-lrc
 */
struct ida guc_ids;
+   /**
+* @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc
+*/
+   unsigned long *guc_ids_bitmap;
/**
 * @guc_id_list: list of intel_context with valid guc_ids but no
 * refs
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index abf867f4f659..c4d7a5c3b558 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -128,6 +128,16 @@ guc_create_virtual(struct intel_engine_cs **siblings, 
unsigned int count);
 
 #define GUC_REQUEST_SIZE 64 /* bytes */
 
+/*
+ * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
+ * per the GuC submission interface. A different allocation algorithm is used
+ * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
+ * partition the guc_id space. We believe the number of multi-lrc contexts in
+ * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
+ * multi-lrc.
+ */
+#define NUMBER_MULTI_LRC_GUC_ID(GUC_MAX_LRC_DESCRIPTORS / 16)
+
 /*
  * Below is a set of functions which control the GuC scheduling state which
  * require a lock.
@@ -1208,6 +1218,11 @@ int intel_guc_submission_init(struct intel_guc *guc)
INIT_WORK(&guc->submission_state.destroyed_worker,
  destroyed_worker_func);
 
+   guc->submission_state.guc_ids_bitmap =
+   bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID, GFP_KERNEL);
+   if (!guc->submission_state.guc_ids_bitmap)
+   return -ENOMEM;
+
return 0;
 }
 
@@ -1219,6 +1234,7 @@ void intel_guc_submission_fini(struct intel_guc *guc)
guc_flush_destroyed_contexts(guc);
guc_lrc_desc_pool_destroy(guc);
i915_sched_engine_put(guc->sched_engine);
+   bitmap_free(guc->submission_state.guc_ids_bitmap);
 }
 
 static inline void queue_request(struct i915_sched_engine *sched_engine,
@@ -1270,18 +1286,43 @@ static void guc_submit_request(struct i915_request *rq)
spin_unlock_irqrestore(&sched_engine->lock, flags);
 }
 
-static int new_guc_id(struct intel_guc *guc)
+static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
 {
-   return ida_simple_get(&guc->submission_state.guc_ids, 0,
- GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL |
- __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
+   int ret;
+
+   GEM_BUG_ON(intel_context_is_child(ce));
+
+   if (intel_context_is_parent(ce))
+   ret = 
bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
+ NUMBER_MULTI_LRC_GUC_ID,
+ 
order_base_2(ce->parallel.number_children
+  + 1));
+   else
+   ret = ida_simple_get(&guc->submission_state.guc_ids,
+NUMBER_MULTI_LRC_GUC_ID,
+GUC_MAX_LRC_DESCRIPTORS,
+GFP_KERNEL | __GFP_RETRY_MAYFAIL |
+__GFP_NOWARN);
+   if (unlikely(ret < 0))
+   return ret;
+
+   ce->guc_id.id = ret;
+   return 0;
 }
 
 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
 {
+   GEM_BUG_ON(intel_context_is_child(ce));
+
if (!context_guc_id_invalid(ce)) {
-   ida_simple_remove(&guc->submission_state.guc_ids,
-  

[PATCH 01/25] drm/i915/guc: Move GuC guc_id allocation under submission state sub-struct

2021-10-13 Thread Matthew Brost
Move guc_id allocation under submission state sub-struct as a future
patch will reuse the spin lock as a global submission state lock. Moving
this into sub-struct makes ownership of fields / lock clear.

v2:
 (Docs)
  - Add comment for submission_state sub-structure
v3:
 (John Harrison)
  - Fixup a few comments

Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/gt/intel_context_types.h |  6 +--
 drivers/gpu/drm/i915/gt/uc/intel_guc.h| 28 +++
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 48 ++-
 3 files changed, 47 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 12252c411159..e7e3984aab78 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -197,18 +197,18 @@ struct intel_context {
struct {
/**
 * @id: handle which is used to uniquely identify this context
-* with the GuC, protected by guc->contexts_lock
+* with the GuC, protected by guc->submission_state.lock
 */
u16 id;
/**
 * @ref: the number of references to the guc_id, when
 * transitioning in and out of zero protected by
-* guc->contexts_lock
+* guc->submission_state.lock
 */
atomic_t ref;
/**
 * @link: in guc->guc_id_list when the guc_id has no refs but is
-* still valid, protected by guc->contexts_lock
+* still valid, protected by guc->submission_state.lock
 */
struct list_head link;
} guc_id;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 5dd174babf7a..82e248c2290c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -71,16 +71,26 @@ struct intel_guc {
} interrupts;
 
/**
-* @contexts_lock: protects guc_ids, guc_id_list, ce->guc_id.id, and
-* ce->guc_id.ref when transitioning in and out of zero
+* @submission_state: sub-structure for submission state protected by
+* single lock
 */
-   spinlock_t contexts_lock;
-   /** @guc_ids: used to allocate unique ce->guc_id.id values */
-   struct ida guc_ids;
-   /**
-* @guc_id_list: list of intel_context with valid guc_ids but no refs
-*/
-   struct list_head guc_id_list;
+   struct {
+   /**
+* @lock: protects everything in submission_state,
+* ce->guc_id.id, and ce->guc_id.ref when transitioning in and
+* out of zero
+*/
+   spinlock_t lock;
+   /**
+* @guc_ids: used to allocate new guc_ids
+*/
+   struct ida guc_ids;
+   /**
+* @guc_id_list: list of intel_context with valid guc_ids but no
+* refs
+*/
+   struct list_head guc_id_list;
+   } submission_state;
 
/**
 * @submission_supported: tracks whether we support GuC submission on
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index ba0de35f6323..b2646b088c7f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -68,14 +68,14 @@
  * fence is used to stall all requests associated with this guc_id until the
  * corresponding G2H returns indicating the guc_id has been deregistered.
  *
- * guc_ids:
+ * submission_state.guc_ids:
  * Unique number associated with private GuC context data passed in during
  * context registration / submission / deregistration. 64k available. Simple 
ida
  * is used for allocation.
  *
  * Stealing guc_ids:
  * If no guc_ids are available they can be stolen from another context at
- * request creation time if that context is unpinned. If a guc_id can't be 
found
+ * request creation time if that context is unpinned. If a guc_id an't be found
  * we punt this problem to the user as we believe this is near impossible to 
hit
  * during normal use cases.
  *
@@ -89,7 +89,7 @@
  * sched_engine can be submitting at a time. Currently only one sched_engine is
  * used for all of GuC submission but that could change in the future.
  *
- * guc->contexts_lock
+ * guc->submission_state.lock
  * Protects guc_id allocation for the given GuC, i.e. only one context can be
  * doing guc_id allocation operations at a time for each GuC in the system.
  *
@@ -103,7 +103,7 @@
  *
  * Lock ordering rules:
  * sched_engine->lock -> ce->guc_state.lock
- * guc->contexts_lock -> ce->guc_state.lock
+ * guc->submission_state.lock -> ce->guc_state.lock
  *
  

[PATCH 15/25] drm/i915/guc: Update debugfs for GuC multi-lrc

2021-10-13 Thread Matthew Brost
Display the workqueue status in debugfs for GuC contexts that are in
parent-child relationship.

v2:
 (John Harrison)
  - Output number children in debugfs

Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 52 ++-
 1 file changed, 38 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index bc052d206861..013f36ef98cc 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -3708,6 +3708,25 @@ static inline void guc_log_context_priority(struct 
drm_printer *p,
drm_printf(p, "\n");
 }
 
+static inline void guc_log_context(struct drm_printer *p,
+  struct intel_context *ce)
+{
+   drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id);
+   drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca);
+   drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
+  ce->ring->head,
+  ce->lrc_reg_state[CTX_RING_HEAD]);
+   drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
+  ce->ring->tail,
+  ce->lrc_reg_state[CTX_RING_TAIL]);
+   drm_printf(p, "\t\tContext Pin Count: %u\n",
+  atomic_read(&ce->pin_count));
+   drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
+  atomic_read(&ce->guc_id.ref));
+   drm_printf(p, "\t\tSchedule State: 0x%x\n\n",
+  ce->guc_state.sched_state);
+}
+
 void intel_guc_submission_print_context_info(struct intel_guc *guc,
 struct drm_printer *p)
 {
@@ -3717,22 +3736,27 @@ void intel_guc_submission_print_context_info(struct 
intel_guc *guc,
 
xa_lock_irqsave(&guc->context_lookup, flags);
xa_for_each(&guc->context_lookup, index, ce) {
-   drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id);
-   drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca);
-   drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
-  ce->ring->head,
-  ce->lrc_reg_state[CTX_RING_HEAD]);
-   drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
-  ce->ring->tail,
-  ce->lrc_reg_state[CTX_RING_TAIL]);
-   drm_printf(p, "\t\tContext Pin Count: %u\n",
-  atomic_read(&ce->pin_count));
-   drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
-  atomic_read(&ce->guc_id.ref));
-   drm_printf(p, "\t\tSchedule State: 0x%x\n\n",
-  ce->guc_state.sched_state);
+   GEM_BUG_ON(intel_context_is_child(ce));
 
+   guc_log_context(p, ce);
guc_log_context_priority(p, ce);
+
+   if (intel_context_is_parent(ce)) {
+   struct guc_process_desc *desc = __get_process_desc(ce);
+   struct intel_context *child;
+
+   drm_printf(p, "\t\tNumber children: %u\n",
+  ce->parallel.number_children);
+   drm_printf(p, "\t\tWQI Head: %u\n",
+  READ_ONCE(desc->head));
+   drm_printf(p, "\t\tWQI Tail: %u\n",
+  READ_ONCE(desc->tail));
+   drm_printf(p, "\t\tWQI Status: %u\n\n",
+  READ_ONCE(desc->wq_status));
+
+   for_each_child(ce, child)
+   guc_log_context(p, child);
+   }
}
xa_unlock_irqrestore(&guc->context_lookup, flags);
 }
-- 
2.32.0



[PATCH 20/25] drm/i915: Multi-BB execbuf

2021-10-13 Thread Matthew Brost
Allow multiple batch buffers to be submitted in a single execbuf IOCTL
after a context has been configured with the 'set_parallel' extension.
The number batches is implicit based on the contexts configuration.

This is implemented with a series of loops. First a loop is used to find
all the batches, a loop to pin all the HW contexts, a loop to create all
the requests, a loop to submit (emit BB start, etc...) all the requests,
a loop to tie the requests to the VMAs they touch, and finally a loop to
commit the requests to the backend.

A composite fence is also created for the generated requests to return
to the user and to stick in dma resv slots.

No behavior from the existing IOCTL should be changed aside from when
throttling because the ring for a context is full. In this situation,
i915 will now wait while holding the object locks. This change was done
because the code is much simpler to wait while holding the locks and we
believe there isn't a huge benefit of dropping these locks. If this
proves false we can restructure the code to drop the locks during the
wait.

IGT: https://patchwork.freedesktop.org/patch/447008/?series=93071&rev=1
media UMD: https://github.com/intel/media-driver/pull/1252

v2:
 (Matthew Brost)
  - Return proper error value if i915_request_create fails
v3:
 (John Harrison)
  - Add comment explaining create / add order loops + locking
  - Update commit message explaining different in IOCTL behavior
  - Line wrap some comments
  - eb_add_request returns void
  - Return -EINVAL rather triggering BUG_ON if cmd parser used
 (Checkpatch)
  - Check eb->batch_len[*current_batch]
v4:
 (CI)
  - Set batch len if passed if via execbuf args
  - Call __i915_request_skip after __i915_request_commit
 (Kernel test robot)
  - Initialize rq to NULL in eb_pin_timeline

Signed-off-by: Matthew Brost 
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 783 --
 drivers/gpu/drm/i915/gt/intel_context.h   |   8 +-
 drivers/gpu/drm/i915/gt/intel_context_types.h |  10 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |   2 +
 drivers/gpu/drm/i915/i915_request.h   |   9 +
 drivers/gpu/drm/i915/i915_vma.c   |  21 +-
 drivers/gpu/drm/i915/i915_vma.h   |  13 +-
 7 files changed, 595 insertions(+), 251 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index c75afc8784e3..6509c9d8c298 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -246,17 +246,25 @@ struct i915_execbuffer {
struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
struct eb_vma *vma;
 
-   struct intel_engine_cs *engine; /** engine to queue the request to */
+   struct intel_gt *gt; /* gt for the execbuf */
struct intel_context *context; /* logical state for the request */
struct i915_gem_context *gem_context; /** caller's context */
 
-   struct i915_request *request; /** our request to build */
-   struct eb_vma *batch; /** identity of the batch obj/vma */
+   /** our requests to build */
+   struct i915_request *requests[MAX_ENGINE_INSTANCE + 1];
+   /** identity of the batch obj/vma */
+   struct eb_vma *batches[MAX_ENGINE_INSTANCE + 1];
struct i915_vma *trampoline; /** trampoline used for chaining */
 
+   /** used for excl fence in dma_resv objects when > 1 BB submitted */
+   struct dma_fence *composite_fence;
+
/** actual size of execobj[] as we may extend it for the cmdparser */
unsigned int buffer_count;
 
+   /* number of batches in execbuf IOCTL */
+   unsigned int num_batches;
+
/** list of vma not yet bound during reservation phase */
struct list_head unbound;
 
@@ -283,7 +291,8 @@ struct i915_execbuffer {
 
u64 invalid_flags; /** Set of execobj.flags that are invalid */
 
-   u64 batch_len; /** Length of batch within object */
+   /** Length of batch within object */
+   u64 batch_len[MAX_ENGINE_INSTANCE + 1];
u32 batch_start_offset; /** Location within object of batch */
u32 batch_flags; /** Flags composed for emit_bb_start() */
struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch 
buffer */
@@ -301,14 +310,13 @@ struct i915_execbuffer {
 };
 
 static int eb_parse(struct i915_execbuffer *eb);
-static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb,
- bool throttle);
+static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle);
 static void eb_unpin_engine(struct i915_execbuffer *eb);
 
 static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
 {
-   return intel_engine_requires_cmd_parser(eb->engine) ||
-   (intel_engine_using_cmd_parser(eb->engine) &&
+   return intel_engine_requires_cmd_parser(eb->context->engine) ||
+   (intel_engine_using_cmd_parser(eb

[PATCH 09/25] drm/i915/guc: Ensure GuC schedule operations do not operate on child contexts

2021-10-13 Thread Matthew Brost
In GuC parent-child contexts the parent context controls the scheduling,
ensure only the parent does the scheduling operations.

Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 58a6f494be8f..abf867f4f659 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -324,6 +324,12 @@ static inline void decr_context_committed_requests(struct 
intel_context *ce)
GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
 }
 
+static struct intel_context *
+request_to_scheduling_context(struct i915_request *rq)
+{
+   return intel_context_to_parent(rq->context);
+}
+
 static inline bool context_guc_id_invalid(struct intel_context *ce)
 {
return ce->guc_id.id == GUC_INVALID_LRC_ID;
@@ -1712,6 +1718,7 @@ static void __guc_context_sched_disable(struct intel_guc 
*guc,
 
GEM_BUG_ON(guc_id == GUC_INVALID_LRC_ID);
 
+   GEM_BUG_ON(intel_context_is_child(ce));
trace_intel_context_sched_disable(ce);
 
guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
@@ -1937,6 +1944,8 @@ static void guc_context_sched_disable(struct 
intel_context *ce)
intel_wakeref_t wakeref;
u16 guc_id;
 
+   GEM_BUG_ON(intel_context_is_child(ce));
+
spin_lock_irqsave(&ce->guc_state.lock, flags);
 
/*
@@ -2305,6 +2314,8 @@ static void guc_signal_context_fence(struct intel_context 
*ce)
 {
unsigned long flags;
 
+   GEM_BUG_ON(intel_context_is_child(ce));
+
spin_lock_irqsave(&ce->guc_state.lock, flags);
clr_context_wait_for_deregister_to_register(ce);
__guc_signal_context_fence(ce);
@@ -2335,7 +2346,7 @@ static void guc_context_init(struct intel_context *ce)
 
 static int guc_request_alloc(struct i915_request *rq)
 {
-   struct intel_context *ce = rq->context;
+   struct intel_context *ce = request_to_scheduling_context(rq);
struct intel_guc *guc = ce_to_guc(ce);
unsigned long flags;
int ret;
-- 
2.32.0



[PATCH 19/25] drm/i915/guc: Implement no mid batch preemption for multi-lrc

2021-10-13 Thread Matthew Brost
For some users of multi-lrc, e.g. split frame, it isn't safe to preempt
mid BB. To safely enable preemption at the BB boundary, a handshake
between parent and child is needed, syncing the set of BBs at the
beginning and end of each batch. This is implemented via custom
emit_bb_start & emit_fini_breadcrumb functions and enabled by default if
a context is configured by set parallel extension.

Lastly, this patch updates the process descriptor to the correct size as
the memory used in the handshake is directly after the process
descriptor.

v2:
 (John Harrison)
  - Fix a few comments wording
  - Add struture for parent page layout
v3:
 (Jojhn Harrison)
  - A structure for sync semaphore
  - Use offsetof to calc address
  - Update commit message

Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/intel_context.c   |   2 +-
 drivers/gpu/drm/i915/gt/intel_context_types.h |   2 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |   2 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 333 +-
 4 files changed, 326 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index 6aab60584ee5..5634d14052bc 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -570,7 +570,7 @@ void intel_context_bind_parent_child(struct intel_context 
*parent,
GEM_BUG_ON(intel_context_is_child(child));
GEM_BUG_ON(intel_context_is_parent(child));
 
-   parent->parallel.number_children++;
+   parent->parallel.child_index = parent->parallel.number_children++;
list_add_tail(&child->parallel.child_link,
  &parent->parallel.child_list);
child->parallel.parent = parent;
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 1d880303a7e4..95a5b94b4ece 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -250,6 +250,8 @@ struct intel_context {
struct i915_request *last_rq;
/** @number_children: number of children if parent */
u8 number_children;
+   /** @child_index: index into child_list if child */
+   u8 child_index;
/** @guc: GuC specific members for parallel submission */
struct {
/** @wqi_head: head pointer in work queue */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index c14fc15dd3a8..2eba6b598e66 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -186,7 +186,7 @@ struct guc_process_desc {
u32 wq_status;
u32 engine_presence;
u32 priority;
-   u32 reserved[30];
+   u32 reserved[36];
 } __packed;
 
 #define CONTEXT_REGISTRATION_FLAG_KMD  BIT(0)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 09a3a9dd7ff6..ae08a196ba0a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -11,6 +11,7 @@
 #include "gt/intel_context.h"
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_engine_heartbeat.h"
+#include "gt/intel_gpu_commands.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_irq.h"
 #include "gt/intel_gt_pm.h"
@@ -368,11 +369,16 @@ static inline struct i915_priolist *to_priolist(struct 
rb_node *rb)
 
 /*
  * When using multi-lrc submission a scratch memory area is reserved in the
- * parent's context state for the process descriptor and work queue. Currently
- * the scratch area is sized to a page.
+ * parent's context state for the process descriptor, work queue, and handhake
+ * between the parent + children contexts to insert safe preemption points
+ * between each of BBs. Currently the scratch area is sized to a page.
  *
  * The layout of this scratch area is below:
  * 0   guc_process_desc
+ * + sizeof(struct guc_process_desc)   child go
+ * + CACHELINE_BYTES   child join[0]
+ * ...
+ * + CACHELINE_BYTES   child join[n - 1]
  * ... unused
  * PARENT_SCRATCH_SIZE / 2 work queue start
  * ... work queue
@@ -381,7 +387,25 @@ static inline struct i915_priolist *to_priolist(struct 
rb_node *rb)
 #define PARENT_SCRATCH_SIZEPAGE_SIZE
 #define WQ_SIZE(PARENT_SCRATCH_SIZE / 2)
 #define WQ_OFFSET  (PARENT_SCRATCH_SIZE - WQ_SIZE)
-static u32 __get_process_desc_offset(struct intel_context *ce)
+
+struct sync_semaphore {
+   u32 semaphore;
+   u8 unused[CACHELINE_BYTES - sizeof(u32)];
+};
+
+struct parent_scratch {
+   struct guc_process_desc pdesc;
+
+   struct sync_se

[PATCH 16/25] drm/i915/guc: Connect UAPI to GuC multi-lrc interface

2021-10-13 Thread Matthew Brost
Introduce 'set parallel submit' extension to connect UAPI to GuC
multi-lrc interface. Kernel doc in new uAPI should explain it all.

IGT: https://patchwork.freedesktop.org/patch/447008/?series=93071&rev=1
media UMD: https://github.com/intel/media-driver/pull/1252

v2:
 (Daniel Vetter)
  - Add IGT link and placeholder for media UMD link
v3:
 (Kernel test robot)
  - Fix warning in unpin engines call
 (John Harrison)
  - Reword a bunch of the kernel doc
v4:
 (John Harrison)
  - Add comment why perma-pin is done after setting gem context
  - Update some comments / docs for proto contexts

Cc: Tvrtko Ursulin 
Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 228 +-
 .../gpu/drm/i915/gem/i915_gem_context_types.h |  16 +-
 drivers/gpu/drm/i915/gt/intel_context_types.h |   9 +-
 drivers/gpu/drm/i915/gt/intel_engine.h|  12 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c |   6 +-
 .../drm/i915/gt/intel_execlists_submission.c  |   6 +-
 drivers/gpu/drm/i915/gt/selftest_execlists.c  |  12 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 114 -
 include/uapi/drm/i915_drm.h   | 131 ++
 9 files changed, 503 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index d225d3dd0b40..6f23aff6e642 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -556,9 +556,150 @@ set_proto_ctx_engines_bond(struct i915_user_extension 
__user *base, void *data)
return 0;
 }
 
+static int
+set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base,
+ void *data)
+{
+   struct i915_context_engines_parallel_submit __user *ext =
+   container_of_user(base, typeof(*ext), base);
+   const struct set_proto_ctx_engines *set = data;
+   struct drm_i915_private *i915 = set->i915;
+   u64 flags;
+   int err = 0, n, i, j;
+   u16 slot, width, num_siblings;
+   struct intel_engine_cs **siblings = NULL;
+   intel_engine_mask_t prev_mask;
+
+   /* Disabling for now */
+   return -ENODEV;
+
+   /* FIXME: This is NIY for execlists */
+   if (!(intel_uc_uses_guc_submission(&i915->gt.uc)))
+   return -ENODEV;
+
+   if (get_user(slot, &ext->engine_index))
+   return -EFAULT;
+
+   if (get_user(width, &ext->width))
+   return -EFAULT;
+
+   if (get_user(num_siblings, &ext->num_siblings))
+   return -EFAULT;
+
+   if (slot >= set->num_engines) {
+   drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n",
+   slot, set->num_engines);
+   return -EINVAL;
+   }
+
+   if (set->engines[slot].type != I915_GEM_ENGINE_TYPE_INVALID) {
+   drm_dbg(&i915->drm,
+   "Invalid placement[%d], already occupied\n", slot);
+   return -EINVAL;
+   }
+
+   if (get_user(flags, &ext->flags))
+   return -EFAULT;
+
+   if (flags) {
+   drm_dbg(&i915->drm, "Unknown flags 0x%02llx", flags);
+   return -EINVAL;
+   }
+
+   for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
+   err = check_user_mbz(&ext->mbz64[n]);
+   if (err)
+   return err;
+   }
+
+   if (width < 2) {
+   drm_dbg(&i915->drm, "Width (%d) < 2\n", width);
+   return -EINVAL;
+   }
+
+   if (num_siblings < 1) {
+   drm_dbg(&i915->drm, "Number siblings (%d) < 1\n",
+   num_siblings);
+   return -EINVAL;
+   }
+
+   siblings = kmalloc_array(num_siblings * width,
+sizeof(*siblings),
+GFP_KERNEL);
+   if (!siblings)
+   return -ENOMEM;
+
+   /* Create contexts / engines */
+   for (i = 0; i < width; ++i) {
+   intel_engine_mask_t current_mask = 0;
+   struct i915_engine_class_instance prev_engine;
+
+   for (j = 0; j < num_siblings; ++j) {
+   struct i915_engine_class_instance ci;
+
+   n = i * num_siblings + j;
+   if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) {
+   err = -EFAULT;
+   goto out_err;
+   }
+
+   siblings[n] =
+   intel_engine_lookup_user(i915, ci.engine_class,
+ci.engine_instance);
+   if (!siblings[n]) {
+   drm_dbg(&i915->drm,
+   "Invalid sibling[%d]: { class:%d, 
inst:%d }\n",
+   n, ci.engine_class, ci.engine_instance);
+ 

[PATCH 08/25] drm/i915/guc: Add multi-lrc context registration

2021-10-13 Thread Matthew Brost
Add multi-lrc context registration H2G. In addition a workqueue and
process descriptor are setup during multi-lrc context registration as
these data structures are needed for multi-lrc submission.

v2:
 (John Harrison)
  - Move GuC specific fields into sub-struct
  - Clean up WQ defines
  - Add comment explaining math to derive WQ / PD address
v3:
 (John Harrison)
  - Add PARENT_SCRATCH_SIZE define
  - Update comment explaining multi-lrc register

Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/intel_context_types.h |  12 ++
 drivers/gpu/drm/i915/gt/intel_lrc.c   |   5 +
 .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |   1 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |   2 -
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 116 +-
 5 files changed, 133 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 76dfca57cb45..48decb5ee954 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -239,6 +239,18 @@ struct intel_context {
struct intel_context *parent;
/** @number_children: number of children if parent */
u8 number_children;
+   /** @guc: GuC specific members for parallel submission */
+   struct {
+   /** @wqi_head: head pointer in work queue */
+   u16 wqi_head;
+   /** @wqi_tail: tail pointer in work queue */
+   u16 wqi_tail;
+   /**
+* @parent_page: page in context state (ce->state) used
+* by parent for work queue, process descriptor
+*/
+   u8 parent_page;
+   } guc;
} parallel;
 
 #ifdef CONFIG_DRM_I915_SELFTEST
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 3ef9eaf8c50e..57339d5c1fc8 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -942,6 +942,11 @@ __lrc_alloc_state(struct intel_context *ce, struct 
intel_engine_cs *engine)
context_size += PAGE_SIZE;
}
 
+   if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {
+   ce->parallel.guc.parent_page = context_size / PAGE_SIZE;
+   context_size += PAGE_SIZE;
+   }
+
obj = i915_gem_object_create_lmem(engine->i915, context_size,
  I915_BO_ALLOC_PM_VOLATILE);
if (IS_ERR(obj))
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h 
b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index 8ff58aff..ba10bd374cee 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -142,6 +142,7 @@ enum intel_guc_action {
INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
+   INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
INTEL_GUC_ACTION_RESET_CLIENT = 0x5507,
INTEL_GUC_ACTION_LIMIT
 };
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index fa4be13c8854..0eeb2a9feeed 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -52,8 +52,6 @@
 
 #define GUC_DOORBELL_INVALID   256
 
-#define GUC_WQ_SIZE(PAGE_SIZE * 2)
-
 /* Work queue item header definitions */
 #define WQ_STATUS_ACTIVE   1
 #define WQ_STATUS_SUSPENDED2
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 84b8e64b148f..58a6f494be8f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -344,6 +344,47 @@ static inline struct i915_priolist *to_priolist(struct 
rb_node *rb)
return rb_entry(rb, struct i915_priolist, node);
 }
 
+/*
+ * When using multi-lrc submission a scratch memory area is reserved in the
+ * parent's context state for the process descriptor and work queue. Currently
+ * the scratch area is sized to a page.
+ *
+ * The layout of this scratch area is below:
+ * 0   guc_process_desc
+ * ... unused
+ * PARENT_SCRATCH_SIZE / 2 work queue start
+ * ... work queue
+ * PARENT_SCRATCH_SIZE - 1 work queue end
+ */
+#define PARENT_SCRATCH_SIZEPAGE_SIZE
+#define WQ_SIZE(PARENT_SCRATCH_SIZE / 2)
+#define WQ_OFFSET  (PARENT_SCRATCH_SIZE - WQ_SIZE)
+static u32 __get_process_desc_offset(struct

[PATCH 21/25] drm/i915/guc: Handle errors in multi-lrc requests

2021-10-13 Thread Matthew Brost
If an error occurs in the front end when multi-lrc requests are getting
generated we need to skip these in the backend but we still need to
emit the breadcrumbs seqno. An issues arises because with multi-lrc
breadcrumbs there is a handshake between the parent and children to make
forward progress. If all the requests are not present this handshake
doesn't work. To work around this, if multi-lrc request has an error we
skip the handshake but still emit the breadcrumbs seqno.

v2:
 (John Harrison)
  - Add comment explaining the skipping of the handshake logic
  - Fix typos in the commit message
v3:
 (John Harrison)
  - Fix up some comments about the math to NOP the ring

Signed-off-by: Matthew Brost 
---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 69 ++-
 1 file changed, 66 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index bfafe996e2d2..80d8ce68ff59 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -4076,8 +4076,8 @@ static int 
emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
 }
 
 static u32 *
-emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
-u32 *cs)
+__emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
+  u32 *cs)
 {
struct intel_context *ce = rq->context;
u8 i;
@@ -4105,6 +4105,45 @@ emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct 
i915_request *rq,
  get_children_go_addr(ce),
  0);
 
+   return cs;
+}
+
+/*
+ * If this true, a submission of multi-lrc requests had an error and the
+ * requests need to be skipped. The front end (execuf IOCTL) should've called
+ * i915_request_skip which squashes the BB but we still need to emit the fini
+ * breadrcrumbs seqno write. At this point we don't know how many of the
+ * requests in the multi-lrc submission were generated so we can't do the
+ * handshake between the parent and children (e.g. if 4 requests should be
+ * generated but 2nd hit an error only 1 would be seen by the GuC backend).
+ * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error
+ * has occurred on any of the requests in submission / relationship.
+ */
+static inline bool skip_handshake(struct i915_request *rq)
+{
+   return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags);
+}
+
+static u32 *
+emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
+u32 *cs)
+{
+   struct intel_context *ce = rq->context;
+
+   GEM_BUG_ON(!intel_context_is_parent(ce));
+
+   if (unlikely(skip_handshake(rq))) {
+   /*
+* NOP everything in 
__emit_fini_breadcrumb_parent_no_preempt_mid_batch,
+* the -6 comes from the length of the emits below.
+*/
+   memset(cs, 0, sizeof(u32) *
+  (ce->engine->emit_fini_breadcrumb_dw - 6));
+   cs += ce->engine->emit_fini_breadcrumb_dw - 6;
+   } else {
+   cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs);
+   }
+
/* Emit fini breadcrumb */
cs = gen8_emit_ggtt_write(cs,
  rq->fence.seqno,
@@ -4121,7 +4160,8 @@ emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct 
i915_request *rq,
 }
 
 static u32 *
-emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, u32 
*cs)
+__emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
+ u32 *cs)
 {
struct intel_context *ce = rq->context;
struct intel_context *parent = intel_context_to_parent(ce);
@@ -4148,6 +4188,29 @@ emit_fini_breadcrumb_child_no_preempt_mid_batch(struct 
i915_request *rq, u32 *cs
*cs++ = get_children_go_addr(parent);
*cs++ = 0;
 
+   return cs;
+}
+
+static u32 *
+emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
+   u32 *cs)
+{
+   struct intel_context *ce = rq->context;
+
+   GEM_BUG_ON(!intel_context_is_child(ce));
+
+   if (unlikely(skip_handshake(rq))) {
+   /*
+* NOP everything in 
__emit_fini_breadcrumb_child_no_preempt_mid_batch,
+* the -6 comes from the length of the emits below.
+*/
+   memset(cs, 0, sizeof(u32) *
+  (ce->engine->emit_fini_breadcrumb_dw - 6));
+   cs += ce->engine->emit_fini_breadcrumb_dw - 6;
+   } else {
+   cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs);
+   }
+
/* Emit fini breadcrumb */
cs = gen8_emit_ggt

[PATCH 06/25] drm/i915: Expose logical engine instance to user

2021-10-13 Thread Matthew Brost
Expose logical engine instance to user via query engine info IOCTL. This
is required for split-frame workloads as these needs to be placed on
engines in a logically contiguous order. The logical mapping can change
based on fusing. Rather than having user have knowledge of the fusing we
simply just expose the logical mapping with the existing query engine
info IOCTL.

IGT: https://patchwork.freedesktop.org/patch/445637/?series=92854&rev=1
media UMD: https://github.com/intel/media-driver/pull/1252

v2:
 (Daniel Vetter)
  - Add IGT link, placeholder for media UMD

Cc: Tvrtko Ursulin 
Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/i915_query.c | 2 ++
 include/uapi/drm/i915_drm.h   | 8 +++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_query.c 
b/drivers/gpu/drm/i915/i915_query.c
index 5e2b909827f4..51b368be0fc4 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -124,7 +124,9 @@ query_engine_info(struct drm_i915_private *i915,
for_each_uabi_engine(engine, i915) {
info.engine.engine_class = engine->uabi_class;
info.engine.engine_instance = engine->uabi_instance;
+   info.flags = I915_ENGINE_INFO_HAS_LOGICAL_INSTANCE;
info.capabilities = engine->uabi_capabilities;
+   info.logical_instance = ilog2(engine->logical_mask);
 
if (copy_to_user(info_ptr, &info, sizeof(info)))
return -EFAULT;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index aa2a7eccfb94..0179f92e0916 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -2775,14 +2775,20 @@ struct drm_i915_engine_info {
 
/** @flags: Engine flags. */
__u64 flags;
+#define I915_ENGINE_INFO_HAS_LOGICAL_INSTANCE  (1 << 0)
 
/** @capabilities: Capabilities of this engine. */
__u64 capabilities;
 #define I915_VIDEO_CLASS_CAPABILITY_HEVC   (1 << 0)
 #define I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC(1 << 1)
 
+   /** @logical_instance: Logical instance of engine */
+   __u16 logical_instance;
+
/** @rsvd1: Reserved fields. */
-   __u64 rsvd1[4];
+   __u16 rsvd1[3];
+   /** @rsvd2: Reserved fields. */
+   __u64 rsvd2[3];
 };
 
 /**
-- 
2.32.0



[PATCH 23/25] drm/i915: Update I915_GEM_BUSY IOCTL to understand composite fences

2021-10-13 Thread Matthew Brost
Parallel submission create composite fences (dma_fence_array) for excl /
shared slots in objects. The I915_GEM_BUSY IOCTL checks these slots to
determine the busyness of the object. Prior to patch it only check if
the fence in the slot was a i915_request. Update the check to understand
composite fences and correctly report the busyness.

v2:
 (Tvrtko)
  - Remove duplicate BUILD_BUG_ON

Reviewed-by: Daniele Ceraolo Spurio 
Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gem/i915_gem_busy.c  | 57 +++
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c|  5 +-
 drivers/gpu/drm/i915/i915_request.h   |  6 ++
 3 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c 
b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
index 6234e17259c1..7358bebef15c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
@@ -4,6 +4,8 @@
  * Copyright © 2014-2016 Intel Corporation
  */
 
+#include 
+
 #include "gt/intel_engine.h"
 
 #include "i915_gem_ioctls.h"
@@ -36,7 +38,7 @@ static __always_inline u32 __busy_write_id(u16 id)
 }
 
 static __always_inline unsigned int
-__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id))
+__busy_set_if_active(struct dma_fence *fence, u32 (*flag)(u16 id))
 {
const struct i915_request *rq;
 
@@ -46,29 +48,60 @@ __busy_set_if_active(const struct dma_fence *fence, u32 
(*flag)(u16 id))
 * to eventually flush us, but to minimise latency just ask the
 * hardware.
 *
-* Note we only report on the status of native fences.
+* Note we only report on the status of native fences and we currently
+* have two native fences:
+*
+* 1. A composite fence (dma_fence_array) constructed of i915 requests
+* created during a parallel submission. In this case we deconstruct the
+* composite fence into individual i915 requests and check the status of
+* each request.
+*
+* 2. A single i915 request.
 */
-   if (!dma_fence_is_i915(fence))
+   if (dma_fence_is_array(fence)) {
+   struct dma_fence_array *array = to_dma_fence_array(fence);
+   struct dma_fence **child = array->fences;
+   unsigned int nchild = array->num_fences;
+
+   do {
+   struct dma_fence *current_fence = *child++;
+
+   /* Not an i915 fence, can't be busy per above */
+   if (!dma_fence_is_i915(current_fence) ||
+   !test_bit(I915_FENCE_FLAG_COMPOSITE,
+ ¤t_fence->flags)) {
+   return 0;
+   }
+
+   rq = to_request(current_fence);
+   if (!i915_request_completed(rq))
+   return flag(rq->engine->uabi_class);
+   } while (--nchild);
+
+   /* All requests in array complete, not busy */
return 0;
+   } else {
+   if (!dma_fence_is_i915(fence))
+   return 0;
 
-   /* opencode to_request() in order to avoid const warnings */
-   rq = container_of(fence, const struct i915_request, fence);
-   if (i915_request_completed(rq))
-   return 0;
+   rq = to_request(fence);
+   if (i915_request_completed(rq))
+   return 0;
 
-   /* Beware type-expansion follies! */
-   BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class));
-   return flag(rq->engine->uabi_class);
+   /* Beware type-expansion follies! */
+   BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class));
+   return flag(rq->engine->uabi_class);
+   }
 }
 
 static __always_inline unsigned int
-busy_check_reader(const struct dma_fence *fence)
+busy_check_reader(struct dma_fence *fence)
 {
return __busy_set_if_active(fence, __busy_read_flag);
 }
 
 static __always_inline unsigned int
-busy_check_writer(const struct dma_fence *fence)
+busy_check_writer(struct dma_fence *fence)
 {
if (!fence)
return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 6509c9d8c298..dea344f9ef99 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -3008,8 +3008,11 @@ eb_composite_fence_create(struct i915_execbuffer *eb, 
int out_fence_fd)
if (!fences)
return ERR_PTR(-ENOMEM);
 
-   for_each_batch_create_order(eb, i)
+   for_each_batch_create_order(eb, i) {
fences[i] = &eb->requests[i]->fence;
+   __set_bit(I915_FENCE_FLAG_COMPOSITE,
+ &eb->requests[i]->fence.flags);
+   }
 
fence_array = dma_fence_array_create(eb->num_batches,
   

[PATCH 07/25] drm/i915/guc: Introduce context parent-child relationship

2021-10-13 Thread Matthew Brost
Introduce context parent-child relationship. Once this relationship is
created all pinning / unpinning operations are directed to the parent
context. The parent context is responsible for pinning all of its
children and itself.

This is a precursor to the full GuC multi-lrc implementation but aligns
to how GuC mutli-lrc interface is defined - a single H2G is used
register / deregister all of the contexts simultaneously.

Subsequent patches in the series will implement the pinning / unpinning
operations for parent / child contexts.

v2:
 (Daniel Vetter)
  - Add kernel doc, add wrapper to access parent to ensure safety
v3:
 (John Harrison)
  - Fix comment explaing GEM_BUG_ON in to_parent()
  - Make variable names generic (non-GuC specific)
v4:
 (John Harrison)
  - s/its'/its/g

Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/gt/intel_context.c   | 29 +
 drivers/gpu/drm/i915/gt/intel_context.h   | 41 +++
 drivers/gpu/drm/i915/gt/intel_context_types.h | 21 ++
 3 files changed, 91 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index f98c9f470ba1..79f321c6c008 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -404,6 +404,8 @@ intel_context_init(struct intel_context *ce, struct 
intel_engine_cs *engine)
 
INIT_LIST_HEAD(&ce->destroyed_link);
 
+   INIT_LIST_HEAD(&ce->parallel.child_list);
+
/*
 * Initialize fence to be complete as this is expected to be complete
 * unless there is a pending schedule disable outstanding.
@@ -418,10 +420,17 @@ intel_context_init(struct intel_context *ce, struct 
intel_engine_cs *engine)
 
 void intel_context_fini(struct intel_context *ce)
 {
+   struct intel_context *child, *next;
+
if (ce->timeline)
intel_timeline_put(ce->timeline);
i915_vm_put(ce->vm);
 
+   /* Need to put the creation ref for the children */
+   if (intel_context_is_parent(ce))
+   for_each_child_safe(ce, child, next)
+   intel_context_put(child);
+
mutex_destroy(&ce->pin_mutex);
i915_active_fini(&ce->active);
i915_sw_fence_fini(&ce->guc_state.blocked);
@@ -538,6 +547,26 @@ struct i915_request 
*intel_context_find_active_request(struct intel_context *ce)
return active;
 }
 
+void intel_context_bind_parent_child(struct intel_context *parent,
+struct intel_context *child)
+{
+   /*
+* Callers responsibility to validate that this function is used
+* correctly but we use GEM_BUG_ON here ensure that they do.
+*/
+   GEM_BUG_ON(!intel_engine_uses_guc(parent->engine));
+   GEM_BUG_ON(intel_context_is_pinned(parent));
+   GEM_BUG_ON(intel_context_is_child(parent));
+   GEM_BUG_ON(intel_context_is_pinned(child));
+   GEM_BUG_ON(intel_context_is_child(child));
+   GEM_BUG_ON(intel_context_is_parent(child));
+
+   parent->parallel.number_children++;
+   list_add_tail(&child->parallel.child_link,
+ &parent->parallel.child_list);
+   child->parallel.parent = parent;
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftest_context.c"
 #endif
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h 
b/drivers/gpu/drm/i915/gt/intel_context.h
index c41098950746..b63c10a144af 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -44,6 +44,47 @@ void intel_context_free(struct intel_context *ce);
 int intel_context_reconfigure_sseu(struct intel_context *ce,
   const struct intel_sseu sseu);
 
+static inline bool intel_context_is_child(struct intel_context *ce)
+{
+   return !!ce->parallel.parent;
+}
+
+static inline bool intel_context_is_parent(struct intel_context *ce)
+{
+   return !!ce->parallel.number_children;
+}
+
+static inline bool intel_context_is_pinned(struct intel_context *ce);
+
+static inline struct intel_context *
+intel_context_to_parent(struct intel_context *ce)
+{
+   if (intel_context_is_child(ce)) {
+   /*
+* The parent holds ref count to the child so it is always safe
+* for the parent to access the child, but the child has a
+* pointer to the parent without a ref. To ensure this is safe
+* the child should only access the parent pointer while the
+* parent is pinned.
+*/
+   GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent));
+
+   return ce->parallel.parent;
+   } else {
+   return ce;
+   }
+}
+
+void intel_context_bind_parent_child(struct intel_context *parent,
+struct intel_context *child);
+
+#define for_each_child(parent, ce)\
+   list_for_each_entry(ce, &(paren

[PATCH 22/25] drm/i915: Make request conflict tracking understand parallel submits

2021-10-13 Thread Matthew Brost
If an object in the excl or shared slot is a composite fence from a
parallel submit and the current request in the conflict tracking is from
the same parallel context there is no need to enforce ordering as the
ordering is already implicit. Make the request conflict tracking
understand this by comparing a parallel submit's parent context and
skipping conflict insertion if the values match.

v2:
 (John Harrison)
  - Reword commit message

Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/i915_request.c | 43 +++--
 1 file changed, 29 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index 8bdf9f2f9b90..820a1f38b271 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1335,6 +1335,25 @@ i915_request_await_external(struct i915_request *rq, 
struct dma_fence *fence)
return err;
 }
 
+static inline bool is_parallel_rq(struct i915_request *rq)
+{
+   return intel_context_is_parallel(rq->context);
+}
+
+static inline struct intel_context *request_to_parent(struct i915_request *rq)
+{
+   return intel_context_to_parent(rq->context);
+}
+
+static bool is_same_parallel_context(struct i915_request *to,
+struct i915_request *from)
+{
+   if (is_parallel_rq(to))
+   return request_to_parent(to) == request_to_parent(from);
+
+   return false;
+}
+
 int
 i915_request_await_execution(struct i915_request *rq,
 struct dma_fence *fence)
@@ -1366,11 +1385,14 @@ i915_request_await_execution(struct i915_request *rq,
 * want to run our callback in all cases.
 */
 
-   if (dma_fence_is_i915(fence))
+   if (dma_fence_is_i915(fence)) {
+   if (is_same_parallel_context(rq, to_request(fence)))
+   continue;
ret = __i915_request_await_execution(rq,
 to_request(fence));
-   else
+   } else {
ret = i915_request_await_external(rq, fence);
+   }
if (ret < 0)
return ret;
} while (--nchild);
@@ -1471,10 +1493,13 @@ i915_request_await_dma_fence(struct i915_request *rq, 
struct dma_fence *fence)
 fence))
continue;
 
-   if (dma_fence_is_i915(fence))
+   if (dma_fence_is_i915(fence)) {
+   if (is_same_parallel_context(rq, to_request(fence)))
+   continue;
ret = i915_request_await_request(rq, to_request(fence));
-   else
+   } else {
ret = i915_request_await_external(rq, fence);
+   }
if (ret < 0)
return ret;
 
@@ -1525,16 +1550,6 @@ i915_request_await_object(struct i915_request *to,
return ret;
 }
 
-static inline bool is_parallel_rq(struct i915_request *rq)
-{
-   return intel_context_is_parallel(rq->context);
-}
-
-static inline struct intel_context *request_to_parent(struct i915_request *rq)
-{
-   return intel_context_to_parent(rq->context);
-}
-
 static struct i915_request *
 __i915_request_ensure_parallel_ordering(struct i915_request *rq,
struct intel_timeline *timeline)
-- 
2.32.0



[PATCH 04/25] drm/i915/guc: Don't call switch_to_kernel_context with GuC submission

2021-10-13 Thread Matthew Brost
Calling switch_to_kernel_context isn't needed if the engine PM reference
is taken while all user contexts are pinned as if don't have PM ref that
guarantees that all user contexts scheduling is disabled. By not calling
switch_to_kernel_context we save on issuing a request to the engine.

v2:
 (Daniel Vetter)
  - Add FIXME comment about pushing switch_to_kernel_context to backend
v3:
 (John Harrison)
  - Update commit message
  - Fix workding comment

Signed-off-by: Matthew Brost 
Reviewed-by: Daniel Vetter 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/gt/intel_engine_pm.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c 
b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index dacd62773735..a1334b48dde7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -162,6 +162,19 @@ static bool switch_to_kernel_context(struct 
intel_engine_cs *engine)
unsigned long flags;
bool result = true;
 
+   /*
+* This is execlist specific behaviour intended to ensure the GPU is
+* idle by switching to a known 'safe' context. With GuC submission, the
+* same idle guarantee is achieved by other means (disabling
+* scheduling). Further, switching to a 'safe' context has no effect
+* with GuC submission as the scheduler can just switch back again.
+*
+* FIXME: Move this backend scheduler specific behaviour into the
+* scheduler backend.
+*/
+   if (intel_engine_uses_guc(engine))
+   return true;
+
/* GPU is pointing to the void, as good as in the kernel context. */
if (intel_gt_is_wedged(engine->gt))
return true;
-- 
2.32.0



[PATCH 24/25] drm/i915: Enable multi-bb execbuf

2021-10-13 Thread Matthew Brost
Enable multi-bb execbuf by enabling the set_parallel extension.

Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 6f23aff6e642..66c7f3c0d08b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -570,9 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct 
i915_user_extension __user *base,
struct intel_engine_cs **siblings = NULL;
intel_engine_mask_t prev_mask;
 
-   /* Disabling for now */
-   return -ENODEV;
-
/* FIXME: This is NIY for execlists */
if (!(intel_uc_uses_guc_submission(&i915->gt.uc)))
return -ENODEV;
-- 
2.32.0



[PATCH 00/25] Parallel submission aka multi-bb execbuf

2021-10-13 Thread Matthew Brost
As discussed in [1] we are introducing a new parallel submission uAPI
for the i915 which allows more than 1 BB to be submitted in an execbuf
IOCTL. This is the implemenation for both GuC and execlists.

In addition to selftests in the series, an IGT is available implemented
in the first 4 patches [2].

The execbuf IOCTL changes have been done in a single large patch (#21)
as all the changes flow together and I believe a single patch will be
better if some one has to lookup this change in the future. Can split in
a series of smaller patches if desired.

This code is available in a public [3] repo for UMD teams to test there
code on.

v2: Drop complicated state machine to block in kernel if no guc_ids
available, perma-pin parallel contexts, reworker execbuf IOCTL to be a
series of loops inside the IOCTL rather than 1 large one on the outside,
address Daniel Vetter's comments
v3: Address John Harrison's comments, add a couple of patches which fix
bugs found internally
v4: Address John Harrison's latest round of comments

Signed-off-by: Matthew Brost 

[1] https://patchwork.freedesktop.org/series/92028/
[2] https://patchwork.freedesktop.org/series/93071/
[3] 
https://gitlab.freedesktop.org/mbrost/mbrost-drm-intel/-/tree/drm-intel-parallel

Matthew Brost (25):
  drm/i915/guc: Move GuC guc_id allocation under submission state
sub-struct
  drm/i915/guc: Take GT PM ref when deregistering context
  drm/i915/guc: Take engine PM when a context is pinned with GuC
submission
  drm/i915/guc: Don't call switch_to_kernel_context with GuC submission
  drm/i915: Add logical engine mapping
  drm/i915: Expose logical engine instance to user
  drm/i915/guc: Introduce context parent-child relationship
  drm/i915/guc: Add multi-lrc context registration
  drm/i915/guc: Ensure GuC schedule operations do not operate on child
contexts
  drm/i915/guc: Assign contexts in parent-child relationship consecutive
guc_ids
  drm/i915/guc: Implement parallel context pin / unpin functions
  drm/i915/guc: Implement multi-lrc submission
  drm/i915/guc: Insert submit fences between requests in parent-child
relationship
  drm/i915/guc: Implement multi-lrc reset
  drm/i915/guc: Update debugfs for GuC multi-lrc
  drm/i915/guc: Connect UAPI to GuC multi-lrc interface
  drm/i915/doc: Update parallel submit doc to point to i915_drm.h
  drm/i915/guc: Add basic GuC multi-lrc selftest
  drm/i915/guc: Implement no mid batch preemption for multi-lrc
  drm/i915: Multi-BB execbuf
  drm/i915/guc: Handle errors in multi-lrc requests
  drm/i915: Make request conflict tracking understand parallel submits
  drm/i915: Update I915_GEM_BUSY IOCTL to understand composite fences
  drm/i915: Enable multi-bb execbuf
  drm/i915/execlists: Weak parallel submission support for execlists

 Documentation/gpu/rfc/i915_parallel_execbuf.h |  122 --
 Documentation/gpu/rfc/i915_scheduler.rst  |4 +-
 drivers/gpu/drm/i915/gem/i915_gem_busy.c  |   57 +-
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  227 ++-
 .../gpu/drm/i915/gem/i915_gem_context_types.h |   16 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c|  786 ++---
 drivers/gpu/drm/i915/gt/intel_context.c   |   50 +-
 drivers/gpu/drm/i915/gt/intel_context.h   |   54 +-
 drivers/gpu/drm/i915/gt/intel_context_types.h |   73 +-
 drivers/gpu/drm/i915/gt/intel_engine.h|   12 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c |   66 +-
 drivers/gpu/drm/i915/gt/intel_engine_pm.c |   13 +
 drivers/gpu/drm/i915/gt/intel_engine_pm.h |   37 +
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |7 +
 .../drm/i915/gt/intel_execlists_submission.c  |   63 +-
 drivers/gpu/drm/i915/gt/intel_gt_pm.h |   14 +
 drivers/gpu/drm/i915/gt/intel_lrc.c   |7 +
 drivers/gpu/drm/i915/gt/selftest_execlists.c  |   12 +-
 .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |1 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.c|   29 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|   54 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c|2 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |   24 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |   34 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 1452 ++---
 .../drm/i915/gt/uc/selftest_guc_multi_lrc.c   |  179 ++
 drivers/gpu/drm/i915/i915_query.c |2 +
 drivers/gpu/drm/i915/i915_request.c   |  143 +-
 drivers/gpu/drm/i915/i915_request.h   |   23 +
 drivers/gpu/drm/i915/i915_vma.c   |   21 +-
 drivers/gpu/drm/i915/i915_vma.h   |   13 +-
 drivers/gpu/drm/i915/intel_wakeref.h  |   12 +
 .../drm/i915/selftests/i915_live_selftests.h  |1 +
 include/uapi/drm/i915_drm.h   |  139 +-
 34 files changed, 3056 insertions(+), 693 deletions(-)
 delete mode 100644 Documentation/gpu/rfc/i915_parallel_execbuf.h
 create mode 100644 drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c

-- 
2.32.0



[PATCH 14/25] drm/i915/guc: Implement multi-lrc reset

2021-10-13 Thread Matthew Brost
Update context and full GPU reset to work with multi-lrc. The idea is
parent context tracks all the active requests inflight for itself and
its children. The parent context owns the reset replaying / canceling
requests as needed.

v2:
 (John Harrison)
  - Simply loop in find active request
  - Add comments to find ative request / reset loop
v3:
 (John Harrison)
  - s/its'/its/g
  - Fix comment when searching for active request
  - Reorder if state in __guc_reset_context

Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/intel_context.c   | 15 +++-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 69 ++-
 2 files changed, 63 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index 79f321c6c008..6aab60584ee5 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -529,20 +529,29 @@ struct i915_request *intel_context_create_request(struct 
intel_context *ce)
 
 struct i915_request *intel_context_find_active_request(struct intel_context 
*ce)
 {
+   struct intel_context *parent = intel_context_to_parent(ce);
struct i915_request *rq, *active = NULL;
unsigned long flags;
 
GEM_BUG_ON(!intel_engine_uses_guc(ce->engine));
 
-   spin_lock_irqsave(&ce->guc_state.lock, flags);
-   list_for_each_entry_reverse(rq, &ce->guc_state.requests,
+   /*
+* We search the parent list to find an active request on the submitted
+* context. The parent list contains the requests for all the contexts
+* in the relationship so we have to do a compare of each request's
+* context.
+*/
+   spin_lock_irqsave(&parent->guc_state.lock, flags);
+   list_for_each_entry_reverse(rq, &parent->guc_state.requests,
sched.link) {
+   if (rq->context != ce)
+   continue;
if (i915_request_completed(rq))
break;
 
active = rq;
}
-   spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+   spin_unlock_irqrestore(&parent->guc_state.lock, flags);
 
return active;
 }
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index f690b7c2b295..bc052d206861 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -683,6 +683,11 @@ static inline int rq_prio(const struct i915_request *rq)
return rq->sched.attr.priority;
 }
 
+static inline bool is_multi_lrc(struct intel_context *ce)
+{
+   return intel_context_is_parallel(ce);
+}
+
 static bool is_multi_lrc_rq(struct i915_request *rq)
 {
return intel_context_is_parallel(rq->context);
@@ -1218,10 +1223,15 @@ __unwind_incomplete_requests(struct intel_context *ce)
 
 static void __guc_reset_context(struct intel_context *ce, bool stalled)
 {
+   bool local_stalled;
struct i915_request *rq;
unsigned long flags;
u32 head;
+   int i, number_children = ce->parallel.number_children;
bool skip = false;
+   struct intel_context *parent = ce;
+
+   GEM_BUG_ON(intel_context_is_child(ce));
 
intel_context_get(ce);
 
@@ -1247,25 +1257,38 @@ static void __guc_reset_context(struct intel_context 
*ce, bool stalled)
if (unlikely(skip))
goto out_put;
 
-   rq = intel_context_find_active_request(ce);
-   if (!rq) {
-   head = ce->ring->tail;
-   stalled = false;
-   goto out_replay;
-   }
+   /*
+* For each context in the relationship find the hanging request
+* resetting each context / request as needed
+*/
+   for (i = 0; i < number_children + 1; ++i) {
+   if (!intel_context_is_pinned(ce))
+   goto next_context;
+
+   local_stalled = false;
+   rq = intel_context_find_active_request(ce);
+   if (!rq) {
+   head = ce->ring->tail;
+   goto out_replay;
+   }
 
-   if (!i915_request_started(rq))
-   stalled = false;
+   if (i915_request_started(rq))
+   local_stalled = true;
 
-   GEM_BUG_ON(i915_active_is_idle(&ce->active));
-   head = intel_ring_wrap(ce->ring, rq->head);
-   __i915_request_reset(rq, stalled);
+   GEM_BUG_ON(i915_active_is_idle(&ce->active));
+   head = intel_ring_wrap(ce->ring, rq->head);
 
+   __i915_request_reset(rq, local_stalled && stalled);
 out_replay:
-   guc_reset_state(ce, head, stalled);
-   __unwind_incomplete_requests(ce);
+   guc_reset_state(ce, head, local_stalled && stalled);
+next_context:
+   if (i != number_children)
+   ce = list_next_entry(ce, parallel.ch

[PATCH 18/25] drm/i915/guc: Add basic GuC multi-lrc selftest

2021-10-13 Thread Matthew Brost
Add very basic (single submission) multi-lrc selftest.

Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |   1 +
 .../drm/i915/gt/uc/selftest_guc_multi_lrc.c   | 179 ++
 .../drm/i915/selftests/i915_live_selftests.h  |   1 +
 3 files changed, 181 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index ecb938bb99fb..09a3a9dd7ff6 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -3960,4 +3960,5 @@ bool intel_guc_virtual_engine_has_heartbeat(const struct 
intel_engine_cs *ve)
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftest_guc.c"
+#include "selftest_guc_multi_lrc.c"
 #endif
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c 
b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
new file mode 100644
index ..50953c8e8b53
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright �� 2019 Intel Corporation
+ */
+
+#include "selftests/igt_spinner.h"
+#include "selftests/igt_reset.h"
+#include "selftests/intel_scheduler_helpers.h"
+#include "gt/intel_engine_heartbeat.h"
+#include "gem/selftests/mock_context.h"
+
+static void logical_sort(struct intel_engine_cs **engines, int num_engines)
+{
+   struct intel_engine_cs *sorted[MAX_ENGINE_INSTANCE + 1];
+   int i, j;
+
+   for (i = 0; i < num_engines; ++i)
+   for (j = 0; j < MAX_ENGINE_INSTANCE + 1; ++j) {
+   if (engines[j]->logical_mask & BIT(i)) {
+   sorted[i] = engines[j];
+   break;
+   }
+   }
+
+   memcpy(*engines, *sorted,
+  sizeof(struct intel_engine_cs *) * num_engines);
+}
+
+static struct intel_context *
+multi_lrc_create_parent(struct intel_gt *gt, u8 class,
+   unsigned long flags)
+{
+   struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+   struct intel_engine_cs *engine;
+   enum intel_engine_id id;
+   int i = 0;
+
+   for_each_engine(engine, gt, id) {
+   if (engine->class != class)
+   continue;
+
+   siblings[i++] = engine;
+   }
+
+   if (i <= 1)
+   return ERR_PTR(0);
+
+   logical_sort(siblings, i);
+
+   return intel_engine_create_parallel(siblings, 1, i);
+}
+
+static void multi_lrc_context_unpin(struct intel_context *ce)
+{
+   struct intel_context *child;
+
+   GEM_BUG_ON(!intel_context_is_parent(ce));
+
+   for_each_child(ce, child)
+   intel_context_unpin(child);
+   intel_context_unpin(ce);
+}
+
+static void multi_lrc_context_put(struct intel_context *ce)
+{
+   GEM_BUG_ON(!intel_context_is_parent(ce));
+
+   /*
+* Only the parent gets the creation ref put in the uAPI, the parent
+* itself is responsible for creation ref put on the children.
+*/
+   intel_context_put(ce);
+}
+
+static struct i915_request *
+multi_lrc_nop_request(struct intel_context *ce)
+{
+   struct intel_context *child;
+   struct i915_request *rq, *child_rq;
+   int i = 0;
+
+   GEM_BUG_ON(!intel_context_is_parent(ce));
+
+   rq = intel_context_create_request(ce);
+   if (IS_ERR(rq))
+   return rq;
+
+   i915_request_get(rq);
+   i915_request_add(rq);
+
+   for_each_child(ce, child) {
+   child_rq = intel_context_create_request(child);
+   if (IS_ERR(child_rq))
+   goto child_error;
+
+   if (++i == ce->parallel.number_children)
+   set_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL,
+   &child_rq->fence.flags);
+   i915_request_add(child_rq);
+   }
+
+   return rq;
+
+child_error:
+   i915_request_put(rq);
+
+   return ERR_PTR(-ENOMEM);
+}
+
+static int __intel_guc_multi_lrc_basic(struct intel_gt *gt, unsigned int class)
+{
+   struct intel_context *parent;
+   struct i915_request *rq;
+   int ret;
+
+   parent = multi_lrc_create_parent(gt, class, 0);
+   if (IS_ERR(parent)) {
+   pr_err("Failed creating contexts: %ld", PTR_ERR(parent));
+   return PTR_ERR(parent);
+   } else if (!parent) {
+   pr_debug("Not enough engines in class: %d", class);
+   return 0;
+   }
+
+   rq = multi_lrc_nop_request(parent);
+   if (IS_ERR(rq)) {
+   ret = PTR_ERR(rq);
+   pr_err("Failed creating requests: %d", ret);
+   goto out;
+   }
+
+   ret = intel_selftest_wait_for_rq(rq);
+   if (ret)
+   pr_err("Failed waiting on request: %d

[PATCH 12/25] drm/i915/guc: Implement multi-lrc submission

2021-10-13 Thread Matthew Brost
Implement multi-lrc submission via a single workqueue entry and single
H2G. The workqueue entry contains an updated tail value for each
request, of all the contexts in the multi-lrc submission, and updates
these values simultaneously. As such, the tasklet and bypass path have
been updated to coalesce requests into a single submission.

v2:
 (John Harrison)
  - s/wqe/wqi
  - Use FIELD_PREP macros
  - Add GEM_BUG_ONs ensures length fits within field
  - Add comment / white space to intel_guc_write_barrier
 (Kernel test robot)
  - Make need_tasklet a static function
v3:
 (Docs)
  - A comment for submission_stall_reason
v4:
 (Kernel test robot)
  - Initialize return value in bypass tasklt submit function
 (John Harrison)
  - Add comment near work queue defs
  - Add BUILD_BUG_ON to ensure WQ_SIZE is a power of 2
  - Update write_barrier comment to talk about work queue

Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/uc/intel_guc.c|  29 ++
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|  11 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |  24 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |  30 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 323 +++---
 drivers/gpu/drm/i915/i915_request.h   |   8 +
 6 files changed, 350 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 8f8182bf7c11..6e228343e8cb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -756,3 +756,32 @@ void intel_guc_load_status(struct intel_guc *guc, struct 
drm_printer *p)
}
}
 }
+
+void intel_guc_write_barrier(struct intel_guc *guc)
+{
+   struct intel_gt *gt = guc_to_gt(guc);
+
+   if (i915_gem_object_is_lmem(guc->ct.vma->obj)) {
+   /*
+* Ensure intel_uncore_write_fw can be used rather than
+* intel_uncore_write.
+*/
+   GEM_BUG_ON(guc->send_regs.fw_domains);
+
+   /*
+* This register is used by the i915 and GuC for MMIO based
+* communication. Once we are in this code CTBs are the only
+* method the i915 uses to communicate with the GuC so it is
+* safe to write to this register (a value of 0 is NOP for MMIO
+* communication). If we ever start mixing CTBs and MMIOs a new
+* register will have to be chosen. This function is also used
+* to enforce ordering of a work queue item write and an update
+* to the process descriptor. When a work queue is being used,
+* CTBs are also the only mechanism of communication.
+*/
+   intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0);
+   } else {
+   /* wmb() sufficient for a barrier if in smem */
+   wmb();
+   }
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 4ca197f400ba..31cf9fb48c7e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -46,6 +46,15 @@ struct intel_guc {
 * submitted until the stalled request is processed.
 */
struct i915_request *stalled_request;
+   /**
+* @submission_stall_reason: reason why submission is stalled
+*/
+   enum {
+   STALL_NONE,
+   STALL_REGISTER_CONTEXT,
+   STALL_MOVE_LRC_TAIL,
+   STALL_ADD_REQUEST,
+   } submission_stall_reason;
 
/* intel_guc_recv interrupt related state */
/** @irq_lock: protects GuC irq state */
@@ -367,4 +376,6 @@ void intel_guc_submission_cancel_requests(struct intel_guc 
*guc);
 
 void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p);
 
+void intel_guc_write_barrier(struct intel_guc *guc);
+
 #endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 0a3504bc0b61..a0cc34be7b56 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -383,28 +383,6 @@ static u32 ct_get_next_fence(struct intel_guc_ct *ct)
return ++ct->requests.last_fence;
 }
 
-static void write_barrier(struct intel_guc_ct *ct)
-{
-   struct intel_guc *guc = ct_to_guc(ct);
-   struct intel_gt *gt = guc_to_gt(guc);
-
-   if (i915_gem_object_is_lmem(guc->ct.vma->obj)) {
-   GEM_BUG_ON(guc->send_regs.fw_domains);
-   /*
-* This register is used by the i915 and GuC for MMIO based
-* communication. Once we are in this code CTBs are the only
-* method the i915 uses to communicate with the GuC so it is
-* safe to write to this register (a value of 0 is NOP for MMIO
-* communication). If we ever start mixing CTBs and MMIOs a new
-   

[PATCH 03/25] drm/i915/guc: Take engine PM when a context is pinned with GuC submission

2021-10-13 Thread Matthew Brost
Taking a PM reference to prevent intel_gt_wait_for_idle from short
circuiting while any user context has scheduling enabled. Returning GT
idle when it is not can cause all sorts of issues throughout the stack.

v2:
 (Daniel Vetter)
  - Add might_lock annotations to pin / unpin function
v3:
 (CI)
  - Drop intel_engine_pm_might_put from unpin path as an async put is
used
v4:
 (John Harrison)
  - Make intel_engine_pm_might_get/put work with GuC virtual engines
  - Update commit message
v5:
  - Update commit message again

Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/intel_context.c   |  2 ++
 drivers/gpu/drm/i915/gt/intel_engine_pm.h | 32 +
 drivers/gpu/drm/i915/gt/intel_gt_pm.h | 10 ++
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 36 +--
 drivers/gpu/drm/i915/intel_wakeref.h  | 12 +++
 5 files changed, 89 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index d008ef8623ce..f98c9f470ba1 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -240,6 +240,8 @@ int __intel_context_do_pin_ww(struct intel_context *ce,
if (err)
goto err_post_unpin;
 
+   intel_engine_pm_might_get(ce->engine);
+
if (unlikely(intel_context_is_closed(ce))) {
err = -ENOENT;
goto err_unlock;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h 
b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
index 6fdeae668e6e..d68675925b79 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -6,9 +6,11 @@
 #ifndef INTEL_ENGINE_PM_H
 #define INTEL_ENGINE_PM_H
 
+#include "i915_drv.h"
 #include "i915_request.h"
 #include "intel_engine_types.h"
 #include "intel_wakeref.h"
+#include "intel_gt_pm.h"
 
 static inline bool
 intel_engine_pm_is_awake(const struct intel_engine_cs *engine)
@@ -31,6 +33,21 @@ static inline bool intel_engine_pm_get_if_awake(struct 
intel_engine_cs *engine)
return intel_wakeref_get_if_active(&engine->wakeref);
 }
 
+static inline void intel_engine_pm_might_get(struct intel_engine_cs *engine)
+{
+   if (!intel_engine_is_virtual(engine)) {
+   intel_wakeref_might_get(&engine->wakeref);
+   } else {
+   struct intel_gt *gt = engine->gt;
+   struct intel_engine_cs *tengine;
+   intel_engine_mask_t tmp, mask = engine->mask;
+
+   for_each_engine_masked(tengine, gt, mask, tmp)
+   intel_wakeref_might_get(&tengine->wakeref);
+   }
+   intel_gt_pm_might_get(engine->gt);
+}
+
 static inline void intel_engine_pm_put(struct intel_engine_cs *engine)
 {
intel_wakeref_put(&engine->wakeref);
@@ -52,6 +69,21 @@ static inline void intel_engine_pm_flush(struct 
intel_engine_cs *engine)
intel_wakeref_unlock_wait(&engine->wakeref);
 }
 
+static inline void intel_engine_pm_might_put(struct intel_engine_cs *engine)
+{
+   if (!intel_engine_is_virtual(engine)) {
+   intel_wakeref_might_put(&engine->wakeref);
+   } else {
+   struct intel_gt *gt = engine->gt;
+   struct intel_engine_cs *tengine;
+   intel_engine_mask_t tmp, mask = engine->mask;
+
+   for_each_engine_masked(tengine, gt, mask, tmp)
+   intel_wakeref_might_put(&tengine->wakeref);
+   }
+   intel_gt_pm_might_put(engine->gt);
+}
+
 static inline struct i915_request *
 intel_engine_create_kernel_request(struct intel_engine_cs *engine)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h 
b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index 05de6c1af25b..bc898df7a48c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -31,6 +31,11 @@ static inline bool intel_gt_pm_get_if_awake(struct intel_gt 
*gt)
return intel_wakeref_get_if_active(>->wakeref);
 }
 
+static inline void intel_gt_pm_might_get(struct intel_gt *gt)
+{
+   intel_wakeref_might_get(>->wakeref);
+}
+
 static inline void intel_gt_pm_put(struct intel_gt *gt)
 {
intel_wakeref_put(>->wakeref);
@@ -41,6 +46,11 @@ static inline void intel_gt_pm_put_async(struct intel_gt *gt)
intel_wakeref_put_async(>->wakeref);
 }
 
+static inline void intel_gt_pm_might_put(struct intel_gt *gt)
+{
+   intel_wakeref_might_put(>->wakeref);
+}
+
 #define with_intel_gt_pm(gt, tmp) \
for (tmp = 1, intel_gt_pm_get(gt); tmp; \
 intel_gt_pm_put(gt), tmp = 0)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index d2ce47b5541e..51d3963cebbf 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1571,7 +1571,12 @@ static int guc_context_pre_pin(struct intel_context *ce,
 
 static int guc_context_pin(struct intel_c

[PATCH 02/25] drm/i915/guc: Take GT PM ref when deregistering context

2021-10-13 Thread Matthew Brost
Taking a PM reference to prevent intel_gt_wait_for_idle from short
circuiting while a deregister context H2G is in flight. To do this must
issue the deregister H2G from a worker as context can be destroyed from
an atomic context and taking GT PM ref blows up. Previously we took a
runtime PM from this atomic context which worked but will stop working
once runtime pm autosuspend in enabled.

So this patch is two fold, stop intel_gt_wait_for_idle from short
circuting and fix runtime pm autosuspend.

v2:
 (John Harrison)
  - Split structure changes out in different patch
 (Tvrtko)
  - Don't drop lock in deregister_destroyed_contexts
v3:
 (John Harrison)
  - Flush destroyed contexts before destroying context reg pool

Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/intel_context.c   |   2 +
 drivers/gpu/drm/i915/gt/intel_context_types.h |   7 +
 drivers/gpu/drm/i915/gt/intel_engine_pm.h |   5 +
 drivers/gpu/drm/i915/gt/intel_gt_pm.h |   4 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|  11 ++
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 146 +++---
 6 files changed, 121 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index 35babd02ddfe..d008ef8623ce 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -400,6 +400,8 @@ intel_context_init(struct intel_context *ce, struct 
intel_engine_cs *engine)
ce->guc_id.id = GUC_INVALID_LRC_ID;
INIT_LIST_HEAD(&ce->guc_id.link);
 
+   INIT_LIST_HEAD(&ce->destroyed_link);
+
/*
 * Initialize fence to be complete as this is expected to be complete
 * unless there is a pending schedule disable outstanding.
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index e7e3984aab78..4613d027cbc3 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -213,6 +213,13 @@ struct intel_context {
struct list_head link;
} guc_id;
 
+   /**
+* @destroyed_link: link in guc->submission_state.destroyed_contexts, in
+* list when context is pending to be destroyed (deregistered with the
+* GuC), protected by guc->submission_state.lock
+*/
+   struct list_head destroyed_link;
+
 #ifdef CONFIG_DRM_I915_SELFTEST
/**
 * @drop_schedule_enable: Force drop of schedule enable G2H for selftest
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h 
b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
index 8520c595f5e1..6fdeae668e6e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -16,6 +16,11 @@ intel_engine_pm_is_awake(const struct intel_engine_cs 
*engine)
return intel_wakeref_is_active(&engine->wakeref);
 }
 
+static inline void __intel_engine_pm_get(struct intel_engine_cs *engine)
+{
+   __intel_wakeref_get(&engine->wakeref);
+}
+
 static inline void intel_engine_pm_get(struct intel_engine_cs *engine)
 {
intel_wakeref_get(&engine->wakeref);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h 
b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index d0588d8aaa44..05de6c1af25b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -41,6 +41,10 @@ static inline void intel_gt_pm_put_async(struct intel_gt *gt)
intel_wakeref_put_async(>->wakeref);
 }
 
+#define with_intel_gt_pm(gt, tmp) \
+   for (tmp = 1, intel_gt_pm_get(gt); tmp; \
+intel_gt_pm_put(gt), tmp = 0)
+
 static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
 {
return intel_wakeref_wait_for_idle(>->wakeref);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 82e248c2290c..74f071a0b6d5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -90,6 +90,17 @@ struct intel_guc {
 * refs
 */
struct list_head guc_id_list;
+   /**
+* @destroyed_contexts: list of contexts waiting to be destroyed
+* (deregistered with the GuC)
+*/
+   struct list_head destroyed_contexts;
+   /**
+* @destroyed_worker: worker to deregister contexts, need as we
+* need to take a GT PM reference and can't from destroy
+* function as it might be in an atomic context (no sleeping)
+*/
+   struct work_struct destroyed_worker;
} submission_state;
 
/**
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index b2646b088c7f..d2ce47b5541e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -90,8 +90

[PATCH 17/25] drm/i915/doc: Update parallel submit doc to point to i915_drm.h

2021-10-13 Thread Matthew Brost
Update parallel submit doc to point to i915_drm.h

Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 Documentation/gpu/rfc/i915_parallel_execbuf.h | 122 --
 Documentation/gpu/rfc/i915_scheduler.rst  |   4 +-
 2 files changed, 2 insertions(+), 124 deletions(-)
 delete mode 100644 Documentation/gpu/rfc/i915_parallel_execbuf.h

diff --git a/Documentation/gpu/rfc/i915_parallel_execbuf.h 
b/Documentation/gpu/rfc/i915_parallel_execbuf.h
deleted file mode 100644
index 8cbe2c4e0172..
--- a/Documentation/gpu/rfc/i915_parallel_execbuf.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2021 Intel Corporation
- */
-
-#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see 
i915_context_engines_parallel_submit */
-
-/**
- * struct drm_i915_context_engines_parallel_submit - Configure engine for
- * parallel submission.
- *
- * Setup a slot in the context engine map to allow multiple BBs to be submitted
- * in a single execbuf IOCTL. Those BBs will then be scheduled to run on the 
GPU
- * in parallel. Multiple hardware contexts are created internally in the i915
- * run these BBs. Once a slot is configured for N BBs only N BBs can be
- * submitted in each execbuf IOCTL and this is implicit behavior e.g. The user
- * doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how
- * many BBs there are based on the slot's configuration. The N BBs are the last
- * N buffer objects or first N if I915_EXEC_BATCH_FIRST is set.
- *
- * The default placement behavior is to create implicit bonds between each
- * context if each context maps to more than 1 physical engine (e.g. context is
- * a virtual engine). Also we only allow contexts of same engine class and 
these
- * contexts must be in logically contiguous order. Examples of the placement
- * behavior described below. Lastly, the default is to not allow BBs to
- * preempted mid BB rather insert coordinated preemption on all hardware
- * contexts between each set of BBs. Flags may be added in the future to change
- * both of these default behaviors.
- *
- * Returns -EINVAL if hardware context placement configuration is invalid or if
- * the placement configuration isn't supported on the platform / submission
- * interface.
- * Returns -ENODEV if extension isn't supported on the platform / submission
- * interface.
- *
- * .. code-block:: none
- *
- * Example 1 pseudo code:
- * CS[X] = generic engine of same class, logical instance X
- * INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
- * set_engines(INVALID)
- * set_parallel(engine_index=0, width=2, num_siblings=1,
- *  engines=CS[0],CS[1])
- *
- * Results in the following valid placement:
- * CS[0], CS[1]
- *
- * Example 2 pseudo code:
- * CS[X] = generic engine of same class, logical instance X
- * INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
- * set_engines(INVALID)
- * set_parallel(engine_index=0, width=2, num_siblings=2,
- *  engines=CS[0],CS[2],CS[1],CS[3])
- *
- * Results in the following valid placements:
- * CS[0], CS[1]
- * CS[2], CS[3]
- *
- * This can also be thought of as 2 virtual engines described by 2-D array
- * in the engines the field with bonds placed between each index of the
- * virtual engines. e.g. CS[0] is bonded to CS[1], CS[2] is bonded to
- * CS[3].
- * VE[0] = CS[0], CS[2]
- * VE[1] = CS[1], CS[3]
- *
- * Example 3 pseudo code:
- * CS[X] = generic engine of same class, logical instance X
- * INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
- * set_engines(INVALID)
- * set_parallel(engine_index=0, width=2, num_siblings=2,
- *  engines=CS[0],CS[1],CS[1],CS[3])
- *
- * Results in the following valid and invalid placements:
- * CS[0], CS[1]
- * CS[1], CS[3] - Not logical contiguous, return -EINVAL
- */
-struct drm_i915_context_engines_parallel_submit {
-   /**
-* @base: base user extension.
-*/
-   struct i915_user_extension base;
-
-   /**
-* @engine_index: slot for parallel engine
-*/
-   __u16 engine_index;
-
-   /**
-* @width: number of contexts per parallel engine
-*/
-   __u16 width;
-
-   /**
-* @num_siblings: number of siblings per context
-*/
-   __u16 num_siblings;
-
-   /**
-* @mbz16: reserved for future use; must be zero
-*/
-   __u16 mbz16;
-
-   /**
-* @flags: all undefined flags must be zero, currently not defined flags
-*/
-   __u64 flags;
-
-   /**
-* @mbz64: reserved for future use; must be zero
-*/
-   __u64 mbz64[3];
-
-   /**
-* @engines: 2-d array of engine instances to configure parallel engine
-*
-* length = width (i) * num_siblings (j)
-* index = j + i * n

[PATCH v4 19/20] drm: cleanup: remove drm_modeset_(un)lock_all()

2021-10-13 Thread Fernando Ramos
Functions drm_modeset_lock_all() and drm_modeset_unlock_all() are no
longer used anywhere and can be removed.

Signed-off-by: Fernando Ramos 
---
 drivers/gpu/drm/drm_modeset_lock.c | 94 +-
 include/drm/drm_modeset_lock.h |  2 -
 2 files changed, 3 insertions(+), 93 deletions(-)

diff --git a/drivers/gpu/drm/drm_modeset_lock.c 
b/drivers/gpu/drm/drm_modeset_lock.c
index 4d32b61fa1fd..b2b84ca2b738 100644
--- a/drivers/gpu/drm/drm_modeset_lock.c
+++ b/drivers/gpu/drm/drm_modeset_lock.c
@@ -117,93 +117,6 @@ static void __stack_depot_print(depot_stack_handle_t 
stack_depot)
 }
 #endif /* CONFIG_DRM_DEBUG_MODESET_LOCK */
 
-/**
- * drm_modeset_lock_all - take all modeset locks
- * @dev: DRM device
- *
- * This function takes all modeset locks, suitable where a more fine-grained
- * scheme isn't (yet) implemented. Locks must be dropped by calling the
- * drm_modeset_unlock_all() function.
- *
- * This function is deprecated. It allocates a lock acquisition context and
- * stores it in &drm_device.mode_config. This facilitate conversion of
- * existing code because it removes the need to manually deal with the
- * acquisition context, but it is also brittle because the context is global
- * and care must be taken not to nest calls. New code should use the
- * drm_modeset_lock_all_ctx() function and pass in the context explicitly.
- */
-void drm_modeset_lock_all(struct drm_device *dev)
-{
-   struct drm_mode_config *config = &dev->mode_config;
-   struct drm_modeset_acquire_ctx *ctx;
-   int ret;
-
-   ctx = kzalloc(sizeof(*ctx), GFP_KERNEL | __GFP_NOFAIL);
-   if (WARN_ON(!ctx))
-   return;
-
-   mutex_lock(&config->mutex);
-
-   drm_modeset_acquire_init(ctx, 0);
-
-retry:
-   ret = drm_modeset_lock_all_ctx(dev, ctx);
-   if (ret < 0) {
-   if (ret == -EDEADLK) {
-   drm_modeset_backoff(ctx);
-   goto retry;
-   }
-
-   drm_modeset_acquire_fini(ctx);
-   kfree(ctx);
-   return;
-   }
-   ww_acquire_done(&ctx->ww_ctx);
-
-   WARN_ON(config->acquire_ctx);
-
-   /*
-* We hold the locks now, so it is safe to stash the acquisition
-* context for drm_modeset_unlock_all().
-*/
-   config->acquire_ctx = ctx;
-
-   drm_warn_on_modeset_not_all_locked(dev);
-}
-EXPORT_SYMBOL(drm_modeset_lock_all);
-
-/**
- * drm_modeset_unlock_all - drop all modeset locks
- * @dev: DRM device
- *
- * This function drops all modeset locks taken by a previous call to the
- * drm_modeset_lock_all() function.
- *
- * This function is deprecated. It uses the lock acquisition context stored
- * in &drm_device.mode_config. This facilitates conversion of existing
- * code because it removes the need to manually deal with the acquisition
- * context, but it is also brittle because the context is global and care must
- * be taken not to nest calls. New code should pass the acquisition context
- * directly to the drm_modeset_drop_locks() function.
- */
-void drm_modeset_unlock_all(struct drm_device *dev)
-{
-   struct drm_mode_config *config = &dev->mode_config;
-   struct drm_modeset_acquire_ctx *ctx = config->acquire_ctx;
-
-   if (WARN_ON(!ctx))
-   return;
-
-   config->acquire_ctx = NULL;
-   drm_modeset_drop_locks(ctx);
-   drm_modeset_acquire_fini(ctx);
-
-   kfree(ctx);
-
-   mutex_unlock(&dev->mode_config.mutex);
-}
-EXPORT_SYMBOL(drm_modeset_unlock_all);
-
 /**
  * drm_warn_on_modeset_not_all_locked - check that all modeset locks are locked
  * @dev: device
@@ -425,10 +338,9 @@ EXPORT_SYMBOL(drm_modeset_unlock);
  * This function takes all modeset locks, suitable where a more fine-grained
  * scheme isn't (yet) implemented.
  *
- * Unlike drm_modeset_lock_all(), it doesn't take the &drm_mode_config.mutex
- * since that lock isn't required for modeset state changes. Callers which
- * need to grab that lock too need to do so outside of the acquire context
- * @ctx.
+ * It doesn't take the &drm_mode_config.mutex since that lock isn't required 
for
+ * modeset state changes. Callers which need to grab that lock too need to do 
so
+ * outside of the acquire context @ctx.
  *
  * Locks acquired with this function should be released by calling the
  * drm_modeset_drop_locks() function on @ctx.
diff --git a/include/drm/drm_modeset_lock.h b/include/drm/drm_modeset_lock.h
index b84693fbd2b5..96b853530120 100644
--- a/include/drm/drm_modeset_lock.h
+++ b/include/drm/drm_modeset_lock.h
@@ -140,8 +140,6 @@ struct drm_device;
 struct drm_crtc;
 struct drm_plane;
 
-void drm_modeset_lock_all(struct drm_device *dev);
-void drm_modeset_unlock_all(struct drm_device *dev);
 void drm_warn_on_modeset_not_all_locked(struct drm_device *dev);
 
 int drm_modeset_lock_all_ctx(struct drm_device *dev,
-- 
2.33.0



[PATCH v4 20/20] drm: cleanup: remove acquire_ctx from drm_mode_config

2021-10-13 Thread Fernando Ramos
The previous patch removed drm_modeset_{lock,unlock}_all, which were the
only users of this field inside the drm_mode_config structure.

Signed-off-by: Fernando Ramos 
---
 include/drm/drm_mode_config.h | 10 --
 1 file changed, 10 deletions(-)

diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h
index 48b7de80daf5..b214b07157f2 100644
--- a/include/drm/drm_mode_config.h
+++ b/include/drm/drm_mode_config.h
@@ -383,16 +383,6 @@ struct drm_mode_config {
 */
struct drm_modeset_lock connection_mutex;
 
-   /**
-* @acquire_ctx:
-*
-* Global implicit acquire context used by atomic drivers for legacy
-* IOCTLs. Deprecated, since implicit locking contexts make it
-* impossible to use driver-private &struct drm_modeset_lock. Users of
-* this must hold @mutex.
-*/
-   struct drm_modeset_acquire_ctx *acquire_ctx;
-
/**
 * @idr_mutex:
 *
-- 
2.33.0



[PATCH v4 18/20] drm/amd: cleanup: drm_modeset_lock_all() --> DRM_MODESET_LOCK_ALL_BEGIN() [part 3]

2021-10-13 Thread Fernando Ramos
As requested in Documentation/gpu/todo.rst, replace driver calls to
drm_modeset_lock_all() with DRM_MODESET_LOCK_ALL_BEGIN() and
DRM_MODESET_LOCK_ALL_END()

NOTE:

While this change is similar to the one done two commits ago, it
contains an important extra nuances that I'm going to explain next.

The only difference between the old drm_modeset_{lock,unlock}_all()
functions and the new DRM_MODESET_LOCK_ALL_{BEGIN,END}() macros is that
the former use a global context stored in dev->mode_config.acquire_ctx
while the latter depend on a user provided one (typically in the stack).

This means that as long as no one accesses the global
dev->mode_config.acquire_ctx context in the block that runs between
lock/BEGIN and unlock/END, the code should be equivalent before and
after my changes.

Turns out that, while not obvious at first sight, the call to
dm_restore_drm_connector_state() done between drm_modset_lock_all() and
drm_modeset_unlock_all() ends up using that global context structure
stored in dev.

To fix this we need to update some function prototypes to accept the
new stack allocated variable as an argument.

Signed-off-by: Fernando Ramos 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 27 ---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  3 ++-
 .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 13 ++---
 3 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 10ed1f8ad514..7a3c5def9fb9 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -81,6 +81,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #if defined(CONFIG_DRM_AMD_DC_DCN)
 #include "ivsrcid/dcn/irqsrcs_dcn_1_0.h"
@@ -2906,6 +2907,8 @@ static void handle_hpd_irq_helper(struct 
amdgpu_dm_connector *aconnector)
struct amdgpu_device *adev = drm_to_adev(dev);
struct dm_connector_state *dm_con_state = 
to_dm_connector_state(connector->state);
struct dm_crtc_state *dm_crtc_state = NULL;
+   struct drm_modeset_acquire_ctx ctx;
+   int ret;
 
if (adev->dm.disable_hpd_irq)
return;
@@ -2947,9 +2950,9 @@ static void handle_hpd_irq_helper(struct 
amdgpu_dm_connector *aconnector)
goto out;
}
 
-   drm_modeset_lock_all(dev);
-   dm_restore_drm_connector_state(dev, connector);
-   drm_modeset_unlock_all(dev);
+   DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
+   dm_restore_drm_connector_state(dev, connector, &ctx);
+   DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
 
if (aconnector->base.force == DRM_FORCE_UNSPECIFIED)
drm_kms_helper_hotplug_event(dev);
@@ -3070,6 +3073,7 @@ static void handle_hpd_rx_irq(void *param)
struct drm_connector *connector = &aconnector->base;
struct drm_device *dev = connector->dev;
struct dc_link *dc_link = aconnector->dc_link;
+   struct drm_modeset_acquire_ctx ctx;
bool is_mst_root_connector = aconnector->mst_mgr.mst_state;
bool result = false;
enum dc_connection_type new_connection_type = dc_connection_none;
@@ -3079,6 +3083,7 @@ static void handle_hpd_rx_irq(void *param)
bool has_left_work = false;
int idx = aconnector->base.index;
struct hpd_rx_irq_offload_work_queue *offload_wq = 
&adev->dm.hpd_rx_offload_wq[idx];
+   int ret;
 
memset(&hpd_irq_data, 0, sizeof(hpd_irq_data));
 
@@ -3153,9 +3158,9 @@ static void handle_hpd_rx_irq(void *param)
goto finish;
}
 
-   drm_modeset_lock_all(dev);
-   dm_restore_drm_connector_state(dev, connector);
-   drm_modeset_unlock_all(dev);
+   DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
+   dm_restore_drm_connector_state(dev, connector, &ctx);
+   DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
 
drm_kms_helper_hotplug_event(dev);
}
@@ -9703,7 +9708,8 @@ static void amdgpu_dm_atomic_commit_tail(struct 
drm_atomic_state *state)
 }
 
 
-static int dm_force_atomic_commit(struct drm_connector *connector)
+static int dm_force_atomic_commit(struct drm_connector *connector,
+ struct drm_modeset_acquire_ctx *ctx)
 {
int ret = 0;
struct drm_device *ddev = connector->dev;
@@ -9717,7 +9723,7 @@ static int dm_force_atomic_commit(struct drm_connector 
*connector)
if (!state)
return -ENOMEM;
 
-   state->acquire_ctx = ddev->mode_config.acquire_ctx;
+   state->acquire_ctx = ctx;
 
/* Construct an atomic state to restore previous display setting */
 
@@ -9764,7 +9770,8 @@ static int dm_force_atomic_commit(struct drm_connector 
*connector)
  * same port and when running without usermode desktop manager supprot
  */
 void dm_restore_drm_connector_state(struct drm_device *dev,
-  

[PATCH v4 17/20] drm/amd: cleanup: drm_modeset_lock_all() --> DRM_MODESET_LOCK_ALL_BEGIN() [part 2]

2021-10-13 Thread Fernando Ramos
Refactor places using drm_modeset_{lock,unlock}_all() so that they only
appear once per function.

This is needed so that in the next commit I can replace those functions
by the new macros (which use labels that can only appear once per
function).

Signed-off-by: Fernando Ramos 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 43 ---
 .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 18 +++-
 2 files changed, 26 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index f35561b5a465..10ed1f8ad514 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2935,13 +2935,6 @@ static void handle_hpd_irq_helper(struct 
amdgpu_dm_connector *aconnector)
if (aconnector->base.force && new_connection_type == 
dc_connection_none) {
emulated_link_detect(aconnector->dc_link);
 
-   drm_modeset_lock_all(dev);
-   dm_restore_drm_connector_state(dev, connector);
-   drm_modeset_unlock_all(dev);
-
-   if (aconnector->base.force == DRM_FORCE_UNSPECIFIED)
-   drm_kms_helper_hotplug_event(dev);
-
} else if (dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD)) {
if (new_connection_type == dc_connection_none &&
aconnector->dc_link->type == dc_connection_none &&
@@ -2950,13 +2943,18 @@ static void handle_hpd_irq_helper(struct 
amdgpu_dm_connector *aconnector)
 
amdgpu_dm_update_connector_after_detect(aconnector);
 
-   drm_modeset_lock_all(dev);
-   dm_restore_drm_connector_state(dev, connector);
-   drm_modeset_unlock_all(dev);
-
-   if (aconnector->base.force == DRM_FORCE_UNSPECIFIED)
-   drm_kms_helper_hotplug_event(dev);
+   } else {
+   goto out;
}
+
+   drm_modeset_lock_all(dev);
+   dm_restore_drm_connector_state(dev, connector);
+   drm_modeset_unlock_all(dev);
+
+   if (aconnector->base.force == DRM_FORCE_UNSPECIFIED)
+   drm_kms_helper_hotplug_event(dev);
+
+out:
mutex_unlock(&aconnector->hpd_lock);
 
 }
@@ -3144,12 +3142,6 @@ static void handle_hpd_rx_irq(void *param)
 
amdgpu_dm_update_connector_after_detect(aconnector);
 
-
-   drm_modeset_lock_all(dev);
-   dm_restore_drm_connector_state(dev, connector);
-   drm_modeset_unlock_all(dev);
-
-   drm_kms_helper_hotplug_event(dev);
} else if (dc_link_detect(dc_link, DETECT_REASON_HPDRX)) {
 
if (aconnector->fake_enable)
@@ -3157,14 +3149,17 @@ static void handle_hpd_rx_irq(void *param)
 
amdgpu_dm_update_connector_after_detect(aconnector);
 
+   } else {
+   goto finish;
+   }
 
-   drm_modeset_lock_all(dev);
-   dm_restore_drm_connector_state(dev, connector);
-   drm_modeset_unlock_all(dev);
+   drm_modeset_lock_all(dev);
+   dm_restore_drm_connector_state(dev, connector);
+   drm_modeset_unlock_all(dev);
 
-   drm_kms_helper_hotplug_event(dev);
-   }
+   drm_kms_helper_hotplug_event(dev);
}
+finish:
 #ifdef CONFIG_DRM_AMD_DC_HDCP
if (hpd_irq_data.bytes.device_service_irq.bits.CP_IRQ) {
if (adev->dm.hdcp_workqueue)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index 814f67d86a3c..7751038d5788 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -1237,12 +1237,6 @@ static ssize_t trigger_hotplug(struct file *f, const 
char __user *buf,
goto unlock;
 
amdgpu_dm_update_connector_after_detect(aconnector);
-
-   drm_modeset_lock_all(dev);
-   dm_restore_drm_connector_state(dev, connector);
-   drm_modeset_unlock_all(dev);
-
-   drm_kms_helper_hotplug_event(dev);
} else if (param[0] == 0) {
if (!aconnector->dc_link)
goto unlock;
@@ -1260,13 +1254,15 @@ static ssize_t trigger_hotplug(struct file *f, const 
char __user *buf,
 
amdgpu_dm_update_connector_after_detect(aconnector);
 
-   drm_modeset_lock_all(dev);
-   dm_restore_drm_connector_state(dev, connector);
-   drm_modeset_unlock_all(dev);
-
-   drm_kms_helper_hotplug_event(dev);
+   } else {
+   goto unlock;
}
 
+   drm_modeset_lock_all(dev);
+   dm_restore_drm_connector_state(dev, connector);

[PATCH v4 16/20] drm/amd: cleanup: drm_modeset_lock_all() --> DRM_MODESET_LOCK_ALL_BEGIN()

2021-10-13 Thread Fernando Ramos
As requested in Documentation/gpu/todo.rst, replace driver calls to
drm_modeset_lock_all() with DRM_MODESET_LOCK_ALL_BEGIN() and
DRM_MODESET_LOCK_ALL_END()

Signed-off-by: Fernando Ramos 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 21 ++---
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index dc50c05f23fc..0ea7bdbc8482 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -40,6 +40,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static void amdgpu_display_flip_callback(struct dma_fence *f,
 struct dma_fence_cb *cb)
@@ -1574,16 +1575,21 @@ int amdgpu_display_suspend_helper(struct amdgpu_device 
*adev)
struct drm_crtc *crtc;
struct drm_connector *connector;
struct drm_connector_list_iter iter;
-   int r;
+   struct drm_modeset_acquire_ctx ctx;
+   int r, ret;
 
/* turn off display hw */
-   drm_modeset_lock_all(dev);
+   DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter)
drm_helper_connector_dpms(connector,
  DRM_MODE_DPMS_OFF);
drm_connector_list_iter_end(&iter);
-   drm_modeset_unlock_all(dev);
+   DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
+
+   if (ret)
+   return ret;
+
/* unpin the front buffers and cursors */
list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
@@ -1621,7 +1627,8 @@ int amdgpu_display_resume_helper(struct amdgpu_device 
*adev)
struct drm_connector *connector;
struct drm_connector_list_iter iter;
struct drm_crtc *crtc;
-   int r;
+   struct drm_modeset_acquire_ctx ctx;
+   int r, ret;
 
/* pin cursors */
list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
@@ -1643,7 +1650,7 @@ int amdgpu_display_resume_helper(struct amdgpu_device 
*adev)
drm_helper_resume_force_mode(dev);
 
/* turn on display hw */
-   drm_modeset_lock_all(dev);
+   DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
 
drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter)
@@ -1651,8 +1658,8 @@ int amdgpu_display_resume_helper(struct amdgpu_device 
*adev)
  DRM_MODE_DPMS_ON);
drm_connector_list_iter_end(&iter);
 
-   drm_modeset_unlock_all(dev);
+   DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
 
-   return 0;
+   return ret;
 }
 
-- 
2.33.0



[PATCH v4 15/20] drm/gma500: cleanup: drm_modeset_lock_all() --> DRM_MODESET_LOCK_ALL_BEGIN()

2021-10-13 Thread Fernando Ramos
As requested in Documentation/gpu/todo.rst, replace driver calls to
drm_modeset_lock_all() with DRM_MODESET_LOCK_ALL_BEGIN() and
DRM_MODESET_LOCK_ALL_END()

Signed-off-by: Fernando Ramos 
---
 drivers/gpu/drm/gma500/psb_device.c | 18 --
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/gma500/psb_device.c 
b/drivers/gpu/drm/gma500/psb_device.c
index 3030f18ba022..021a7238508f 100644
--- a/drivers/gpu/drm/gma500/psb_device.c
+++ b/drivers/gpu/drm/gma500/psb_device.c
@@ -8,6 +8,7 @@
 #include 
 
 #include 
+#include 
 
 #include "gma_device.h"
 #include "intel_bios.h"
@@ -169,8 +170,10 @@ static int psb_save_display_registers(struct drm_device 
*dev)
 {
struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
struct drm_crtc *crtc;
+   struct drm_modeset_acquire_ctx ctx;
struct gma_connector *connector;
struct psb_state *regs = &dev_priv->regs.psb;
+   int ret;
 
/* Display arbitration control + watermarks */
regs->saveDSPARB = PSB_RVDC32(DSPARB);
@@ -183,7 +186,7 @@ static int psb_save_display_registers(struct drm_device 
*dev)
regs->saveCHICKENBIT = PSB_RVDC32(DSPCHICKENBIT);
 
/* Save crtc and output state */
-   drm_modeset_lock_all(dev);
+   DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
if (drm_helper_crtc_in_use(crtc))
dev_priv->ops->save_crtc(crtc);
@@ -193,8 +196,9 @@ static int psb_save_display_registers(struct drm_device 
*dev)
if (connector->save)
connector->save(&connector->base);
 
-   drm_modeset_unlock_all(dev);
-   return 0;
+   DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
+
+   return ret;
 }
 
 /**
@@ -207,8 +211,10 @@ static int psb_restore_display_registers(struct drm_device 
*dev)
 {
struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
struct drm_crtc *crtc;
+   struct drm_modeset_acquire_ctx ctx;
struct gma_connector *connector;
struct psb_state *regs = &dev_priv->regs.psb;
+   int ret;
 
/* Display arbitration + watermarks */
PSB_WVDC32(regs->saveDSPARB, DSPARB);
@@ -223,7 +229,7 @@ static int psb_restore_display_registers(struct drm_device 
*dev)
/*make sure VGA plane is off. it initializes to on after reset!*/
PSB_WVDC32(0x8000, VGACNTRL);
 
-   drm_modeset_lock_all(dev);
+   DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
if (drm_helper_crtc_in_use(crtc))
dev_priv->ops->restore_crtc(crtc);
@@ -232,8 +238,8 @@ static int psb_restore_display_registers(struct drm_device 
*dev)
if (connector->restore)
connector->restore(&connector->base);
 
-   drm_modeset_unlock_all(dev);
-   return 0;
+   DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
+   return ret;
 }
 
 static int psb_power_down(struct drm_device *dev)
-- 
2.33.0



[PATCH v4 14/20] drm/i915: cleanup: drm_modeset_lock_all() --> DRM_MODESET_LOCK_ALL_BEGIN() [part 3]

2021-10-13 Thread Fernando Ramos
As requested in Documentation/gpu/todo.rst, replace driver calls to
drm_modeset_lock_all() with DRM_MODESET_LOCK_ALL_BEGIN() and
DRM_MODESET_LOCK_ALL_END()

NOTE:

While the previous two commits were a simple "search and replace", this
time I had to do a bit of refactoring as only one call to
DRM_MODESET_LOCK_ALL_BEGIN() is allowed inside one same function.

Signed-off-by: Fernando Ramos 
---
 drivers/gpu/drm/i915/display/intel_overlay.c | 40 ++--
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c 
b/drivers/gpu/drm/i915/display/intel_overlay.c
index c0ee135e5499..c623738c59c8 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -1105,6 +1105,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, 
void *data,
struct drm_crtc *drmmode_crtc;
struct intel_crtc *crtc;
struct drm_i915_gem_object *new_bo;
+   struct drm_modeset_acquire_ctx ctx;
int ret;
 
overlay = dev_priv->overlay;
@@ -1113,24 +1114,24 @@ int intel_overlay_put_image_ioctl(struct drm_device 
*dev, void *data,
return -ENODEV;
}
 
-   if (!(params->flags & I915_OVERLAY_ENABLE)) {
-   drm_modeset_lock_all(dev);
-   ret = intel_overlay_switch_off(overlay);
-   drm_modeset_unlock_all(dev);
+   if (params->flags & I915_OVERLAY_ENABLE) {
 
-   return ret;
-   }
+   drmmode_crtc = drm_crtc_find(dev, file_priv, params->crtc_id);
+   if (!drmmode_crtc)
+   return -ENOENT;
+   crtc = to_intel_crtc(drmmode_crtc);
 
-   drmmode_crtc = drm_crtc_find(dev, file_priv, params->crtc_id);
-   if (!drmmode_crtc)
-   return -ENOENT;
-   crtc = to_intel_crtc(drmmode_crtc);
+   new_bo = i915_gem_object_lookup(file_priv, params->bo_handle);
+   if (!new_bo)
+   return -ENOENT;
+   }
 
-   new_bo = i915_gem_object_lookup(file_priv, params->bo_handle);
-   if (!new_bo)
-   return -ENOENT;
+   DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
 
-   drm_modeset_lock_all(dev);
+   if (!(params->flags & I915_OVERLAY_ENABLE)) {
+   ret = intel_overlay_switch_off(overlay);
+   goto out_unlock;
+   }
 
if (i915_gem_object_is_tiled(new_bo)) {
drm_dbg_kms(&dev_priv->drm,
@@ -1195,14 +1196,11 @@ int intel_overlay_put_image_ioctl(struct drm_device 
*dev, void *data,
if (ret != 0)
goto out_unlock;
 
-   drm_modeset_unlock_all(dev);
-   i915_gem_object_put(new_bo);
-
-   return 0;
-
 out_unlock:
-   drm_modeset_unlock_all(dev);
-   i915_gem_object_put(new_bo);
+   DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
+
+   if (params->flags & I915_OVERLAY_ENABLE)
+   i915_gem_object_put(new_bo);
 
return ret;
 }
-- 
2.33.0



[PATCH v4 12/20] drm/i915: cleanup: drm_modeset_lock_all() --> DRM_MODESET_LOCK_ALL_BEGIN()

2021-10-13 Thread Fernando Ramos
As requested in Documentation/gpu/todo.rst, replace driver calls to
drm_modeset_lock_all() with DRM_MODESET_LOCK_ALL_BEGIN() and
DRM_MODESET_LOCK_ALL_END()

Signed-off-by: Fernando Ramos 
---
 drivers/gpu/drm/i915/display/intel_audio.c| 16 ---
 .../drm/i915/display/intel_display_debugfs.c  | 46 ---
 drivers/gpu/drm/i915/display/intel_overlay.c  |  6 ++-
 drivers/gpu/drm/i915/display/intel_pipe_crc.c |  7 ++-
 drivers/gpu/drm/i915/i915_drv.c   | 13 --
 5 files changed, 58 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_audio.c 
b/drivers/gpu/drm/i915/display/intel_audio.c
index 03e8c05a74f6..37699f13b21f 100644
--- a/drivers/gpu/drm/i915/display/intel_audio.c
+++ b/drivers/gpu/drm/i915/display/intel_audio.c
@@ -26,6 +26,7 @@
 
 #include 
 #include 
+#include 
 
 #include "i915_drv.h"
 #include "intel_atomic.h"
@@ -1225,7 +1226,8 @@ static int i915_audio_component_bind(struct device 
*i915_kdev,
 {
struct i915_audio_component *acomp = data;
struct drm_i915_private *dev_priv = kdev_to_i915(i915_kdev);
-   int i;
+   struct drm_modeset_acquire_ctx ctx;
+   int i, ret;
 
if (drm_WARN_ON(&dev_priv->drm, acomp->base.ops || acomp->base.dev))
return -EEXIST;
@@ -1235,16 +1237,16 @@ static int i915_audio_component_bind(struct device 
*i915_kdev,
 DL_FLAG_STATELESS)))
return -ENOMEM;
 
-   drm_modeset_lock_all(&dev_priv->drm);
+   DRM_MODESET_LOCK_ALL_BEGIN((&dev_priv->drm), ctx, 0, ret);
acomp->base.ops = &i915_audio_component_ops;
acomp->base.dev = i915_kdev;
BUILD_BUG_ON(MAX_PORTS != I915_MAX_PORTS);
for (i = 0; i < ARRAY_SIZE(acomp->aud_sample_rate); i++)
acomp->aud_sample_rate[i] = 0;
dev_priv->audio_component = acomp;
-   drm_modeset_unlock_all(&dev_priv->drm);
+   DRM_MODESET_LOCK_ALL_END((&dev_priv->drm), ctx, ret);
 
-   return 0;
+   return ret;
 }
 
 static void i915_audio_component_unbind(struct device *i915_kdev,
@@ -1252,12 +1254,14 @@ static void i915_audio_component_unbind(struct device 
*i915_kdev,
 {
struct i915_audio_component *acomp = data;
struct drm_i915_private *dev_priv = kdev_to_i915(i915_kdev);
+   struct drm_modeset_acquire_ctx ctx;
+   int ret;
 
-   drm_modeset_lock_all(&dev_priv->drm);
+   DRM_MODESET_LOCK_ALL_BEGIN((&dev_priv->drm), ctx, 0, ret);
acomp->base.ops = NULL;
acomp->base.dev = NULL;
dev_priv->audio_component = NULL;
-   drm_modeset_unlock_all(&dev_priv->drm);
+   DRM_MODESET_LOCK_ALL_END((&dev_priv->drm), ctx, ret);
 
device_link_remove(hda_kdev, i915_kdev);
 
diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c 
b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
index bc5113589f0a..3205ceb0ab70 100644
--- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c
+++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
@@ -5,6 +5,7 @@
 
 #include 
 #include 
+#include 
 
 #include "i915_debugfs.h"
 #include "intel_de.h"
@@ -1059,11 +1060,13 @@ static int i915_display_info(struct seq_file *m, void 
*unused)
struct intel_crtc *crtc;
struct drm_connector *connector;
struct drm_connector_list_iter conn_iter;
+   struct drm_modeset_acquire_ctx ctx;
intel_wakeref_t wakeref;
+   int ret;
 
wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 
-   drm_modeset_lock_all(dev);
+   DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
 
seq_printf(m, "CRTC info\n");
seq_printf(m, "-\n");
@@ -1078,20 +1081,21 @@ static int i915_display_info(struct seq_file *m, void 
*unused)
intel_connector_info(m, connector);
drm_connector_list_iter_end(&conn_iter);
 
-   drm_modeset_unlock_all(dev);
+   DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
 
intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 
-   return 0;
+   return ret;
 }
 
 static int i915_shared_dplls_info(struct seq_file *m, void *unused)
 {
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
-   int i;
+   struct drm_modeset_acquire_ctx ctx;
+   int i, ret;
 
-   drm_modeset_lock_all(dev);
+   DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
 
seq_printf(m, "PLL refclks: non-SSC: %d kHz, SSC: %d kHz\n",
   dev_priv->dpll.ref_clks.nssc,
@@ -1134,9 +1138,9 @@ static int i915_shared_dplls_info(struct seq_file *m, 
void *unused)
seq_printf(m, " mg_pll_tdc_coldst_bias: 0x%08x\n",
   pll->state.hw_state.mg_pll_tdc_coldst_bias);
}
-   drm_modeset_unlock_all(dev);
+   DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
 
-   return 0;
+   return ret;
 }
 
 static int i915_ipc_status_show(struct seq_file *m, void *data)
@@ -119

[PATCH v4 13/20] drm/i915: cleanup: drm_modeset_lock_all() --> DRM_MODESET_LOCK_ALL_BEGIN() [part 2]

2021-10-13 Thread Fernando Ramos
As requested in Documentation/gpu/todo.rst, replace driver calls to
drm_modeset_lock_all() with DRM_MODESET_LOCK_ALL_BEGIN() and
DRM_MODESET_LOCK_ALL_END()

NOTE:

I separated this change from the rest of modifications to the i915
driver to point out something special explained next.

The only difference between the old drm_modeset_{lock,unlock}_all()
functions and the new DRM_MODESET_LOCK_ALL_{BEGIN,END}() macros is that
the former use a global context stored in dev->mode_config.acquire_ctx
while the latter depend on a user provided one (typically in the stack).

This means that as long as no one accesses the global
dev->mode_config.acquire_ctx context in the block that runs between
lock/BEGIN and unlock/END, the code should be equivalent before and
after my changes.

The only place where I had to take special action to preserve this
condition was here, where I need to modify the old call to
intel_modeset_setup_hw_state() to use the new stack allocated context
structure instead of the global one.

Signed-off-by: Fernando Ramos 
---
 drivers/gpu/drm/i915/display/intel_display.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
b/drivers/gpu/drm/i915/display/intel_display.c
index c7d9a58f9f98..75b45c01c573 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -10992,6 +10992,7 @@ int intel_modeset_init_noirq(struct drm_i915_private 
*i915)
 int intel_modeset_init_nogem(struct drm_i915_private *i915)
 {
struct drm_device *dev = &i915->drm;
+   struct drm_modeset_acquire_ctx ctx;
enum pipe pipe;
struct intel_crtc *crtc;
int ret;
@@ -11043,10 +11044,10 @@ int intel_modeset_init_nogem(struct drm_i915_private 
*i915)
intel_vga_disable(i915);
intel_setup_outputs(i915);
 
-   drm_modeset_lock_all(dev);
-   intel_modeset_setup_hw_state(dev, dev->mode_config.acquire_ctx);
+   DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
+   intel_modeset_setup_hw_state(dev, &ctx);
intel_acpi_assign_connector_fwnodes(i915);
-   drm_modeset_unlock_all(dev);
+   DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
 
for_each_intel_crtc(dev, crtc) {
if (!to_intel_crtc_state(crtc->base.state)->uapi.active)
-- 
2.33.0



[PATCH v4 11/20] drm/msm: cleanup: drm_modeset_lock_all() --> DRM_MODESET_LOCK_ALL_BEGIN()

2021-10-13 Thread Fernando Ramos
As requested in Documentation/gpu/todo.rst, replace driver calls to
drm_modeset_lock_all() with DRM_MODESET_LOCK_ALL_BEGIN() and
DRM_MODESET_LOCK_ALL_END()

Signed-off-by: Fernando Ramos 
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
index 768012243b44..b89687074890 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "dpu_kms.h"
 #include "dpu_hw_lm.h"
@@ -1172,14 +1173,15 @@ static int _dpu_debugfs_status_show(struct seq_file *s, 
void *data)
struct drm_display_mode *mode;
struct drm_framebuffer *fb;
struct drm_plane_state *state;
+   struct drm_modeset_acquire_ctx ctx;
struct dpu_crtc_state *cstate;
 
-   int i, out_width;
+   int i, out_width, ret;
 
dpu_crtc = s->private;
crtc = &dpu_crtc->base;
 
-   drm_modeset_lock_all(crtc->dev);
+   DRM_MODESET_LOCK_ALL_BEGIN(crtc->dev, ctx, 0, ret);
cstate = to_dpu_crtc_state(crtc->state);
 
mode = &crtc->state->adjusted_mode;
@@ -1263,9 +1265,9 @@ static int _dpu_debugfs_status_show(struct seq_file *s, 
void *data)
dpu_crtc->vblank_cb_time = ktime_set(0, 0);
}
 
-   drm_modeset_unlock_all(crtc->dev);
+   DRM_MODESET_LOCK_ALL_END(crtc->dev, ctx, ret);
 
-   return 0;
+   return ret;
 }
 
 DEFINE_SHOW_ATTRIBUTE(_dpu_debugfs_status);
-- 
2.33.0



[PATCH v4 09/20] drm/omapdrm: cleanup: drm_modeset_lock_all() --> DRM_MODESET_LOCK_ALL_BEGIN()

2021-10-13 Thread Fernando Ramos
As requested in Documentation/gpu/todo.rst, replace driver calls to
drm_modeset_lock_all() with DRM_MODESET_LOCK_ALL_BEGIN() and
DRM_MODESET_LOCK_ALL_END()

Signed-off-by: Fernando Ramos 
Reviewed-by: Sean Paul 
---
 drivers/gpu/drm/omapdrm/omap_fb.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c 
b/drivers/gpu/drm/omapdrm/omap_fb.c
index 190afc564914..fa7636c13c19 100644
--- a/drivers/gpu/drm/omapdrm/omap_fb.c
+++ b/drivers/gpu/drm/omapdrm/omap_fb.c
@@ -9,6 +9,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "omap_dmm_tiler.h"
 #include "omap_drv.h"
@@ -62,15 +63,17 @@ static int omap_framebuffer_dirty(struct drm_framebuffer 
*fb,
  unsigned num_clips)
 {
struct drm_crtc *crtc;
+   struct drm_modeset_acquire_ctx ctx;
+   int ret;
 
-   drm_modeset_lock_all(fb->dev);
+   DRM_MODESET_LOCK_ALL_BEGIN(fb->dev, ctx, 0, ret);
 
drm_for_each_crtc(crtc, fb->dev)
omap_crtc_flush(crtc);
 
-   drm_modeset_unlock_all(fb->dev);
+   DRM_MODESET_LOCK_ALL_END(fb->dev, ctx, ret);
 
-   return 0;
+   return ret;
 }
 
 static const struct drm_framebuffer_funcs omap_framebuffer_funcs = {
-- 
2.33.0



[PATCH v4 10/20] drm/nouveau: cleanup: drm_modeset_lock_all() --> DRM_MODESET_LOCK_ALL_BEGIN()

2021-10-13 Thread Fernando Ramos
As requested in Documentation/gpu/todo.rst, replace driver calls to
drm_modeset_lock_all() with DRM_MODESET_LOCK_ALL_BEGIN() and
DRM_MODESET_LOCK_ALL_END()

Signed-off-by: Fernando Ramos 
Reviewed-by: Sean Paul 
---
 drivers/gpu/drm/nouveau/dispnv50/disp.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c 
b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index d7b9f7f8c9e3..86e18a844953 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -42,6 +42,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -667,16 +668,18 @@ nv50_audio_component_bind(struct device *kdev, struct 
device *hda_kdev,
struct drm_device *drm_dev = dev_get_drvdata(kdev);
struct nouveau_drm *drm = nouveau_drm(drm_dev);
struct drm_audio_component *acomp = data;
+   struct drm_modeset_acquire_ctx ctx;
+   int ret;
 
if (WARN_ON(!device_link_add(hda_kdev, kdev, DL_FLAG_STATELESS)))
return -ENOMEM;
 
-   drm_modeset_lock_all(drm_dev);
+   DRM_MODESET_LOCK_ALL_BEGIN(drm_dev, ctx, 0, ret);
acomp->ops = &nv50_audio_component_ops;
acomp->dev = kdev;
drm->audio.component = acomp;
-   drm_modeset_unlock_all(drm_dev);
-   return 0;
+   DRM_MODESET_LOCK_ALL_END(drm_dev, ctx, ret);
+   return ret;
 }
 
 static void
@@ -686,12 +689,14 @@ nv50_audio_component_unbind(struct device *kdev, struct 
device *hda_kdev,
struct drm_device *drm_dev = dev_get_drvdata(kdev);
struct nouveau_drm *drm = nouveau_drm(drm_dev);
struct drm_audio_component *acomp = data;
+   struct drm_modeset_acquire_ctx ctx;
+   int ret;
 
-   drm_modeset_lock_all(drm_dev);
+   DRM_MODESET_LOCK_ALL_BEGIN(drm_dev, ctx, 0, ret);
drm->audio.component = NULL;
acomp->ops = NULL;
acomp->dev = NULL;
-   drm_modeset_unlock_all(drm_dev);
+   DRM_MODESET_LOCK_ALL_END(drm_dev, ctx, ret);
 }
 
 static const struct component_ops nv50_audio_component_bind_ops = {
-- 
2.33.0



[PATCH v4 07/20] drm/shmobile: cleanup: drm_modeset_lock_all() --> DRM_MODESET_LOCK_ALL_BEGIN()

2021-10-13 Thread Fernando Ramos
As requested in Documentation/gpu/todo.rst, replace driver calls to
drm_modeset_lock_all() with DRM_MODESET_LOCK_ALL_BEGIN() and
DRM_MODESET_LOCK_ALL_END()

Signed-off-by: Fernando Ramos 
Reviewed-by: Sean Paul 
---
 drivers/gpu/drm/shmobile/shmob_drm_drv.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/shmobile/shmob_drm_drv.c 
b/drivers/gpu/drm/shmobile/shmob_drm_drv.c
index 80078a9fd7f6..1877feff2e6b 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_drv.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_drv.c
@@ -156,10 +156,12 @@ static int shmob_drm_pm_suspend(struct device *dev)
 static int shmob_drm_pm_resume(struct device *dev)
 {
struct shmob_drm_device *sdev = dev_get_drvdata(dev);
+   struct drm_modeset_acquire_ctx ctx;
+   int ret;
 
-   drm_modeset_lock_all(sdev->ddev);
+   DRM_MODESET_LOCK_ALL_BEGIN(sdev->ddev, ctx, 0, ret);
shmob_drm_crtc_resume(&sdev->crtc);
-   drm_modeset_unlock_all(sdev->ddev);
+   DRM_MODESET_LOCK_ALL_END(sdev->ddev, ctx, ret);
 
drm_kms_helper_poll_enable(sdev->ddev);
return 0;
-- 
2.33.0



[PATCH v4 08/20] drm/radeon: cleanup: drm_modeset_lock_all() --> DRM_MODESET_LOCK_ALL_BEGIN()

2021-10-13 Thread Fernando Ramos
As requested in Documentation/gpu/todo.rst, replace driver calls to
drm_modeset_lock_all() with DRM_MODESET_LOCK_ALL_BEGIN() and
DRM_MODESET_LOCK_ALL_END()

Signed-off-by: Fernando Ramos 
---
 drivers/gpu/drm/radeon/radeon_device.c | 21 +++--
 drivers/gpu/drm/radeon/radeon_dp_mst.c | 10 ++
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_device.c 
b/drivers/gpu/drm/radeon/radeon_device.c
index 4f0fbf667431..7e31e5ce7f61 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1559,7 +1560,8 @@ int radeon_suspend_kms(struct drm_device *dev, bool 
suspend,
struct pci_dev *pdev;
struct drm_crtc *crtc;
struct drm_connector *connector;
-   int i, r;
+   struct drm_modeset_acquire_ctx ctx;
+   int i, r, ret;
 
if (dev == NULL || dev->dev_private == NULL) {
return -ENODEV;
@@ -1573,12 +1575,15 @@ int radeon_suspend_kms(struct drm_device *dev, bool 
suspend,
 
drm_kms_helper_poll_disable(dev);
 
-   drm_modeset_lock_all(dev);
+   DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
/* turn off display hw */
list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
}
-   drm_modeset_unlock_all(dev);
+   DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
+
+   if (ret)
+   return ret;
 
/* unpin the front buffers and cursors */
list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
@@ -1663,7 +1668,8 @@ int radeon_resume_kms(struct drm_device *dev, bool 
resume, bool fbcon)
struct radeon_device *rdev = dev->dev_private;
struct pci_dev *pdev = to_pci_dev(dev->dev);
struct drm_crtc *crtc;
-   int r;
+   struct drm_modeset_acquire_ctx ctx;
+   int r, ret;
 
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
@@ -1741,11 +1747,14 @@ int radeon_resume_kms(struct drm_device *dev, bool 
resume, bool fbcon)
if (fbcon) {
drm_helper_resume_force_mode(dev);
/* turn on display hw */
-   drm_modeset_lock_all(dev);
+   DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
list_for_each_entry(connector, 
&dev->mode_config.connector_list, head) {
drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
}
-   drm_modeset_unlock_all(dev);
+   DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
+
+   if (ret)
+   return ret;
}
 
drm_kms_helper_poll_enable(dev);
diff --git a/drivers/gpu/drm/radeon/radeon_dp_mst.c 
b/drivers/gpu/drm/radeon/radeon_dp_mst.c
index ec867fa880a4..3f83ee75b100 100644
--- a/drivers/gpu/drm/radeon/radeon_dp_mst.c
+++ b/drivers/gpu/drm/radeon/radeon_dp_mst.c
@@ -4,6 +4,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "atom.h"
 #include "ni_reg.h"
@@ -737,11 +738,12 @@ static int radeon_debugfs_mst_info_show(struct seq_file 
*m, void *unused)
struct radeon_device *rdev = (struct radeon_device *)m->private;
struct drm_device *dev = rdev->ddev;
struct drm_connector *connector;
+   struct drm_modeset_acquire_ctx ctx;
struct radeon_connector *radeon_connector;
struct radeon_connector_atom_dig *dig_connector;
-   int i;
+   int i, ret;
 
-   drm_modeset_lock_all(dev);
+   DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
if (connector->connector_type != DRM_MODE_CONNECTOR_DisplayPort)
continue;
@@ -759,8 +761,8 @@ static int radeon_debugfs_mst_info_show(struct seq_file *m, 
void *unused)
   radeon_connector->cur_stream_attribs[i].fe,
   
radeon_connector->cur_stream_attribs[i].slots);
}
-   drm_modeset_unlock_all(dev);
-   return 0;
+   DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
+   return ret;
 }
 
 DEFINE_SHOW_ATTRIBUTE(radeon_debugfs_mst_info);
-- 
2.33.0



[PATCH v4 06/20] drm/tegra: cleanup: drm_modeset_lock_all() --> DRM_MODESET_LOCK_ALL_BEGIN()

2021-10-13 Thread Fernando Ramos
As requested in Documentation/gpu/todo.rst, replace driver calls to
drm_modeset_lock_all() with DRM_MODESET_LOCK_ALL_BEGIN() and
DRM_MODESET_LOCK_ALL_END()

Signed-off-by: Fernando Ramos 
Reviewed-by: Sean Paul 
Reported-by: kernel test robot 
---
 drivers/gpu/drm/tegra/dsi.c  |  6 --
 drivers/gpu/drm/tegra/hdmi.c |  6 --
 drivers/gpu/drm/tegra/sor.c  | 11 +++
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c
index f46d377f0c30..28050c188c1c 100644
--- a/drivers/gpu/drm/tegra/dsi.c
+++ b/drivers/gpu/drm/tegra/dsi.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "dc.h"
 #include "drm.h"
@@ -202,10 +203,11 @@ static int tegra_dsi_show_regs(struct seq_file *s, void 
*data)
struct tegra_dsi *dsi = node->info_ent->data;
struct drm_crtc *crtc = dsi->output.encoder.crtc;
struct drm_device *drm = node->minor->dev;
+   struct drm_modeset_acquire_ctx ctx;
unsigned int i;
int err = 0;
 
-   drm_modeset_lock_all(drm);
+   DRM_MODESET_LOCK_ALL_BEGIN(drm, ctx, 0, err);
 
if (!crtc || !crtc->state->active) {
err = -EBUSY;
@@ -220,7 +222,7 @@ static int tegra_dsi_show_regs(struct seq_file *s, void 
*data)
}
 
 unlock:
-   drm_modeset_unlock_all(drm);
+   DRM_MODESET_LOCK_ALL_END(drm, ctx, err);
return err;
 }
 
diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c
index e5d2a4026028..a62de7f92414 100644
--- a/drivers/gpu/drm/tegra/hdmi.c
+++ b/drivers/gpu/drm/tegra/hdmi.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "hda.h"
 #include "hdmi.h"
@@ -1031,10 +1032,11 @@ static int tegra_hdmi_show_regs(struct seq_file *s, 
void *data)
struct tegra_hdmi *hdmi = node->info_ent->data;
struct drm_crtc *crtc = hdmi->output.encoder.crtc;
struct drm_device *drm = node->minor->dev;
+   struct drm_modeset_acquire_ctx ctx;
unsigned int i;
int err = 0;
 
-   drm_modeset_lock_all(drm);
+   DRM_MODESET_LOCK_ALL_BEGIN(drm, ctx, 0, err);
 
if (!crtc || !crtc->state->active) {
err = -EBUSY;
@@ -1049,7 +1051,7 @@ static int tegra_hdmi_show_regs(struct seq_file *s, void 
*data)
}
 
 unlock:
-   drm_modeset_unlock_all(drm);
+   DRM_MODESET_LOCK_ALL_END(drm, ctx, err);
return err;
 }
 
diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 0ea320c1092b..3d1c8b3d1358 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "dc.h"
 #include "dp.h"
@@ -1490,10 +1491,11 @@ static int tegra_sor_show_crc(struct seq_file *s, void 
*data)
struct tegra_sor *sor = node->info_ent->data;
struct drm_crtc *crtc = sor->output.encoder.crtc;
struct drm_device *drm = node->minor->dev;
+   struct drm_modeset_acquire_ctx ctx;
int err = 0;
u32 value;
 
-   drm_modeset_lock_all(drm);
+   DRM_MODESET_LOCK_ALL_BEGIN(drm, ctx, 0, err);
 
if (!crtc || !crtc->state->active) {
err = -EBUSY;
@@ -1522,7 +1524,7 @@ static int tegra_sor_show_crc(struct seq_file *s, void 
*data)
seq_printf(s, "%08x\n", value);
 
 unlock:
-   drm_modeset_unlock_all(drm);
+   DRM_MODESET_LOCK_ALL_END(drm, ctx, err);
return err;
 }
 
@@ -1652,10 +1654,11 @@ static int tegra_sor_show_regs(struct seq_file *s, void 
*data)
struct tegra_sor *sor = node->info_ent->data;
struct drm_crtc *crtc = sor->output.encoder.crtc;
struct drm_device *drm = node->minor->dev;
+   struct drm_modeset_acquire_ctx ctx;
unsigned int i;
int err = 0;
 
-   drm_modeset_lock_all(drm);
+   DRM_MODESET_LOCK_ALL_BEGIN(drm, ctx, 0, err);
 
if (!crtc || !crtc->state->active) {
err = -EBUSY;
@@ -1670,7 +1673,7 @@ static int tegra_sor_show_regs(struct seq_file *s, void 
*data)
}
 
 unlock:
-   drm_modeset_unlock_all(drm);
+   DRM_MODESET_LOCK_ALL_END(drm, ctx, err);
return err;
 }
 
-- 
2.33.0



[PATCH v4 05/20] drm/vmwgfx: cleanup: drm_modeset_lock_all() --> DRM_MODESET_LOCK_ALL_BEGIN()

2021-10-13 Thread Fernando Ramos
As requested in Documentation/gpu/todo.rst, replace driver calls to
drm_modeset_lock_all() with DRM_MODESET_LOCK_ALL_BEGIN() and
DRM_MODESET_LOCK_ALL_END()

Signed-off-by: Fernando Ramos 
Reviewed-by: Sean Paul 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c | 11 +++
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c   | 12 
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index 28af34ab6ed6..7df35c6f1458 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -28,6 +28,7 @@
 #include "vmwgfx_drv.h"
 #include "vmwgfx_devcaps.h"
 #include 
+#include 
 #include "vmwgfx_kms.h"
 
 int vmw_getparam_ioctl(struct drm_device *dev, void *data,
@@ -172,6 +173,7 @@ int vmw_present_ioctl(struct drm_device *dev, void *data,
struct drm_vmw_rect __user *clips_ptr;
struct drm_vmw_rect *clips = NULL;
struct drm_framebuffer *fb;
+   struct drm_modeset_acquire_ctx ctx;
struct vmw_framebuffer *vfb;
struct vmw_resource *res;
uint32_t num_clips;
@@ -203,7 +205,7 @@ int vmw_present_ioctl(struct drm_device *dev, void *data,
goto out_no_copy;
}
 
-   drm_modeset_lock_all(dev);
+   DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
 
fb = drm_framebuffer_lookup(dev, file_priv, arg->fb_id);
if (!fb) {
@@ -231,7 +233,7 @@ int vmw_present_ioctl(struct drm_device *dev, void *data,
 out_no_surface:
drm_framebuffer_put(fb);
 out_no_fb:
-   drm_modeset_unlock_all(dev);
+   DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
 out_no_copy:
kfree(clips);
 out_clips:
@@ -250,6 +252,7 @@ int vmw_present_readback_ioctl(struct drm_device *dev, void 
*data,
struct drm_vmw_rect __user *clips_ptr;
struct drm_vmw_rect *clips = NULL;
struct drm_framebuffer *fb;
+   struct drm_modeset_acquire_ctx ctx;
struct vmw_framebuffer *vfb;
uint32_t num_clips;
int ret;
@@ -280,7 +283,7 @@ int vmw_present_readback_ioctl(struct drm_device *dev, void 
*data,
goto out_no_copy;
}
 
-   drm_modeset_lock_all(dev);
+   DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
 
fb = drm_framebuffer_lookup(dev, file_priv, arg->fb_id);
if (!fb) {
@@ -303,7 +306,7 @@ int vmw_present_readback_ioctl(struct drm_device *dev, void 
*data,
 out_no_ttm_lock:
drm_framebuffer_put(fb);
 out_no_fb:
-   drm_modeset_unlock_all(dev);
+   DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
 out_no_copy:
kfree(clips);
 out_clips:
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 74fa41909213..268095cb8c84 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "vmwgfx_kms.h"
 
@@ -243,15 +244,17 @@ void vmw_kms_legacy_hotspot_clear(struct vmw_private 
*dev_priv)
struct drm_device *dev = &dev_priv->drm;
struct vmw_display_unit *du;
struct drm_crtc *crtc;
+   struct drm_modeset_acquire_ctx ctx;
+   int ret;
 
-   drm_modeset_lock_all(dev);
+   DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
drm_for_each_crtc(crtc, dev) {
du = vmw_crtc_to_du(crtc);
 
du->hotspot_x = 0;
du->hotspot_y = 0;
}
-   drm_modeset_unlock_all(dev);
+   DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
 }
 
 void vmw_kms_cursor_post_execbuf(struct vmw_private *dev_priv)
@@ -1012,9 +1015,10 @@ static int vmw_framebuffer_bo_dirty(struct 
drm_framebuffer *framebuffer,
struct vmw_framebuffer_bo *vfbd =
vmw_framebuffer_to_vfbd(framebuffer);
struct drm_clip_rect norect;
+   struct drm_modeset_acquire_ctx ctx;
int ret, increment = 1;
 
-   drm_modeset_lock_all(&dev_priv->drm);
+   DRM_MODESET_LOCK_ALL_BEGIN((&dev_priv->drm), ctx, 0, ret);
 
if (!num_clips) {
num_clips = 1;
@@ -1040,7 +1044,7 @@ static int vmw_framebuffer_bo_dirty(struct 
drm_framebuffer *framebuffer,
 
vmw_cmd_flush(dev_priv, false);
 
-   drm_modeset_unlock_all(&dev_priv->drm);
+   DRM_MODESET_LOCK_ALL_END((&dev_priv->drm), ctx, ret);
 
return ret;
 }
-- 
2.33.0



[PATCH v4 04/20] drm: cleanup: drm_modeset_lock_all() --> DRM_MODESET_LOCK_ALL_BEGIN()

2021-10-13 Thread Fernando Ramos
As requested in Documentation/gpu/todo.rst, replace driver calls to
drm_modeset_lock_all() with DRM_MODESET_LOCK_ALL_BEGIN() and
DRM_MODESET_LOCK_ALL_END()

Signed-off-by: Fernando Ramos 
Reviewed-by: Sean Paul 
---
 drivers/gpu/drm/drm_client_modeset.c |  5 +++--
 drivers/gpu/drm/drm_crtc_helper.c| 18 --
 drivers/gpu/drm/drm_fb_helper.c  | 10 ++
 drivers/gpu/drm/drm_framebuffer.c|  6 --
 4 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/drm_client_modeset.c 
b/drivers/gpu/drm/drm_client_modeset.c
index 5f5184f071ed..43f772543d2a 100644
--- a/drivers/gpu/drm/drm_client_modeset.c
+++ b/drivers/gpu/drm/drm_client_modeset.c
@@ -1062,9 +1062,10 @@ static int drm_client_modeset_commit_legacy(struct 
drm_client_dev *client)
struct drm_device *dev = client->dev;
struct drm_mode_set *mode_set;
struct drm_plane *plane;
+   struct drm_modeset_acquire_ctx ctx;
int ret = 0;
 
-   drm_modeset_lock_all(dev);
+   DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
drm_for_each_plane(plane, dev) {
if (plane->type != DRM_PLANE_TYPE_PRIMARY)
drm_plane_force_disable(plane);
@@ -1093,7 +1094,7 @@ static int drm_client_modeset_commit_legacy(struct 
drm_client_dev *client)
goto out;
}
 out:
-   drm_modeset_unlock_all(dev);
+   DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
 
return ret;
 }
diff --git a/drivers/gpu/drm/drm_crtc_helper.c 
b/drivers/gpu/drm/drm_crtc_helper.c
index bff917531f33..f3ce073dff79 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -218,11 +218,14 @@ static void __drm_helper_disable_unused_functions(struct 
drm_device *dev)
  */
 void drm_helper_disable_unused_functions(struct drm_device *dev)
 {
+   struct drm_modeset_acquire_ctx ctx;
+   int ret;
+
WARN_ON(drm_drv_uses_atomic_modeset(dev));
 
-   drm_modeset_lock_all(dev);
+   DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
__drm_helper_disable_unused_functions(dev);
-   drm_modeset_unlock_all(dev);
+   DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
 }
 EXPORT_SYMBOL(drm_helper_disable_unused_functions);
 
@@ -942,12 +945,14 @@ void drm_helper_resume_force_mode(struct drm_device *dev)
struct drm_crtc *crtc;
struct drm_encoder *encoder;
const struct drm_crtc_helper_funcs *crtc_funcs;
+   struct drm_modeset_acquire_ctx ctx;
int encoder_dpms;
bool ret;
+   int err;
 
WARN_ON(drm_drv_uses_atomic_modeset(dev));
 
-   drm_modeset_lock_all(dev);
+   DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, err);
drm_for_each_crtc(crtc, dev) {
 
if (!crtc->enabled)
@@ -982,7 +987,7 @@ void drm_helper_resume_force_mode(struct drm_device *dev)
 
/* disable the unused connectors while restoring the modesetting */
__drm_helper_disable_unused_functions(dev);
-   drm_modeset_unlock_all(dev);
+   DRM_MODESET_LOCK_ALL_END(dev, ctx, err);
 }
 EXPORT_SYMBOL(drm_helper_resume_force_mode);
 
@@ -1002,9 +1007,10 @@ EXPORT_SYMBOL(drm_helper_resume_force_mode);
 int drm_helper_force_disable_all(struct drm_device *dev)
 {
struct drm_crtc *crtc;
+   struct drm_modeset_acquire_ctx ctx;
int ret = 0;
 
-   drm_modeset_lock_all(dev);
+   DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
drm_for_each_crtc(crtc, dev)
if (crtc->enabled) {
struct drm_mode_set set = {
@@ -1016,7 +1022,7 @@ int drm_helper_force_disable_all(struct drm_device *dev)
goto out;
}
 out:
-   drm_modeset_unlock_all(dev);
+   DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
return ret;
 }
 EXPORT_SYMBOL(drm_helper_force_disable_all);
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 8e7a124d6c5a..3b5661cf6c2b 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -940,10 +940,11 @@ static int setcmap_legacy(struct fb_cmap *cmap, struct 
fb_info *info)
struct drm_fb_helper *fb_helper = info->par;
struct drm_mode_set *modeset;
struct drm_crtc *crtc;
+   struct drm_modeset_acquire_ctx ctx;
u16 *r, *g, *b;
int ret = 0;
 
-   drm_modeset_lock_all(fb_helper->dev);
+   DRM_MODESET_LOCK_ALL_BEGIN(fb_helper->dev, ctx, 0, ret);
drm_client_for_each_modeset(modeset, &fb_helper->client) {
crtc = modeset->crtc;
if (!crtc->funcs->gamma_set || !crtc->gamma_size) {
@@ -970,7 +971,7 @@ static int setcmap_legacy(struct fb_cmap *cmap, struct 
fb_info *info)
goto out;
}
 out:
-   drm_modeset_unlock_all(fb_helper->dev);
+   DRM_MODESET_LOCK_ALL_END(fb_helper->dev, ctx, ret);
 
return ret;
 }
@@ -1441,10 +1442,11 @@ static int pan_display_legacy(struc

[PATCH v4 03/20] drm/msm: cleanup: drm_modeset_lock_all_ctx() --> DRM_MODESET_LOCK_ALL_BEGIN()

2021-10-13 Thread Fernando Ramos
As requested in Documentation/gpu/todo.rst, replace the boilerplate code
surrounding drm_modeset_lock_all_ctx() with DRM_MODESET_LOCK_ALL_BEGIN()
and DRM_MODESET_LOCK_ALL_END()

Signed-off-by: Fernando Ramos 
Reviewed-by: Sean Paul 
Reported-by: kernel test robot 
---
 drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c 
b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c
index cabe15190ec1..abda52f09b09 100644
--- a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c
+++ b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c
@@ -5,6 +5,8 @@
 
 #define pr_fmt(fmt)"[drm:%s:%d] " fmt, __func__, __LINE__
 
+#include 
+
 #include "msm_disp_snapshot.h"
 
 static void msm_disp_state_dump_regs(u32 **reg, u32 aligned_len, void __iomem 
*base_addr)
@@ -99,20 +101,18 @@ static void msm_disp_capture_atomic_state(struct 
msm_disp_state *disp_state)
 {
struct drm_device *ddev;
struct drm_modeset_acquire_ctx ctx;
+   int ret;
 
disp_state->timestamp = ktime_get();
 
ddev = disp_state->drm_dev;
 
-   drm_modeset_acquire_init(&ctx, 0);
-
-   while (drm_modeset_lock_all_ctx(ddev, &ctx) != 0)
-   drm_modeset_backoff(&ctx);
+   DRM_MODESET_LOCK_ALL_BEGIN(ddev, ctx, 0, ret);
 
disp_state->atomic_state = drm_atomic_helper_duplicate_state(ddev,
&ctx);
-   drm_modeset_drop_locks(&ctx);
-   drm_modeset_acquire_fini(&ctx);
+
+   DRM_MODESET_LOCK_ALL_END(ddev, ctx, ret);
 }
 
 void msm_disp_snapshot_capture_state(struct msm_disp_state *disp_state)
-- 
2.33.0



[PATCH v4 02/20] drm/i915: cleanup: drm_modeset_lock_all_ctx() --> DRM_MODESET_LOCK_ALL_BEGIN()

2021-10-13 Thread Fernando Ramos
As requested in Documentation/gpu/todo.rst, replace the boilerplate code
surrounding drm_modeset_lock_all_ctx() with DRM_MODESET_LOCK_ALL_BEGIN()
and DRM_MODESET_LOCK_ALL_END()

Signed-off-by: Fernando Ramos 
Reviewed-by: Sean Paul 
---
 drivers/gpu/drm/i915/display/intel_display.c | 18 +-
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
b/drivers/gpu/drm/i915/display/intel_display.c
index 9cf987ee143d..c7d9a58f9f98 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -43,6 +43,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "display/intel_audio.h"
 #include "display/intel_crt.h"
@@ -11923,22 +11924,13 @@ void intel_display_resume(struct drm_device *dev)
if (state)
state->acquire_ctx = &ctx;
 
-   drm_modeset_acquire_init(&ctx, 0);
-
-   while (1) {
-   ret = drm_modeset_lock_all_ctx(dev, &ctx);
-   if (ret != -EDEADLK)
-   break;
-
-   drm_modeset_backoff(&ctx);
-   }
+   DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
 
-   if (!ret)
-   ret = __intel_display_resume(dev, state, &ctx);
+   ret = __intel_display_resume(dev, state, &ctx);
 
intel_enable_ipc(dev_priv);
-   drm_modeset_drop_locks(&ctx);
-   drm_modeset_acquire_fini(&ctx);
+
+   DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
 
if (ret)
drm_err(&dev_priv->drm,
-- 
2.33.0



[PATCH v4 01/20] drm: cleanup: drm_modeset_lock_all_ctx() --> DRM_MODESET_LOCK_ALL_BEGIN()

2021-10-13 Thread Fernando Ramos
As requested in Documentation/gpu/todo.rst, replace the boilerplate code
surrounding drm_modeset_lock_all_ctx() with DRM_MODESET_LOCK_ALL_BEGIN()
and DRM_MODESET_LOCK_ALL_END()

Signed-off-by: Fernando Ramos 
---
 drivers/gpu/drm/drm_client_modeset.c | 9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/drm_client_modeset.c 
b/drivers/gpu/drm/drm_client_modeset.c
index ced09c7c06f9..5f5184f071ed 100644
--- a/drivers/gpu/drm/drm_client_modeset.c
+++ b/drivers/gpu/drm/drm_client_modeset.c
@@ -574,6 +574,7 @@ static bool drm_client_firmware_config(struct 
drm_client_dev *client,
int num_connectors_detected = 0;
int num_tiled_conns = 0;
struct drm_modeset_acquire_ctx ctx;
+   int err;
 
if (!drm_drv_uses_atomic_modeset(dev))
return false;
@@ -585,10 +586,7 @@ static bool drm_client_firmware_config(struct 
drm_client_dev *client,
if (!save_enabled)
return false;
 
-   drm_modeset_acquire_init(&ctx, 0);
-
-   while (drm_modeset_lock_all_ctx(dev, &ctx) != 0)
-   drm_modeset_backoff(&ctx);
+   DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, err);
 
memcpy(save_enabled, enabled, count);
mask = GENMASK(count - 1, 0);
@@ -743,8 +741,7 @@ static bool drm_client_firmware_config(struct 
drm_client_dev *client,
ret = false;
}
 
-   drm_modeset_drop_locks(&ctx);
-   drm_modeset_acquire_fini(&ctx);
+   DRM_MODESET_LOCK_ALL_END(dev, ctx, err);
 
kfree(save_enabled);
return ret;
-- 
2.33.0



[PATCH v4 00/20] drm: cleanup: Use DRM_MODESET_LOCK_ALL_* helpers

2021-10-13 Thread Fernando Ramos
Hi all,

One of the things in the DRM TODO list ("Documentation/gpu/todo.rst") was to
"use DRM_MODESET_LOCAL_ALL_* helpers instead of boilerplate". That's what this
patch series is about.

You will find two types of changes here:

  - Replacing "drm_modeset_lock_all_ctx()" (and surrounding boilerplate) with
"DRM_MODESET_LOCK_ALL_BEGIN()/END()" in the remaining places (as it has
already been done in previous commits such as b7ea04d2)

  - Replacing "drm_modeset_lock_all()" with "DRM_MODESET_LOCK_ALL_BEGIN()/END()"
in the remaining places (as it has already been done in previous commits
such as 57037094)

Most of the changes are straight forward, except for a few cases in the "amd"
and "i915" drivers where some extra dancing was needed to overcome the
limitation that the DRM_MODESET_LOCK_ALL_BEGIN()/END() macros can only be used
once inside the same function (the reason being that the macro expansion
includes *labels*, and you can not have two labels named the same inside one
function)

Notice that, even after this patch series, some places remain where
"drm_modeset_lock_all()" and "drm_modeset_lock_all_ctx()" are still present,
all inside drm core (which makes sense), except for two (in "amd" and "i915")
which cannot be replaced due to the way they are being used.

Changes in v2:
  - Fix commit message typo
  - Use the value returned by DRM_MODESET_LOCK_ALL_END when possible
  - Split drm/i915 patch into two simpler ones
  - Remove drm_modeset_(un)lock_all()
  - Fix build problems in non-x86 platforms

Changes in v3:
  - Fix in drm/i915 driver to make sure global context is no longer used
  - Fix in drm/amdgpu driver to make sure global context is no longer used
  - Split amdgpu driver to make it easier to understand
  - Remove acquire_ctx from drm_mode_config 
  - Rebase on top of drm-tip
  - WARNING: There is some discussion going on regarding whether the new macros
should be used (or not) in the i915 driver, as a different set of functions
has been proposed in the past (see here:
https://lore.kernel.org/dri-devel/yvrizxceipbug...@intel.com/).
In that case I will need to create a v4 where i915 files are left unchanged.
Let me know your thoughts regarding this.

Changes in v4:
  - Fix missing "Signed-off-by" in one commit
  - No extra comments received in one week
  - Rebase on top of drm-tip

Fernando Ramos (20):
  drm: cleanup: drm_modeset_lock_all_ctx() -->
DRM_MODESET_LOCK_ALL_BEGIN()
  drm/i915: cleanup: drm_modeset_lock_all_ctx() -->
DRM_MODESET_LOCK_ALL_BEGIN()
  drm/msm: cleanup: drm_modeset_lock_all_ctx() -->
DRM_MODESET_LOCK_ALL_BEGIN()
  drm: cleanup: drm_modeset_lock_all() --> DRM_MODESET_LOCK_ALL_BEGIN()
  drm/vmwgfx: cleanup: drm_modeset_lock_all() -->
DRM_MODESET_LOCK_ALL_BEGIN()
  drm/tegra: cleanup: drm_modeset_lock_all() -->
DRM_MODESET_LOCK_ALL_BEGIN()
  drm/shmobile: cleanup: drm_modeset_lock_all() -->
DRM_MODESET_LOCK_ALL_BEGIN()
  drm/radeon: cleanup: drm_modeset_lock_all() -->
DRM_MODESET_LOCK_ALL_BEGIN()
  drm/omapdrm: cleanup: drm_modeset_lock_all() -->
DRM_MODESET_LOCK_ALL_BEGIN()
  drm/nouveau: cleanup: drm_modeset_lock_all() -->
DRM_MODESET_LOCK_ALL_BEGIN()
  drm/msm: cleanup: drm_modeset_lock_all() -->
DRM_MODESET_LOCK_ALL_BEGIN()
  drm/i915: cleanup: drm_modeset_lock_all() -->
DRM_MODESET_LOCK_ALL_BEGIN()
  drm/i915: cleanup: drm_modeset_lock_all() -->
DRM_MODESET_LOCK_ALL_BEGIN() [part 2]
  drm/i915: cleanup: drm_modeset_lock_all() -->
DRM_MODESET_LOCK_ALL_BEGIN() [part 3]
  drm/gma500: cleanup: drm_modeset_lock_all() -->
DRM_MODESET_LOCK_ALL_BEGIN()
  drm/amd: cleanup: drm_modeset_lock_all() -->
DRM_MODESET_LOCK_ALL_BEGIN()
  drm/amd: cleanup: drm_modeset_lock_all() -->
DRM_MODESET_LOCK_ALL_BEGIN() [part 2]
  drm/amd: cleanup: drm_modeset_lock_all() -->
DRM_MODESET_LOCK_ALL_BEGIN() [part 3]
  drm: cleanup: remove drm_modeset_(un)lock_all()
  drm: cleanup: remove acquire_ctx from drm_mode_config

 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c   | 21 +++--
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 58 ++--
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  3 +-
 .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 25 ++---
 drivers/gpu/drm/drm_client_modeset.c  | 14 ++-
 drivers/gpu/drm/drm_crtc_helper.c | 18 ++--
 drivers/gpu/drm/drm_fb_helper.c   | 10 +-
 drivers/gpu/drm/drm_framebuffer.c |  6 +-
 drivers/gpu/drm/drm_modeset_lock.c| 94 +--
 drivers/gpu/drm/gma500/psb_device.c   | 18 ++--
 drivers/gpu/drm/i915/display/intel_audio.c| 16 ++--
 drivers/gpu/drm/i915/display/intel_display.c  | 25 ++---
 .../drm/i915/display/intel_display_debugfs.c  | 46 +
 drivers/gpu/drm/i915/display/intel_overlay.c  | 46 -
 drivers/gpu/drm/i915/display/intel_pipe_crc.c |  7 +-
 drivers/gpu/drm/i915/i915_drv.c   | 13 ++-
 drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c  | 10 +-

Re: [PATCH] drm/msm/dpu: Add CRC support for DPU

2021-10-13 Thread Jessica Zhang

On 10/11/2021 7:01 PM, Dmitry Baryshkov wrote:

On 12/10/2021 02:41, Jessica Zhang wrote:

Add CRC support to DPU, which is currently not supported by
this driver. Only supports CRC for CRTC for now, but will extend support
to other blocks later on.

Tested on Qualcomm RB3 (debian, sdm845)

Signed-off-by: Jessica Zhang 
---
  drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c    | 169 +++-
  drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.h    |  20 ++-
  drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c   |  46 +-
  drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h   |  13 +-
  drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h |   9 +-
  5 files changed, 251 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c

index 768012243b44..6ebf989c4e67 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
@@ -1,6 +1,6 @@
  // SPDX-License-Identifier: GPL-2.0-only
  /*
- * Copyright (c) 2014-2018 The Linux Foundation. All rights reserved.
+ * Copyright (c) 2014-2021 The Linux Foundation. All rights reserved.
   * Copyright (C) 2013 Red Hat
   * Author: Rob Clark 
   */
@@ -70,6 +70,121 @@ static struct drm_encoder 
*get_encoder_from_crtc(struct drm_crtc *crtc)

  return NULL;
  }
  +static enum dpu_crtc_crc_source dpu_crtc_parse_crc_source(const 
char *src_name)

+{
+    if (!src_name || !strcmp(src_name, "none"))
+    return DPU_CRTC_CRC_SOURCE_NONE;


Newlines after || please. this would improve readability.

Noted.



+    if (!strcmp(src_name, "auto") || !strcmp(src_name, "lm"))
+    return DPU_CRTC_CRC_SOURCE_LAYER_MIXER;
+
+    return DPU_CRTC_CRC_SOURCE_INVALID;
+}
+
+static bool dpu_crtc_is_valid_crc_source(enum dpu_crtc_crc_source 
source)

+{
+    return (source > DPU_CRTC_CRC_SOURCE_NONE &&
+    source < DPU_CRTC_CRC_SOURCE_MAX);
+}
+
+int dpu_crtc_verify_crc_source(struct drm_crtc *crtc, const char 
*src_name, size_t *values_cnt)

+{
+    enum dpu_crtc_crc_source source = 
dpu_crtc_parse_crc_source(src_name);

+    struct dpu_crtc_state *crtc_state = to_dpu_crtc_state(crtc->state);
+
+    if (source < 0) {


Just use dpu_crtc_is_valid_crtc_source() here.


dpu_crtc_is_valid_crc_source() is not exactly the same as checking if 
the source *name* is valid, as "none" is a valid source name (e.g. would 
pass the `source < 0` check), but 
dpu_crtc_is_valid_crc_source(DPU_CRTC_CRC_SOURCE_NONE) would return 
false as DPU_CRTC_CRC_SOURCE_NONE represents when the CRC source is set 
to nothing. The general purpose of dpu_crc_is_valid_crtc_source() is to 
check that the source specified is able to return a CRC value, so a 
source set to "none" would return false, even though "none" is a 
technically valid source name.


Seems like the root issue is that the name 
"dpu_crtc_is_valid_crc_source" is misleading and it would be better to 
rename the helper method to something clearer. Or replace the 
dpu_crtc_is_valid_crc_source() checks with a check against 
DPU_CRTC_CRC_SOURCE_NONE instead.




+    DRM_DEBUG_DRIVER("Invalid source %s for CRTC%d\n", src_name, 
crtc->index);

+    return -EINVAL;
+    }
+
+    if (source == DPU_CRTC_CRC_SOURCE_LAYER_MIXER)
+    *values_cnt = crtc_state->num_mixers;
+
+    return 0;
+}
+
+int dpu_crtc_set_crc_source(struct drm_crtc *crtc, const char 
*src_name)

+{
+    enum dpu_crtc_crc_source source = 
dpu_crtc_parse_crc_source(src_name);

+    enum dpu_crtc_crc_source current_source;
+    struct drm_crtc_commit *commit;
+    struct dpu_crtc_state *crtc_state;
+    struct drm_device *drm_dev = crtc->dev;
+    struct dpu_crtc *dpu_crtc = to_dpu_crtc(crtc);
+    struct dpu_crtc_mixer *m;
+
+    bool was_enabled;
+    bool enable = false;
+    int i, ret = 0;
+
+    if (source < 0) {
+    DRM_DEBUG_DRIVER("Invalid CRC source %s for CRTC%d\n", 
src_name, crtc->index);

+    return -EINVAL;
+    }
+
+    ret = drm_modeset_lock(&crtc->mutex, NULL);
+
+    if (ret)
+    return ret;
+
+    /* Wait for any pending commits to finish */
+    spin_lock(&crtc->commit_lock);
+    commit = list_first_entry_or_null(&crtc->commit_list, struct 
drm_crtc_commit, commit_entry);

+
+    if (commit)
+    drm_crtc_commit_get(commit);
+    spin_unlock(&crtc->commit_lock);
+
+    if (commit) {
+    ret = 
wait_for_completion_interruptible_timeout(&commit->hw_done, 10 * HZ);

+
+    if (ret)
+    goto cleanup;
+    }


AMD drivers waits for the commit to finish, because it's commit tail 
can modify CRC-related registers. It unique, no other drivers seem to 
do this kind of wait. Why do we need to do it? And if we really need, 
I'd prefer to have this code in some kind of DRM helper function.


Makes sense. I wanted to include it to be safe, but as far as I know 
nothing that happens during a commit will affect reading the CRC for 
this driver. I've also tested without the wait for commit and it doesn't 
seem to affect the CRC read, so I'll remove it.

[airlied:drm-intel-display-refactor 12/19] drivers/gpu/drm/i915/display/intel_dump_verify.c:19:1: sparse: sparse: symbol 'pipe_config_mismatch' was not declared. Should it be static?

2021-10-13 Thread kernel test robot
tree:   git://people.freedesktop.org/~airlied/linux.git 
drm-intel-display-refactor
head:   cb45bcc9cf97016e5d4edb7a4196f0847437460e
commit: 678661f2ff1ba755fc652011d3edb2977165f508 [12/19] drm/i915/display: move 
display dump/verify code to a separate file
config: i386-randconfig-s002-20211013 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce:
# apt-get install sparse
# sparse version: v0.6.4-dirty
git remote add airlied git://people.freedesktop.org/~airlied/linux.git
git fetch --no-tags airlied drm-intel-display-refactor
git checkout 678661f2ff1ba755fc652011d3edb2977165f508
# save the attached .config to linux build tree
make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir 
ARCH=i386 SHELL=/bin/bash drivers/gpu/drm/i915/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 


sparse warnings: (new ones prefixed by >>)
>> drivers/gpu/drm/i915/display/intel_dump_verify.c:19:1: sparse: sparse: 
>> symbol 'pipe_config_mismatch' was not declared. Should it be static?

Please review and possibly fold the followup patch.

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip


[RFC PATCH airlied] drm/i915/display: pipe_config_mismatch can be static

2021-10-13 Thread kernel test robot
drivers/gpu/drm/i915/display/intel_dump_verify.c:19:1: warning: symbol 
'pipe_config_mismatch' was not declared. Should it be static?

Fixes: 678661f2ff1b ("drm/i915/display: move display dump/verify code to a 
separate file")
Reported-by: kernel test robot 
Signed-off-by: kernel test robot 
---
 intel_dump_verify.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dump_verify.c 
b/drivers/gpu/drm/i915/display/intel_dump_verify.c
index dab85805a569d..ad4448f7b0291 100644
--- a/drivers/gpu/drm/i915/display/intel_dump_verify.c
+++ b/drivers/gpu/drm/i915/display/intel_dump_verify.c
@@ -15,7 +15,7 @@
 #include "intel_snps_phy.h"
 #include "intel_vrr.h"
 
-void __printf(4, 5)
+static void __printf(4, 5)
 pipe_config_mismatch(bool fastset, const struct intel_crtc *crtc,
 const char *name, const char *format, ...)
 {


Re: [PATCH v2 01/34] component: Introduce struct aggregate_device

2021-10-13 Thread Stephen Boyd
Quoting Daniel Vetter (2021-10-13 05:22:23)
> On Wed, Oct 06, 2021 at 12:37:46PM -0700, Stephen Boyd wrote:
> > Replace 'struct master' with 'struct aggregate_device' and then rename
> > 'master' to 'adev' everywhere in the code. While we're here, put a
> > struct device inside the aggregate device so that we can register it
> > with a bus_type in the next patch.
> >
> > The diff is large but that's because this is mostly a rename, where
> > sometimes 'master' is replaced with 'adev' and other times it is
> > replaced with 'parent' to indicate that the struct device that was being
> > used is actually the parent of the aggregate device and driver.
> >
> > Cc: Daniel Vetter 
> > Cc: "Rafael J. Wysocki" 
> > Cc: Rob Clark 
> > Cc: Russell King 
> > Cc: Saravana Kannan 
> > Signed-off-by: Stephen Boyd 
>
> This adds device model stuff, please cc Greg KH and ask him to review
> this. Maybe also an ack from Rafael would be good whether this makes
> sense.

I will explicitly Cc Greg on these component.c patches.

>
> Once we have that I think we can then go&collect acks/review for all the
> driver changes and get this sorted. Thanks a lot for pushing this forward.

I'll resend the series today. This one has some compilation problems
that I've fixed now.


Re: [PATCH] drm: Update MST First Link Slot Information Based on Encoding Format

2021-10-13 Thread Lyude Paul
On Wed, 2021-10-13 at 15:33 -0400, Bhawanpreet Lakha wrote:
> 
> > I wonder if we could split this to separate drm dp helper and amd driver
> > patches?

Whoops! I thought it was strange that I would say this but it seems there was
a misunderstanding on my part: when the original patch series was submitted I
was only CC'd on the first patch and I guess I must not have noticed the 1/2
in the subject line, so I thought Jerry had submitted just a single patch for
the helper. Looking back in my email history though that definitely wasn't
correct, and the original patch structure was what we wanted to go with.

Sorry for the confusion on my part!

> > 
> I believe that was the original structure but, lyude recommended to put 
> them into the same patch to see how it is being used
> > > /**
> > >  * Streams and planes are reset when there are changes that
> > > affect
> > >  * bandwidth. Anything that affects bandwidth needs to go
> > > through
> > > diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c
> > > b/drivers/gpu/drm/drm_dp_mst_topology.c
> > > index ad0795afc21c..fb5c47c4cb2e 100644
> > > --- a/drivers/gpu/drm/drm_dp_mst_topology.c
> > > +++ b/drivers/gpu/drm/drm_dp_mst_topology.c
> > > @@ -3368,7 +3368,7 @@ int drm_dp_update_payload_part1(struct
> > > drm_dp_mst_topology_mgr *mgr)
> > > struct drm_dp_payload req_payload;
> > > struct drm_dp_mst_port *port;
> > > int i, j;
> > > -   int cur_slots = 1;
> > > +   int cur_slots = mgr->start_slot;
> > > bool skip;
> > >   
> > > mutex_lock(&mgr->payload_lock);
> > > @@ -4321,7 +4321,7 @@ int drm_dp_find_vcpi_slots(struct
> > > drm_dp_mst_topology_mgr *mgr,
> > > num_slots = DIV_ROUND_UP(pbn, mgr->pbn_div);
> > >   
> > > /* max. time slots - one slot for MTP header */
> > > -   if (num_slots > 63)
> > > +   if (num_slots > mgr->total_avail_slots)
> > > return -ENOSPC;
> > > return num_slots;
> > >   }
> > > @@ -4333,7 +4333,7 @@ static int drm_dp_init_vcpi(struct
> > > drm_dp_mst_topology_mgr *mgr,
> > > int ret;
> > >   
> > > /* max. time slots - one slot for MTP header */
> > > -   if (slots > 63)
> > > +   if (slots > mgr->total_avail_slots)
> > > return -ENOSPC;
> > >   
> > > vcpi->pbn = pbn;
> > > @@ -4507,6 +4507,18 @@ int drm_dp_atomic_release_vcpi_slots(struct
> > > drm_atomic_state *state,
> > >   }
> > >   EXPORT_SYMBOL(drm_dp_atomic_release_vcpi_slots);
> > >   
> > > +void drm_dp_mst_update_coding_cap(struct drm_dp_mst_topology_state
> > > *mst_state, uint8_t link_coding_cap)
> > > +{
> > > +   if (link_coding_cap == DP_CAP_ANSI_128B132B) {
> > > +   mst_state->total_avail_slots = 64;
> > > +   mst_state->start_slot = 0;
> > > +   }
> > The values never change AFAICT, should we store the channel encoding
> > instead, and use that information to initialize the values?
> > 
> > (Alternatively, why aren't the 8b/10b values initialized here if
> > 128b/132b are?)
> I agree, 8b/10 are the default, but in case where we switch from 
> 128b/132 -> 8b/10b we should be updating them here aswell.
> > > +
> > > +   DRM_DEBUG_KMS("%s coding format on mgr 0x%p\n",
> > > +   (link_coding_cap == DP_CAP_ANSI_128B132B) ?
> > > "128b/132b":"8b/10b", mst_state->mgr);
> > > +}
> > > +EXPORT_SYMBOL(drm_dp_mst_update_coding_cap);
> > > +
> > >   /**
> > >    * drm_dp_mst_allocate_vcpi() - Allocate a virtual channel
> > >    * @mgr: manager for this port
> > > @@ -4538,8 +4550,8 @@ bool drm_dp_mst_allocate_vcpi(struct
> > > drm_dp_mst_topology_mgr *mgr,
> > >   
> > > ret = drm_dp_init_vcpi(mgr, &port->vcpi, pbn, slots);
> > > if (ret) {
> > > -   drm_dbg_kms(mgr->dev, "failed to init vcpi slots=%d
> > > max=63 ret=%d\n",
> > > -   DIV_ROUND_UP(pbn, mgr->pbn_div), ret);
> > > +   drm_dbg_kms(mgr->dev, "failed to init vcpi slots=%d
> > > max=%d ret=%d\n",
> > > +   DIV_ROUND_UP(pbn, mgr->pbn_div), mgr-
> > > >total_avail_slots, ret);
> > > drm_dp_mst_topology_put_port(port);
> > > goto out;
> > > }
> > > @@ -5226,7 +5238,7 @@ drm_dp_mst_atomic_check_vcpi_alloc_limit(struct
> > > drm_dp_mst_topology_mgr *mgr,
> > >  struct
> > > drm_dp_mst_topology_state *mst_state)
> > >   {
> > > struct drm_dp_vcpi_allocation *vcpi;
> > > -   int avail_slots = 63, payload_count = 0;
> > > +   int avail_slots = mgr->total_avail_slots, payload_count = 0;
> > >   
> > > list_for_each_entry(vcpi, &mst_state->vcpis, next) {
> > > /* Releasing VCPI is always OK-even if the port is gone
> > > */
> > > @@ -5255,7 +5267,7 @@ drm_dp_mst_atomic_check_vcpi_alloc_limit(struct
> > > drm_dp_mst_topology_mgr *mgr,
> > > }
> > > }
> > > drm_dbg_a

Re: [PATCH 23/26] drm/i915: Make request conflict tracking understand parallel submits

2021-10-13 Thread John Harrison

On 10/12/2021 17:32, Matthew Brost wrote:

On Tue, Oct 12, 2021 at 03:08:05PM -0700, John Harrison wrote:

On 10/4/2021 15:06, Matthew Brost wrote:

If an object in the excl or shared slot is a composite fence from a
parallel submit and the current request in the conflict tracking is from
the same parallel context there is no need to enforce ordering as the
ordering already implicit. Make the request conflict tracking understand

ordering already -> ordering is already


Yep.


this by comparing the parents parallel fence values and skipping the

parents -> parent's


Yep.


conflict insertion if the values match.

Presumably, this is to cope with the fact that the parallel submit fences do
not look like regular submission fences. And hence the existing code that
says 'new fence belongs to same context as old fence, so safe to ignore'
does not work with parallel submission. However, this change does not appear

Yes. The check for 'if (fence->context == rq->fence.context)' doesn't
work with parallel submission as each rq->fence.context corresponds to a
timeline. With parallel submission each intel_context in the parallel
submit has its own timeline (seqno) so the compare fails for different
intel_context within the same parallel submit. This is the reason for
the additional compare on parallel submits parents, if they have the
same parent it is the same parallel submission and there is no need to
enforce additional ordering.


to be adding parallel submit support to an existing 'same context' check. It
seems to be a brand new check that does not exist for single submission.
What makes parallel submit different? If we aren't skipping same context
fences for single submits, why do we need it for parallel? Conversely, if we
need it for parallel then why don't we need it for single?


I'm confused by what you are asking here. The existing same context
check is fine for parallel submits - it will just return true when we
compare requests with the same intel_context and new additional check
only true parallel submissions with the same parent.


And if the single submission version is simply somewhere else in the code,
why do the parallel version here instead of at the same place?


Again I'm confused by what you are asking. We might just need to sync on
a quick call.

That's okay. I think I had partly confused myself ;).

I was just meaning that the parallel compliant version of the 'ctxtA == 
ctxtB -> skip' test should be coded adjacent to the single submission 
version of the same test. I had somehow completely missed that the 
single submission version is indeed the line above in 
i915_request_await_execution(). So the two are indeed very definitely 
next to each other.


It's all good :).

John.




Matt
  

John.


Signed-off-by: Matthew Brost 
---
   drivers/gpu/drm/i915/i915_request.c | 43 +++--
   1 file changed, 29 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index e9bfa32f9270..cf89624020ad 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1325,6 +1325,25 @@ i915_request_await_external(struct i915_request *rq, 
struct dma_fence *fence)
return err;
   }
+static inline bool is_parallel_rq(struct i915_request *rq)
+{
+   return intel_context_is_parallel(rq->context);
+}
+
+static inline struct intel_context *request_to_parent(struct i915_request *rq)
+{
+   return intel_context_to_parent(rq->context);
+}
+
+static bool is_same_parallel_context(struct i915_request *to,
+struct i915_request *from)
+{
+   if (is_parallel_rq(to))

Should this not say '&& is_parallel_rq(from)'?


+   return request_to_parent(to) == request_to_parent(from);
+
+   return false;
+}
+
   int
   i915_request_await_execution(struct i915_request *rq,
 struct dma_fence *fence)
@@ -1356,11 +1375,14 @@ i915_request_await_execution(struct i915_request *rq,
 * want to run our callback in all cases.
 */
-   if (dma_fence_is_i915(fence))
+   if (dma_fence_is_i915(fence)) {
+   if (is_same_parallel_context(rq, to_request(fence)))
+   continue;
ret = __i915_request_await_execution(rq,
 to_request(fence));
-   else
+   } else {
ret = i915_request_await_external(rq, fence);
+   }
if (ret < 0)
return ret;
} while (--nchild);
@@ -1461,10 +1483,13 @@ i915_request_await_dma_fence(struct i915_request *rq, 
struct dma_fence *fence)
 fence))
continue;
-   if (dma_fence_is_i915(fence))
+   if (dma_fence_is_i915(fence)) {
+  

Re: [PATCH] drm: Update MST First Link Slot Information Based on Encoding Format

2021-10-13 Thread Bhawanpreet Lakha



On 2021-10-13 12:09 p.m., Jani Nikula wrote:

On Tue, 12 Oct 2021, Bhawanpreet Lakha  wrote:

8b/10b encoding format requires to reserve the first slot for
recording metadata. Real data transmission starts from the second slot,
with a total of available 63 slots available.

In 128b/132b encoding format, metadata is transmitted separately
in LLCP packet before MTP. Real data transmission starts from
the first slot, with a total of 64 slots available.

v2:
* Remove get_mst_link_encoding_cap
* Move total/start slots to mst_state, and copy it to mst_mgr in
atomic_check

Signed-off-by: Fangzhi Zuo 
Signed-off-by: Bhawanpreet Lakha 
---
  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 28 +++
  drivers/gpu/drm/drm_dp_mst_topology.c | 35 +++
  include/drm/drm_dp_mst_helper.h   | 13 +++
  3 files changed, 69 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 5020f2d36fe1..4ad50eb0091a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -10612,6 +10612,8 @@ static int amdgpu_dm_atomic_check(struct drm_device 
*dev,
  #if defined(CONFIG_DRM_AMD_DC_DCN)
struct dsc_mst_fairness_vars vars[MAX_PIPES];
  #endif
+   struct drm_dp_mst_topology_state *mst_state;
+   struct drm_dp_mst_topology_mgr *mgr;
  
  	trace_amdgpu_dm_atomic_check_begin(state);
  
@@ -10819,6 +10821,32 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,

lock_and_validation_needed = true;
}
  
+#if defined(CONFIG_DRM_AMD_DC_DCN)

+   for_each_new_mst_mgr_in_state(state, mgr, mst_state, i) {
+   struct amdgpu_dm_connector *aconnector;
+   struct drm_connector *connector;
+   struct drm_connector_list_iter iter;
+   u8 link_coding_cap;
+
+   if (!mgr->mst_state )
+   continue;
+
+   drm_connector_list_iter_begin(dev, &iter);
+   drm_for_each_connector_iter(connector, &iter) {
+   int id = connector->index;
+
+   if (id == mst_state->mgr->conn_base_id) {
+   aconnector = to_amdgpu_dm_connector(connector);
+   link_coding_cap = 
dc_link_dp_mst_decide_link_encoding_format(aconnector->dc_link);
+   drm_dp_mst_update_coding_cap(mst_state, 
link_coding_cap);
+
+   break;
+   }
+   }
+   drm_connector_list_iter_end(&iter);
+
+   }
+#endif

I wonder if we could split this to separate drm dp helper and amd driver
patches?

I believe that was the original structure but, lyude recommended to put 
them into the same patch to see how it is being used

/**
 * Streams and planes are reset when there are changes that affect
 * bandwidth. Anything that affects bandwidth needs to go through
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c 
b/drivers/gpu/drm/drm_dp_mst_topology.c
index ad0795afc21c..fb5c47c4cb2e 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -3368,7 +3368,7 @@ int drm_dp_update_payload_part1(struct 
drm_dp_mst_topology_mgr *mgr)
struct drm_dp_payload req_payload;
struct drm_dp_mst_port *port;
int i, j;
-   int cur_slots = 1;
+   int cur_slots = mgr->start_slot;
bool skip;
  
  	mutex_lock(&mgr->payload_lock);

@@ -4321,7 +4321,7 @@ int drm_dp_find_vcpi_slots(struct drm_dp_mst_topology_mgr 
*mgr,
num_slots = DIV_ROUND_UP(pbn, mgr->pbn_div);
  
  	/* max. time slots - one slot for MTP header */

-   if (num_slots > 63)
+   if (num_slots > mgr->total_avail_slots)
return -ENOSPC;
return num_slots;
  }
@@ -4333,7 +4333,7 @@ static int drm_dp_init_vcpi(struct 
drm_dp_mst_topology_mgr *mgr,
int ret;
  
  	/* max. time slots - one slot for MTP header */

-   if (slots > 63)
+   if (slots > mgr->total_avail_slots)
return -ENOSPC;
  
  	vcpi->pbn = pbn;

@@ -4507,6 +4507,18 @@ int drm_dp_atomic_release_vcpi_slots(struct 
drm_atomic_state *state,
  }
  EXPORT_SYMBOL(drm_dp_atomic_release_vcpi_slots);
  
+void drm_dp_mst_update_coding_cap(struct drm_dp_mst_topology_state *mst_state, uint8_t link_coding_cap)

+{
+   if (link_coding_cap == DP_CAP_ANSI_128B132B) {
+   mst_state->total_avail_slots = 64;
+   mst_state->start_slot = 0;
+   }

The values never change AFAICT, should we store the channel encoding
instead, and use that information to initialize the values?

(Alternatively, why aren't the 8b/10b values initialized here if
128b/132b are?)
I agree, 8b/10 are the default, but in case where we switch from 
128b/132 -> 8b/10b we should be updating them here aswell.

+
+   

Re: [PATCH] drm/i915: Use dma_resv_iter for waiting in i915_gem_object_wait_reservation.

2021-10-13 Thread kernel test robot
Hi Maarten,

I love your patch! Yet something to improve:

[auto build test ERROR on drm-intel/for-linux-next]
[also build test ERROR on drm-tip/drm-tip drm-exynos/exynos-drm-next 
tegra-drm/drm/tegra/for-next v5.15-rc5 next-20211013]
[cannot apply to airlied/drm-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Maarten-Lankhorst/drm-i915-Use-dma_resv_iter-for-waiting-in-i915_gem_object_wait_reservation/20211013-184219
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: i386-randconfig-a003-20211013 (attached as .config)
compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project 
b6a8c695542b2987eb9a203d5663a0740cb4725f)
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/0day-ci/linux/commit/647f0c4c47ffea53967daf523e8b935707e7a586
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Maarten-Lankhorst/drm-i915-Use-dma_resv_iter-for-waiting-in-i915_gem_object_wait_reservation/20211013-184219
git checkout 647f0c4c47ffea53967daf523e8b935707e7a586
# save the attached .config to linux build tree
mkdir build_dir
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 
O=build_dir ARCH=i386 SHELL=/bin/bash drivers/gpu/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

>> make[4]: *** No rule to make target 'drivers/gpu/drm/i915/dma_resv_utils.o', 
>> needed by 'drivers/gpu/drm/i915/i915.o'.
   make[4]: *** [scripts/Makefile.build:277: 
drivers/gpu/drm/i915/gem/i915_gem_shrinker.o] Error 1
   make[4]: Target '__build' not remade because of errors.
--
>> drivers/gpu/drm/i915/gem/i915_gem_shrinker.c:18:10: fatal error: 
>> 'dma_resv_utils.h' file not found
   #include "dma_resv_utils.h"
^~
   1 error generated.


vim +18 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c

09137e94543761 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c Chris Wilson  
2020-07-08  17  
6d393ef5ff5cac drivers/gpu/drm/i915/gem/i915_gem_shrinker.c Chris Wilson  
2020-12-23 @18  #include "dma_resv_utils.h"
be6a0376950475 drivers/gpu/drm/i915/i915_gem_shrinker.c Daniel Vetter 
2015-03-18  19  #include "i915_trace.h"
be6a0376950475 drivers/gpu/drm/i915/i915_gem_shrinker.c Daniel Vetter 
2015-03-18  20  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip


Re: [PATCH 23/26] drm/i915: Make request conflict tracking understand parallel submits

2021-10-13 Thread John Harrison

On 10/13/2021 10:51, Matthew Brost wrote:

On Tue, Oct 12, 2021 at 03:08:05PM -0700, John Harrison wrote:

On 10/4/2021 15:06, Matthew Brost wrote:

If an object in the excl or shared slot is a composite fence from a
parallel submit and the current request in the conflict tracking is from
the same parallel context there is no need to enforce ordering as the
ordering already implicit. Make the request conflict tracking understand

ordering already -> ordering is already


this by comparing the parents parallel fence values and skipping the

parents -> parent's


conflict insertion if the values match.

Presumably, this is to cope with the fact that the parallel submit fences do
not look like regular submission fences. And hence the existing code that
says 'new fence belongs to same context as old fence, so safe to ignore'
does not work with parallel submission. However, this change does not appear
to be adding parallel submit support to an existing 'same context' check. It
seems to be a brand new check that does not exist for single submission.
What makes parallel submit different? If we aren't skipping same context
fences for single submits, why do we need it for parallel? Conversely, if we
need it for parallel then why don't we need it for single?

And if the single submission version is simply somewhere else in the code,
why do the parallel version here instead of at the same place?

John.


Signed-off-by: Matthew Brost 
---
   drivers/gpu/drm/i915/i915_request.c | 43 +++--
   1 file changed, 29 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index e9bfa32f9270..cf89624020ad 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1325,6 +1325,25 @@ i915_request_await_external(struct i915_request *rq, 
struct dma_fence *fence)
return err;
   }
+static inline bool is_parallel_rq(struct i915_request *rq)
+{
+   return intel_context_is_parallel(rq->context);
+}
+
+static inline struct intel_context *request_to_parent(struct i915_request *rq)
+{
+   return intel_context_to_parent(rq->context);
+}
+
+static bool is_same_parallel_context(struct i915_request *to,
+struct i915_request *from)
+{
+   if (is_parallel_rq(to))

Should this not say '&& is_parallel_rq(from)'?


Missed this one. That isn't necessary as if from is not a parallel
submit the following compare of parents will always return false. I
could add if you insist as either way works.

Matt
It was more a question of whether req_to_parent() works fine 
irrespective of whether the rq is a parent, child or single?


John.




+   return request_to_parent(to) == request_to_parent(from);
+
+   return false;
+}
+
   int
   i915_request_await_execution(struct i915_request *rq,
 struct dma_fence *fence)
@@ -1356,11 +1375,14 @@ i915_request_await_execution(struct i915_request *rq,
 * want to run our callback in all cases.
 */
-   if (dma_fence_is_i915(fence))
+   if (dma_fence_is_i915(fence)) {
+   if (is_same_parallel_context(rq, to_request(fence)))
+   continue;
ret = __i915_request_await_execution(rq,
 to_request(fence));
-   else
+   } else {
ret = i915_request_await_external(rq, fence);
+   }
if (ret < 0)
return ret;
} while (--nchild);
@@ -1461,10 +1483,13 @@ i915_request_await_dma_fence(struct i915_request *rq, 
struct dma_fence *fence)
 fence))
continue;
-   if (dma_fence_is_i915(fence))
+   if (dma_fence_is_i915(fence)) {
+   if (is_same_parallel_context(rq, to_request(fence)))
+   continue;
ret = i915_request_await_request(rq, to_request(fence));
-   else
+   } else {
ret = i915_request_await_external(rq, fence);
+   }
if (ret < 0)
return ret;
@@ -1539,16 +1564,6 @@ i915_request_await_object(struct i915_request *to,
return ret;
   }
-static inline bool is_parallel_rq(struct i915_request *rq)
-{
-   return intel_context_is_parallel(rq->context);
-}
-
-static inline struct intel_context *request_to_parent(struct i915_request *rq)
-{
-   return intel_context_to_parent(rq->context);
-}
-
   static struct i915_request *
   __i915_request_ensure_parallel_ordering(struct i915_request *rq,
struct intel_timeline *timeline)




Re: [PATCH 10/26] drm/i915/guc: Assign contexts in parent-child relationship consecutive guc_ids

2021-10-13 Thread John Harrison

On 10/13/2021 11:03, Matthew Brost wrote:

On Fri, Oct 08, 2021 at 09:40:43AM -0700, John Harrison wrote:

On 10/7/2021 18:21, Matthew Brost wrote:

On Thu, Oct 07, 2021 at 03:03:04PM -0700, John Harrison wrote:

On 10/4/2021 15:06, Matthew Brost wrote:

Assign contexts in parent-child relationship consecutive guc_ids. This
is accomplished by partitioning guc_id space between ones that need to
be consecutive (1/16 available guc_ids) and ones that do not (15/16 of
available guc_ids). The consecutive search is implemented via the bitmap
API.

This is a precursor to the full GuC multi-lrc implementation but aligns
to how GuC mutli-lrc interface is defined - guc_ids must be consecutive
when using the GuC multi-lrc interface.

v2:
(Daniel Vetter)
 - Explicitly state why we assign consecutive guc_ids
v3:
(John Harrison)
 - Bring back in spin lock

Signed-off-by: Matthew Brost 
---
drivers/gpu/drm/i915/gt/uc/intel_guc.h|   6 +-
.../gpu/drm/i915/gt/uc/intel_guc_submission.c | 104 ++
2 files changed, 86 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 25a598e2b6e8..a9f4ec972bfb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -76,9 +76,13 @@ struct intel_guc {
 */
spinlock_t lock;
/**
-* @guc_ids: used to allocate new guc_ids
+* @guc_ids: used to allocate new guc_ids, single-lrc
 */
struct ida guc_ids;
+   /**
+* @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc
+*/
+   unsigned long *guc_ids_bitmap;
/**
 * @guc_id_list: list of intel_context with valid guc_ids but no
 * refs
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 1f2809187513..79e7732e83b2 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -128,6 +128,16 @@ guc_create_virtual(struct intel_engine_cs **siblings, 
unsigned int count);
#define GUC_REQUEST_SIZE 64 /* bytes */
+/*
+ * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
+ * per the GuC submission interface. A different allocation algorithm is used
+ * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
+ * partition the guc_id space. We believe the number of multi-lrc contexts in
+ * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
+ * multi-lrc.
+ */
+#define NUMBER_MULTI_LRC_GUC_ID(GUC_MAX_LRC_DESCRIPTORS / 16)
+
/*
 * Below is a set of functions which control the GuC scheduling state which
 * require a lock.
@@ -1206,6 +1216,11 @@ int intel_guc_submission_init(struct intel_guc *guc)
INIT_WORK(&guc->submission_state.destroyed_worker,
  destroyed_worker_func);
+   guc->submission_state.guc_ids_bitmap =
+   bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID, GFP_KERNEL);
+   if (!guc->submission_state.guc_ids_bitmap)
+   return -ENOMEM;
+
return 0;
}
@@ -1217,6 +1232,7 @@ void intel_guc_submission_fini(struct intel_guc *guc)
guc_lrc_desc_pool_destroy(guc);
guc_flush_destroyed_contexts(guc);
i915_sched_engine_put(guc->sched_engine);
+   bitmap_free(guc->submission_state.guc_ids_bitmap);
}
static inline void queue_request(struct i915_sched_engine *sched_engine,
@@ -1268,18 +1284,43 @@ static void guc_submit_request(struct i915_request *rq)
spin_unlock_irqrestore(&sched_engine->lock, flags);
}
-static int new_guc_id(struct intel_guc *guc)
+static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
{
-   return ida_simple_get(&guc->submission_state.guc_ids, 0,
- GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL |
- __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
+   int ret;
+
+   GEM_BUG_ON(intel_context_is_child(ce));
+
+   if (intel_context_is_parent(ce))
+   ret = 
bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
+ NUMBER_MULTI_LRC_GUC_ID,
+ 
order_base_2(ce->parallel.number_children
+  + 1));
+   else
+   ret = ida_simple_get(&guc->submission_state.guc_ids,
+NUMBER_MULTI_LRC_GUC_ID,
+GUC_MAX_LRC_DESCRIPTORS,
+GFP_KERNEL | __GFP_RETRY_MAYFAIL |
+__GFP_NOWARN);
+   if (unlikely(ret < 0))
+   return ret;
+
+   ce->guc_id.id = ret;
+   return 0;
 

Re: [PATCH] drm: Update MST First Link Slot Information Based on Encoding Format

2021-10-13 Thread kernel test robot
Hi Bhawanpreet,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on drm-intel/for-linux-next]
[also build test ERROR on drm-tip/drm-tip linus/master v5.15-rc5 next-20211013]
[cannot apply to drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next 
airlied/drm-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Bhawanpreet-Lakha/drm-Update-MST-First-Link-Slot-Information-Based-on-Encoding-Format/20211013-060001
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: i386-randconfig-a003-20211012 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
# 
https://github.com/0day-ci/linux/commit/5604bf980dcbfdd7650b7e1d5d4a2fd9f18cd866
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Bhawanpreet-Lakha/drm-Update-MST-First-Link-Slot-Information-Based-on-Encoding-Format/20211013-060001
git checkout 5604bf980dcbfdd7650b7e1d5d4a2fd9f18cd866
# save the attached .config to linux build tree
make W=1 ARCH=i386 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   In file included from 
drivers/gpu/drm/amd/amdgpu/../display/dmub/dmub_srv.h:67,
from 
drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:35:
   drivers/gpu/drm/amd/amdgpu/../display/dmub/inc/dmub_cmd.h: In function 
'dmub_rb_flush_pending':
   drivers/gpu/drm/amd/amdgpu/../display/dmub/inc/dmub_cmd.h:2819:12: warning: 
variable 'temp' set but not used [-Wunused-but-set-variable]
2819 |   uint64_t temp;
 |^~~~
   drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c: At top level:
   drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:633:6: warning: 
no previous prototype for 'dmub_aux_setconfig_callback' [-Wmissing-prototypes]
 633 | void dmub_aux_setconfig_callback(struct amdgpu_device *adev, struct 
dmub_notification *notify)
 |  ^~~
   drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:649:6: warning: 
no previous prototype for 'dmub_hpd_callback' [-Wmissing-prototypes]
 649 | void dmub_hpd_callback(struct amdgpu_device *adev, struct 
dmub_notification *notify)
 |  ^
   drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:706:6: warning: 
no previous prototype for 'register_dmub_notify_callback' [-Wmissing-prototypes]
 706 | bool register_dmub_notify_callback(struct amdgpu_device *adev, enum 
dmub_notification_type type,
 |  ^
   drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c: In function 
'dm_update_mst_vcpi_slots_for_dsc':
   drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:7174:12: 
warning: variable 'clock' set but not used [-Wunused-but-set-variable]
7174 |  int i, j, clock;
 |^
   drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c: In function 
'amdgpu_dm_atomic_check':
>> drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:10912:23: error: 
>> implicit declaration of function 
>> 'dc_link_dp_mst_decide_link_encoding_format' 
>> [-Werror=implicit-function-declaration]
   10912 | link_coding_cap = 
dc_link_dp_mst_decide_link_encoding_format(aconnector->dc_link);
 |   ^~
   drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c: At top level:
   drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:11394:5: 
warning: no previous prototype for 'amdgpu_dm_set_dmub_async_sync_status' 
[-Wmissing-prototypes]
   11394 | int amdgpu_dm_set_dmub_async_sync_status(bool is_cmd_aux, struct 
dc_context *ctx,
 | ^~~~
   cc1: some warnings being treated as errors


vim +/dc_link_dp_mst_decide_link_encoding_format +10912 
drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c

 10643  
 10644  /**
 10645   * amdgpu_dm_atomic_check() - Atomic check implementation for AMDgpu DM.
 10646   * @dev: The DRM device
 10647   * @state: The atomic state to commit
 10648   *
 10649   * Validate that the given atomic state is programmable by DC into 
hardware.
 10650   * This involves constructing a &struct dc_state reflecting the new 
hardware
 10651   * state we wish to commit, then querying DC to see if it is 
programmable. It's
 10652   * important not to modify the existing DC state. Otherwise, 
atomic_check
 10653   * may unexpectedly commit hardware changes.
 10654   *
 10655   * Whe

Re: [PATCH v2 1/4] dri: do not check for NULL debugfs dentry

2021-10-13 Thread Das, Nirmoy

Ah there are three typos :/

s/deference/dereference for this one and for the 2nd patch as well.


Regards,

Nirmoy

On 10/13/2021 8:35 PM, Nirmoy Das wrote:

Debugfs APIs returns encoded error on failure instead of NULL
and for drm primary/minor debugfs directories, we save the
returned value in the dentry pointer and pass it on to drm
drivers to further create debugfs files/directories. Error
conditions are handled by debugfs APIs, so no need to check
for NULL, as saved dentry pointers will either contain a
valid pointer or an error code.

Also document this for future reference.

CC: Maarten Lankhorst 
CC: Maxime Ripard 
CC: Thomas Zimmermann 
CC: David Airlie 
CC: Daniel Vetter 

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/drm_debugfs.c |  9 -
  drivers/gpu/drm/drm_drv.c |  1 +
  include/drm/drm_file.h| 28 
  3 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
index b0a826489488..0073854a4383 100644
--- a/drivers/gpu/drm/drm_debugfs.c
+++ b/drivers/gpu/drm/drm_debugfs.c
@@ -272,9 +272,6 @@ static void drm_debugfs_remove_all_files(struct drm_minor 
*minor)

  void drm_debugfs_cleanup(struct drm_minor *minor)
  {
-   if (!minor->debugfs_root)
-   return;
-
drm_debugfs_remove_all_files(minor);

debugfs_remove_recursive(minor->debugfs_root);
@@ -419,9 +416,6 @@ void drm_debugfs_connector_add(struct drm_connector 
*connector)
struct drm_minor *minor = connector->dev->primary;
struct dentry *root;

-   if (!minor->debugfs_root)
-   return;
-
root = debugfs_create_dir(connector->name, minor->debugfs_root);
connector->debugfs_entry = root;

@@ -440,9 +434,6 @@ void drm_debugfs_connector_add(struct drm_connector 
*connector)

  void drm_debugfs_connector_remove(struct drm_connector *connector)
  {
-   if (!connector->debugfs_entry)
-   return;
-
debugfs_remove_recursive(connector->debugfs_entry);

connector->debugfs_entry = NULL;
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 7a5097467ba5..918f302d9c43 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -64,6 +64,7 @@ static struct idr drm_minors_idr;
   */
  static bool drm_core_init_complete;

+/* Do not deference this pointer as it will contain ERR_PTR on error. */
  static struct dentry *drm_debugfs_root;

  DEFINE_STATIC_SRCU(drm_unplug_srcu);
diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
index a3acb7ac3550..3a30fc4d8905 100644
--- a/include/drm/drm_file.h
+++ b/include/drm/drm_file.h
@@ -69,15 +69,35 @@ enum drm_minor_type {
   */
  struct drm_minor {
/* private: */
-   int index;  /* Minor device number */
-   int type;   /* Control or render */
-   struct device *kdev;/* Linux device */
+   /** @index: minor device number. */
+   int index;
+
+   /** @type: minor device type: primary, control, render. */
+   int type;
+
+   /** @kdev: Linux device pointer. */
+   struct device *kdev;
+
+   /** @dev: drm device pointer. */
struct drm_device *dev;

+
+   /** @debugfs_root:
+*
+* Dentry for /sys/kernel/debug/dri/@index debugfs dir. Do not
+* deference this pointer as it will contain ERR_PTR on error.
+*/
struct dentry *debugfs_root;

+   /** @debugfs_list:
+*
+* A list to keep track of debugfs dentries created using
+* drm_debugfs_create_files() by drm drivers.
+*/
struct list_head debugfs_list;
-   struct mutex debugfs_lock; /* Protects debugfs_list. */
+
+   /** @debugfs_lock: Protects debugfs_list. */
+   struct mutex debugfs_lock;
  };

  /**
--
2.32.0



Re: [Intel-gfx] [PATCH] drm/i915/uapi: Add comment clarifying purpose of I915_TILING_* values

2021-10-13 Thread Yokoyama, Caz
Looks good to me.
Reviewed-by: Caz Yokoyama 
-caz

On Tue, 2021-10-12 at 15:12 -0700, Matt Roper wrote:
> The I915_TILING_* values in our uapi header are intended solely for
> use
> with the old get_tiling/set_tiling ioctls that operate on hardware
> de-tiling fences; all other uapi communication about tiling types is
> done via framebuffer modifiers rather than with these old values.
> 
> On newer Intel platforms detiling fences no longer exist so the old
> get_tiling/set_tiling ioctls are no longer usable and will always
> return
> -EOPNOTSUPP.  This means there's no reason to add new tiling types
> (such
> as the Tile4 format introduced by Xe_HP) to the uapi header
> here.  Any
> kernel-internal code that needs to represent tiling format should
> either
> rely on framebuffer modifiers (as the display code does) or use some
> kind of non-uapi enum (as the GEM blt selftest now does).
> 
> References: 
> https://patchwork.freedesktop.org/patch/456656/?series=95308
> Cc: Ville Syrjälä 
> Signed-off-by: Matt Roper 
> ---
>  include/uapi/drm/i915_drm.h | 6 ++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/include/uapi/drm/i915_drm.h
> b/include/uapi/drm/i915_drm.h
> index aa2a7eccfb94..9b8e61163c39 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1522,6 +1522,12 @@ struct drm_i915_gem_caching {
>  #define I915_TILING_NONE 0
>  #define I915_TILING_X1
>  #define I915_TILING_Y2
> +/*
> + * Do not add new tiling types here.  The I915_TILING_* values are
> for
> + * de-tiling fence registers that no longer exist on modern
> platforms.  Although
> + * the hardware may support new types of tiling in general (e.g.,
> Tile4), we
> + * do not need to add them to the uapi that is specific to now-
> defunct ioctls.
> + */
>  #define I915_TILING_LAST I915_TILING_Y
>  
>  #define I915_BIT_6_SWIZZLE_NONE  0


[PATCH v2 4/4] vgaswitcheroo: do not check for NULL debugfs dentry

2021-10-13 Thread Nirmoy Das
Debugfs APIs returns encoded error on failure so use
debugfs_lookup() instead of checking for NULL.

CC: Lukas Wunner 
CC: David Airlie 
CC: Daniel Vetter 
CC: Maarten Lankhorst 
CC: Maxime Ripard 
CC: Thomas Zimmermann 

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/vga/vga_switcheroo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index 365e6ddbe90f..07ab8d85e899 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -914,7 +914,7 @@ static void vga_switcheroo_debugfs_fini(struct vgasr_priv 
*priv)
 static void vga_switcheroo_debugfs_init(struct vgasr_priv *priv)
 {
/* already initialised */
-   if (priv->debugfs_root)
+   if (priv->debugfs_root && !IS_ERR(priv->debugfs_root))
return;

priv->debugfs_root = debugfs_create_dir("vgaswitcheroo", NULL);
--
2.32.0



[PATCH 3/4] drm/i915/gt: do not check for NULL debugfs dentry

2021-10-13 Thread Nirmoy Das
Do not check for NULL value as drm.primary->debugfs_root
will either contain a valid pointer or an encoded error
instead of NULL.

CC: Jani Nikula 
CC: Joonas Lahtinen 
CC: Rodrigo Vivi 
CC: David Airlie 
CC: Daniel Vetter 

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/i915/gt/debugfs_gt.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.c 
b/drivers/gpu/drm/i915/gt/debugfs_gt.c
index 591eb60785db..95ca1b3ad320 100644
--- a/drivers/gpu/drm/i915/gt/debugfs_gt.c
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt.c
@@ -16,9 +16,6 @@ void debugfs_gt_register(struct intel_gt *gt)
 {
struct dentry *root;
 
-   if (!gt->i915->drm.primary->debugfs_root)
-   return;
-
root = debugfs_create_dir("gt", gt->i915->drm.primary->debugfs_root);
if (IS_ERR(root))
return;
-- 
2.32.0



[PATCH 2/4] drm/ttm: do not set NULL to debugfs dentry

2021-10-13 Thread Nirmoy Das
For debugfs directory, it is recommended to save the result
and pass over to next debugfs API for creating debugfs
files/directories. Error conditions are handled by debugfs APIs.

CC: Christian Koenig 
CC: Huang Rui 
CC: David Airlie 
CC: Daniel Vetter 

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/ttm/ttm_device.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index be24bb6cefd0..2c66f06198e9 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -44,6 +44,7 @@ static unsigned ttm_glob_use_count;
 struct ttm_global ttm_glob;
 EXPORT_SYMBOL(ttm_glob);
 
+/* Do not deference this pointer as it will contain ERR_PTR on error. */
 struct dentry *ttm_debugfs_root;
 
 static void ttm_global_release(void)
@@ -77,9 +78,6 @@ static int ttm_global_init(void)
si_meminfo(&si);
 
ttm_debugfs_root = debugfs_create_dir("ttm", NULL);
-   if (IS_ERR(ttm_debugfs_root)) {
-   ttm_debugfs_root = NULL;
-   }
 
/* Limit the number of pages in the pool to about 50% of the total
 * system memory.
@@ -108,8 +106,7 @@ static int ttm_global_init(void)
debugfs_create_atomic_t("buffer_objects", 0444, ttm_debugfs_root,
&glob->bo_count);
 out:
-   if (ret && ttm_debugfs_root)
-   debugfs_remove(ttm_debugfs_root);
+   debugfs_remove(ttm_debugfs_root);
if (ret)
--ttm_glob_use_count;
mutex_unlock(&ttm_global_mutex);
-- 
2.32.0



[PATCH v2 1/4] dri: do not check for NULL debugfs dentry

2021-10-13 Thread Nirmoy Das
Debugfs APIs returns encoded error on failure instead of NULL
and for drm primary/minor debugfs directories, we save the
returned value in the dentry pointer and pass it on to drm
drivers to further create debugfs files/directories. Error
conditions are handled by debugfs APIs, so no need to check
for NULL, as saved dentry pointers will either contain a
valid pointer or an error code.

Also document this for future reference.

CC: Maarten Lankhorst 
CC: Maxime Ripard 
CC: Thomas Zimmermann 
CC: David Airlie 
CC: Daniel Vetter 

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/drm_debugfs.c |  9 -
 drivers/gpu/drm/drm_drv.c |  1 +
 include/drm/drm_file.h| 28 
 3 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
index b0a826489488..0073854a4383 100644
--- a/drivers/gpu/drm/drm_debugfs.c
+++ b/drivers/gpu/drm/drm_debugfs.c
@@ -272,9 +272,6 @@ static void drm_debugfs_remove_all_files(struct drm_minor 
*minor)

 void drm_debugfs_cleanup(struct drm_minor *minor)
 {
-   if (!minor->debugfs_root)
-   return;
-
drm_debugfs_remove_all_files(minor);

debugfs_remove_recursive(minor->debugfs_root);
@@ -419,9 +416,6 @@ void drm_debugfs_connector_add(struct drm_connector 
*connector)
struct drm_minor *minor = connector->dev->primary;
struct dentry *root;

-   if (!minor->debugfs_root)
-   return;
-
root = debugfs_create_dir(connector->name, minor->debugfs_root);
connector->debugfs_entry = root;

@@ -440,9 +434,6 @@ void drm_debugfs_connector_add(struct drm_connector 
*connector)

 void drm_debugfs_connector_remove(struct drm_connector *connector)
 {
-   if (!connector->debugfs_entry)
-   return;
-
debugfs_remove_recursive(connector->debugfs_entry);

connector->debugfs_entry = NULL;
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 7a5097467ba5..918f302d9c43 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -64,6 +64,7 @@ static struct idr drm_minors_idr;
  */
 static bool drm_core_init_complete;

+/* Do not deference this pointer as it will contain ERR_PTR on error. */
 static struct dentry *drm_debugfs_root;

 DEFINE_STATIC_SRCU(drm_unplug_srcu);
diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
index a3acb7ac3550..3a30fc4d8905 100644
--- a/include/drm/drm_file.h
+++ b/include/drm/drm_file.h
@@ -69,15 +69,35 @@ enum drm_minor_type {
  */
 struct drm_minor {
/* private: */
-   int index;  /* Minor device number */
-   int type;   /* Control or render */
-   struct device *kdev;/* Linux device */
+   /** @index: minor device number. */
+   int index;
+
+   /** @type: minor device type: primary, control, render. */
+   int type;
+
+   /** @kdev: Linux device pointer. */
+   struct device *kdev;
+
+   /** @dev: drm device pointer. */
struct drm_device *dev;

+
+   /** @debugfs_root:
+*
+* Dentry for /sys/kernel/debug/dri/@index debugfs dir. Do not
+* deference this pointer as it will contain ERR_PTR on error.
+*/
struct dentry *debugfs_root;

+   /** @debugfs_list:
+*
+* A list to keep track of debugfs dentries created using
+* drm_debugfs_create_files() by drm drivers.
+*/
struct list_head debugfs_list;
-   struct mutex debugfs_lock; /* Protects debugfs_list. */
+
+   /** @debugfs_lock: Protects debugfs_list. */
+   struct mutex debugfs_lock;
 };

 /**
--
2.32.0



Re: [PATCH 12/26] drm/i915/guc: Implement multi-lrc submission

2021-10-13 Thread Matthew Brost
On Fri, Oct 08, 2021 at 10:20:24AM -0700, John Harrison wrote:
> On 10/4/2021 15:06, Matthew Brost wrote:
> > Implement multi-lrc submission via a single workqueue entry and single
> > H2G. The workqueue entry contains an updated tail value for each
> > request, of all the contexts in the multi-lrc submission, and updates
> > these values simultaneously. As such, the tasklet and bypass path have
> > been updated to coalesce requests into a single submission.
> > 
> > v2:
> >   (John Harrison)
> >- s/wqe/wqi
> >- Use FIELD_PREP macros
> >- Add GEM_BUG_ONs ensures length fits within field
> >- Add comment / white space to intel_guc_write_barrier
> >   (Kernel test robot)
> >- Make need_tasklet a static function
> > 
> > Signed-off-by: Matthew Brost 
> > ---
> >   drivers/gpu/drm/i915/gt/uc/intel_guc.c|  26 ++
> >   drivers/gpu/drm/i915/gt/uc/intel_guc.h|   8 +
> >   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |  24 +-
> >   drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |  23 +-
> >   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 319 --
> >   drivers/gpu/drm/i915/i915_request.h   |   8 +
> >   6 files changed, 335 insertions(+), 73 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
> > index 8f8182bf7c11..7191e8439290 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
> > @@ -756,3 +756,29 @@ void intel_guc_load_status(struct intel_guc *guc, 
> > struct drm_printer *p)
> > }
> > }
> >   }
> > +
> > +void intel_guc_write_barrier(struct intel_guc *guc)
> > +{
> > +   struct intel_gt *gt = guc_to_gt(guc);
> > +
> > +   if (i915_gem_object_is_lmem(guc->ct.vma->obj)) {
> > +   /*
> > +* Ensure intel_uncore_write_fw can be used rather than
> > +* intel_uncore_write.
> > +*/
> > +   GEM_BUG_ON(guc->send_regs.fw_domains);
> > +
> > +   /*
> > +* This register is used by the i915 and GuC for MMIO based
> > +* communication. Once we are in this code CTBs are the only
> > +* method the i915 uses to communicate with the GuC so it is
> > +* safe to write to this register (a value of 0 is NOP for MMIO
> > +* communication). If we ever start mixing CTBs and MMIOs a new
> > +* register will have to be chosen.
> > +*/
> Hmm, missed it before but this comment is very CTB centric and the barrier
> function is now being used for parallel submission work queues. Seems like
> an extra comment should be added to cover that case. Just something simple
> about WQ usage is also guaranteed to be post CTB switch over.
> 

Sure.

> > +   intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0);
> > +   } else {
> > +   /* wmb() sufficient for a barrier if in smem */
> > +   wmb();
> > +   }
> > +}
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > index a9f4ec972bfb..147f39cc0f2f 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > @@ -46,6 +46,12 @@ struct intel_guc {
> >  * submitted until the stalled request is processed.
> >  */
> > struct i915_request *stalled_request;
> > +   enum {
> > +   STALL_NONE,
> > +   STALL_REGISTER_CONTEXT,
> > +   STALL_MOVE_LRC_TAIL,
> > +   STALL_ADD_REQUEST,
> > +   } submission_stall_reason;
> > /* intel_guc_recv interrupt related state */
> > /** @irq_lock: protects GuC irq state */
> > @@ -361,4 +367,6 @@ void intel_guc_submission_cancel_requests(struct 
> > intel_guc *guc);
> >   void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p);
> > +void intel_guc_write_barrier(struct intel_guc *guc);
> > +
> >   #endif
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > index 20c710a74498..10d1878d2826 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > @@ -377,28 +377,6 @@ static u32 ct_get_next_fence(struct intel_guc_ct *ct)
> > return ++ct->requests.last_fence;
> >   }
> > -static void write_barrier(struct intel_guc_ct *ct)
> > -{
> > -   struct intel_guc *guc = ct_to_guc(ct);
> > -   struct intel_gt *gt = guc_to_gt(guc);
> > -
> > -   if (i915_gem_object_is_lmem(guc->ct.vma->obj)) {
> > -   GEM_BUG_ON(guc->send_regs.fw_domains);
> > -   /*
> > -* This register is used by the i915 and GuC for MMIO based
> > -* communication. Once we are in this code CTBs are the only
> > -* method the i915 uses to communicate with the GuC so it is
> > -* safe to write to this register (a value of 0 is NOP for MMIO
> > -* communication). If we ever start mixing CTBs and MMIOs a

Re: [PATCH 1/2] drm: Add Gamma and Degamma LUT sizes props to drm_crtc to validate.

2021-10-13 Thread Mark Yacoub
On Fri, Oct 1, 2021 at 4:34 PM Sean Paul  wrote:
>
> On Wed, Sep 29, 2021 at 03:39:25PM -0400, Mark Yacoub wrote:
> > From: Mark Yacoub 
> >
> > [Why]
> > 1. drm_atomic_helper_check doesn't check for the LUT sizes of either Gamma
> > or Degamma props in the new CRTC state, allowing any invalid size to
> > be passed on.
> > 2. Each driver has its own LUT size, which could also be different for
> > legacy users.
> >
> > [How]
> > 1. Create |degamma_lut_size| and |gamma_lut_size| to save the LUT sizes
> > assigned by the driver when it's initializing its color and CTM
> > management.
> > 2. Create drm_atomic_helper_check_crtc which is called by
> > drm_atomic_helper_check to check the LUT sizes saved in drm_crtc that
> > they match the sizes in the new CRTC state.
> >
>
> Did you consider extending drm_color_lut_check() with the size checks?
renamed it to be specific to channels. It's HW specific so i thought
of keeping it a separate check if the driver chooses to check it.
Removed the LUT size check that intel uses though.
>
> > Fixes: igt@kms_color@pipe-A-invalid-gamma-lut-sizes on MTK
> > Tested on Zork(amdgpu) and Jacuzzi(mediatek)
> >
> > Signed-off-by: Mark Yacoub
>
> nit: missing a space between name and email
>
>
> > ---
> >  drivers/gpu/drm/drm_atomic_helper.c | 56 +
> >  drivers/gpu/drm/drm_color_mgmt.c|  2 ++
> >  include/drm/drm_atomic_helper.h |  1 +
> >  include/drm/drm_crtc.h  | 11 ++
> >  4 files changed, 70 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/drm_atomic_helper.c 
> > b/drivers/gpu/drm/drm_atomic_helper.c
> > index 2c0c6ec928200..265b9747250d1 100644
> > --- a/drivers/gpu/drm/drm_atomic_helper.c
> > +++ b/drivers/gpu/drm/drm_atomic_helper.c
> > @@ -930,6 +930,58 @@ drm_atomic_helper_check_planes(struct drm_device *dev,
> >  }
> >  EXPORT_SYMBOL(drm_atomic_helper_check_planes);
> >
> > +/**
> > + * drm_atomic_helper_check_planes - validate state object for CRTC changes
>
> Ctrl+c/Ctrl+v error here
>
> > + * @state: the driver state object
> > + *
> > + * Check the CRTC state object such as the Gamma/Degamma LUT sizes if the 
> > new
>
> Are there missing words between "object" and "such"?
>
not really. I was thinking of how to reword it without being too
verbose and nothing sounded good.
I mean I'm checking the object, such as the LUT which is part of this object.
> > + * state holds them.
> > + *
> > + * RETURNS:
> > + * Zero for success or -errno
> > + */
> > +int drm_atomic_helper_check_crtc(struct drm_atomic_state *state)
>
> drm_atomic_helper_check_crtcs to be consistent with
> drm_atomic_helper_check_planes
>
> > +{
> > + struct drm_crtc *crtc;
> > + struct drm_crtc_state *new_crtc_state;
> > + int i;
> > +
> > + for_each_new_crtc_in_state (state, crtc, new_crtc_state, i) {
>
> no space before (
>
> > + if (new_crtc_state->gamma_lut) {
>
> Perhaps gate these with a check of state->color_mgmt_changed first?
done .  did it for each check so you can easily expand in the future
and squeeze in more things around those checks as it loops the CRTC
states.
>
> > + uint64_t supported_lut_size = crtc->gamma_lut_size;
> > + uint32_t supported_legacy_lut_size = crtc->gamma_size;
> > + uint32_t new_state_lut_size =
> > + drm_color_lut_size(new_crtc_state->gamma_lut);
>
> nit: new_state_lut_size and supported_lut_size can be pulled out to top level 
> scope
> to avoid re-instantiation on each iteration
>
CRTC is an iterator, so it changes within the loop.
> > +
> > + if (new_state_lut_size != supported_lut_size &&
> > + new_state_lut_size != supported_legacy_lut_size) {
>
> According to the docbook, "If drivers support multiple LUT sizes then they
> should publish the largest size, and sub-sample smaller sized LUTs". So
> should this check be > instead of != ?
>
so IGT tests see it differently, they check for a very specific size,
rather than a range. so if the legacy size is 256 and regular is 1024,
1000 isn't a valid size.
> > + DRM_DEBUG_DRIVER(
>
> drm_dbg_state() is probably more appropriate
>
> > + "Invalid Gamma LUT size. Should be %u 
> > (or %u for legacy) but got %u.\n",
> > + supported_lut_size,
> > + supported_legacy_lut_size,
> > + new_state_lut_size);
> > + return -EINVAL;
> > + }
> > + }
> > +
> > + if (new_crtc_state->degamma_lut) {
> > + uint32_t new_state_lut_size =
> > + 
> > drm_color_lut_size(new_crtc_state->degamma_lut);
> > + uint64_t supported_lut_size = crtc->degamma_lut_size;
> > +
> > + if (new_state_lut_size != supported_lut_s

[PATCH 2/2] amd/amdgpu_dm: Verify Gamma and Degamma LUT sizes using DRM Core check

2021-10-13 Thread Mark Yacoub
From: Mark Yacoub 

[Why]
drm_atomic_helper_check_crtc now verifies both legacy and non-legacy LUT
sizes. There is no need to check it within amdgpu_dm_atomic_check.

[How]
Remove the local call to verify LUT sizes and use DRM Core function
instead.

Tested on ChromeOS Zork.

v1:
Remove amdgpu_dm_verify_lut_sizes everywhere.

Signed-off-by: Mark Yacoub 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  8 ++---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  1 -
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 35 ---
 3 files changed, 4 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index f74663b6b046e..47f8de1cfc3a5 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -10244,6 +10244,10 @@ static int amdgpu_dm_atomic_check(struct drm_device 
*dev,
}
}
 #endif
+   ret = drm_atomic_helper_check_crtcs(state);
+   if (ret)
+   return ret;
+
for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, 
new_crtc_state, i) {
dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
 
@@ -10253,10 +10257,6 @@ static int amdgpu_dm_atomic_check(struct drm_device 
*dev,
dm_old_crtc_state->dsc_force_changed == false)
continue;
 
-   ret = amdgpu_dm_verify_lut_sizes(new_crtc_state);
-   if (ret)
-   goto fail;
-
if (!new_crtc_state->enable)
continue;
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index fcb9c4a629c32..22730e5542092 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -617,7 +617,6 @@ void amdgpu_dm_trigger_timing_sync(struct drm_device *dev);
 #define MAX_COLOR_LEGACY_LUT_ENTRIES 256
 
 void amdgpu_dm_init_color_mod(void);
-int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state);
 int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc);
 int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
  struct dc_plane_state *dc_plane_state);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index a022e5bb30a5c..319f8a8a89835 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -284,37 +284,6 @@ static int __set_input_tf(struct dc_transfer_func *func,
return res ? 0 : -ENOMEM;
 }
 
-/**
- * Verifies that the Degamma and Gamma LUTs attached to the |crtc_state| are of
- * the expected size.
- * Returns 0 on success.
- */
-int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state)
-{
-   const struct drm_color_lut *lut = NULL;
-   uint32_t size = 0;
-
-   lut = __extract_blob_lut(crtc_state->degamma_lut, &size);
-   if (lut && size != MAX_COLOR_LUT_ENTRIES) {
-   DRM_DEBUG_DRIVER(
-   "Invalid Degamma LUT size. Should be %u but got %u.\n",
-   MAX_COLOR_LUT_ENTRIES, size);
-   return -EINVAL;
-   }
-
-   lut = __extract_blob_lut(crtc_state->gamma_lut, &size);
-   if (lut && size != MAX_COLOR_LUT_ENTRIES &&
-   size != MAX_COLOR_LEGACY_LUT_ENTRIES) {
-   DRM_DEBUG_DRIVER(
-   "Invalid Gamma LUT size. Should be %u (or %u for 
legacy) but got %u.\n",
-   MAX_COLOR_LUT_ENTRIES, MAX_COLOR_LEGACY_LUT_ENTRIES,
-   size);
-   return -EINVAL;
-   }
-
-   return 0;
-}
-
 /**
  * amdgpu_dm_update_crtc_color_mgmt: Maps DRM color management to DC stream.
  * @crtc: amdgpu_dm crtc state
@@ -348,10 +317,6 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state 
*crtc)
bool is_legacy;
int r;
 
-   r = amdgpu_dm_verify_lut_sizes(&crtc->base);
-   if (r)
-   return r;
-
degamma_lut = __extract_blob_lut(crtc->base.degamma_lut, °amma_size);
regamma_lut = __extract_blob_lut(crtc->base.gamma_lut, ®amma_size);
 
-- 
2.33.0.882.g93a45727a2-goog



  1   2   3   >