Re: [PATCH 0/8] drm/i915: HuC loading and authentication for MTL

2023-04-27 Thread Ye, Tony

Acked-by: Tony Ye 

Thanks,

Tony

On 4/27/2023 7:34 PM, Daniele Ceraolo Spurio wrote:

The HuC loading and authentication flow is once again changing and a new
"clear-media only" authentication step is introduced. The flow is as
follows:

1) The HuC is loaded via DMA - same as all non-GSC HuC binaries.

2) The HuC is authenticated by the GuC - this is the same step as
performed for all non-GSC HuC binaries and re-uses the same code, but
it is now resulting in a partial authentication that only allows
clear-media workloads.

3) The HuC is fully authenticated for all workloads by the GSC - this
is done via a new PXP command, submitted via the GSCCS.

The advantage of this new flow is that we can start processing
clear-media workloads without having to wait for the GSC to be ready,
which can take several seconds.

As part of this change, the HuC status getparam has been updated with a
new value to indicate a partial authentication. Note tha the media
driver is checking for value > 0 for clear media workloads, so no
changes are required in userspace for that to work.

The SW proxy series [1] has been included, squashed in a single patch,
as some of some of the patches in this series depend on it. This is not
a functional dependencies, the patches just touch the same code; the
proxy patches are planned to be merged first, so it is easier to base
the new patches on top of it to avoid having to rebase them later.

[1]https://patchwork.freedesktop.org/series/115806/
Cc: Alan Previn
Cc: Tony Ye

Daniele Ceraolo Spurio (8):
   DO NOT REVIEW: drm/i915: Add support for MTL GSC SW Proxy
   drm/i915/uc: perma-pin firmwares
   drm/i915/huc: Parse the GSC-enabled HuC binary
   drm/i915/huc: Load GSC-enabled HuC via DMA xfer if the fuse says so
   drm/i915/huc: differentiate the 2 steps of the MTL HuC auth flow
   drm/i915/mtl/huc: auth HuC via GSC
   drm/i915/mtl/huc: Use the media gt for the HuC getparam
   drm/i915/huc: define HuC FW version for MTL

  drivers/gpu/drm/i915/Makefile |   1 +
  drivers/gpu/drm/i915/gt/intel_ggtt.c  |   3 +
  drivers/gpu/drm/i915/gt/intel_gt_irq.c|  22 +-
  drivers/gpu/drm/i915/gt/intel_gt_regs.h   |   3 +
  .../drm/i915/gt/uc/intel_gsc_meu_headers.h|  74 +++
  drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c  | 424 ++
  drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.h  |  18 +
  drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c |  89 +++-
  drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h |  17 +-
  .../i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c |   2 +-
  .../i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h |   1 +
  drivers/gpu/drm/i915/gt/uc/intel_guc.c|   2 +-
  drivers/gpu/drm/i915/gt/uc/intel_huc.c| 182 +---
  drivers/gpu/drm/i915/gt/uc/intel_huc.h|  26 +-
  drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c | 214 -
  drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h |   6 +-
  drivers/gpu/drm/i915/gt/uc/intel_huc_print.h  |  21 +
  drivers/gpu/drm/i915/gt/uc/intel_uc.c |  10 +-
  drivers/gpu/drm/i915/gt/uc/intel_uc.h |   2 +
  drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 120 ++---
  drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h  |   9 +-
  drivers/gpu/drm/i915/i915_getparam.c  |   6 +-
  drivers/gpu/drm/i915/i915_reg.h   |   3 +
  .../drm/i915/pxp/intel_pxp_cmd_interface_43.h |  14 +-
  drivers/gpu/drm/i915/pxp/intel_pxp_huc.c  |   2 +-
  drivers/misc/mei/Kconfig  |   2 +-
  drivers/misc/mei/Makefile |   1 +
  drivers/misc/mei/gsc_proxy/Kconfig|  14 +
  drivers/misc/mei/gsc_proxy/Makefile   |   7 +
  drivers/misc/mei/gsc_proxy/mei_gsc_proxy.c| 208 +
  include/drm/i915_component.h  |   3 +-
  include/drm/i915_gsc_proxy_mei_interface.h|  53 +++
  include/uapi/drm/i915_drm.h   |   3 +-
  33 files changed, 1428 insertions(+), 134 deletions(-)
  create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_gsc_meu_headers.h
  create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
  create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.h
  create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_huc_print.h
  create mode 100644 drivers/misc/mei/gsc_proxy/Kconfig
  create mode 100644 drivers/misc/mei/gsc_proxy/Makefile
  create mode 100644 drivers/misc/mei/gsc_proxy/mei_gsc_proxy.c
  create mode 100644 include/drm/i915_gsc_proxy_mei_interface.h



Re: [PATCH v4 12/15] drm/i915/huc: stall media submission until HuC is loaded

2022-09-09 Thread Ye, Tony



On 9/8/2022 5:16 PM, Daniele Ceraolo Spurio wrote:

Wait on the fence to be signalled to avoid the submissions finding HuC
not yet loaded.

Signed-off-by: Daniele Ceraolo Spurio 
Cc: Tony Ye 
Reviewed-by: Alan Previn 
---
  drivers/gpu/drm/i915/gt/uc/intel_huc.h |  6 ++
  drivers/gpu/drm/i915/i915_request.c| 24 
  2 files changed, 30 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
index 915d281c1c72..52db03620c60 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
@@ -81,6 +81,12 @@ static inline bool intel_huc_is_loaded_by_gsc(const struct 
intel_huc *huc)
return huc->fw.loaded_via_gsc;
  }
  
+static inline bool intel_huc_wait_required(struct intel_huc *huc)

+{
+   return intel_huc_is_used(huc) && intel_huc_is_loaded_by_gsc(huc) &&
+  !intel_huc_is_authenticated(huc);
+}
+
  void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p);
  
  #endif

diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index 62fad16a55e8..77f45a3cb01f 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1621,6 +1621,20 @@ i915_request_await_object(struct i915_request *to,
return ret;
  }
  
+static void i915_request_await_huc(struct i915_request *rq)

+{
+   struct intel_huc *huc = >context->engine->gt->uc.huc;
+
+   /* don't stall kernel submissions! */
+   if (!rcu_access_pointer(rq->context->gem_context))
+   return;
+
+   if (intel_huc_wait_required(huc))
+   i915_sw_fence_await_sw_fence(>submit,
+>delayed_load.fence,
+>submitq);
+}
+
  static struct i915_request *
  __i915_request_ensure_parallel_ordering(struct i915_request *rq,
struct intel_timeline *timeline)
@@ -1702,6 +1716,16 @@ __i915_request_add_to_timeline(struct i915_request *rq)
struct intel_timeline *timeline = i915_request_timeline(rq);
struct i915_request *prev;
  
+	/*

+* Media workloads may require HuC, so stall them until HuC loading is
+* complete. Note that HuC not being loaded when a user submission
+* arrives can only happen when HuC is loaded via GSC and in that case
+* we still expect the window between us starting to accept submissions
+* and HuC loading completion to be small (a few hundred ms).
+*/
+   if (rq->engine->class == VIDEO_DECODE_CLASS)
+   i915_request_await_huc(rq);
+


Acked-by: Tony Ye 

Thanks,

Tony


/*
 * Dependency tracking and request ordering along the timeline
 * is special cased so that we can eliminate redundant ordering


Re: [PATCH v4 14/15] drm/i915/huc: define gsc-compatible HuC fw for DG2

2022-09-09 Thread Ye, Tony



On 9/8/2022 5:16 PM, Daniele Ceraolo Spurio wrote:

The fw name is different and we need to record the fact that the blob is
gsc-loaded, so add a new macro to help.

Note: A-step DG2 G10 does not support HuC loading via GSC and would
require a separate firmware to be loaded the legacy way, but that's
not a production stepping so we're not going to bother.

v2: rebase on new fw fetch logic

Signed-off-by: Daniele Ceraolo Spurio 
Cc: Tony Ye 
Reviewed-by: Alan Previn  #v1
---
  drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 23 ---
  1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c 
b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 4792960d9c04..09e06ac8bcf1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -91,7 +91,8 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
fw_def(BROXTON,  0, guc_mmp(bxt,  70, 1, 1)) \
fw_def(SKYLAKE,  0, guc_mmp(skl,  70, 1, 1))
  
-#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_raw, huc_mmp) \

+#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_raw, huc_mmp, huc_gsc) \
+   fw_def(DG2,  0, huc_gsc(dg2)) \
fw_def(ALDERLAKE_P,  0, huc_mmp(tgl,  7, 9, 3)) \
fw_def(ALDERLAKE_S,  0, huc_mmp(tgl,  7, 9, 3)) \
fw_def(DG1,  0, huc_mmp(dg1,  7, 9, 3)) \
@@ -137,6 +138,9 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
  #define MAKE_HUC_FW_PATH_BLANK(prefix_) \
__MAKE_UC_FW_PATH_BLANK(prefix_, "_huc")
  
+#define MAKE_HUC_FW_PATH_GSC(prefix_) \

+   __MAKE_UC_FW_PATH_BLANK(prefix_, "_huc_gsc")
+
  #define MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_) \
__MAKE_UC_FW_PATH_MMP(prefix_, "_huc_", major_, minor_, patch_)
  
@@ -149,7 +153,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,

MODULE_FIRMWARE(uc_);
  
  INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH_MAJOR, MAKE_GUC_FW_PATH_MMP)

-INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH_BLANK, 
MAKE_HUC_FW_PATH_MMP)
+INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH_BLANK, 
MAKE_HUC_FW_PATH_MMP, MAKE_HUC_FW_PATH_GSC)
  
  /*

   * The next expansion of the table macros (in __uc_fw_auto_select below) 
provides
@@ -164,6 +168,7 @@ struct __packed uc_fw_blob {
u8 major;
u8 minor;
u8 patch;
+   bool loaded_via_gsc;
  };
  
  #define UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \

@@ -172,16 +177,16 @@ struct __packed uc_fw_blob {
.patch = patch_, \
.path = path_,
  
-#define UC_FW_BLOB_NEW(major_, minor_, patch_, path_) \

+#define UC_FW_BLOB_NEW(major_, minor_, patch_, gsc_, path_) \
{ UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \
- .legacy = false }
+ .legacy = false, .loaded_via_gsc = gsc_ }
  
  #define UC_FW_BLOB_OLD(major_, minor_, patch_, path_) \

{ UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \
  .legacy = true }
  
  #define GUC_FW_BLOB(prefix_, major_, minor_) \

-   UC_FW_BLOB_NEW(major_, minor_, 0, \
+   UC_FW_BLOB_NEW(major_, minor_, 0, false, \
   MAKE_GUC_FW_PATH_MAJOR(prefix_, major_, minor_))
  
  #define GUC_FW_BLOB_MMP(prefix_, major_, minor_, patch_) \

@@ -189,12 +194,15 @@ struct __packed uc_fw_blob {
   MAKE_GUC_FW_PATH_MMP(prefix_, major_, minor_, patch_))
  
  #define HUC_FW_BLOB(prefix_) \

-   UC_FW_BLOB_NEW(0, 0, 0, MAKE_HUC_FW_PATH_BLANK(prefix_))
+   UC_FW_BLOB_NEW(0, 0, 0, false, MAKE_HUC_FW_PATH_BLANK(prefix_))
  
  #define HUC_FW_BLOB_MMP(prefix_, major_, minor_, patch_) \

UC_FW_BLOB_OLD(major_, minor_, patch_, \
   MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_))
  
+#define HUC_FW_BLOB_GSC(prefix_) \

+   UC_FW_BLOB_NEW(0, 0, 0, true, MAKE_HUC_FW_PATH_GSC(prefix_))
+
  struct __packed uc_fw_platform_requirement {
enum intel_platform p;
u8 rev; /* first platform rev using this FW */
@@ -220,7 +228,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct 
intel_uc_fw *uc_fw)
INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, 
GUC_FW_BLOB_MMP)
};
static const struct uc_fw_platform_requirement blobs_huc[] = {
-   INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, 
HUC_FW_BLOB_MMP)
+   INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, 
HUC_FW_BLOB_MMP, HUC_FW_BLOB_GSC)
};
static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = 
{
[INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) },
@@ -266,6 +274,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct 
intel_uc_fw *uc_fw)
uc_fw->file_wanted.path = blob->path;
uc_fw->file_wanted.major_ver = blob->major;
uc_fw->file_wanted.minor_ver = blob->minor;
+   uc_fw->loaded_via_gsc = 

Re: [RFC] drm/i915/huc: better define HuC status getparam possible return values.

2022-07-08 Thread Ye, Tony



On 7/8/2022 4:48 PM, Daniele Ceraolo Spurio wrote:

The current HuC status getparam return values are a bit confusing in
regards to what happens in some scenarios. In particular, most of the
error cases cause the ioctl to return an error, but a couple of them,
INIT_FAIL and LOAD_FAIL, are not explicitly handled and neither is
their expected return value documented; these 2 error cases therefore
end up into the catch-all umbrella of the "HuC not loaded" case, with
this case therefore including both some error scenarios and the load
in progress one.

The updates included in this patch change the handling so that all
error cases behave the same way, i.e. return an errno code, and so
that the HuC load in progress case is unambiguous.

The patch also includes a small change to the FW init path to make sure
we always transition to an error state if something goes wrong.

This is an RFC because this is a minor change in behavior for an ioctl.
I'm arguing that this is not an API breakage because the expected return
for the cases I've changed was not well defined, but I want to make sure
no one is in opposition to this. From comments from media driver devs
on a different patch [1], it sounds like the media driver already
expected an errno value for all errors cases and is therefore already
compatible with the proposed changes.

[1] https://lists.freedesktop.org/archives/intel-gfx/2022-July/300990.html

Signed-off-by: Daniele Ceraolo Spurio 
Cc: Tvrtko Ursulin 
Cc: Tony Ye 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc.c   |  1 +
  drivers/gpu/drm/i915/gt/uc/intel_huc.c   | 14 +++---
  drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c |  1 -
  include/uapi/drm/i915_drm.h  | 16 
  4 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 2706a8c65090..42cb244587f1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -455,6 +455,7 @@ int intel_guc_init(struct intel_guc *guc)
  err_fw:
intel_uc_fw_fini(>fw);
  out:
+   intel_uc_fw_change_status(>fw, INTEL_UC_FIRMWARE_INIT_FAIL);
i915_probe_error(gt->i915, "failed with %d\n", ret);
return ret;
  }
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index 3bb8838e325a..bddcd3242ad0 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -113,6 +113,7 @@ int intel_huc_init(struct intel_huc *huc)
return 0;
  
  out:

+   intel_uc_fw_change_status(>fw, INTEL_UC_FIRMWARE_INIT_FAIL);
drm_info(>drm, "HuC init failed with %d\n", err);
return err;
  }
@@ -200,13 +201,8 @@ static bool huc_is_authenticated(struct intel_huc *huc)
   * This function reads status register to verify if HuC
   * firmware was successfully loaded.
   *
- * Returns:
- *  * -ENODEV if HuC is not present on this platform,
- *  * -EOPNOTSUPP if HuC firmware is disabled,
- *  * -ENOPKG if HuC firmware was not installed,
- *  * -ENOEXEC if HuC firmware is invalid or mismatched,
- *  * 0 if HuC firmware is not running,
- *  * 1 if HuC firmware is authenticated and running.
+ * The return values match what is expected for the I915_PARAM_HUC_STATUS
+ * getparam.
   */
  int intel_huc_check_status(struct intel_huc *huc)
  {
@@ -219,6 +215,10 @@ int intel_huc_check_status(struct intel_huc *huc)
return -ENOPKG;
case INTEL_UC_FIRMWARE_ERROR:
return -ENOEXEC;
+   case INTEL_UC_FIRMWARE_INIT_FAIL:
+   return -ENOMEM;
+   case INTEL_UC_FIRMWARE_LOAD_FAIL:
+   return -EIO;
default:
break;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c 
b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 27363091e1af..007401397935 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -707,7 +707,6 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw)
  out_unpin:
i915_gem_object_unpin_pages(uc_fw->obj);
  out:
-   intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_INIT_FAIL);
return err;
  }
  
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h

index 094f6e377793..0950ef0d598c 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -645,6 +645,22 @@ typedef struct drm_i915_irq_wait {
   */
  #define   I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP  (1ul << 5)
  
+/*

+ * Query the status of HuC load.
+ *
+ * The query can fail in the following scenarios with the listed error codes:
+ *  -ENODEV if HuC is not present on this platform,
+ *  -EOPNOTSUPP if HuC firmware usage is disabled,
+ *  -ENOPKG if HuC firmware fetch failed,
+ *  -ENOEXEC if HuC firmware is invalid or mismatched,
+ *  -ENOMEM if i915 failed to prepare the FW objects for transfer to the uC,
+ *  -EIO if the FW transfer or the FW authentication failed.
+ *

Re: [PATCH v3] uapi/drm/i915: Document memory residency and Flat-CCS capability of obj

2022-05-18 Thread Ye, Tony

Media driver never creates a BO with more than one backing regions.

Acked-by: Tony Ye 

Thanks,

Tony

On 5/2/2022 7:15 AM, Ramalingam C wrote:

Capture the impact of memory region preference list of the objects, on
their memory residency and Flat-CCS capability.

v2:
   Fix the Flat-CCS capability of an obj with {lmem, smem} preference
   list [Thomas]
v3:
   Reworded the doc [Matt]

Signed-off-by: Ramalingam C 
cc: Matthew Auld 
cc: Thomas Hellstrom 
cc: Daniel Vetter 
cc: Jon Bloomfield 
cc: Lionel Landwerlin 
cc: Kenneth Graunke 
cc: mesa-...@lists.freedesktop.org
cc: Jordan Justen 
cc: Tony Ye 
Reviewed-by: Matthew Auld 
---
  include/uapi/drm/i915_drm.h | 16 
  1 file changed, 16 insertions(+)

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index a2def7b27009..b7e1c2fe08dc 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -3443,6 +3443,22 @@ struct drm_i915_gem_create_ext {
   * At which point we get the object handle in _i915_gem_create_ext.handle,
   * along with the final object size in _i915_gem_create_ext.size, which
   * should account for any rounding up, if required.
+ *
+ * Note that userspace has no means of knowing the current backing region
+ * for objects where @num_regions is larger than one. The kernel will only
+ * ensure that the priority order of the @regions array is honoured, either
+ * when initially placing the object, or when moving memory around due to
+ * memory pressure
+ *
+ * On Flat-CCS capable HW, compression is supported for the objects residing
+ * in I915_MEMORY_CLASS_DEVICE. When such objects (compressed) has other
+ * memory class in @regions and migrated (by I915, due to memory
+ * constrain) to the non I915_MEMORY_CLASS_DEVICE region, then I915 needs to
+ * decompress the content. But I915 dosen't have the required information to
+ * decompress the userspace compressed objects.
+ *
+ * So I915 supports Flat-CCS, only on the objects which can reside only on
+ * I915_MEMORY_CLASS_DEVICE regions.
   */
  struct drm_i915_gem_create_ext_memory_regions {
/** @base: Extension link. See struct i915_user_extension. */


Re: [PATCH 2/2] drm/doc/rfc: i915 new parallel submission uAPI plan

2021-06-18 Thread Ye, Tony
Acked-by: Tony Ye 

Regards,
Tony

On 6/11/2021 4:40 PM, Matthew Brost wrote:
> Add entry for i915 new parallel submission uAPI plan.
> 
> v2:
>   (Daniel Vetter):
>- Expand logical order explaination
>- Add dummy header
>- Only allow N BBs in execbuf IOCTL
>- Configure parallel submission per slot not per gem context
> v3:
>   (Marcin Ĺšlusarz):
>- Lot's of typos / bad english fixed
>   (Tvrtko Ursulin):
>- Consistent pseudo code, clean up wording in descriptions
> v4:
>   (Daniel Vetter)
>- Drop flags
>- Add kernel doc
>- Reword a few things / fix typos
>   (Tvrtko)
>- Reword a few things / fix typos
> 
> Cc: Tvrtko Ursulin 
> Cc: Tony Ye 
> CC: Carl Zhang 
> Cc: Daniel Vetter 
> Cc: Jason Ekstrand 
> Signed-off-by: Matthew Brost 
> Acked-by: Daniel Vetter 
> ---
>   Documentation/gpu/rfc/i915_parallel_execbuf.h | 117 ++
>   Documentation/gpu/rfc/i915_scheduler.rst  |  59 -
>   2 files changed, 175 insertions(+), 1 deletion(-)
>   create mode 100644 Documentation/gpu/rfc/i915_parallel_execbuf.h
> 
> diff --git a/Documentation/gpu/rfc/i915_parallel_execbuf.h 
> b/Documentation/gpu/rfc/i915_parallel_execbuf.h
> new file mode 100644
> index ..c22af3a359e4
> --- /dev/null
> +++ b/Documentation/gpu/rfc/i915_parallel_execbuf.h
> @@ -0,0 +1,117 @@
> +#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see 
> i915_context_engines_parallel_submit */
> +
> +/**
> + * struct drm_i915_context_engines_parallel_submit - Configure engine for
> + * parallel submission.
> + *
> + * Setup a slot in the context engine map to allow multiple BBs to be 
> submitted
> + * in a single execbuf IOCTL. Those BBs will then be scheduled to run on the 
> GPU
> + * in parallel. Multiple hardware contexts are created internally in the i915
> + * run these BBs. Once a slot is configured for N BBs only N BBs can be
> + * submitted in each execbuf IOCTL and this is implicit behavior e.g. The 
> user
> + * doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows 
> how
> + * many BBs there are based on the slot's configuration. The N BBs are the 
> last
> + * N buffer objects or first N if I915_EXEC_BATCH_FIRST is set.
> + *
> + * The default placement behavior is to create implicit bonds between each
> + * context if each context maps to more than 1 physical engine (e.g. context 
> is
> + * a virtual engine). Also we only allow contexts of same engine class and 
> these
> + * contexts must be in logically contiguous order. Examples of the placement
> + * behavior described below. Lastly, the default is to not allow BBs to
> + * preempted mid BB rather insert coordinated preemption on all hardware
> + * contexts between each set of BBs. Flags may be added in the future to 
> change
> + * bott of these default behaviors.
> + *
> + * Returns -EINVAL if hardware context placement configuration is invalid or 
> if
> + * the placement configuration isn't supported on the platform / submission
> + * interface.
> + * Returns -ENODEV if extension isn't supported on the platform / submission
> + * inteface.
> + *
> + * .. code-block::
> + *
> + *   Example 1 pseudo code:
> + *   CS[X] = generic engine of same class, logical instance X
> + *   INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
> + *   set_engines(INVALID)
> + *   set_parallel(engine_index=0, width=2, num_siblings=1,
> + *engines=CS[0],CS[1])
> + *
> + *   Results in the following valid placement:
> + *   CS[0], CS[1]
> + *
> + *   Example 2 pseudo code:
> + *   CS[X] = generic engine of same class, logical instance X
> + *   INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
> + *   set_engines(INVALID)
> + *   set_parallel(engine_index=0, width=2, num_siblings=2,
> + *engines=CS[0],CS[2],CS[1],CS[3])
> + *
> + *   Results in the following valid placements:
> + *   CS[0], CS[1]
> + *   CS[2], CS[3]
> + *
> + *   This can also be thought of as 2 virtual engines described by 2-D array
> + *   in the engines the field with bonds placed between each index of the
> + *   virtual engines. e.g. CS[0] is bonded to CS[1], CS[2] is bonded to
> + *   CS[3].
> + *   VE[0] = CS[0], CS[2]
> + *   VE[1] = CS[1], CS[3]
> + *
> + *   Example 3 pseudo code:
> + *   CS[X] = generic engine of same class, logical instance X
> + *   INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
> + *   set_engines(INVALID)
> + *   set_parallel(engine_index=0, width=2, num_siblings=2,
> + *engines=CS[0],CS[1],CS[1],CS[3])
> + *
> + *   Results in the following valid and invalid placements:
> + *   CS[0], CS[1]
> + *   CS[1], CS[3] - Not logical contiguous, return -EINVAL
> + */
> +struct drm_i915_context_engines_parallel_submit {
> + /**
> +  * @base: base user extension.
> +  */
> + struct i915_user_extension base;
> +
> + /**
> +  * @engine_index: slot for parallel engine
> +  */
> + __u16