[PATCH 16/19] drm/i915/guc: Use a single pass to calculate regset
The ADS initialitazion was using 2 passes to calculate the regset sent to GuC to initialize each engine: the first pass to just have the final object size and the second to set each register in place in the final gem object. However in order to maintain an ordered set of registers to pass to guc, each register needs to be added and moved in the final array. The second phase may actually happen in IO memory rather than system memory and accessing IO memory by simply dereferencing the pointer doesn't work on all architectures. Other places of the ADS initializaition were converted to use the iosys_map API, but here there may be a lot more accesses to IO memory. So, instead of following that same approach, convert the regset initialization to calculate the final array in 1 pass and in the second pass that array is just copied to its final location, updating the pointers for each engine written to the ADS blob. One important thing is that struct temp_regset now have different semantics: `registers` continues to track the registers of a single engine, however the other fields are updated together, according to the newly added `storage`, which tracks the memory allocated for all the registers. So rename some of these fields and add a __mmio_reg_add(): this function (possibly) allocates memory and operates on the storage pointer while guc_mmio_reg_add() continues to manage the registers pointer. On a Tiger Lake system using enable_guc=3, the following log message is now seen: [ 187.334310] i915 :00:02.0: [drm:intel_guc_ads_create [i915]] Used 4 KB for temporary ADS regset This change has also been tested on an ARM64 host with DG2 and other discrete graphics cards. v2 (Daniele): - Fix leaking tempset on error path - Add comments on struct temp_regset to document the meaning of each field Cc: Matt Roper Cc: Thomas Hellström Cc: Daniel Vetter Cc: John Harrison Cc: Matthew Brost Cc: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 7 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 128 + 2 files changed, 90 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 9b9ba79f7594..f857e9190750 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -152,6 +152,13 @@ struct intel_guc { struct iosys_map ads_map; /** @ads_regset_size: size of the save/restore regsets in the ADS */ u32 ads_regset_size; + /** +* @ads_regset_count: number of save/restore registers in the ADS for +* each engine +*/ + u32 ads_regset_count[I915_NUM_ENGINES]; + /** @ads_regset: save/restore regsets in the ADS */ + struct guc_mmio_reg *ads_regset; /** @ads_golden_ctxt_size: size of the golden contexts in the ADS */ u32 ads_golden_ctxt_size; /** @ads_engine_usage_size: size of engine usage in the ADS */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 3a4558948c31..c040d8d8d7a4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -226,14 +226,18 @@ static void guc_mapping_table_init(struct intel_gt *gt, /* * The save/restore register list must be pre-calculated to a temporary - * buffer of driver defined size before it can be generated in place - * inside the ADS. + * buffer before it can be copied inside the ADS. */ -#define MAX_MMIO_REGS 128 /* Arbitrary size, increase as needed */ struct temp_regset { + /* +* ptr to the section of the storage for the engine currently being +* worked on +*/ struct guc_mmio_reg *registers; - u32 used; - u32 size; + /* ptr to the base of the allocated storage for all engines */ + struct guc_mmio_reg *storage; + u32 storage_used; + u32 storage_max; }; static int guc_mmio_reg_cmp(const void *a, const void *b) @@ -244,18 +248,44 @@ static int guc_mmio_reg_cmp(const void *a, const void *b) return (int)ra->offset - (int)rb->offset; } +static struct guc_mmio_reg * __must_check +__mmio_reg_add(struct temp_regset *regset, struct guc_mmio_reg *reg) +{ + u32 pos = regset->storage_used; + struct guc_mmio_reg *slot; + + if (pos >= regset->storage_max) { + size_t size = ALIGN((pos + 1) * sizeof(*slot), PAGE_SIZE); + struct guc_mmio_reg *r = krealloc(regset->storage, + size, GFP_KERNEL); + if (!r) { + WARN_ONCE(1, "Incomplete regset list: can't add register (%d)\n", + -ENOMEM); + return ERR_PTR(-ENOMEM); + } + + regset->registers = r + (regset->registers - regset->storage); +
Re: [PATCH 16/19] drm/i915/guc: Use a single pass to calculate regset
On Tue, Feb 01, 2022 at 02:42:20PM -0800, Daniele Ceraolo Spurio wrote: On 1/26/2022 12:36 PM, Lucas De Marchi wrote: The ADS initialitazion was using 2 passes to calculate the regset sent to GuC to initialize each engine: the first pass to just have the final object size and the second to set each register in place in the final gem object. However in order to maintain an ordered set of registers to pass to guc, each register needs to be added and moved in the final array. The second phase may actually happen in IO memory rather than system memory and accessing IO memory by simply dereferencing the pointer doesn't work on all architectures. Other places of the ADS initializaition were converted to use the dma_buf_map API, but here there may be a lot more accesses to IO memory. So, instead of following that same approach, convert the regset initialization to calculate the final array in 1 pass and in the second pass that array is just copied to its final location, updating the pointers for each engine written to the ADS blob. One important thing is that struct temp_regset now have different semantics: `registers` continues to track the registers of a single engine, however the other fields are updated together, according to the newly added `storage`, which tracks the memory allocated for all the registers. So rename some of these fields and add a __mmio_reg_add(): this function (possibly) allocates memory and operates on the storage pointer while guc_mmio_reg_add() continues to manage the registers pointer. On a Tiger Lake system using enable_guc=3, the following log message is now seen: [ 187.334310] i915 :00:02.0: [drm:intel_guc_ads_create [i915]] Used 4 KB for temporary ADS regset This change has also been tested on an ARM64 host with DG2 and other discrete graphics cards. Cc: Matt Roper Cc: Thomas Hellström Cc: Daniel Vetter Cc: John Harrison Cc: Matthew Brost Cc: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 7 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 117 + 2 files changed, 79 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index e2e0df1c3d91..4c852eee3ad8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -152,6 +152,13 @@ struct intel_guc { struct dma_buf_map ads_map; /** @ads_regset_size: size of the save/restore regsets in the ADS */ u32 ads_regset_size; + /** +* @ads_regset_count: number of save/restore registers in the ADS for +* each engine +*/ + u32 ads_regset_count[I915_NUM_ENGINES]; + /** @ads_regset: save/restore regsets in the ADS */ + struct guc_mmio_reg *ads_regset; /** @ads_golden_ctxt_size: size of the golden contexts in the ADS */ u32 ads_golden_ctxt_size; /** @ads_engine_usage_size: size of engine usage in the ADS */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 73ca34de44f7..390101ee3661 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -226,14 +226,13 @@ static void guc_mapping_table_init(struct intel_gt *gt, /* * The save/restore register list must be pre-calculated to a temporary - * buffer of driver defined size before it can be generated in place - * inside the ADS. + * buffer before it can be copied inside the ADS. */ -#define MAX_MMIO_REGS 128 /* Arbitrary size, increase as needed */ struct temp_regset { struct guc_mmio_reg *registers; - u32 used; - u32 size; + struct guc_mmio_reg *storage; I think this could use a comment to distinguish between registers and storage. Something like.: /* ptr to the base of the allocated storage for all engines */ struct guc_mmio_reg *storage; /* ptr to the section of the storage for the engine currently being worked on */ struct guc_mmio_reg *registers; agreed, I will add that + u32 storage_used; + u32 storage_max; }; static int guc_mmio_reg_cmp(const void *a, const void *b) @@ -244,18 +243,44 @@ static int guc_mmio_reg_cmp(const void *a, const void *b) return (int)ra->offset - (int)rb->offset; } +static struct guc_mmio_reg * __must_check +__mmio_reg_add(struct temp_regset *regset, struct guc_mmio_reg *reg) +{ + u32 pos = regset->storage_used; + struct guc_mmio_reg *slot; + + if (pos >= regset->storage_max) { + size_t size = ALIGN((pos + 1) * sizeof(*slot), PAGE_SIZE); + struct guc_mmio_reg *r = krealloc(regset->storage, + size, GFP_KERNEL); + if (!r) { + WARN_ONCE(1, "Incomplete regset list: can't add register (%d)\n", + -ENOMEM); +
Re: [PATCH 16/19] drm/i915/guc: Use a single pass to calculate regset
On 1/26/2022 12:36 PM, Lucas De Marchi wrote: The ADS initialitazion was using 2 passes to calculate the regset sent to GuC to initialize each engine: the first pass to just have the final object size and the second to set each register in place in the final gem object. However in order to maintain an ordered set of registers to pass to guc, each register needs to be added and moved in the final array. The second phase may actually happen in IO memory rather than system memory and accessing IO memory by simply dereferencing the pointer doesn't work on all architectures. Other places of the ADS initializaition were converted to use the dma_buf_map API, but here there may be a lot more accesses to IO memory. So, instead of following that same approach, convert the regset initialization to calculate the final array in 1 pass and in the second pass that array is just copied to its final location, updating the pointers for each engine written to the ADS blob. One important thing is that struct temp_regset now have different semantics: `registers` continues to track the registers of a single engine, however the other fields are updated together, according to the newly added `storage`, which tracks the memory allocated for all the registers. So rename some of these fields and add a __mmio_reg_add(): this function (possibly) allocates memory and operates on the storage pointer while guc_mmio_reg_add() continues to manage the registers pointer. On a Tiger Lake system using enable_guc=3, the following log message is now seen: [ 187.334310] i915 :00:02.0: [drm:intel_guc_ads_create [i915]] Used 4 KB for temporary ADS regset This change has also been tested on an ARM64 host with DG2 and other discrete graphics cards. Cc: Matt Roper Cc: Thomas Hellström Cc: Daniel Vetter Cc: John Harrison Cc: Matthew Brost Cc: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 7 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 117 + 2 files changed, 79 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index e2e0df1c3d91..4c852eee3ad8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -152,6 +152,13 @@ struct intel_guc { struct dma_buf_map ads_map; /** @ads_regset_size: size of the save/restore regsets in the ADS */ u32 ads_regset_size; + /** +* @ads_regset_count: number of save/restore registers in the ADS for +* each engine +*/ + u32 ads_regset_count[I915_NUM_ENGINES]; + /** @ads_regset: save/restore regsets in the ADS */ + struct guc_mmio_reg *ads_regset; /** @ads_golden_ctxt_size: size of the golden contexts in the ADS */ u32 ads_golden_ctxt_size; /** @ads_engine_usage_size: size of engine usage in the ADS */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 73ca34de44f7..390101ee3661 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -226,14 +226,13 @@ static void guc_mapping_table_init(struct intel_gt *gt, /* * The save/restore register list must be pre-calculated to a temporary - * buffer of driver defined size before it can be generated in place - * inside the ADS. + * buffer before it can be copied inside the ADS. */ -#define MAX_MMIO_REGS 128 /* Arbitrary size, increase as needed */ struct temp_regset { struct guc_mmio_reg *registers; - u32 used; - u32 size; + struct guc_mmio_reg *storage; I think this could use a comment to distinguish between registers and storage. Something like.: /* ptr to the base of the allocated storage for all engines */ struct guc_mmio_reg *storage; /* ptr to the section of the storage for the engine currently being worked on */ struct guc_mmio_reg *registers; + u32 storage_used; + u32 storage_max; }; static int guc_mmio_reg_cmp(const void *a, const void *b) @@ -244,18 +243,44 @@ static int guc_mmio_reg_cmp(const void *a, const void *b) return (int)ra->offset - (int)rb->offset; } +static struct guc_mmio_reg * __must_check +__mmio_reg_add(struct temp_regset *regset, struct guc_mmio_reg *reg) +{ + u32 pos = regset->storage_used; + struct guc_mmio_reg *slot; + + if (pos >= regset->storage_max) { + size_t size = ALIGN((pos + 1) * sizeof(*slot), PAGE_SIZE); + struct guc_mmio_reg *r = krealloc(regset->storage, + size, GFP_KERNEL); + if (!r) { + WARN_ONCE(1, "Incomplete regset list: can't add register (%d)\n", + -ENOMEM); + return ERR_PTR(-ENOMEM); + } + + regset->registers
Re: [PATCH 16/19] drm/i915/guc: Use a single pass to calculate regset
Hi Lucas, Thank you for the patch! Yet something to improve: [auto build test ERROR on drm-tip/drm-tip] [also build test ERROR on next-20220125] [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next drm/drm-next tegra-drm/drm/tegra/for-next linus/master airlied/drm-next v5.17-rc1] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Lucas-De-Marchi/drm-i915-guc-Refactor-ADS-access-to-use-dma_buf_map/20220127-043912 base: git://anongit.freedesktop.org/drm/drm-tip drm-tip config: i386-allyesconfig (https://download.01.org/0day-ci/archive/20220127/202201271208.kelpe3mn-...@intel.com/config) compiler: gcc-9 (Debian 9.3.0-22) 9.3.0 reproduce (this is a W=1 build): # https://github.com/0day-ci/linux/commit/313757d9ed833acea4ee2bb0e3f3565d6efcf3cc git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Lucas-De-Marchi/drm-i915-guc-Refactor-ADS-access-to-use-dma_buf_map/20220127-043912 git checkout 313757d9ed833acea4ee2bb0e3f3565d6efcf3cc # save the config file to linux build tree mkdir build_dir make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot All errors (new ones prefixed by >>): In file included from include/drm/drm_mm.h:51, from drivers/gpu/drm/i915/i915_vma.h:31, from drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h:13, from drivers/gpu/drm/i915/gt/uc/intel_guc.h:20, from drivers/gpu/drm/i915/gt/uc/intel_uc.h:9, from drivers/gpu/drm/i915/gt/intel_gt_types.h:18, from drivers/gpu/drm/i915/gt/intel_gt.h:10, from drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c:9: drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c: In function 'guc_mmio_reg_state_create': >> drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c:369:38: error: format '%lu' >> expects argument of type 'long unsigned int', but argument 4 has type 'u32' >> {aka 'unsigned int'} [-Werror=format=] 369 | drm_dbg(&guc_to_gt(guc)->i915->drm, "Used %lu KB for temporary ADS regset\n", | ^~~~ 370 | (temp_set.storage_max * sizeof(struct guc_mmio_reg)) >> 10); | ~~ || |u32 {aka unsigned int} include/drm/drm_print.h:461:56: note: in definition of macro 'drm_dbg' 461 | drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_DRIVER, fmt, ##__VA_ARGS__) |^~~ drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c:369:46: note: format string is defined here 369 | drm_dbg(&guc_to_gt(guc)->i915->drm, "Used %lu KB for temporary ADS regset\n", |~~^ | | | long unsigned int |%u cc1: all warnings being treated as errors vim +369 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 348 349 static long guc_mmio_reg_state_create(struct intel_guc *guc) 350 { 351 struct intel_gt *gt = guc_to_gt(guc); 352 struct intel_engine_cs *engine; 353 enum intel_engine_id id; 354 struct temp_regset temp_set = {}; 355 long total = 0; 356 357 for_each_engine(engine, gt, id) { 358 u32 used = temp_set.storage_used; 359 360 if (guc_mmio_regset_init(&temp_set, engine) < 0) 361 return -1; 362 363 guc->ads_regset_count[id] = temp_set.storage_used - used; 364 total += guc->ads_regset_count[id]; 365 } 366 367 guc->ads_regset = temp_set.storage; 368 > 369 drm_dbg(&guc_to_gt(guc)->i915->drm, "Used %lu KB for temporary > ADS regset\n", 370 (temp_set.storage_max * sizeof(struct guc_mmio_reg)) >> 10); 371 372 return total * sizeof(struct guc_mmio_reg); 373 } 374 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org
Re: [PATCH 16/19] drm/i915/guc: Use a single pass to calculate regset
Hi Lucas, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on drm-tip/drm-tip] [also build test WARNING on next-20220125] [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next drm/drm-next tegra-drm/drm/tegra/for-next linus/master airlied/drm-next v5.17-rc1] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Lucas-De-Marchi/drm-i915-guc-Refactor-ADS-access-to-use-dma_buf_map/20220127-043912 base: git://anongit.freedesktop.org/drm/drm-tip drm-tip config: i386-randconfig-a011 (https://download.01.org/0day-ci/archive/20220127/202201270902.hcre2frp-...@intel.com/config) compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project 2a1b7aa016c0f4b5598806205bdfbab1ea2d92c4) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/0day-ci/linux/commit/313757d9ed833acea4ee2bb0e3f3565d6efcf3cc git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Lucas-De-Marchi/drm-i915-guc-Refactor-ADS-access-to-use-dma_buf_map/20220127-043912 git checkout 313757d9ed833acea4ee2bb0e3f3565d6efcf3cc # save the config file to linux build tree mkdir build_dir COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash drivers/gpu/drm/i915/ If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot All warnings (new ones prefixed by >>): >> drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c:370:3: warning: format specifies >> type 'unsigned long' but the argument has type 'unsigned int' [-Wformat] (temp_set.storage_max * sizeof(struct guc_mmio_reg)) >> 10); ^~ include/drm/drm_print.h:461:63: note: expanded from macro 'drm_dbg' drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_DRIVER, fmt, ##__VA_ARGS__) ~~~ ^~~ 1 warning generated. vim +370 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 348 349 static long guc_mmio_reg_state_create(struct intel_guc *guc) 350 { 351 struct intel_gt *gt = guc_to_gt(guc); 352 struct intel_engine_cs *engine; 353 enum intel_engine_id id; 354 struct temp_regset temp_set = {}; 355 long total = 0; 356 357 for_each_engine(engine, gt, id) { 358 u32 used = temp_set.storage_used; 359 360 if (guc_mmio_regset_init(&temp_set, engine) < 0) 361 return -1; 362 363 guc->ads_regset_count[id] = temp_set.storage_used - used; 364 total += guc->ads_regset_count[id]; 365 } 366 367 guc->ads_regset = temp_set.storage; 368 369 drm_dbg(&guc_to_gt(guc)->i915->drm, "Used %lu KB for temporary ADS regset\n", > 370 (temp_set.storage_max * sizeof(struct guc_mmio_reg)) >> > 10); 371 372 return total * sizeof(struct guc_mmio_reg); 373 } 374 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org
Re: [PATCH 16/19] drm/i915/guc: Use a single pass to calculate regset
Hi Lucas, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on drm-tip/drm-tip] [also build test WARNING on next-20220125] [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next drm/drm-next tegra-drm/drm/tegra/for-next linus/master airlied/drm-next v5.17-rc1] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Lucas-De-Marchi/drm-i915-guc-Refactor-ADS-access-to-use-dma_buf_map/20220127-043912 base: git://anongit.freedesktop.org/drm/drm-tip drm-tip config: i386-randconfig-m021-20220124 (https://download.01.org/0day-ci/archive/20220127/202201270827.clihfdpe-...@intel.com/config) compiler: gcc-9 (Debian 9.3.0-22) 9.3.0 reproduce (this is a W=1 build): # https://github.com/0day-ci/linux/commit/313757d9ed833acea4ee2bb0e3f3565d6efcf3cc git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Lucas-De-Marchi/drm-i915-guc-Refactor-ADS-access-to-use-dma_buf_map/20220127-043912 git checkout 313757d9ed833acea4ee2bb0e3f3565d6efcf3cc # save the config file to linux build tree mkdir build_dir make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash drivers/gpu/drm/i915/ If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot All warnings (new ones prefixed by >>): In file included from include/drm/drm_mm.h:51, from drivers/gpu/drm/i915/i915_vma.h:31, from drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h:13, from drivers/gpu/drm/i915/gt/uc/intel_guc.h:20, from drivers/gpu/drm/i915/gt/uc/intel_uc.h:9, from drivers/gpu/drm/i915/gt/intel_gt_types.h:18, from drivers/gpu/drm/i915/gt/intel_gt.h:10, from drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c:9: drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c: In function 'guc_mmio_reg_state_create': >> drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c:369:38: warning: format '%lu' >> expects argument of type 'long unsigned int', but argument 4 has type 'u32' >> {aka 'unsigned int'} [-Wformat=] 369 | drm_dbg(&guc_to_gt(guc)->i915->drm, "Used %lu KB for temporary ADS regset\n", | ^~~~ 370 | (temp_set.storage_max * sizeof(struct guc_mmio_reg)) >> 10); | ~~ || |u32 {aka unsigned int} include/drm/drm_print.h:461:56: note: in definition of macro 'drm_dbg' 461 | drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_DRIVER, fmt, ##__VA_ARGS__) |^~~ drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c:369:46: note: format string is defined here 369 | drm_dbg(&guc_to_gt(guc)->i915->drm, "Used %lu KB for temporary ADS regset\n", |~~^ | | | long unsigned int |%u vim +369 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 348 349 static long guc_mmio_reg_state_create(struct intel_guc *guc) 350 { 351 struct intel_gt *gt = guc_to_gt(guc); 352 struct intel_engine_cs *engine; 353 enum intel_engine_id id; 354 struct temp_regset temp_set = {}; 355 long total = 0; 356 357 for_each_engine(engine, gt, id) { 358 u32 used = temp_set.storage_used; 359 360 if (guc_mmio_regset_init(&temp_set, engine) < 0) 361 return -1; 362 363 guc->ads_regset_count[id] = temp_set.storage_used - used; 364 total += guc->ads_regset_count[id]; 365 } 366 367 guc->ads_regset = temp_set.storage; 368 > 369 drm_dbg(&guc_to_gt(guc)->i915->drm, "Used %lu KB for temporary > ADS regset\n", 370 (temp_set.storage_max * sizeof(struct guc_mmio_reg)) >> 10); 371 372 return total * sizeof(struct guc_mmio_reg); 373 } 374 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org
[PATCH 16/19] drm/i915/guc: Use a single pass to calculate regset
The ADS initialitazion was using 2 passes to calculate the regset sent to GuC to initialize each engine: the first pass to just have the final object size and the second to set each register in place in the final gem object. However in order to maintain an ordered set of registers to pass to guc, each register needs to be added and moved in the final array. The second phase may actually happen in IO memory rather than system memory and accessing IO memory by simply dereferencing the pointer doesn't work on all architectures. Other places of the ADS initializaition were converted to use the dma_buf_map API, but here there may be a lot more accesses to IO memory. So, instead of following that same approach, convert the regset initialization to calculate the final array in 1 pass and in the second pass that array is just copied to its final location, updating the pointers for each engine written to the ADS blob. One important thing is that struct temp_regset now have different semantics: `registers` continues to track the registers of a single engine, however the other fields are updated together, according to the newly added `storage`, which tracks the memory allocated for all the registers. So rename some of these fields and add a __mmio_reg_add(): this function (possibly) allocates memory and operates on the storage pointer while guc_mmio_reg_add() continues to manage the registers pointer. On a Tiger Lake system using enable_guc=3, the following log message is now seen: [ 187.334310] i915 :00:02.0: [drm:intel_guc_ads_create [i915]] Used 4 KB for temporary ADS regset This change has also been tested on an ARM64 host with DG2 and other discrete graphics cards. Cc: Matt Roper Cc: Thomas Hellström Cc: Daniel Vetter Cc: John Harrison Cc: Matthew Brost Cc: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 7 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 117 + 2 files changed, 79 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index e2e0df1c3d91..4c852eee3ad8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -152,6 +152,13 @@ struct intel_guc { struct dma_buf_map ads_map; /** @ads_regset_size: size of the save/restore regsets in the ADS */ u32 ads_regset_size; + /** +* @ads_regset_count: number of save/restore registers in the ADS for +* each engine +*/ + u32 ads_regset_count[I915_NUM_ENGINES]; + /** @ads_regset: save/restore regsets in the ADS */ + struct guc_mmio_reg *ads_regset; /** @ads_golden_ctxt_size: size of the golden contexts in the ADS */ u32 ads_golden_ctxt_size; /** @ads_engine_usage_size: size of engine usage in the ADS */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 73ca34de44f7..390101ee3661 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -226,14 +226,13 @@ static void guc_mapping_table_init(struct intel_gt *gt, /* * The save/restore register list must be pre-calculated to a temporary - * buffer of driver defined size before it can be generated in place - * inside the ADS. + * buffer before it can be copied inside the ADS. */ -#define MAX_MMIO_REGS 128 /* Arbitrary size, increase as needed */ struct temp_regset { struct guc_mmio_reg *registers; - u32 used; - u32 size; + struct guc_mmio_reg *storage; + u32 storage_used; + u32 storage_max; }; static int guc_mmio_reg_cmp(const void *a, const void *b) @@ -244,18 +243,44 @@ static int guc_mmio_reg_cmp(const void *a, const void *b) return (int)ra->offset - (int)rb->offset; } +static struct guc_mmio_reg * __must_check +__mmio_reg_add(struct temp_regset *regset, struct guc_mmio_reg *reg) +{ + u32 pos = regset->storage_used; + struct guc_mmio_reg *slot; + + if (pos >= regset->storage_max) { + size_t size = ALIGN((pos + 1) * sizeof(*slot), PAGE_SIZE); + struct guc_mmio_reg *r = krealloc(regset->storage, + size, GFP_KERNEL); + if (!r) { + WARN_ONCE(1, "Incomplete regset list: can't add register (%d)\n", + -ENOMEM); + return ERR_PTR(-ENOMEM); + } + + regset->registers = r + (regset->registers - regset->storage); + regset->storage = r; + regset->storage_max = size / sizeof(*slot); + } + + slot = ®set->storage[pos]; + regset->storage_used++; + *slot = *reg; + + return slot; +} + static long __must_check guc_mmio_reg_add(struct temp_regset *regset,