Module: Mesa Branch: staging/21.3 Commit: bdb84ef2a02a65faffe3d7b9e564a54e43df4b18 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=bdb84ef2a02a65faffe3d7b9e564a54e43df4b18
Author: Lionel Landwerlin <[email protected]> Date: Sat Apr 3 13:28:50 2021 +0300 intel/dev: fix subslice/eu total computations with some fused configurations When a device has its first slice/subslice fused off, we can't use the number of slices/subslices to iterate the mask array. v2: Fix spelling (Marcin) Use size_t for iterator (Marcin) Signed-off-by: Lionel Landwerlin <[email protected]> Reported-by: Matt Roper <[email protected]> Cc: <[email protected]> Reviewed-by: Francisco Jerez <[email protected]> Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5601 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10015> (cherry picked from commit a543a9440437df8d7498bc76353e828fcc66f5e5) --- .pick_status.json | 2 +- src/gallium/drivers/iris/iris_screen.c | 5 +---- src/intel/dev/intel_dev_info.c | 8 ++++---- src/intel/dev/intel_device_info.c | 5 ++++- src/intel/dev/intel_device_info.h | 31 +++++++++++++++++++++++++++++++ src/intel/dev/intel_device_info_test.c | 33 +++++++++++++++++++++++++++++++++ 6 files changed, 74 insertions(+), 10 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 5016cbb0415..982d34499ba 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -85,7 +85,7 @@ "description": "intel/dev: fix subslice/eu total computations with some fused configurations", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null }, diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c index afc83fa614e..f7a4adfdd7b 100644 --- a/src/gallium/drivers/iris/iris_screen.c +++ b/src/gallium/drivers/iris/iris_screen.c @@ -572,10 +572,7 @@ iris_get_compute_param(struct pipe_screen *pscreen, RET((uint32_t []) { 400 }); /* TODO */ case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: { - unsigned total_num_subslices = 0; - for (unsigned i = 0; i < devinfo->num_slices; i++) - total_num_subslices += devinfo->num_subslices[i]; - RET((uint32_t []) { total_num_subslices }); + RET((uint32_t []) { intel_device_info_subslice_total(devinfo) }); } case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: diff --git a/src/intel/dev/intel_dev_info.c b/src/intel/dev/intel_dev_info.c index 2c067319b7a..40258b48171 100644 --- a/src/intel/dev/intel_dev_info.c +++ b/src/intel/dev/intel_dev_info.c @@ -79,18 +79,18 @@ main(int argc, char *argv[]) const char *subslice_name = devinfo.ver >= 12 ? "dualsubslice" : "subslice"; uint32_t n_s = 0, n_ss = 0, n_eus = 0; - for (unsigned s = 0; s < devinfo.num_slices; s++) { + for (unsigned s = 0; s < devinfo.max_slices; s++) { n_s += (devinfo.slice_masks & (1u << s)) ? 1 : 0; - for (unsigned ss = 0; ss < devinfo.num_subslices[s]; ss++) { + for (unsigned ss = 0; ss < devinfo.max_subslices_per_slice; ss++) { fprintf(stdout, " slice%u.%s%u: ", s, subslice_name, ss); if (intel_device_info_subslice_available(&devinfo, s, ss)) { n_ss++; - for (unsigned eu = 0; eu < devinfo.num_eu_per_subslice; eu++) { + for (unsigned eu = 0; eu < devinfo.max_eu_per_subslice; eu++) { n_eus += intel_device_info_eu_available(&devinfo, s, ss, eu) ? 1 : 0; fprintf(stdout, "%s", intel_device_info_eu_available(&devinfo, s, ss, eu) ? "1" : "0"); } } else { - fprintf(stderr, "fused"); + fprintf(stdout, "fused"); } fprintf(stdout, "\n"); } diff --git a/src/intel/dev/intel_device_info.c b/src/intel/dev/intel_device_info.c index 06c15661ff4..8c3cbe9c47e 100644 --- a/src/intel/dev/intel_device_info.c +++ b/src/intel/dev/intel_device_info.c @@ -1101,6 +1101,9 @@ update_from_topology(struct intel_device_info *devinfo, assert(sizeof(devinfo->slice_masks) >= DIV_ROUND_UP(topology->max_slices, 8)); memcpy(&devinfo->slice_masks, topology->data, DIV_ROUND_UP(topology->max_slices, 8)); devinfo->num_slices = __builtin_popcount(devinfo->slice_masks); + devinfo->max_slices = topology->max_slices; + devinfo->max_subslices_per_slice = topology->max_subslices; + devinfo->max_eu_per_subslice = topology->max_eus_per_subslice; uint32_t subslice_mask_len = topology->max_slices * topology->subslice_stride; @@ -1654,7 +1657,7 @@ intel_get_device_info_from_fd(int fd, struct intel_device_info *devinfo) devinfo->has_tiling_uapi = has_get_tiling(fd); devinfo->subslice_total = 0; - for (uint32_t i = 0; i < devinfo->num_slices; i++) + for (uint32_t i = 0; i < devinfo->max_slices; i++) devinfo->subslice_total += __builtin_popcount(devinfo->subslice_masks[i]); /* Gfx7 and older do not support EU/Subslice info */ diff --git a/src/intel/dev/intel_device_info.h b/src/intel/dev/intel_device_info.h index b26296bcb9d..2e3e459d9a2 100644 --- a/src/intel/dev/intel_device_info.h +++ b/src/intel/dev/intel_device_info.h @@ -135,11 +135,24 @@ struct intel_device_info */ unsigned num_slices; + /** + * Maximum number of slices present on this device (can be more than + * num_slices if some slices are fused). + */ + unsigned max_slices; + /** * Number of subslices for each slice (used to be uniform until CNL). */ unsigned num_subslices[INTEL_DEVICE_MAX_SUBSLICES]; + /** + * Maximum number of subslices per slice present on this device (can be + * more than the maximum value in the num_subslices[] array if some + * subslices are fused). + */ + unsigned max_subslices_per_slice; + /** * Number of subslices on each pixel pipe (ICL). */ @@ -152,6 +165,12 @@ struct intel_device_info */ unsigned num_eu_per_subslice; + /** + * Maximum number of EUs per subslice (can be more than num_eu_per_subslice + * if some EUs are fused off). + */ + unsigned max_eu_per_subslice; + /** * Number of threads per eu, varies between 4 and 8 between generations. */ @@ -354,6 +373,18 @@ intel_device_info_eu_available(const struct intel_device_info *devinfo, return (devinfo->eu_masks[subslice_offset + eu / 8] & (1U << eu % 8)) != 0; } +static inline uint32_t +intel_device_info_subslice_total(const struct intel_device_info *devinfo) +{ + uint32_t total = 0; + + for (size_t i = 0; i < ARRAY_SIZE(devinfo->subslice_masks); i++) { + total += __builtin_popcount(devinfo->subslice_masks[i]); + } + + return total; +} + static inline uint32_t intel_device_info_eu_total(const struct intel_device_info *devinfo) { diff --git a/src/intel/dev/intel_device_info_test.c b/src/intel/dev/intel_device_info_test.c index 06e51ace785..db2e65cea8f 100644 --- a/src/intel/dev/intel_device_info_test.c +++ b/src/intel/dev/intel_device_info_test.c @@ -30,6 +30,39 @@ main(int argc, char *argv[]) assert(devinfo.cs_prefetch_size > 0); assert(devinfo.ver < 7 || devinfo.max_constant_urb_size_kb > 0); + + assert(devinfo.num_slices <= ARRAY_SIZE(devinfo.subslice_masks)); + + assert(devinfo.num_slices <= devinfo.max_slices); + assert(intel_device_info_subslice_total(&devinfo) <= + (devinfo.max_slices * devinfo.max_subslices_per_slice)); + + for (uint32_t s = 0; s < ARRAY_SIZE(devinfo.num_subslices); s++) + assert(devinfo.num_subslices[s] <= devinfo.max_subslices_per_slice); + + assert(__builtin_popcount(devinfo.slice_masks) <= devinfo.max_slices); + + uint32_t total_subslices = 0; + for (size_t i = 0; i < ARRAY_SIZE(devinfo.subslice_masks); i++) + total_subslices += __builtin_popcount(devinfo.subslice_masks[i]); + assert(total_subslices <= + (devinfo.max_slices * devinfo.max_subslices_per_slice)); + + assert(intel_device_info_eu_total(&devinfo) > 0); + assert(intel_device_info_subslice_total(&devinfo) > 0); + + total_subslices = 0; + for (uint32_t s = 0; s < devinfo.max_slices; s++) + for (uint32_t ss = 0; ss < devinfo.max_subslices_per_slice; ss++) + total_subslices += intel_device_info_subslice_available(&devinfo, s, ss); + assert(total_subslices == intel_device_info_subslice_total(&devinfo)); + + uint32_t total_eus = 0; + for (uint32_t s = 0; s < devinfo.max_slices; s++) + for (uint32_t ss = 0; ss < devinfo.max_subslices_per_slice; ss++) + for (uint32_t eu = 0; eu < devinfo.max_eu_per_subslice; eu++) + total_eus += intel_device_info_eu_available(&devinfo, s, ss, eu); + assert(total_eus == intel_device_info_eu_total(&devinfo)); } return 0;
