Re: [PATCH] drm/amdgpu: Fix the iounmap error of rmmio

2024-03-15 Thread Christian König

Am 15.03.24 um 06:17 schrieb Ma Jun:

Setting the rmmio pointer to NULL to fix the following
iounmap error and calltrace.
iounmap: bad address d0b3631f

Fixes: 923f7a82d2e1 ("drm/amd/amdgpu: Fix potential ioremap() memory leaks in 
amdgpu_device_init()")
Signed-off-by: Ma Jun 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +--
  1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 39dd76e57154..d65a6aabefbb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4383,6 +4383,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
  
  unmap_memory:

iounmap(adev->rmmio);
+   adev->rmmio = NULL;


Well that doesn't looks correct to me. You seem to be working around 
broken initialisation code here.



return r;
  }
  
@@ -4514,9 +4515,11 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)

  #endif
  
  	if (drm_dev_enter(adev_to_drm(adev), &idx)) {


Ok, well that alone doesn't look correct to me. The MMIO regions needs 
to be unmapped independent if the driver is disconnected or not.



+   if (adev->rmmio) {


That looks just like a hack to me.

Regards,
Christian.


+   iounmap(adev->rmmio);
+   adev->rmmio = NULL;
+   }
  
-		iounmap(adev->rmmio);

-   adev->rmmio = NULL;
amdgpu_doorbell_fini(adev);
drm_dev_exit(idx);
}




[PATCH v2 1/2] drm/amd/pm: Update SMUv13.0.6 PMFW headers

2024-03-15 Thread Asad Kamal
Update PMFW interface headers for updated metrics table
with pcie link speed and pcie link width

Signed-off-by: Asad Kamal 
---
 drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
index 7b812b9994d7..0b3c2f54a343 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
@@ -123,7 +123,7 @@ typedef enum {
   VOLTAGE_GUARDBAND_COUNT
 } GFX_GUARDBAND_e;
 
-#define SMU_METRICS_TABLE_VERSION 0xB
+#define SMU_METRICS_TABLE_VERSION 0xC
 
 typedef struct __attribute__((packed, aligned(4))) {
   uint32_t AccumulationCounter;
@@ -223,6 +223,10 @@ typedef struct __attribute__((packed, aligned(4))) {
   // VCN/JPEG ACTIVITY
   uint32_t VcnBusy[4];
   uint32_t JpegBusy[32];
+
+  // PCIE LINK Speed and width
+  uint32_t PCIeLinkSpeed;
+  uint32_t PCIeLinkWidth;
 } MetricsTableX_t;
 
 typedef struct __attribute__((packed, aligned(4))) {
-- 
2.42.0



[PATCH v2 2/2] drm/amd/pm: Use metric table for pcie speed/width

2024-03-15 Thread Asad Kamal
Report pcie link speed/width using metric table in case
of one vf & if pmfw support is available, else report directly from
registers in case of pf. Skip reporting it for other cases.

v2: Skip multi-vf check(Lijo)

Signed-off-by: Asad Kamal 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 744c84f3029f..c1dcbbd88464 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -2229,7 +2229,15 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct 
smu_context *smu, void **table
gpu_metrics->gfxclk_lock_status = GET_METRIC_FIELD(GfxLockXCDMak) >> 
GET_INST(GC, 0);
 
if (!(adev->flags & AMD_IS_APU)) {
-   if (!amdgpu_sriov_vf(adev)) {
+   /*Check smu version, PCIE link speed and width will be reported 
from pmfw metric
+* table for both pf & one vf for smu version 85.99.0 or higher 
else report only
+* for pf from registers
+*/
+   if (smu->smc_fw_version >= 0x556300) {
+   gpu_metrics->pcie_link_width = metrics_x->PCIeLinkWidth;
+   gpu_metrics->pcie_link_speed =
+   pcie_gen_to_speed(metrics_x->PCIeLinkSpeed);
+   } else if (!amdgpu_sriov_vf(adev)) {
link_width_level = 
smu_v13_0_6_get_current_pcie_link_width_level(smu);
if (link_width_level > MAX_LINK_WIDTH)
link_width_level = 0;
@@ -2239,6 +2247,7 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct 
smu_context *smu, void **table
gpu_metrics->pcie_link_speed =
smu_v13_0_6_get_current_pcie_link_speed(smu);
}
+
gpu_metrics->pcie_bandwidth_acc =
SMUQ10_ROUND(metrics_x->PcieBandwidthAcc[0]);
gpu_metrics->pcie_bandwidth_inst =
-- 
2.42.0



Re: [FYI][PATCH] tracing/treewide: Remove second parameter of __assign_str()

2024-03-15 Thread Alison Schofield
On Fri, Feb 23, 2024 at 12:56:34PM -0500, Steven Rostedt wrote:
> From: "Steven Rostedt (Google)" 
> 
> [
>This is a treewide change. I will likely re-create this patch again in
>the second week of the merge window of v6.9 and submit it then. Hoping
>to keep the conflicts that it will cause to a minimum.
> ]
> 
> With the rework of how the __string() handles dynamic strings where it
> saves off the source string in field in the helper structure[1], the
> assignment of that value to the trace event field is stored in the helper
> value and does not need to be passed in again.
> 
> This means that with:
> 
>   __string(field, mystring)
> 
> Which use to be assigned with __assign_str(field, mystring), no longer
> needs the second parameter and it is unused. With this, __assign_str()
> will now only get a single parameter.
> 
> There's over 700 users of __assign_str() and because coccinelle does not
> handle the TRACE_EVENT() macro I ended up using the following sed script:
> 
>   git grep -l __assign_str | while read a ; do
>   sed -e 's/\(__assign_str([^,]*[^ ,]\) *,[^;]*/\1)/' $a > /tmp/test-file;
>   mv /tmp/test-file $a;
>   done
> 
> I then searched for __assign_str() that did not end with ';' as those
> were multi line assignments that the sed script above would fail to catch.
> 
> Note, the same updates will need to be done for:
> 
>   __assign_str_len()
>   __assign_rel_str()
>   __assign_rel_str_len()
>   __assign_bitmask()
>   __assign_rel_bitmask()
>   __assign_cpumask()
>   __assign_rel_cpumask()
> 
> [1] 
> https://lore.kernel.org/linux-trace-kernel/2024011442.634192...@goodmis.org/
> 
> Signed-off-by: Steven Rostedt (Google) 
> ---
>  arch/arm64/kernel/trace-events-emulation.h|   2 +-
>  arch/powerpc/include/asm/trace.h  |   4 +-
>  arch/x86/kvm/trace.h  |   2 +-
>  drivers/base/regmap/trace.h   |  18 +--
>  drivers/base/trace.h  |   2 +-
>  drivers/block/rnbd/rnbd-srv-trace.h   |  12 +-
>  drivers/cxl/core/trace.h  |  24 ++--

snip to CXL


> diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h
> index bdf117a33744..07ba4e033347 100644
> --- a/drivers/cxl/core/trace.h
> +++ b/drivers/cxl/core/trace.h

snip to poison

> @@ -668,8 +668,8 @@ TRACE_EVENT(cxl_poison,
>   ),
>  
>   TP_fast_assign(
> - __assign_str(memdev, dev_name(&cxlmd->dev));
> - __assign_str(host, dev_name(cxlmd->dev.parent));
> + __assign_str(memdev);
> + __assign_str(host);

I think I get that the above changes work because the TP_STRUCT__entry for
these did:
__string(memdev, dev_name(&cxlmd->dev))
__string(host, dev_name(cxlmd->dev.parent))

>   __entry->serial = cxlmd->cxlds->serial;
>   __entry->overflow_ts = cxl_poison_overflow(flags, overflow_ts);
>   __entry->dpa = cxl_poison_record_dpa(record);
> @@ -678,12 +678,12 @@ TRACE_EVENT(cxl_poison,
>   __entry->trace_type = trace_type;
>   __entry->flags = flags;
>   if (region) {
> - __assign_str(region, dev_name(®ion->dev));
> + __assign_str(region);
>   memcpy(__entry->uuid, ®ion->params.uuid, 16);
>   __entry->hpa = cxl_trace_hpa(region, cxlmd,
>__entry->dpa);
>   } else {
> - __assign_str(region, "");
> + __assign_str(region);
>   memset(__entry->uuid, 0, 16);
>   __entry->hpa = ULLONG_MAX;

For the above 2, there was no helper in TP_STRUCT__entry. A recently
posted patch is fixing that up to be __string(region, NULL) See [1],
with the actual assignment still happening in TP_fast_assign.

Does that assign logic need to move to the TP_STRUCT__entry definition
when you merge these changes? I'm not clear how much logic is able to be
included, ie like 'C' style code in the TP_STRUCT__entry.

[1]
https://lore.kernel.org/linux-cxl/20240314044301.2108650-1-alison.schofi...@intel.com/

Thanks for helping,
Alison


>   }





Re: [RFC PATCH v4 25/42] drm/vkms: Add tests for CTM handling

2024-03-15 Thread Pekka Paalanen
On Mon, 26 Feb 2024 16:10:39 -0500
Harry Wentland  wrote:

> A whole slew of tests for CTM handling that greatly helped in
> debugging the CTM code. The extent of tests might seem a bit
> silly but they're fast and might someday help save someone
> else's day when debugging this.
> 
> v4:
>  - Comment on origin of bt709_enc matrix (Pekka)
>  - Use full opaque alpha (Pekka)
>  - Add additional check for Y < 0x (Pekka)
>  - Remove unused code (Pekka)
>  - Rename red, green, blue to Y, U, V where applicable
> 
> Signed-off-by: Harry Wentland 
> ---
>  drivers/gpu/drm/vkms/tests/vkms_color_tests.c | 251 ++
>  drivers/gpu/drm/vkms/vkms_composer.c  |   2 +-
>  2 files changed, 252 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/vkms/tests/vkms_color_tests.c 
> b/drivers/gpu/drm/vkms/tests/vkms_color_tests.c
> index e6ac01dee830..83d07f7bae37 100644
> --- a/drivers/gpu/drm/vkms/tests/vkms_color_tests.c
> +++ b/drivers/gpu/drm/vkms/tests/vkms_color_tests.c
> @@ -3,6 +3,7 @@
>  #include 
>  
>  #include 
> +#include 
>  
>  #define TEST_LUT_SIZE 16
>  
> @@ -181,11 +182,261 @@ static void vkms_color_srgb_inv_srgb(struct kunit 
> *test)
>   }
>  }
>  
> +#define FIXPT_HALF(DRM_FIXED_ONE >> 1)
> +#define FIXPT_QUARTER (DRM_FIXED_ONE >> 2)
> +
> +const struct drm_color_ctm_3x4 test_matrix_3x4_50_desat = { {
> + FIXPT_HALF, FIXPT_QUARTER, FIXPT_QUARTER, 0,
> + FIXPT_QUARTER, FIXPT_HALF, FIXPT_QUARTER, 0,
> + FIXPT_QUARTER, FIXPT_QUARTER, FIXPT_HALF, 0

These are supposed to be sign-magnitude, not fixed-point, to my
understanding. It just happens that these specific values have the same
bit pattern in both representations.


> +} };
> +
> +static void vkms_color_ctm_3x4_50_desat(struct kunit *test)
> +{
> + struct pixel_argb_s32 ref, out;
> +
> + /* full white */
> + ref.a = 0x;
> + ref.r = 0x;
> + ref.g = 0x;
> + ref.b = 0x;
> +
> + memcpy(&out, &ref, sizeof(out));
> + apply_3x4_matrix(&out, &test_matrix_3x4_50_desat);
> +
> + KUNIT_EXPECT_MEMEQ(test, &ref, &out, sizeof(out));
> +
> + /* full black */
> + ref.a = 0x;
> + ref.r = 0x0;
> + ref.g = 0x0;
> + ref.b = 0x0;
> +
> + memcpy(&out, &ref, sizeof(out));
> + apply_3x4_matrix(&out, &test_matrix_3x4_50_desat);
> +
> + KUNIT_EXPECT_MEMEQ(test, &ref, &out, sizeof(out));
> +
> + /* 50% grey */
> + ref.a = 0x;
> + ref.r = 0x8000;
> + ref.g = 0x8000;
> + ref.b = 0x8000;
> +
> + memcpy(&out, &ref, sizeof(out));
> + apply_3x4_matrix(&out, &test_matrix_3x4_50_desat);
> +
> + KUNIT_EXPECT_MEMEQ(test, &ref, &out, sizeof(out));
> +
> + /* full red to 50% desat */
> + ref.a = 0x;
> + ref.r = 0x7fff;
> + ref.g = 0x3fff;
> + ref.b = 0x3fff;
> +
> + out.a = 0x;
> + out.r = 0x;
> + out.g = 0x0;
> + out.b = 0x0;
> +
> + apply_3x4_matrix(&out, &test_matrix_3x4_50_desat);
> +
> + KUNIT_EXPECT_MEMEQ(test, &ref, &out, sizeof(out));
> +}
> +
> +/*
> + * BT.709 encoding matrix
> + *
> + * Values printed from within IGT when converting
> + * igt_matrix_3x4_bt709_enc to the fixed-point format expected
> + * by DRM/KMS.

Shouldn't that be sign-magnitude, not fixed point?

I was hoping to get a reference to a spec like BT.709 and a formula for
getting the blow out of it. IGT can change over time, and I don't know
where IGT for that matrix from.

But ok, this is a test with an arbitrary matrix, not necessarily the
BT.709 matrix specifically.

Btw. which BT.709 matrix is this? YUV->RGB, RGB->XYZ, or the inverse of
one of those? Limited or full quantization range?

Judging by the tests, I guess RGB->YUV full range.

> + */
> +const struct drm_color_ctm_3x4 test_matrix_3x4_bt709_enc = { {
> + 0x366cf400ull, 0xb7175900ull, 0x000127bb300ull, 0,
> + 0x80001993b3a0ull, 0x80005609fe80ull, 0x6f9db200ull, 0,
> + 0x9d70a400ull, 0x80008f011100ull, 0x8e6f9330ull, 0
> +} };
> +
> +static void vkms_color_ctm_3x4_bt709(struct kunit *test)
> +{
> + struct pixel_argb_s32 out;
> +
> + /* full white to bt709 */
> + out.a = 0x;
> + out.r = 0x;
> + out.g = 0x;
> + out.b = 0x;
> +
> + apply_3x4_matrix(&out, &test_matrix_3x4_bt709_enc);
> +
> + /* Y 255 */
> + KUNIT_EXPECT_GT(test, out.r, 0xfe00);
> + KUNIT_EXPECT_LT(test, out.r, 0x1);
> +
> + /* U 0 */
> + KUNIT_EXPECT_LT(test, out.g, 0x0100);

For all of these U/V too, you may want to enforce a range. The value
must be approximately 0x100. Something like 0x17 would very wrong.

Hmm, wait...

Neutral color should have U and V neutral, that is, zero, right? So
what is zero in this integer representation?

You don't seem to be testing negative U or V...


Thanks,
pq

> +
> + /* V 0 */
> + KUNIT_EXPECT_LT(test, out.b, 0x0100);
> +
> + /* full black to bt709 */
> + out.a = 0xf

Re: [RFC PATCH v4 23/42] drm/vkms: add 3x4 matrix in color pipeline

2024-03-15 Thread Pekka Paalanen
On Mon, 26 Feb 2024 16:10:37 -0500
Harry Wentland  wrote:

> We add two 3x4 matrices into the VKMS color pipeline. The reason
> we're adding matrices is so that we can test that application
> of a matrix and its inverse yields an output equal to the input
> image.

You will test also cases where the matrix configuration will not
produce output equal to input, right?

Otherwise one can accidentally pass the matrix test by ignoring all
matrices.

> One complication with the matrix implementation has to do with
> the fact that the matrix entries are in signed-magnitude fixed
> point, whereas the drm_fixed.h implementation uses 2s-complement.
> The latter one is the one that we want for easy addition and
> subtraction, so we convert all entries to 2s-complement.
> 
> Signed-off-by: Harry Wentland 
> ---
>  drivers/gpu/drm/vkms/vkms_colorop.c  | 32 +++-
>  drivers/gpu/drm/vkms/vkms_composer.c | 27 +++
>  include/drm/drm_colorop.h|  2 ++
>  3 files changed, 60 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/vkms/vkms_colorop.c 
> b/drivers/gpu/drm/vkms/vkms_colorop.c
> index d2db366da6d3..a0e54b2c1f7a 100644
> --- a/drivers/gpu/drm/vkms/vkms_colorop.c
> +++ b/drivers/gpu/drm/vkms/vkms_colorop.c
> @@ -35,7 +35,37 @@ const int vkms_initialize_color_pipeline(struct drm_plane 
> *plane, struct drm_pro
>  
>   prev_op = op;
>  
> - /* 2nd op: 1d curve */
> + /* 2nd op: 3x4 matrix */
> + op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
> + if (!op) {
> + DRM_ERROR("KMS: Failed to allocate colorop\n");
> + return -ENOMEM;
> + }
> +
> + ret = drm_colorop_ctm_3x4_init(dev, op, plane);
> + if (ret)
> + return ret;
> +
> + drm_colorop_set_next_property(prev_op, op);
> +
> + prev_op = op;
> +
> + /* 3rd op: 3x4 matrix */
> + op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
> + if (!op) {
> + DRM_ERROR("KMS: Failed to allocate colorop\n");
> + return -ENOMEM;
> + }
> +
> + ret = drm_colorop_ctm_3x4_init(dev, op, plane);
> + if (ret)
> + return ret;
> +
> + drm_colorop_set_next_property(prev_op, op);
> +
> + prev_op = op;
> +
> + /* 4th op: 1d curve */
>   op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
>   if (!op) {
>   DRM_ERROR("KMS: Failed to allocate colorop\n");
> diff --git a/drivers/gpu/drm/vkms/vkms_composer.c 
> b/drivers/gpu/drm/vkms/vkms_composer.c
> index d2101fa55aa3..8bbfce651526 100644
> --- a/drivers/gpu/drm/vkms/vkms_composer.c
> +++ b/drivers/gpu/drm/vkms/vkms_composer.c
> @@ -164,6 +164,30 @@ static void apply_lut(const struct vkms_crtc_state 
> *crtc_state, struct line_buff
>   }
>  }
>  
> +static void apply_3x4_matrix(struct pixel_argb_s32 *pixel, const struct 
> drm_color_ctm_3x4 *matrix)
> +{
> + s64 rf, gf, bf;
> +
> + rf = drm_fixp_mul(drm_sm2fixp(matrix->matrix[0]), 
> drm_int2fixp(pixel->r)) +
> +  drm_fixp_mul(drm_sm2fixp(matrix->matrix[1]), 
> drm_int2fixp(pixel->g)) +
> +  drm_fixp_mul(drm_sm2fixp(matrix->matrix[2]), 
> drm_int2fixp(pixel->b)) +
> +  drm_sm2fixp(matrix->matrix[3]);

It would be nice if the driver had its private data for the matrix
colorop state, so it could convert the matrix only once when userspace
sets it.


Thanks,
pq

> +
> + gf = drm_fixp_mul(drm_sm2fixp(matrix->matrix[4]), 
> drm_int2fixp(pixel->r)) +
> +  drm_fixp_mul(drm_sm2fixp(matrix->matrix[5]), 
> drm_int2fixp(pixel->g)) +
> +  drm_fixp_mul(drm_sm2fixp(matrix->matrix[6]), 
> drm_int2fixp(pixel->b)) +
> +  drm_sm2fixp(matrix->matrix[7]);
> +
> + bf = drm_fixp_mul(drm_sm2fixp(matrix->matrix[8]), 
> drm_int2fixp(pixel->r)) +
> +  drm_fixp_mul(drm_sm2fixp(matrix->matrix[9]), 
> drm_int2fixp(pixel->g)) +
> +  drm_fixp_mul(drm_sm2fixp(matrix->matrix[10]), 
> drm_int2fixp(pixel->b)) +
> +  drm_sm2fixp(matrix->matrix[11]);
> +
> + pixel->r = drm_fixp2int(rf);
> + pixel->g = drm_fixp2int(gf);
> + pixel->b = drm_fixp2int(bf);
> +}
> +
>  static void apply_colorop(struct pixel_argb_s32 *pixel, struct drm_colorop 
> *colorop)
>  {
>   /* TODO is this right? */
> @@ -185,6 +209,9 @@ static void apply_colorop(struct pixel_argb_s32 *pixel, 
> struct drm_colorop *colo
>   DRM_DEBUG_DRIVER("unkown colorop 1D curve type 
> %d\n", colorop_state->curve_1d_type);
>   break;
>   }
> + } else if (colorop->type == DRM_COLOROP_CTM_3X4) {
> + if (colorop_state->data)
> + apply_3x4_matrix(pixel, (struct drm_color_ctm_3x4 *) 
> colorop_state->data->data);
>   }
>  
>  }
> diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h
> index 4aee29e161d6..8710e550790c 100644
> --- a/include/drm/drm_colorop.h
> +++ b/include/drm/drm_colorop.h
> @@ -224,6 +224,8 @@ int drm_col

Re: [RFC PATCH v4 24/42] drm/tests: Add a few tests around drm_fixed.h

2024-03-15 Thread Pekka Paalanen
On Mon, 26 Feb 2024 16:10:38 -0500
Harry Wentland  wrote:

> While working on the CTM implementation of VKMS I had to ascertain
> myself of a few assumptions. One of those is whether drm_fixed.h
> treats its numbers using signed-magnitude or twos-complement. It is
> twos-complement.
> 
> In order to make someone else's day easier I am adding the
> drm_test_int2fixp test that validates the above assumption.
> 
> I am also adding a test for the new sm2fixp function that converts
> from a signed-magnitude fixed point to the twos-complement fixed
> point.
> 
> Signed-off-by: Harry Wentland 
> ---
>  drivers/gpu/drm/tests/Makefile|  3 +-
>  drivers/gpu/drm/tests/drm_fixp_test.c | 69 +++
>  2 files changed, 71 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/tests/drm_fixp_test.c
> 
> diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile
> index d6183b3d7688..98468f7908dd 100644
> --- a/drivers/gpu/drm/tests/Makefile
> +++ b/drivers/gpu/drm/tests/Makefile
> @@ -19,6 +19,7 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \
>   drm_modes_test.o \
>   drm_plane_helper_test.o \
>   drm_probe_helper_test.o \
> - drm_rect_test.o
> + drm_rect_test.o \
> + drm_fixp_test.o
>  
>  CFLAGS_drm_mm_test.o := $(DISABLE_STRUCTLEAK_PLUGIN)
> diff --git a/drivers/gpu/drm/tests/drm_fixp_test.c 
> b/drivers/gpu/drm/tests/drm_fixp_test.c
> new file mode 100644
> index ..f420f173ff66
> --- /dev/null
> +++ b/drivers/gpu/drm/tests/drm_fixp_test.c
> @@ -0,0 +1,69 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + */
> +
> +#include 
> +#include 
> +
> +static void drm_test_sm2fixp(struct kunit *test)
> +{
> + KUNIT_EXPECT_EQ(test, 0x7fffll, ((1LL << 63) - 1));
> +
> + /* 1 */
> + KUNIT_EXPECT_EQ(test, drm_int2fixp(1), drm_sm2fixp(1ull << 
> DRM_FIXED_POINT));

This sounds like confusing two different APIs.

DRM_FIXED_POINT is for the two's complement representation, and sm is
not.

It does not look like there is a #define for the signed-magnitude UAPI
construct, otherwise drm_color_ctm_s31_32_to_qm_n() would be using it.

It's just an accident that DRM_FIXED_POINT has the right value for sm.

Hm, drm_color_ctm_s31_32_to_qm_n() produces two's complement. Why not
use that for implementing drm_sm2fixp()?


Thanks,
pq

> +
> + /* -1 */
> + KUNIT_EXPECT_EQ(test, drm_int2fixp(-1), drm_sm2fixp((1ull << 63) | 
> (1ull << DRM_FIXED_POINT)));
> +
> + /* 0.5 */
> + KUNIT_EXPECT_EQ(test, drm_fixp_from_fraction(1, 2), drm_sm2fixp(1ull << 
> (DRM_FIXED_POINT - 1)));
> +
> + /* -0.5 */
> + KUNIT_EXPECT_EQ(test, drm_fixp_from_fraction(-1, 2), drm_sm2fixp((1ull 
> << 63) | (1ull << (DRM_FIXED_POINT - 1;
> +
> +}
> +
> +static void drm_test_int2fixp(struct kunit *test)
> +{
> + /* 1 */
> + KUNIT_EXPECT_EQ(test, 1ll << 32, drm_int2fixp(1));
> +
> + /* -1 */
> + KUNIT_EXPECT_EQ(test, -(1ll << 32), drm_int2fixp(-1));
> +
> + /* 1 + (-1) = 0 */
> + KUNIT_EXPECT_EQ(test, 0, drm_int2fixp(1) + drm_int2fixp(-1));
> +
> + /* 1 / 2 */
> + KUNIT_EXPECT_EQ(test, 1ll << 31, drm_fixp_from_fraction(1, 2));
> +
> + /* -0.5 */
> + KUNIT_EXPECT_EQ(test, -(1ll << 31), drm_fixp_from_fraction(-1, 2));
> +
> + /* (1 / 2) + (-1) = 0.5 */
> + KUNIT_EXPECT_EQ(test, 1ll << 31, drm_fixp_from_fraction(-1, 2) + 
> drm_int2fixp(1));
> +
> + /* (1 / 2) - 1) = 0.5 */
> + KUNIT_EXPECT_EQ(test, -(1ll << 31), drm_fixp_from_fraction(1, 2) + 
> drm_int2fixp(-1));
> +
> + /* (1 / 2) - 1) = 0.5 */
> + KUNIT_EXPECT_EQ(test, -(1ll << 31), drm_fixp_from_fraction(1, 2) - 
> drm_int2fixp(1));
> +
> +}
> +
> +static struct kunit_case drm_fixp_tests[] = {
> + KUNIT_CASE(drm_test_int2fixp),
> + KUNIT_CASE(drm_test_sm2fixp),
> + { }
> +};
> +
> +static struct kunit_suite drm_rect_test_suite = {
> + .name = "drm_fixp",
> + .test_cases = drm_fixp_tests,
> +};
> +
> +kunit_test_suite(drm_rect_test_suite);
> +
> +MODULE_AUTHOR("AMD");
> +MODULE_LICENSE("GPL and additional rights");
> \ No newline at end of file



pgph845Z3nkWs.pgp
Description: OpenPGP digital signature


Re: [FYI][PATCH] tracing/treewide: Remove second parameter of __assign_str()

2024-03-15 Thread Steven Rostedt
On Thu, 14 Mar 2024 09:57:57 -0700
Alison Schofield  wrote:

> On Fri, Feb 23, 2024 at 12:56:34PM -0500, Steven Rostedt wrote:
> > From: "Steven Rostedt (Google)" 
> > 
> > [
> >This is a treewide change. I will likely re-create this patch again in
> >the second week of the merge window of v6.9 and submit it then. Hoping
> >to keep the conflicts that it will cause to a minimum.
> > ]

Note, change of plans. I plan on sending this in the next merge window, as
this merge window I have this patch:

  
https://lore.kernel.org/linux-trace-kernel/20240312113002.00031...@gandalf.local.home/

That will warn if the source string of __string() is different than the
source string of __assign_str(). I want to make sure they are identical
before just dropping one of them.


> 
> > diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h
> > index bdf117a33744..07ba4e033347 100644
> > --- a/drivers/cxl/core/trace.h
> > +++ b/drivers/cxl/core/trace.h  
> 
> snip to poison
> 
> > @@ -668,8 +668,8 @@ TRACE_EVENT(cxl_poison,
> > ),
> >  
> > TP_fast_assign(
> > -   __assign_str(memdev, dev_name(&cxlmd->dev));
> > -   __assign_str(host, dev_name(cxlmd->dev.parent));
> > +   __assign_str(memdev);
> > +   __assign_str(host);  
> 
> I think I get that the above changes work because the TP_STRUCT__entry for
> these did:
>   __string(memdev, dev_name(&cxlmd->dev))
>   __string(host, dev_name(cxlmd->dev.parent))

That's the point. They have to be identical or you will likely bug.

The __string(name, src) is used to find the string length of src which
allocates the necessary length on the ring buffer. The __assign_str(name, src)
will copy src into the ring buffer.

Similar to:

len = strlen(src);
buf = malloc(len);
strcpy(buf, str);

Where __string() is strlen() and __assign_str() is strcpy(). It doesn't
make sense to use two different strings, and if you did, it would likely be
a bug.

But the magic behind __string() does much more than just get the length of
the string, and it could easily save the pointer to the string (along with
its length) and have it copy that in the __assign_str() call, making the
src parameter of __assign_str() useless.

> 
> > __entry->serial = cxlmd->cxlds->serial;
> > __entry->overflow_ts = cxl_poison_overflow(flags, overflow_ts);
> > __entry->dpa = cxl_poison_record_dpa(record);
> > @@ -678,12 +678,12 @@ TRACE_EVENT(cxl_poison,
> > __entry->trace_type = trace_type;
> > __entry->flags = flags;
> > if (region) {
> > -   __assign_str(region, dev_name(®ion->dev));
> > +   __assign_str(region);
> > memcpy(__entry->uuid, ®ion->params.uuid, 16);
> > __entry->hpa = cxl_trace_hpa(region, cxlmd,
> >  __entry->dpa);
> > } else {
> > -   __assign_str(region, "");
> > +   __assign_str(region);
> > memset(__entry->uuid, 0, 16);
> > __entry->hpa = ULLONG_MAX;  
> 
> For the above 2, there was no helper in TP_STRUCT__entry. A recently
> posted patch is fixing that up to be __string(region, NULL) See [1],
> with the actual assignment still happening in TP_fast_assign.

__string(region, NULL) doesn't make sense. It's like:

len = strlen(NULL);
buf = malloc(len);
strcpy(buf, NULL);

??

I'll reply to that email.

-- Steve

> 
> Does that assign logic need to move to the TP_STRUCT__entry definition
> when you merge these changes? I'm not clear how much logic is able to be
> included, ie like 'C' style code in the TP_STRUCT__entry.
> 
> [1]
> https://lore.kernel.org/linux-cxl/20240314044301.2108650-1-alison.schofi...@intel.com/


Re: [PATCH] drm/amdgpu: fix deadlock while reading mqd from debugfs

2024-03-15 Thread Johannes Weiner
Hello,

On Fri, Mar 08, 2024 at 12:32:33PM +0100, Christian König wrote:
> Am 07.03.24 um 23:07 schrieb Johannes Weiner:
> > Lastly I went with an open loop instead of a memcpy() as I wasn't
> > sure if that memory is safe to address a byte at at time.

Shashank pointed out to me in private that byte access would indeed be
safe. However, after actually trying it it won't work because memcpy()
doesn't play nice with mqd being volatile:

/home/hannes/src/linux/linux/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c: In 
function 'amdgpu_debugfs_mqd_read':
/home/hannes/src/linux/linux/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c:550:22: 
warning: passing argument 1 of '__builtin_dynamic_object_size' discards 
'volatil' qualifier from pointer target type [-Wdiscarded-qualifiers]
  550 | memcpy(kbuf, mqd, ring->mqd_size);

So I would propose leaving the patch as-is. Shashank, does that sound
good to you?

(Please keep me CC'd on replies, as I'm not subscribed to the graphics
lists.)

Thanks!


Re: [PATCH v2 1/2] drm/amd/pm: Update SMUv13.0.6 PMFW headers

2024-03-15 Thread Lazar, Lijo



On 3/15/2024 1:13 PM, Asad Kamal wrote:
> Update PMFW interface headers for updated metrics table
> with pcie link speed and pcie link width
> 
> Signed-off-by: Asad Kamal 

Series is -

Reviewed-by: Lijo Lazar 

Thanks,
Lijo

> ---
>  drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h | 6 +-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h 
> b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
> index 7b812b9994d7..0b3c2f54a343 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
> @@ -123,7 +123,7 @@ typedef enum {
>VOLTAGE_GUARDBAND_COUNT
>  } GFX_GUARDBAND_e;
>  
> -#define SMU_METRICS_TABLE_VERSION 0xB
> +#define SMU_METRICS_TABLE_VERSION 0xC
>  
>  typedef struct __attribute__((packed, aligned(4))) {
>uint32_t AccumulationCounter;
> @@ -223,6 +223,10 @@ typedef struct __attribute__((packed, aligned(4))) {
>// VCN/JPEG ACTIVITY
>uint32_t VcnBusy[4];
>uint32_t JpegBusy[32];
> +
> +  // PCIE LINK Speed and width
> +  uint32_t PCIeLinkSpeed;
> +  uint32_t PCIeLinkWidth;
>  } MetricsTableX_t;
>  
>  typedef struct __attribute__((packed, aligned(4))) {


[PATCH 1/1] drm/amdgpu: drop setting buffer funcs in sdma442

2024-03-15 Thread Le Ma
To fix the entity rq NULL issue. This setting has been moved to upper level.

Fixes b70438004a14 ("drm/amdgpu: move buffer funcs setting up a level")

Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 20 +---
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index eaa4f5f49949..589a734982a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -431,16 +431,11 @@ static void sdma_v4_4_2_inst_gfx_stop(struct 
amdgpu_device *adev,
struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
u32 doorbell_offset, doorbell;
u32 rb_cntl, ib_cntl;
-   int i, unset = 0;
+   int i;
 
for_each_inst(i, inst_mask) {
sdma[i] = &adev->sdma.instance[i].ring;
 
-   if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) {
-   amdgpu_ttm_set_buffer_funcs_status(adev, false);
-   unset = 1;
-   }
-
rb_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_CNTL);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_ENABLE, 
0);
WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl);
@@ -490,17 +485,10 @@ static void sdma_v4_4_2_inst_page_stop(struct 
amdgpu_device *adev,
struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
u32 rb_cntl, ib_cntl;
int i;
-   bool unset = false;
 
for_each_inst(i, inst_mask) {
sdma[i] = &adev->sdma.instance[i].page;
 
-   if ((adev->mman.buffer_funcs_ring == sdma[i]) &&
-   (!unset)) {
-   amdgpu_ttm_set_buffer_funcs_status(adev, false);
-   unset = true;
-   }
-
rb_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_CNTL);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL,
RB_ENABLE, 0);
@@ -950,13 +938,7 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device 
*adev,
r = amdgpu_ring_test_helper(page);
if (r)
return r;
-
-   if (adev->mman.buffer_funcs_ring == page)
-   amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
-
-   if (adev->mman.buffer_funcs_ring == ring)
-   amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
 
return r;
-- 
2.43.2



RE: [PATCH 1/1] drm/amdgpu: drop setting buffer funcs in sdma442

2024-03-15 Thread Zhang, Hawking
[AMD Official Use Only - General]

Reviewed-by: Hawking Zhang 

Regards,
Hawking
-Original Message-
From: Ma, Le 
Sent: Friday, March 15, 2024 17:16
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Song, Asher ; 
Deucher, Alexander ; Ma, Le 
Subject: [PATCH 1/1] drm/amdgpu: drop setting buffer funcs in sdma442

To fix the entity rq NULL issue. This setting has been moved to upper level.

Fixes b70438004a14 ("drm/amdgpu: move buffer funcs setting up a level")

Signed-off-by: Le Ma 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 20 +---
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index eaa4f5f49949..589a734982a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -431,16 +431,11 @@ static void sdma_v4_4_2_inst_gfx_stop(struct 
amdgpu_device *adev,
struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
u32 doorbell_offset, doorbell;
u32 rb_cntl, ib_cntl;
-   int i, unset = 0;
+   int i;

for_each_inst(i, inst_mask) {
sdma[i] = &adev->sdma.instance[i].ring;

-   if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) {
-   amdgpu_ttm_set_buffer_funcs_status(adev, false);
-   unset = 1;
-   }
-
rb_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_CNTL);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_ENABLE, 
0);
WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl); @@ -490,17 
+485,10 @@ static void sdma_v4_4_2_inst_page_stop(struct amdgpu_device *adev,
struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
u32 rb_cntl, ib_cntl;
int i;
-   bool unset = false;

for_each_inst(i, inst_mask) {
sdma[i] = &adev->sdma.instance[i].page;

-   if ((adev->mman.buffer_funcs_ring == sdma[i]) &&
-   (!unset)) {
-   amdgpu_ttm_set_buffer_funcs_status(adev, false);
-   unset = true;
-   }
-
rb_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_CNTL);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL,
RB_ENABLE, 0);
@@ -950,13 +938,7 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device 
*adev,
r = amdgpu_ring_test_helper(page);
if (r)
return r;
-
-   if (adev->mman.buffer_funcs_ring == page)
-   amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
-
-   if (adev->mman.buffer_funcs_ring == ring)
-   amdgpu_ttm_set_buffer_funcs_status(adev, true);
}

return r;
--
2.43.2



Re: [PATCH 1/1] drm/amdgpu: drop setting buffer funcs in sdma442

2024-03-15 Thread Lazar, Lijo



On 3/15/2024 2:46 PM, Le Ma wrote:
> To fix the entity rq NULL issue. This setting has been moved to upper level.
> 

Need to call amdgpu_ttm_set_buffer_funcs_status(adev, true/false) in
mode-2 reset handlers as well.

Thanks,
Lijo

> Fixes b70438004a14 ("drm/amdgpu: move buffer funcs setting up a level")
> 
> Signed-off-by: Le Ma 
> ---
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 20 +---
>  1 file changed, 1 insertion(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c 
> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> index eaa4f5f49949..589a734982a7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> @@ -431,16 +431,11 @@ static void sdma_v4_4_2_inst_gfx_stop(struct 
> amdgpu_device *adev,
>   struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
>   u32 doorbell_offset, doorbell;
>   u32 rb_cntl, ib_cntl;
> - int i, unset = 0;
> + int i;
>  
>   for_each_inst(i, inst_mask) {
>   sdma[i] = &adev->sdma.instance[i].ring;
>  
> - if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) {
> - amdgpu_ttm_set_buffer_funcs_status(adev, false);
> - unset = 1;
> - }
> -
>   rb_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_CNTL);
>   rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_ENABLE, 
> 0);
>   WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl);
> @@ -490,17 +485,10 @@ static void sdma_v4_4_2_inst_page_stop(struct 
> amdgpu_device *adev,
>   struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
>   u32 rb_cntl, ib_cntl;
>   int i;
> - bool unset = false;
>  
>   for_each_inst(i, inst_mask) {
>   sdma[i] = &adev->sdma.instance[i].page;
>  
> - if ((adev->mman.buffer_funcs_ring == sdma[i]) &&
> - (!unset)) {
> - amdgpu_ttm_set_buffer_funcs_status(adev, false);
> - unset = true;
> - }
> -
>   rb_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_CNTL);
>   rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL,
>   RB_ENABLE, 0);
> @@ -950,13 +938,7 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device 
> *adev,
>   r = amdgpu_ring_test_helper(page);
>   if (r)
>   return r;
> -
> - if (adev->mman.buffer_funcs_ring == page)
> - amdgpu_ttm_set_buffer_funcs_status(adev, true);
>   }
> -
> - if (adev->mman.buffer_funcs_ring == ring)
> - amdgpu_ttm_set_buffer_funcs_status(adev, true);
>   }
>  
>   return r;


Re: [PATCH 1/1] drm/amdgpu: drop setting buffer funcs in sdma442

2024-03-15 Thread Lazar, Lijo



On 3/15/2024 3:43 PM, Lazar, Lijo wrote:
> 
> 
> On 3/15/2024 2:46 PM, Le Ma wrote:
>> To fix the entity rq NULL issue. This setting has been moved to upper level.
>>
> 
> Need to call amdgpu_ttm_set_buffer_funcs_status(adev, true/false) in
> mode-2 reset handlers as well.
> 

Please also check if it's required here as well -
amdgpu_device_ip_reinit_late_sriov()

Thanks,
Lijo

> Thanks,
> Lijo
> 
>> Fixes b70438004a14 ("drm/amdgpu: move buffer funcs setting up a level")
>>
>> Signed-off-by: Le Ma 
>> ---
>>  drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 20 +---
>>  1 file changed, 1 insertion(+), 19 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c 
>> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>> index eaa4f5f49949..589a734982a7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>> @@ -431,16 +431,11 @@ static void sdma_v4_4_2_inst_gfx_stop(struct 
>> amdgpu_device *adev,
>>  struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
>>  u32 doorbell_offset, doorbell;
>>  u32 rb_cntl, ib_cntl;
>> -int i, unset = 0;
>> +int i;
>>  
>>  for_each_inst(i, inst_mask) {
>>  sdma[i] = &adev->sdma.instance[i].ring;
>>  
>> -if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) {
>> -amdgpu_ttm_set_buffer_funcs_status(adev, false);
>> -unset = 1;
>> -}
>> -
>>  rb_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_CNTL);
>>  rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_ENABLE, 
>> 0);
>>  WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl);
>> @@ -490,17 +485,10 @@ static void sdma_v4_4_2_inst_page_stop(struct 
>> amdgpu_device *adev,
>>  struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
>>  u32 rb_cntl, ib_cntl;
>>  int i;
>> -bool unset = false;
>>  
>>  for_each_inst(i, inst_mask) {
>>  sdma[i] = &adev->sdma.instance[i].page;
>>  
>> -if ((adev->mman.buffer_funcs_ring == sdma[i]) &&
>> -(!unset)) {
>> -amdgpu_ttm_set_buffer_funcs_status(adev, false);
>> -unset = true;
>> -}
>> -
>>  rb_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_CNTL);
>>  rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL,
>>  RB_ENABLE, 0);
>> @@ -950,13 +938,7 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device 
>> *adev,
>>  r = amdgpu_ring_test_helper(page);
>>  if (r)
>>  return r;
>> -
>> -if (adev->mman.buffer_funcs_ring == page)
>> -amdgpu_ttm_set_buffer_funcs_status(adev, true);
>>  }
>> -
>> -if (adev->mman.buffer_funcs_ring == ring)
>> -amdgpu_ttm_set_buffer_funcs_status(adev, true);
>>  }
>>  
>>  return r;


Re: [PATCH] drm/amdgpu: trigger flr_work if reading pf2vf data failed

2024-03-15 Thread Lazar, Lijo



On 3/14/2024 10:24 PM, Zhigang Luo wrote:
> if reading pf2vf data failed 5 times continuously, it means something is
> wrong. Need to trigger flr_work to recover the issue.
> 
> also use dev_err to print the error message to get which device has
> issue and add warning message if waiting IDH_FLR_NOTIFICATION_CMPL
> timeout.
> 
> Signed-off-by: Zhigang Luo 
> Change-Id: Ia7ce934d0c3068ad3934715c14bbffdfcbafc4c2
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 15 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 29 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h   |  1 +
>  drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c  |  2 ++
>  drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c  |  2 ++
>  5 files changed, 39 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 1e9454e6e4cb..9bb04a56d020 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -143,6 +143,8 @@ const char *amdgpu_asic_name[] = {
>   "LAST",
>  };
>  
> +static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device 
> *adev);
> +
>  /**
>   * DOC: pcie_replay_count
>   *
> @@ -4972,6 +4974,8 @@ static int amdgpu_device_reset_sriov(struct 
> amdgpu_device *adev,
>  retry:
>   amdgpu_amdkfd_pre_reset(adev);
>  
> + amdgpu_device_stop_pending_resets(adev);
> +
>   if (from_hypervisor)
>   r = amdgpu_virt_request_full_gpu(adev, true);
>   else
> @@ -5689,11 +5693,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
> *adev,
>   tmp_adev->asic_reset_res = r;
>   }
>  
> - /*
> -  * Drop all pending non scheduler resets. Scheduler resets
> -  * were already dropped during drm_sched_stop
> -  */
> - amdgpu_device_stop_pending_resets(tmp_adev);
> + if (!amdgpu_sriov_vf(tmp_adev))
> + /*
> + * Drop all pending non scheduler resets. Scheduler 
> resets
> + * were already dropped during drm_sched_stop
> + */
> + amdgpu_device_stop_pending_resets(tmp_adev);
>   }
>  

For better consistency - you may make this path common for both VF and
bare metal. Call this right before "skip_hw_reset" after a successful
reset. All pending reset jobs submitted may be cancelled at that point
once a succesful reset of the device/hive is completed.

Thanks,
Lijo

>   /* Actual ASIC resets if needed.*/
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 7a4eae36778a..4e8364a46d4e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -32,6 +32,7 @@
>  
>  #include "amdgpu.h"
>  #include "amdgpu_ras.h"
> +#include "amdgpu_reset.h"
>  #include "vi.h"
>  #include "soc15.h"
>  #include "nv.h"
> @@ -424,7 +425,7 @@ static int amdgpu_virt_read_pf2vf_data(struct 
> amdgpu_device *adev)
>   return -EINVAL;
>  
>   if (pf2vf_info->size > 1024) {
> - DRM_ERROR("invalid pf2vf message size\n");
> + dev_err(adev->dev, "invalid pf2vf message size: 0x%x\n", 
> pf2vf_info->size);
>   return -EINVAL;
>   }
>  
> @@ -435,7 +436,9 @@ static int amdgpu_virt_read_pf2vf_data(struct 
> amdgpu_device *adev)
>   adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size,
>   adev->virt.fw_reserve.checksum_key, checksum);
>   if (checksum != checkval) {
> - DRM_ERROR("invalid pf2vf message\n");
> + dev_err(adev->dev,
> + "invalid pf2vf message: header checksum=0x%x 
> calculated checksum=0x%x\n",
> + checksum, checkval);
>   return -EINVAL;
>   }
>  
> @@ -449,7 +452,9 @@ static int amdgpu_virt_read_pf2vf_data(struct 
> amdgpu_device *adev)
>   adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size,
>   0, checksum);
>   if (checksum != checkval) {
> - DRM_ERROR("invalid pf2vf message\n");
> + dev_err(adev->dev,
> + "invalid pf2vf message: header checksum=0x%x 
> calculated checksum=0x%x\n",
> + checksum, checkval);
>   return -EINVAL;
>   }
>  
> @@ -485,7 +490,7 @@ static int amdgpu_virt_read_pf2vf_data(struct 
> amdgpu_device *adev)
>   ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->uuid;
>   break;
>   default:
> - DRM_ERROR("invalid pf2vf version\n");
> + dev_err(adev->dev, "invalid pf2vf version: 0x%x\n", 
> pf2vf_info->version);
>   return -EINVAL;
>   }
>  
> @@ -584,8 +589,21 @@ st

[PATCH] drm/amdgpu: add the hw_ip version of all IP's

2024-03-15 Thread Sunil Khatri
Add all the IP's version information on a SOC to the
devcoredump.

Signed-off-by: Sunil Khatri 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 62 +++
 1 file changed, 62 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index a0dbccad2f53..3d4bfe0a5a7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -29,6 +29,43 @@
 #include "sienna_cichlid.h"
 #include "smu_v13_0_10.h"
 
+const char *hw_ip_names[MAX_HWIP] = {
+   [GC_HWIP]   = "GC",
+   [HDP_HWIP]  = "HDP",
+   [SDMA0_HWIP]= "SDMA0",
+   [SDMA1_HWIP]= "SDMA1",
+   [SDMA2_HWIP]= "SDMA2",
+   [SDMA3_HWIP]= "SDMA3",
+   [SDMA4_HWIP]= "SDMA4",
+   [SDMA5_HWIP]= "SDMA5",
+   [SDMA6_HWIP]= "SDMA6",
+   [SDMA7_HWIP]= "SDMA7",
+   [LSDMA_HWIP]= "LSDMA",
+   [MMHUB_HWIP]= "MMHUB",
+   [ATHUB_HWIP]= "ATHUB",
+   [NBIO_HWIP] = "NBIO",
+   [MP0_HWIP]  = "MP0",
+   [MP1_HWIP]  = "MP1",
+   [UVD_HWIP]  = "UVD/JPEG/VCN",
+   [VCN1_HWIP] = "VCN1",
+   [VCE_HWIP]  = "VCE",
+   [VPE_HWIP]  = "VPE",
+   [DF_HWIP]   = "DF",
+   [DCE_HWIP]  = "DCE",
+   [OSSSYS_HWIP]   = "OSSSYS",
+   [SMUIO_HWIP]= "SMUIO",
+   [PWR_HWIP]  = "PWR",
+   [NBIF_HWIP] = "NBIF",
+   [THM_HWIP]  = "THM",
+   [CLK_HWIP]  = "CLK",
+   [UMC_HWIP]  = "UMC",
+   [RSMU_HWIP] = "RSMU",
+   [XGMI_HWIP] = "XGMI",
+   [DCI_HWIP]  = "DCI",
+   [PCIE_HWIP] = "PCIE",
+};
+
+
 int amdgpu_reset_init(struct amdgpu_device *adev)
 {
int ret = 0;
@@ -196,6 +233,31 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, 
size_t count,
   coredump->reset_task_info.process_name,
   coredump->reset_task_info.pid);
 
+   /* GPU IP's information of the SOC */
+   if (coredump->adev) {
+
+   drm_printf(&p, "\nIP Information\n");
+   drm_printf(&p, "SOC Family: %d\n", coredump->adev->family);
+   drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id);
+   drm_printf(&p, "SOC External Revision id: %d\n",
+  coredump->adev->external_rev_id);
+
+   for (int i = 1; i < MAX_HWIP; i++) {
+   for (int j = 0; j < HWIP_MAX_INSTANCE; j++) {
+   int ver = coredump->adev->ip_versions[i][j];
+
+   if (ver)
+   drm_printf(&p, "HWIP: %s[%d][%d]: 
v%d.%d.%d.%d.%d\n",
+  hw_ip_names[i], i, j,
+  IP_VERSION_MAJ(ver),
+  IP_VERSION_MIN(ver),
+  IP_VERSION_REV(ver),
+  IP_VERSION_VARIANT(ver),
+  IP_VERSION_SUBREV(ver));
+   }
+   }
+   }
+
if (coredump->ring) {
drm_printf(&p, "\nRing timed out details\n");
drm_printf(&p, "IP Type: %d Ring Name: %s\n",
-- 
2.34.1



RE: [PATCH 1/1] drm/amdgpu: drop setting buffer funcs in sdma442

2024-03-15 Thread Ma, Le
[AMD Official Use Only - General]


> -Original Message-
> From: Lazar, Lijo mailto:lijo.la...@amd.com>>
> Sent: Friday, March 15, 2024 6:14 PM
> To: Ma, Le mailto:le...@amd.com>>; 
> amd-gfx@lists.freedesktop.org
> Cc: Zhang, Hawking mailto:hawking.zh...@amd.com>>; 
> Song, Asher
> mailto:asher.s...@amd.com>>; Deucher, Alexander 
> mailto:alexander.deuc...@amd.com>>
> Subject: Re: [PATCH 1/1] drm/amdgpu: drop setting buffer funcs in sdma442
>
>
>
> On 3/15/2024 2:46 PM, Le Ma wrote:
> > To fix the entity rq NULL issue. This setting has been moved to upper level.
> >
>
> Need to call amdgpu_ttm_set_buffer_funcs_status(adev, true/false) in
> mode-2 reset handlers as well.

Thanks for pointing out this. I think we can make another separated patch to 
handle it for mode2 since this patch is for alignment purpose. Actually, the 
set_buffer_funcs will not be unset/set in reset case as the conditions below:

  void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool 
enable)
  {
  struct ttm_resource_manager *man = 
ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
  uint64_t size;
  int r;

  if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
  adev->mman.buffer_funcs_enabled == enable || 
adev->gmc.is_app_apu)
  return;


>
> Thanks,
> Lijo
>
> > Fixes b70438004a14 ("drm/amdgpu: move buffer funcs setting up a
> > level")
> >
> > Signed-off-by: Le Ma mailto:le...@amd.com>>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 20 +---
> >  1 file changed, 1 insertion(+), 19 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> > index eaa4f5f49949..589a734982a7 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> > @@ -431,16 +431,11 @@ static void sdma_v4_4_2_inst_gfx_stop(struct
> amdgpu_device *adev,
> > struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
> > u32 doorbell_offset, doorbell;
> > u32 rb_cntl, ib_cntl;
> > -   int i, unset = 0;
> > +   int i;
> >
> > for_each_inst(i, inst_mask) {
> > sdma[i] = &adev->sdma.instance[i].ring;
> >
> > -   if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) {
> > -   amdgpu_ttm_set_buffer_funcs_status(adev, false);
> > -   unset = 1;
> > -   }
> > -
> > rb_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_CNTL);
> > rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL,
> RB_ENABLE, 0);
> > WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl); @@ -
> 490,17 +485,10 @@
> > static void sdma_v4_4_2_inst_page_stop(struct amdgpu_device *adev,
> > struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
> > u32 rb_cntl, ib_cntl;
> > int i;
> > -   bool unset = false;
> >
> > for_each_inst(i, inst_mask) {
> > sdma[i] = &adev->sdma.instance[i].page;
> >
> > -   if ((adev->mman.buffer_funcs_ring == sdma[i]) &&
> > -   (!unset)) {
> > -   amdgpu_ttm_set_buffer_funcs_status(adev, false);
> > -   unset = true;
> > -   }
> > -
> > rb_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_CNTL);
> > rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL,
> > RB_ENABLE, 0);
> > @@ -950,13 +938,7 @@ static int sdma_v4_4_2_inst_start(struct
> amdgpu_device *adev,
> > r = amdgpu_ring_test_helper(page);
> > if (r)
> > return r;
> > -
> > -   if (adev->mman.buffer_funcs_ring == page)
> > -   amdgpu_ttm_set_buffer_funcs_status(adev,
> true);
> > }
> > -
> > -   if (adev->mman.buffer_funcs_ring == ring)
> > -   amdgpu_ttm_set_buffer_funcs_status(adev, true);
> > }
> >
> > return r;


RE: [PATCH] drm/amdgpu: add the hw_ip version of all IP's

2024-03-15 Thread Khatri, Sunil
[AMD Official Use Only - General]

Hello Alex

Added the information directly from the ip_version and also added names for 
each ip so the version information makes more sense to the user.

Below is the output in devcoredump now:
IP Information
SOC Family: 143
SOC Revision id: 0
SOC External Revision id: 50
HWIP: GC[1][0]: v10.3.2.0.0
HWIP: HDP[2][0]: v5.0.3.0.0
HWIP: SDMA0[3][0]: v5.2.2.0.0
HWIP: SDMA1[4][0]: v5.2.2.0.0
HWIP: MMHUB[12][0]: v2.1.0.0.0
HWIP: ATHUB[13][0]: v2.1.0.0.0
HWIP: NBIO[14][0]: v3.3.1.0.0
HWIP: MP0[15][0]: v11.0.11.0.0
HWIP: MP1[16][0]: v11.0.11.0.0
HWIP: UVD/JPEG/VCN[17][0]: v3.0.0.0.0
HWIP: UVD/JPEG/VCN[17][1]: v3.0.1.0.0
HWIP: DF[21][0]: v3.7.3.0.0
HWIP: DCE[22][0]: v3.0.0.0.0
HWIP: OSSSYS[23][0]: v5.0.3.0.0
HWIP: SMUIO[24][0]: v11.0.6.0.0
HWIP: NBIF[26][0]: v3.3.1.0.0
HWIP: THM[27][0]: v11.0.5.0.0
HWIP: CLK[28][0]: v11.0.3.0.0
HWIP: CLK[28][1]: v11.0.3.0.0
HWIP: CLK[28][2]: v11.0.3.0.0
HWIP: CLK[28][3]: v11.0.3.0.0
HWIP: CLK[28][4]: v11.0.3.0.0
HWIP: CLK[28][5]: v11.0.3.0.0
HWIP: CLK[28][6]: v11.0.3.0.0
HWIP: CLK[28][7]: v11.0.3.0.0
HWIP: UMC[29][0]: v8.7.1.0.0
HWIP: UMC[29][1]: v8.7.1.0.0
HWIP: UMC[29][2]: v8.7.1.0.0
HWIP: UMC[29][3]: v8.7.1.0.0
HWIP: UMC[29][4]: v8.7.1.0.0
HWIP: UMC[29][5]: v8.7.1.0.0
HWIP: PCIE[33][0]: v6.5.0.0.0


-Original Message-
From: Sunil Khatri 
Sent: Friday, March 15, 2024 5:43 PM
To: Deucher, Alexander ; Koenig, Christian 
; Sharma, Shashank 
Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; 
linux-ker...@vger.kernel.org; Khatri, Sunil 
Subject: [PATCH] drm/amdgpu: add the hw_ip version of all IP's

Add all the IP's version information on a SOC to the devcoredump.

Signed-off-by: Sunil Khatri 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 62 +++
 1 file changed, 62 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index a0dbccad2f53..3d4bfe0a5a7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -29,6 +29,43 @@
 #include "sienna_cichlid.h"
 #include "smu_v13_0_10.h"

+const char *hw_ip_names[MAX_HWIP] = {
+   [GC_HWIP]   = "GC",
+   [HDP_HWIP]  = "HDP",
+   [SDMA0_HWIP]= "SDMA0",
+   [SDMA1_HWIP]= "SDMA1",
+   [SDMA2_HWIP]= "SDMA2",
+   [SDMA3_HWIP]= "SDMA3",
+   [SDMA4_HWIP]= "SDMA4",
+   [SDMA5_HWIP]= "SDMA5",
+   [SDMA6_HWIP]= "SDMA6",
+   [SDMA7_HWIP]= "SDMA7",
+   [LSDMA_HWIP]= "LSDMA",
+   [MMHUB_HWIP]= "MMHUB",
+   [ATHUB_HWIP]= "ATHUB",
+   [NBIO_HWIP] = "NBIO",
+   [MP0_HWIP]  = "MP0",
+   [MP1_HWIP]  = "MP1",
+   [UVD_HWIP]  = "UVD/JPEG/VCN",
+   [VCN1_HWIP] = "VCN1",
+   [VCE_HWIP]  = "VCE",
+   [VPE_HWIP]  = "VPE",
+   [DF_HWIP]   = "DF",
+   [DCE_HWIP]  = "DCE",
+   [OSSSYS_HWIP]   = "OSSSYS",
+   [SMUIO_HWIP]= "SMUIO",
+   [PWR_HWIP]  = "PWR",
+   [NBIF_HWIP] = "NBIF",
+   [THM_HWIP]  = "THM",
+   [CLK_HWIP]  = "CLK",
+   [UMC_HWIP]  = "UMC",
+   [RSMU_HWIP] = "RSMU",
+   [XGMI_HWIP] = "XGMI",
+   [DCI_HWIP]  = "DCI",
+   [PCIE_HWIP] = "PCIE",
+};
+
+
 int amdgpu_reset_init(struct amdgpu_device *adev)  {
int ret = 0;
@@ -196,6 +233,31 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, 
size_t count,
   coredump->reset_task_info.process_name,
   coredump->reset_task_info.pid);

+   /* GPU IP's information of the SOC */
+   if (coredump->adev) {
+
+   drm_printf(&p, "\nIP Information\n");
+   drm_printf(&p, "SOC Family: %d\n", coredump->adev->family);
+   drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id);
+   drm_printf(&p, "SOC External Revision id: %d\n",
+  coredump->adev->external_rev_id);
+
+   for (int i = 1; i < MAX_HWIP; i++) {
+   for (int j = 0; j < HWIP_MAX_INSTANCE; j++) {
+   int ver = coredump->adev->ip_versions[i][j];
+
+   if (ver)
+   drm_printf(&p, "HWIP: %s[%d][%d]: 
v%d.%d.%d.%d.%d\n",
+  hw_ip_names[i], i, j,
+  IP_VERSION_MAJ(ver),
+  IP_VERSION_MIN(ver),
+  IP_VERSION_REV(ver),
+  IP_VERSION_VARIANT(ver),
+

Re: [PATCH 1/1] drm/amdgpu: drop setting buffer funcs in sdma442

2024-03-15 Thread Lazar, Lijo



On 3/15/2024 5:45 PM, Ma, Le wrote:
> [AMD Official Use Only - General]
> 
>  
>  
>> -Original Message-
>> From: Lazar, Lijo <_Lijo.Lazar@amd.com_ >
>> Sent: Friday, March 15, 2024 6:14 PM
>> To: Ma, Le <_Le.Ma@amd.com_ >; 
>> _amd-gfx@lists.freedesktop.org_
> 
>> Cc: Zhang, Hawking <_Hawking.Zhang@amd.com_ >; 
>> Song, Asher
>> <_Asher.Song@amd.com_ >; Deucher, Alexander
> <_Alexander.Deucher@amd.com_ >
>> Subject: Re: [PATCH 1/1] drm/amdgpu: drop setting buffer funcs in sdma442
>> 
>> 
>> 
>> On 3/15/2024 2:46 PM, Le Ma wrote:
>> > To fix the entity rq NULL issue. This setting has been moved to upper 
>> > level.
>> >
>> 
>> Need to call amdgpu_ttm_set_buffer_funcs_status(adev, true/false) in
>> mode-2 reset handlers as well.
>  
> Thanks for pointing out this. I think we can make another separated
> patch to handle it for mode2 since this patch is for alignment purpose.
> Actually, the set_buffer_funcs will not be unset/set in reset case as
> the conditions below:
>  
> void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool
> enable)
> {
>     struct ttm_resource_manager *man =
> ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
>     uint64_t size;
>     int r;
>  
>     if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
>     adev->mman.buffer_funcs_enabled == enable ||
> adev->gmc.is_app_apu)
>     return;
>  
>  

Thanks for clarifying.

In this case, we don't require that since reset() condition is set. I
saw amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true) getting called in
amdgpu_do_asic_reset(), thought it was affected by reset.

Thanks,
Lijo

>> 
>> Thanks,
>> Lijo
>> 
>> > Fixes b70438004a14 ("drm/amdgpu: move buffer funcs setting up a
>> > level")
>> >
>> > Signed-off-by: Le Ma <_le.ma@amd.com_ >
>> > ---
>> >  drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 20 +---
>> >  1 file changed, 1 insertion(+), 19 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>> > b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>> > index eaa4f5f49949..589a734982a7 100644
>> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
>> > @@ -431,16 +431,11 @@ static void sdma_v4_4_2_inst_gfx_stop(struct
>> amdgpu_device *adev,
>> >  struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
>> >  u32 doorbell_offset, doorbell;
>> >  u32 rb_cntl, ib_cntl;
>> > -   int i, unset = 0;
>> > +   int i;
>> >
>> >  for_each_inst(i, inst_mask) {
>> >  sdma[i] = &adev->sdma.instance[i].ring;
>> >
>> > -   if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) {
>> > -   amdgpu_ttm_set_buffer_funcs_status(adev, false);
>> > -   unset = 1;
>> > -   }
>> > -
>> >  rb_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_CNTL);
>> >  rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL,
>> RB_ENABLE, 0);
>> >  WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl); @@ -
>> 490,17 +485,10 @@
>> > static void sdma_v4_4_2_inst_page_stop(struct amdgpu_device *adev,
>> >  struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
>> >  u32 rb_cntl, ib_cntl;
>> >  int i;
>> > -   bool unset = false;
>> >
>> >  for_each_inst(i, inst_mask) {
>> >  sdma[i] = &adev->sdma.instance[i].page;
>> >
>> > -   if ((adev->mman.buffer_funcs_ring == sdma[i]) &&
>> > -   (!unset)) {
>> > -   amdgpu_ttm_set_buffer_funcs_status(adev, false);
>> > -   unset = true;
>> > -   }
>> > -
>> >  rb_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_CNTL);
>> >  rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL,
>> >  RB_ENABLE, 0);
>> > @@ -950,13 +938,7 @@ static int sdma_v4_4_2_inst_start(struct
>> amdgpu_device *adev,
>> >  r = amdgpu_ring_test_helper(page);
>> >  if (r)
>> >  return r;
>> > -
>> > -   if (adev->mman.buffer_funcs_ring == page)
>> > -   amdgpu_ttm_set_buffer_funcs_status(adev,
>> true);
>> >  }
>> > -
>> > -   if (adev->mman.buffer_funcs_ring == ring)
>> > -   amdgpu_ttm_set_buffer_funcs_status(adev, true);
>> >  }
>> >
>> >  return r;


[PATCH 05/10] drivers: use new capable_any functionality

2024-03-15 Thread Christian Göttsche
Use the new added capable_any function in appropriate cases, where a
task is required to have any of two capabilities.

Reorder CAP_SYS_ADMIN last.

Signed-off-by: Christian Göttsche 
Acked-by: Alexander Gordeev  (s390 portion)
---
v4:
   Additional usage in kfd_ioctl()
v3:
   rename to capable_any()
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 3 +--
 drivers/net/caif/caif_serial.c   | 2 +-
 drivers/s390/block/dasd_eckd.c   | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index dfa8c69532d4..8c7ebca01c17 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -3290,8 +3290,7 @@ static long kfd_ioctl(struct file *filep, unsigned int 
cmd, unsigned long arg)
 * more priviledged access.
 */
if (unlikely(ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE)) {
-   if (!capable(CAP_CHECKPOINT_RESTORE) &&
-   !capable(CAP_SYS_ADMIN)) {
+   if (!capable_any(CAP_CHECKPOINT_RESTORE, CAP_SYS_ADMIN)) {
retcode = -EACCES;
goto err_i1;
}
diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index ed3a589def6b..e908b9ce57dc 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c
@@ -326,7 +326,7 @@ static int ldisc_open(struct tty_struct *tty)
/* No write no play */
if (tty->ops->write == NULL)
return -EOPNOTSUPP;
-   if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_TTY_CONFIG))
+   if (!capable_any(CAP_SYS_TTY_CONFIG, CAP_SYS_ADMIN))
return -EPERM;
 
/* release devices to avoid name collision */
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 373c1a86c33e..8f9a5136306a 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -5384,7 +5384,7 @@ static int dasd_symm_io(struct dasd_device *device, void 
__user *argp)
char psf0, psf1;
int rc;
 
-   if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RAWIO))
+   if (!capable_any(CAP_SYS_RAWIO, CAP_SYS_ADMIN))
return -EACCES;
psf0 = psf1 = 0;
 
-- 
2.43.0



Re: [PATCH] drm/amdgpu/vpe: power on vpe when hw_init

2024-03-15 Thread Alex Deucher
On Thu, Mar 14, 2024 at 9:40 PM Lee, Peyton  wrote:
>
> [AMD Official Use Only - General]
>
> Hi Alex
>
> > I think it will continue to be powered up until a VPE job comes in and 
> > completes and the idle handler gets scheduled.  If a VPE job doesn't come 
> > in, it will stay powered up I think.
> Yes, correct.
> And after the VPE is called to do initialization, a simple test is executed 
> for checking VPE status, and the idle handler gets scheduled to power off the 
> VPE when the test finished.

Does the IB test take care of this?  I just want to confirm that we
aren't leaving it powered up unless a user runs a VPE workload which
might not happen.

Alex


>
> Thanks,
> Peyton
>
> -原始郵件-
> 寄件者: Alex Deucher 
> 寄件日期: Thursday, March 14, 2024 8:58 PM
> 收件者: Lee, Peyton 
> 副本: amd-gfx@lists.freedesktop.org; Deucher, Alexander 
> ; Zhang, Yifan ; Ma, Li 
> ; Yu, Lang 
> 主旨: Re: [PATCH] drm/amdgpu/vpe: power on vpe when hw_init
>
> On Wed, Mar 13, 2024 at 9:33 PM Lee, Peyton  wrote:
> >
> > [AMD Official Use Only - General]
> >
> > Hi Alex,
> >
> > There are two places where VPE will lose power: When there is a system call 
> > to vpe_hw_fini(), and the vpe-thread finds that VEP has no jobs in the 
> > queue.
> > This patch is to make sure that VPE is power up before loading firmware.
>
> I think it will continue to be powered up until a VPE job comes in and 
> completes and the idle handler gets scheduled.  If a VPE job doesn't come in, 
> it will stay powered up I think.
>
> Alex
>
> >
> > Thanks,
> > Peyton
> > -原始郵件-
> > 寄件者: Alex Deucher 
> > 寄件日期: Wednesday, March 13, 2024 9:17 PM
> > 收件者: Lee, Peyton 
> > 副本: amd-gfx@lists.freedesktop.org; Deucher, Alexander
> > ; Zhang, Yifan ; Ma,
> > Li ; Yu, Lang 
> > 主旨: Re: [PATCH] drm/amdgpu/vpe: power on vpe when hw_init
> >
> > On Wed, Mar 13, 2024 at 7:41 AM Peyton Lee  wrote:
> > >
> > > To fix mode2 reset failure.
> > > Should power on VPE when hw_init.
> >
> > When does it get powered down again?  Won't this leave it powered up?
> >
> > Alex
> >
> > >
> > > Signed-off-by: Peyton Lee 
> > > ---
> > >  drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c | 6 ++
> > >  1 file changed, 6 insertions(+)
> > >
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
> > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
> > > index 70c5cc80ecdc..ecfe0f36e83e 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
> > > @@ -396,6 +396,12 @@ static int vpe_hw_init(void *handle)
> > > struct amdgpu_vpe *vpe = &adev->vpe;
> > > int ret;
> > >
> > > +   /* Power on VPE */
> > > +   ret = amdgpu_device_ip_set_powergating_state(adev, 
> > > AMD_IP_BLOCK_TYPE_VPE,
> > > +AMD_PG_STATE_UNGATE);
> > > +   if (ret)
> > > +   return ret;
> > > +
> > > ret = vpe_load_microcode(vpe);
> > > if (ret)
> > > return ret;
> > > --
> > > 2.34.1
> > >


Re: [PATCH] drm/amdgpu: add the hw_ip version of all IP's

2024-03-15 Thread Alex Deucher
On Fri, Mar 15, 2024 at 8:13 AM Sunil Khatri  wrote:
>
> Add all the IP's version information on a SOC to the
> devcoredump.
>
> Signed-off-by: Sunil Khatri 

This looks great.

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 62 +++
>  1 file changed, 62 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
> index a0dbccad2f53..3d4bfe0a5a7c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
> @@ -29,6 +29,43 @@
>  #include "sienna_cichlid.h"
>  #include "smu_v13_0_10.h"
>
> +const char *hw_ip_names[MAX_HWIP] = {
> +   [GC_HWIP]   = "GC",
> +   [HDP_HWIP]  = "HDP",
> +   [SDMA0_HWIP]= "SDMA0",
> +   [SDMA1_HWIP]= "SDMA1",
> +   [SDMA2_HWIP]= "SDMA2",
> +   [SDMA3_HWIP]= "SDMA3",
> +   [SDMA4_HWIP]= "SDMA4",
> +   [SDMA5_HWIP]= "SDMA5",
> +   [SDMA6_HWIP]= "SDMA6",
> +   [SDMA7_HWIP]= "SDMA7",
> +   [LSDMA_HWIP]= "LSDMA",
> +   [MMHUB_HWIP]= "MMHUB",
> +   [ATHUB_HWIP]= "ATHUB",
> +   [NBIO_HWIP] = "NBIO",
> +   [MP0_HWIP]  = "MP0",
> +   [MP1_HWIP]  = "MP1",
> +   [UVD_HWIP]  = "UVD/JPEG/VCN",
> +   [VCN1_HWIP] = "VCN1",
> +   [VCE_HWIP]  = "VCE",
> +   [VPE_HWIP]  = "VPE",
> +   [DF_HWIP]   = "DF",
> +   [DCE_HWIP]  = "DCE",
> +   [OSSSYS_HWIP]   = "OSSSYS",
> +   [SMUIO_HWIP]= "SMUIO",
> +   [PWR_HWIP]  = "PWR",
> +   [NBIF_HWIP] = "NBIF",
> +   [THM_HWIP]  = "THM",
> +   [CLK_HWIP]  = "CLK",
> +   [UMC_HWIP]  = "UMC",
> +   [RSMU_HWIP] = "RSMU",
> +   [XGMI_HWIP] = "XGMI",
> +   [DCI_HWIP]  = "DCI",
> +   [PCIE_HWIP] = "PCIE",
> +};
> +
> +
>  int amdgpu_reset_init(struct amdgpu_device *adev)
>  {
> int ret = 0;
> @@ -196,6 +233,31 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, 
> size_t count,
>coredump->reset_task_info.process_name,
>coredump->reset_task_info.pid);
>
> +   /* GPU IP's information of the SOC */
> +   if (coredump->adev) {
> +
> +   drm_printf(&p, "\nIP Information\n");
> +   drm_printf(&p, "SOC Family: %d\n", coredump->adev->family);
> +   drm_printf(&p, "SOC Revision id: %d\n", 
> coredump->adev->rev_id);
> +   drm_printf(&p, "SOC External Revision id: %d\n",
> +  coredump->adev->external_rev_id);
> +
> +   for (int i = 1; i < MAX_HWIP; i++) {
> +   for (int j = 0; j < HWIP_MAX_INSTANCE; j++) {
> +   int ver = coredump->adev->ip_versions[i][j];
> +
> +   if (ver)
> +   drm_printf(&p, "HWIP: %s[%d][%d]: 
> v%d.%d.%d.%d.%d\n",
> +  hw_ip_names[i], i, j,
> +  IP_VERSION_MAJ(ver),
> +  IP_VERSION_MIN(ver),
> +  IP_VERSION_REV(ver),
> +  IP_VERSION_VARIANT(ver),
> +  IP_VERSION_SUBREV(ver));
> +   }
> +   }
> +   }
> +
> if (coredump->ring) {
> drm_printf(&p, "\nRing timed out details\n");
> drm_printf(&p, "IP Type: %d Ring Name: %s\n",
> --
> 2.34.1
>


Re: [PATCH] drm/amdgpu: add the hw_ip version of all IP's

2024-03-15 Thread Khatri, Sunil



On 3/15/2024 6:45 PM, Alex Deucher wrote:

On Fri, Mar 15, 2024 at 8:13 AM Sunil Khatri  wrote:

Add all the IP's version information on a SOC to the
devcoredump.

Signed-off-by: Sunil Khatri 

This looks great.

Reviewed-by: Alex Deucher 


Thanks Alex




---
  drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 62 +++
  1 file changed, 62 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index a0dbccad2f53..3d4bfe0a5a7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -29,6 +29,43 @@
  #include "sienna_cichlid.h"
  #include "smu_v13_0_10.h"

+const char *hw_ip_names[MAX_HWIP] = {
+   [GC_HWIP]   = "GC",
+   [HDP_HWIP]  = "HDP",
+   [SDMA0_HWIP]= "SDMA0",
+   [SDMA1_HWIP]= "SDMA1",
+   [SDMA2_HWIP]= "SDMA2",
+   [SDMA3_HWIP]= "SDMA3",
+   [SDMA4_HWIP]= "SDMA4",
+   [SDMA5_HWIP]= "SDMA5",
+   [SDMA6_HWIP]= "SDMA6",
+   [SDMA7_HWIP]= "SDMA7",
+   [LSDMA_HWIP]= "LSDMA",
+   [MMHUB_HWIP]= "MMHUB",
+   [ATHUB_HWIP]= "ATHUB",
+   [NBIO_HWIP] = "NBIO",
+   [MP0_HWIP]  = "MP0",
+   [MP1_HWIP]  = "MP1",
+   [UVD_HWIP]  = "UVD/JPEG/VCN",
+   [VCN1_HWIP] = "VCN1",
+   [VCE_HWIP]  = "VCE",
+   [VPE_HWIP]  = "VPE",
+   [DF_HWIP]   = "DF",
+   [DCE_HWIP]  = "DCE",
+   [OSSSYS_HWIP]   = "OSSSYS",
+   [SMUIO_HWIP]= "SMUIO",
+   [PWR_HWIP]  = "PWR",
+   [NBIF_HWIP] = "NBIF",
+   [THM_HWIP]  = "THM",
+   [CLK_HWIP]  = "CLK",
+   [UMC_HWIP]  = "UMC",
+   [RSMU_HWIP] = "RSMU",
+   [XGMI_HWIP] = "XGMI",
+   [DCI_HWIP]  = "DCI",
+   [PCIE_HWIP] = "PCIE",
+};
+
+
  int amdgpu_reset_init(struct amdgpu_device *adev)
  {
 int ret = 0;
@@ -196,6 +233,31 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, 
size_t count,
coredump->reset_task_info.process_name,
coredump->reset_task_info.pid);

+   /* GPU IP's information of the SOC */
+   if (coredump->adev) {
+
+   drm_printf(&p, "\nIP Information\n");
+   drm_printf(&p, "SOC Family: %d\n", coredump->adev->family);
+   drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id);
+   drm_printf(&p, "SOC External Revision id: %d\n",
+  coredump->adev->external_rev_id);
+
+   for (int i = 1; i < MAX_HWIP; i++) {
+   for (int j = 0; j < HWIP_MAX_INSTANCE; j++) {
+   int ver = coredump->adev->ip_versions[i][j];
+
+   if (ver)
+   drm_printf(&p, "HWIP: %s[%d][%d]: 
v%d.%d.%d.%d.%d\n",
+  hw_ip_names[i], i, j,
+  IP_VERSION_MAJ(ver),
+  IP_VERSION_MIN(ver),
+  IP_VERSION_REV(ver),
+  IP_VERSION_VARIANT(ver),
+  IP_VERSION_SUBREV(ver));
+   }
+   }
+   }
+
 if (coredump->ring) {
 drm_printf(&p, "\nRing timed out details\n");
 drm_printf(&p, "IP Type: %d Ring Name: %s\n",
--
2.34.1



[PATCH v6 1/2] drm/amdgpu: implement TLB flush fence

2024-03-15 Thread Shashank Sharma
From: Christian Koenig 

The problem is that when (for example) 4k pages are replaced
with a single 2M page we need to wait for change to be flushed
out by invalidating the TLB before the PT can be freed.

Solve this by moving the TLB flush into a DMA-fence object which
can be used to delay the freeing of the PT BOs until it is signaled.
Also remove existing TLB flush cb and vm->last_tlb_flush fence and
replace it with new method of flushing tlb.

V2: (Shashank)
- rebase
- set dma_fence_error only in case of error
- add tlb_flush fence only when PT/PD BO is locked (Felix)
- use vm->pasid when f is NULL (Mukul)

V4: - add a wait for (f->dependency) in tlb_fence_work (Christian)
- move the misplaced fence_create call to the end (Philip)

V5: - free the f->dependency properly (Christian)

V6: (Shashank)
- added some cleanup from the HW seq patch in this patch.
- introduce params.needs_flush and its usage in this patch.
- remove vm->last_tlb_flush and tlb_cb.
- rebase without TLB HW seq patch.

Cc: Christian Koenig 
Cc: Felix Kuehling 
Cc: Rajneesh Bhardwaj 
Cc: Alex Deucher 
Acked-by: Felix Kuehling 
Acked-by: Rajneesh Bhardwaj 
Tested-by: Rajneesh Bhardwaj 
Reviewed-by: Shashank Sharma 
Signed-off-by: Christian König 
Signed-off-by: Shashank Sharma 
---
 drivers/gpu/drm/amd/amdgpu/Makefile   |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  77 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|  26 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c|   4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c   |   4 +
 .../gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c  | 112 ++
 7 files changed, 141 insertions(+), 87 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 4536c8ad0e11..f24f11ac3e92 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -70,7 +70,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o 
amdgpu_kms.o \
amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \
atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
-   amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o amdgpu_pll.o \
+   amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_vm_tlb_fence.o \
+   amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \
amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 81fb3465e197..3b64623f32ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -111,21 +111,6 @@ struct amdgpu_prt_cb {
struct dma_fence_cb cb;
 };
 
-/**
- * struct amdgpu_vm_tlb_seq_struct - Helper to increment the TLB flush sequence
- */
-struct amdgpu_vm_tlb_seq_struct {
-   /**
-* @vm: pointer to the amdgpu_vm structure to set the fence sequence on
-*/
-   struct amdgpu_vm *vm;
-
-   /**
-* @cb: callback
-*/
-   struct dma_fence_cb cb;
-};
-
 /**
  * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping
  *
@@ -868,23 +853,6 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
return r;
 }
 
-/**
- * amdgpu_vm_tlb_seq_cb - make sure to increment tlb sequence
- * @fence: unused
- * @cb: the callback structure
- *
- * Increments the tlb sequence to make sure that future CS execute a VM flush.
- */
-static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
-struct dma_fence_cb *cb)
-{
-   struct amdgpu_vm_tlb_seq_struct *tlb_cb;
-
-   tlb_cb = container_of(cb, typeof(*tlb_cb), cb);
-   atomic64_inc(&tlb_cb->vm->tlb_seq);
-   kfree(tlb_cb);
-}
-
 /**
  * amdgpu_vm_update_range - update a range in the vm page table
  *
@@ -917,7 +885,6 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
   struct dma_fence **fence)
 {
struct amdgpu_vm_update_params params;
-   struct amdgpu_vm_tlb_seq_struct *tlb_cb;
struct amdgpu_res_cursor cursor;
enum amdgpu_sync_mode sync_mode;
int r, idx;
@@ -925,12 +892,6 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
if (!drm_dev_enter(adev_to_drm(adev), &idx))
return -ENODEV;
 
-   tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL);
-   if (!tlb_cb) {
-   r = -ENOMEM;
-   goto error_unlock;
-   }
-
/* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache,
 * heavy-weight flush TLB unconditionally.
 */
@@ -948,6 +909,7 @@ int amdgpu_vm_update

[PATCH v6 2/2] drm/amdgpu: sync page table freeing with tlb flush

2024-03-15 Thread Shashank Sharma
The idea behind this patch is to delay the freeing of PT entry objects
until the TLB flush is done.

This patch:
- Adds a tlb_flush_waitlist in amdgpu_vm_update_params which will keep the
  objects that need to be freed after tlb_flush.
- Adds PT entries in this list in amdgpu_vm_ptes_update after finding
  the PT entry.
- Changes functionality of amdgpu_vm_pt_free_dfs from (df_search + free)
  to simply freeing of the BOs, also renames it to
  amdgpu_vm_pt_free_list to reflect this same.
- Exports function amdgpu_vm_pt_free_list to be called directly.
- Calls amdgpu_vm_pt_free_list directly from amdgpu_vm_update_range.

V2: rebase
V4: Addressed review comments from Christian
- add only locked PTEs entries in TLB flush waitlist.
- do not create a separate function for list flush.
- do not create a new lock for TLB flush.
- there is no need to wait on tlb_flush_fence exclusively.

V5: Addressed review comments from Christian
- change the amdgpu_vm_pt_free_dfs's functionality to simple freeing
  of the objects and rename it.
- add all the PTE objects in params->tlb_flush_waitlist
- let amdgpu_vm_pt_free_root handle the freeing of BOs independently
- call amdgpu_vm_pt_free_list directly

V6: Rebase

Cc: Christian König 
Cc: Alex Deucher 
Cc: Felix Kuehling 
Cc: Rajneesh Bhardwaj 
Acked-by: Felix Kuehling 
Acked-by: Rajneesh Bhardwaj 
Tested-by: Rajneesh Bhardwaj 
Signed-off-by: Shashank Sharma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  4 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|  7 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 53 +--
 3 files changed, 40 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 3b64623f32ea..9845d5077750 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -911,6 +911,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
params.unlocked = unlocked;
params.needs_flush = flush_tlb;
params.allow_override = allow_override;
+   INIT_LIST_HEAD(¶ms.tlb_flush_waitlist);
 
/* Implicitly sync to command submissions in the same VM before
 * unmapping. Sync to moving fences before mapping.
@@ -1003,6 +1004,9 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
   DMA_RESV_USAGE_BOOKKEEP);
}
 
+   if (params.needs_flush)
+   amdgpu_vm_pt_free_list(adev, ¶ms);
+
 error_unlock:
amdgpu_vm_eviction_unlock(vm);
drm_dev_exit(idx);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index ba92f431f4e0..cc6a74a79f52 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -266,6 +266,11 @@ struct amdgpu_vm_update_params {
 * to be overridden for NUMA local memory.
 */
bool allow_override;
+
+   /**
+* @tlb_flush_waitlist: temporary storage for BOs until tlb_flush
+*/
+   struct list_head tlb_flush_waitlist;
 };
 
 struct amdgpu_vm_update_funcs {
@@ -546,6 +551,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params 
*params,
  uint64_t start, uint64_t end,
  uint64_t dst, uint64_t flags);
 void amdgpu_vm_pt_free_work(struct work_struct *work);
+void amdgpu_vm_pt_free_list(struct amdgpu_device *adev,
+   struct amdgpu_vm_update_params *params);
 
 #if defined(CONFIG_DEBUG_FS)
 void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index 601df0ce8290..440dc8c581fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -622,40 +622,30 @@ void amdgpu_vm_pt_free_work(struct work_struct *work)
 }
 
 /**
- * amdgpu_vm_pt_free_dfs - free PD/PT levels
+ * amdgpu_vm_pt_free_list - free PD/PT levels
  *
  * @adev: amdgpu device structure
- * @vm: amdgpu vm structure
- * @start: optional cursor where to start freeing PDs/PTs
- * @unlocked: vm resv unlock status
+ * @params: see amdgpu_vm_update_params definition
  *
- * Free the page directory or page table level and all sub levels.
+ * Free the page directory objects saved in the flush list
  */
-static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_vm_pt_cursor *start,
- bool unlocked)
+void amdgpu_vm_pt_free_list(struct amdgpu_device *adev,
+   struct amdgpu_vm_update_params *params)
 {
-   struct amdgpu_vm_pt_cursor cursor;
-   struct amdgpu_vm_bo_base *entry;
+   struct amdgpu_vm_bo_base *entry, *next;
+   struct amdgpu_vm *vm = params->vm

Re: [PATCH v6 1/2] drm/amdgpu: implement TLB flush fence

2024-03-15 Thread Christian König

Am 15.03.24 um 14:25 schrieb Shashank Sharma:

From: Christian Koenig 

The problem is that when (for example) 4k pages are replaced
with a single 2M page we need to wait for change to be flushed
out by invalidating the TLB before the PT can be freed.

Solve this by moving the TLB flush into a DMA-fence object which
can be used to delay the freeing of the PT BOs until it is signaled.
Also remove existing TLB flush cb and vm->last_tlb_flush fence and
replace it with new method of flushing tlb.

V2: (Shashank)
 - rebase
 - set dma_fence_error only in case of error
 - add tlb_flush fence only when PT/PD BO is locked (Felix)
 - use vm->pasid when f is NULL (Mukul)

V4: - add a wait for (f->dependency) in tlb_fence_work (Christian)
 - move the misplaced fence_create call to the end (Philip)

V5: - free the f->dependency properly (Christian)

V6: (Shashank)
 - added some cleanup from the HW seq patch in this patch.
 - introduce params.needs_flush and its usage in this patch.
 - remove vm->last_tlb_flush and tlb_cb.
 - rebase without TLB HW seq patch.

Cc: Christian Koenig 
Cc: Felix Kuehling 
Cc: Rajneesh Bhardwaj 
Cc: Alex Deucher 
Acked-by: Felix Kuehling 
Acked-by: Rajneesh Bhardwaj 
Tested-by: Rajneesh Bhardwaj 
Reviewed-by: Shashank Sharma 
Signed-off-by: Christian König 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/Makefile   |   3 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  77 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|  26 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c|   4 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c |   2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c   |   4 +
  .../gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c  | 112 ++
  7 files changed, 141 insertions(+), 87 deletions(-)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 4536c8ad0e11..f24f11ac3e92 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -70,7 +70,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o 
amdgpu_kms.o \
amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \
atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
-   amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o amdgpu_pll.o \
+   amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_vm_tlb_fence.o \
+   amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \
amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 81fb3465e197..3b64623f32ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -111,21 +111,6 @@ struct amdgpu_prt_cb {
struct dma_fence_cb cb;
  };
  
-/**

- * struct amdgpu_vm_tlb_seq_struct - Helper to increment the TLB flush sequence
- */
-struct amdgpu_vm_tlb_seq_struct {
-   /**
-* @vm: pointer to the amdgpu_vm structure to set the fence sequence on
-*/
-   struct amdgpu_vm *vm;
-
-   /**
-* @cb: callback
-*/
-   struct dma_fence_cb cb;
-};
-
  /**
   * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping
   *
@@ -868,23 +853,6 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
return r;
  }
  
-/**

- * amdgpu_vm_tlb_seq_cb - make sure to increment tlb sequence
- * @fence: unused
- * @cb: the callback structure
- *
- * Increments the tlb sequence to make sure that future CS execute a VM flush.
- */
-static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
-struct dma_fence_cb *cb)
-{
-   struct amdgpu_vm_tlb_seq_struct *tlb_cb;
-
-   tlb_cb = container_of(cb, typeof(*tlb_cb), cb);
-   atomic64_inc(&tlb_cb->vm->tlb_seq);
-   kfree(tlb_cb);
-}
-
  /**
   * amdgpu_vm_update_range - update a range in the vm page table
   *
@@ -917,7 +885,6 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
   struct dma_fence **fence)
  {
struct amdgpu_vm_update_params params;
-   struct amdgpu_vm_tlb_seq_struct *tlb_cb;
struct amdgpu_res_cursor cursor;
enum amdgpu_sync_mode sync_mode;
int r, idx;
@@ -925,12 +892,6 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
if (!drm_dev_enter(adev_to_drm(adev), &idx))
return -ENODEV;
  
-	tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL);

-   if (!tlb_cb) {
-   r = -ENOMEM;
-   goto error_unlock;
-   }
-
/* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache,
 * heavy-weight flus

Re: [PATCH] drm/sched: fix null-ptr-deref in init entity

2024-03-15 Thread Christian König

Am 15.03.24 um 03:39 schrieb vitaly.pros...@amd.com:

From: Vitaly Prosyak 

The bug can be triggered by sending an amdgpu_cs_wait_ioctl
to the AMDGPU DRM driver on any ASICs with valid context.
The bug was reported by Joonkyo Jung .
For example the following code:

 static void Syzkaller2(int fd)
 {
union drm_amdgpu_ctx arg1;
union drm_amdgpu_wait_cs arg2;

arg1.in.op = AMDGPU_CTX_OP_ALLOC_CTX;
ret = drmIoctl(fd, 0x140106442 /* amdgpu_ctx_ioctl */, &arg1);

arg2.in.handle = 0x0;
arg2.in.timeout = 0x2;
arg2.in.ip_type = AMD_IP_VPE /* 0x9 */;
arg2->in.ip_instance = 0x0;
arg2.in.ring = 0x0;
arg2.in.ctx_id = arg1.out.alloc.ctx_id;

drmIoctl(fd, 0xc0206449 /* AMDGPU_WAIT_CS * /, &arg2);
 }

The ioctl AMDGPU_WAIT_CS without previously submitted job could be assumed that
the error should be returned, but the following commit 
1decbf6bb0b4dc56c9da6c5e57b994ebfc2be3aa
modified the logic and allowed to have sched_rq equal to NULL.

As a result when there is no job the ioctl AMDGPU_WAIT_CS returns success.
The change fixes null-ptr-deref in init entity and the stack below demonstrates
the error condition:

[  +0.07] BUG: kernel NULL pointer dereference, address: 0028
[  +0.007086] #PF: supervisor read access in kernel mode
[  +0.005234] #PF: error_code(0x) - not-present page
[  +0.005232] PGD 0 P4D 0
[  +0.002501] Oops:  [#1] PREEMPT SMP KASAN NOPTI
[  +0.005034] CPU: 10 PID: 9229 Comm: amd_basic Tainted: GB   WL 
6.7.0+ #4
[  +0.007797] Hardware name: ASUS System Product Name/ROG STRIX B550-F GAMING 
(WI-FI), BIOS 1401 12/03/2020
[  +0.009798] RIP: 0010:drm_sched_entity_init+0x2d3/0x420 [gpu_sched]
[  +0.006426] Code: 80 00 00 00 00 00 00 00 e8 1a 81 82 e0 49 89 9c 24 c0 00 00 00 4c 
89 ef e8 4a 80 82 e0 49 8b 5d 00 48 8d 7b 28 e8 3d 80 82 e0 <48> 83 7b 28 00 0f 
84 28 01 00 00 4d 8d ac 24 98 00 00 00 49 8d 5c
[  +0.019094] RSP: 0018:c90014c1fa40 EFLAGS: 00010282
[  +0.005237] RAX: 0001 RBX:  RCX: 8113f3fa
[  +0.007326] RDX: fbfff0a7889d RSI: 0008 RDI: 853c44e0
[  +0.007264] RBP: c90014c1fa80 R08: 0001 R09: fbfff0a7889c
[  +0.007266] R10: 853c44e7 R11: 0001 R12: 8881a719b010
[  +0.007263] R13: 88810d412748 R14: 0002 R15: 
[  +0.007264] FS:  77045540() GS:8883cc90() 
knlGS:
[  +0.008236] CS:  0010 DS:  ES:  CR0: 80050033
[  +0.005851] CR2: 0028 CR3: 00011912e000 CR4: 00350ef0
[  +0.007175] Call Trace:
[  +0.002561]  
[  +0.002141]  ? show_regs+0x6a/0x80
[  +0.003473]  ? __die+0x25/0x70
[  +0.003124]  ? page_fault_oops+0x214/0x720
[  +0.004179]  ? preempt_count_sub+0x18/0xc0
[  +0.004093]  ? __pfx_page_fault_oops+0x10/0x10
[  +0.004590]  ? srso_return_thunk+0x5/0x5f
[  +0.004000]  ? vprintk_default+0x1d/0x30
[  +0.004063]  ? srso_return_thunk+0x5/0x5f
[  +0.004087]  ? vprintk+0x5c/0x90
[  +0.003296]  ? drm_sched_entity_init+0x2d3/0x420 [gpu_sched]
[  +0.005807]  ? srso_return_thunk+0x5/0x5f
[  +0.004090]  ? _printk+0xb3/0xe0
[  +0.003293]  ? __pfx__printk+0x10/0x10
[  +0.003735]  ? asm_sysvec_apic_timer_interrupt+0x1b/0x20
[  +0.005482]  ? do_user_addr_fault+0x345/0x770
[  +0.004361]  ? exc_page_fault+0x64/0xf0
[  +0.003972]  ? asm_exc_page_fault+0x27/0x30
[  +0.004271]  ? add_taint+0x2a/0xa0
[  +0.003476]  ? drm_sched_entity_init+0x2d3/0x420 [gpu_sched]
[  +0.005812]  amdgpu_ctx_get_entity+0x3f9/0x770 [amdgpu]
[  +0.009530]  ? finish_task_switch.isra.0+0x129/0x470
[  +0.005068]  ? __pfx_amdgpu_ctx_get_entity+0x10/0x10 [amdgpu]
[  +0.010063]  ? __kasan_check_write+0x14/0x20
[  +0.004356]  ? srso_return_thunk+0x5/0x5f
[  +0.004001]  ? mutex_unlock+0x81/0xd0
[  +0.003802]  ? srso_return_thunk+0x5/0x5f
[  +0.004096]  amdgpu_cs_wait_ioctl+0xf6/0x270 [amdgpu]
[  +0.009355]  ? __pfx_amdgpu_cs_wait_ioctl+0x10/0x10 [amdgpu]
[  +0.009981]  ? srso_return_thunk+0x5/0x5f
[  +0.004089]  ? srso_return_thunk+0x5/0x5f
[  +0.004090]  ? __srcu_read_lock+0x20/0x50
[  +0.004096]  drm_ioctl_kernel+0x140/0x1f0 [drm]
[  +0.005080]  ? __pfx_amdgpu_cs_wait_ioctl+0x10/0x10 [amdgpu]
[  +0.009974]  ? __pfx_drm_ioctl_kernel+0x10/0x10 [drm]
[  +0.005618]  ? srso_return_thunk+0x5/0x5f
[  +0.004088]  ? __kasan_check_write+0x14/0x20
[  +0.004357]  drm_ioctl+0x3da/0x730 [drm]
[  +0.004461]  ? __pfx_amdgpu_cs_wait_ioctl+0x10/0x10 [amdgpu]
[  +0.009979]  ? __pfx_drm_ioctl+0x10/0x10 [drm]
[  +0.004993]  ? srso_return_thunk+0x5/0x5f
[  +0.004090]  ? __kasan_check_write+0x14/0x20
[  +0.004356]  ? srso_return_thunk+0x5/0x5f
[  +0.004090]  ? _raw_spin_lock_irqsave+0x99/0x100
[  +0.004712]  ? __pfx__raw_spin_lock_irqsave+0x10/0x10
[  +0.005063]  ? __pfx_arch_do_signal_or_restart+0x10/0x10
[  +0.005477]  ? srso_return_thunk+0x5/0x5f
[  +0.004000]  ? preempt_count_sub+0x18/0xc0
[  +0.004237]  ? srs

Re: [PATCH] drm/sched: fix null-ptr-deref in init entity

2024-03-15 Thread Alex Deucher
On Fri, Mar 15, 2024 at 10:12 AM Christian König
 wrote:
>
> Am 15.03.24 um 03:39 schrieb vitaly.pros...@amd.com:
> > From: Vitaly Prosyak 
> >
> > The bug can be triggered by sending an amdgpu_cs_wait_ioctl
> > to the AMDGPU DRM driver on any ASICs with valid context.
> > The bug was reported by Joonkyo Jung .
> > For example the following code:
> >
> >  static void Syzkaller2(int fd)
> >  {
> >   union drm_amdgpu_ctx arg1;
> >   union drm_amdgpu_wait_cs arg2;
> >
> >   arg1.in.op = AMDGPU_CTX_OP_ALLOC_CTX;
> >   ret = drmIoctl(fd, 0x140106442 /* amdgpu_ctx_ioctl */, &arg1);
> >
> >   arg2.in.handle = 0x0;
> >   arg2.in.timeout = 0x2;
> >   arg2.in.ip_type = AMD_IP_VPE /* 0x9 */;
> >   arg2->in.ip_instance = 0x0;
> >   arg2.in.ring = 0x0;
> >   arg2.in.ctx_id = arg1.out.alloc.ctx_id;
> >
> >   drmIoctl(fd, 0xc0206449 /* AMDGPU_WAIT_CS * /, &arg2);
> >  }
> >
> > The ioctl AMDGPU_WAIT_CS without previously submitted job could be assumed 
> > that
> > the error should be returned, but the following commit 
> > 1decbf6bb0b4dc56c9da6c5e57b994ebfc2be3aa
> > modified the logic and allowed to have sched_rq equal to NULL.
> >
> > As a result when there is no job the ioctl AMDGPU_WAIT_CS returns success.
> > The change fixes null-ptr-deref in init entity and the stack below 
> > demonstrates
> > the error condition:
> >
> > [  +0.07] BUG: kernel NULL pointer dereference, address: 
> > 0028
> > [  +0.007086] #PF: supervisor read access in kernel mode
> > [  +0.005234] #PF: error_code(0x) - not-present page
> > [  +0.005232] PGD 0 P4D 0
> > [  +0.002501] Oops:  [#1] PREEMPT SMP KASAN NOPTI
> > [  +0.005034] CPU: 10 PID: 9229 Comm: amd_basic Tainted: GB   WL
> >  6.7.0+ #4
> > [  +0.007797] Hardware name: ASUS System Product Name/ROG STRIX B550-F 
> > GAMING (WI-FI), BIOS 1401 12/03/2020
> > [  +0.009798] RIP: 0010:drm_sched_entity_init+0x2d3/0x420 [gpu_sched]
> > [  +0.006426] Code: 80 00 00 00 00 00 00 00 e8 1a 81 82 e0 49 89 9c 24 c0 
> > 00 00 00 4c 89 ef e8 4a 80 82 e0 49 8b 5d 00 48 8d 7b 28 e8 3d 80 82 e0 
> > <48> 83 7b 28 00 0f 84 28 01 00 00 4d 8d ac 24 98 00 00 00 49 8d 5c
> > [  +0.019094] RSP: 0018:c90014c1fa40 EFLAGS: 00010282
> > [  +0.005237] RAX: 0001 RBX:  RCX: 
> > 8113f3fa
> > [  +0.007326] RDX: fbfff0a7889d RSI: 0008 RDI: 
> > 853c44e0
> > [  +0.007264] RBP: c90014c1fa80 R08: 0001 R09: 
> > fbfff0a7889c
> > [  +0.007266] R10: 853c44e7 R11: 0001 R12: 
> > 8881a719b010
> > [  +0.007263] R13: 88810d412748 R14: 0002 R15: 
> > 
> > [  +0.007264] FS:  77045540() GS:8883cc90() 
> > knlGS:
> > [  +0.008236] CS:  0010 DS:  ES:  CR0: 80050033
> > [  +0.005851] CR2: 0028 CR3: 00011912e000 CR4: 
> > 00350ef0
> > [  +0.007175] Call Trace:
> > [  +0.002561]  
> > [  +0.002141]  ? show_regs+0x6a/0x80
> > [  +0.003473]  ? __die+0x25/0x70
> > [  +0.003124]  ? page_fault_oops+0x214/0x720
> > [  +0.004179]  ? preempt_count_sub+0x18/0xc0
> > [  +0.004093]  ? __pfx_page_fault_oops+0x10/0x10
> > [  +0.004590]  ? srso_return_thunk+0x5/0x5f
> > [  +0.004000]  ? vprintk_default+0x1d/0x30
> > [  +0.004063]  ? srso_return_thunk+0x5/0x5f
> > [  +0.004087]  ? vprintk+0x5c/0x90
> > [  +0.003296]  ? drm_sched_entity_init+0x2d3/0x420 [gpu_sched]
> > [  +0.005807]  ? srso_return_thunk+0x5/0x5f
> > [  +0.004090]  ? _printk+0xb3/0xe0
> > [  +0.003293]  ? __pfx__printk+0x10/0x10
> > [  +0.003735]  ? asm_sysvec_apic_timer_interrupt+0x1b/0x20
> > [  +0.005482]  ? do_user_addr_fault+0x345/0x770
> > [  +0.004361]  ? exc_page_fault+0x64/0xf0
> > [  +0.003972]  ? asm_exc_page_fault+0x27/0x30
> > [  +0.004271]  ? add_taint+0x2a/0xa0
> > [  +0.003476]  ? drm_sched_entity_init+0x2d3/0x420 [gpu_sched]
> > [  +0.005812]  amdgpu_ctx_get_entity+0x3f9/0x770 [amdgpu]
> > [  +0.009530]  ? finish_task_switch.isra.0+0x129/0x470
> > [  +0.005068]  ? __pfx_amdgpu_ctx_get_entity+0x10/0x10 [amdgpu]
> > [  +0.010063]  ? __kasan_check_write+0x14/0x20
> > [  +0.004356]  ? srso_return_thunk+0x5/0x5f
> > [  +0.004001]  ? mutex_unlock+0x81/0xd0
> > [  +0.003802]  ? srso_return_thunk+0x5/0x5f
> > [  +0.004096]  amdgpu_cs_wait_ioctl+0xf6/0x270 [amdgpu]
> > [  +0.009355]  ? __pfx_amdgpu_cs_wait_ioctl+0x10/0x10 [amdgpu]
> > [  +0.009981]  ? srso_return_thunk+0x5/0x5f
> > [  +0.004089]  ? srso_return_thunk+0x5/0x5f
> > [  +0.004090]  ? __srcu_read_lock+0x20/0x50
> > [  +0.004096]  drm_ioctl_kernel+0x140/0x1f0 [drm]
> > [  +0.005080]  ? __pfx_amdgpu_cs_wait_ioctl+0x10/0x10 [amdgpu]
> > [  +0.009974]  ? __pfx_drm_ioctl_kernel+0x10/0x10 [drm]
> > [  +0.005618]  ? srso_return_thunk+0x5/0x5f
> > [  +0.004088]  ? __kasan_check_write+0x14/0x20
> > [  +0.004357]  drm_ioctl+0x3da/0x730 [drm]
> > [  +0.004461]  ? __pfx_amdgpu_cs_wait_ioctl+0x10/0x10 [amdgp

Re: [PATCH] drm/sched: fix null-ptr-deref in init entity

2024-03-15 Thread Christian König

Am 15.03.24 um 15:12 schrieb Alex Deucher:

On Fri, Mar 15, 2024 at 10:12 AM Christian König
 wrote:

Am 15.03.24 um 03:39 schrieb vitaly.pros...@amd.com:

From: Vitaly Prosyak 

The bug can be triggered by sending an amdgpu_cs_wait_ioctl
to the AMDGPU DRM driver on any ASICs with valid context.
The bug was reported by Joonkyo Jung .
For example the following code:

  static void Syzkaller2(int fd)
  {
   union drm_amdgpu_ctx arg1;
   union drm_amdgpu_wait_cs arg2;

   arg1.in.op = AMDGPU_CTX_OP_ALLOC_CTX;
   ret = drmIoctl(fd, 0x140106442 /* amdgpu_ctx_ioctl */, &arg1);

   arg2.in.handle = 0x0;
   arg2.in.timeout = 0x2;
   arg2.in.ip_type = AMD_IP_VPE /* 0x9 */;
   arg2->in.ip_instance = 0x0;
   arg2.in.ring = 0x0;
   arg2.in.ctx_id = arg1.out.alloc.ctx_id;

   drmIoctl(fd, 0xc0206449 /* AMDGPU_WAIT_CS * /, &arg2);
  }

The ioctl AMDGPU_WAIT_CS without previously submitted job could be assumed that
the error should be returned, but the following commit 
1decbf6bb0b4dc56c9da6c5e57b994ebfc2be3aa
modified the logic and allowed to have sched_rq equal to NULL.

As a result when there is no job the ioctl AMDGPU_WAIT_CS returns success.
The change fixes null-ptr-deref in init entity and the stack below demonstrates
the error condition:

[  +0.07] BUG: kernel NULL pointer dereference, address: 0028
[  +0.007086] #PF: supervisor read access in kernel mode
[  +0.005234] #PF: error_code(0x) - not-present page
[  +0.005232] PGD 0 P4D 0
[  +0.002501] Oops:  [#1] PREEMPT SMP KASAN NOPTI
[  +0.005034] CPU: 10 PID: 9229 Comm: amd_basic Tainted: GB   WL 
6.7.0+ #4
[  +0.007797] Hardware name: ASUS System Product Name/ROG STRIX B550-F GAMING 
(WI-FI), BIOS 1401 12/03/2020
[  +0.009798] RIP: 0010:drm_sched_entity_init+0x2d3/0x420 [gpu_sched]
[  +0.006426] Code: 80 00 00 00 00 00 00 00 e8 1a 81 82 e0 49 89 9c 24 c0 00 00 00 4c 
89 ef e8 4a 80 82 e0 49 8b 5d 00 48 8d 7b 28 e8 3d 80 82 e0 <48> 83 7b 28 00 0f 
84 28 01 00 00 4d 8d ac 24 98 00 00 00 49 8d 5c
[  +0.019094] RSP: 0018:c90014c1fa40 EFLAGS: 00010282
[  +0.005237] RAX: 0001 RBX:  RCX: 8113f3fa
[  +0.007326] RDX: fbfff0a7889d RSI: 0008 RDI: 853c44e0
[  +0.007264] RBP: c90014c1fa80 R08: 0001 R09: fbfff0a7889c
[  +0.007266] R10: 853c44e7 R11: 0001 R12: 8881a719b010
[  +0.007263] R13: 88810d412748 R14: 0002 R15: 
[  +0.007264] FS:  77045540() GS:8883cc90() 
knlGS:
[  +0.008236] CS:  0010 DS:  ES:  CR0: 80050033
[  +0.005851] CR2: 0028 CR3: 00011912e000 CR4: 00350ef0
[  +0.007175] Call Trace:
[  +0.002561]  
[  +0.002141]  ? show_regs+0x6a/0x80
[  +0.003473]  ? __die+0x25/0x70
[  +0.003124]  ? page_fault_oops+0x214/0x720
[  +0.004179]  ? preempt_count_sub+0x18/0xc0
[  +0.004093]  ? __pfx_page_fault_oops+0x10/0x10
[  +0.004590]  ? srso_return_thunk+0x5/0x5f
[  +0.004000]  ? vprintk_default+0x1d/0x30
[  +0.004063]  ? srso_return_thunk+0x5/0x5f
[  +0.004087]  ? vprintk+0x5c/0x90
[  +0.003296]  ? drm_sched_entity_init+0x2d3/0x420 [gpu_sched]
[  +0.005807]  ? srso_return_thunk+0x5/0x5f
[  +0.004090]  ? _printk+0xb3/0xe0
[  +0.003293]  ? __pfx__printk+0x10/0x10
[  +0.003735]  ? asm_sysvec_apic_timer_interrupt+0x1b/0x20
[  +0.005482]  ? do_user_addr_fault+0x345/0x770
[  +0.004361]  ? exc_page_fault+0x64/0xf0
[  +0.003972]  ? asm_exc_page_fault+0x27/0x30
[  +0.004271]  ? add_taint+0x2a/0xa0
[  +0.003476]  ? drm_sched_entity_init+0x2d3/0x420 [gpu_sched]
[  +0.005812]  amdgpu_ctx_get_entity+0x3f9/0x770 [amdgpu]
[  +0.009530]  ? finish_task_switch.isra.0+0x129/0x470
[  +0.005068]  ? __pfx_amdgpu_ctx_get_entity+0x10/0x10 [amdgpu]
[  +0.010063]  ? __kasan_check_write+0x14/0x20
[  +0.004356]  ? srso_return_thunk+0x5/0x5f
[  +0.004001]  ? mutex_unlock+0x81/0xd0
[  +0.003802]  ? srso_return_thunk+0x5/0x5f
[  +0.004096]  amdgpu_cs_wait_ioctl+0xf6/0x270 [amdgpu]
[  +0.009355]  ? __pfx_amdgpu_cs_wait_ioctl+0x10/0x10 [amdgpu]
[  +0.009981]  ? srso_return_thunk+0x5/0x5f
[  +0.004089]  ? srso_return_thunk+0x5/0x5f
[  +0.004090]  ? __srcu_read_lock+0x20/0x50
[  +0.004096]  drm_ioctl_kernel+0x140/0x1f0 [drm]
[  +0.005080]  ? __pfx_amdgpu_cs_wait_ioctl+0x10/0x10 [amdgpu]
[  +0.009974]  ? __pfx_drm_ioctl_kernel+0x10/0x10 [drm]
[  +0.005618]  ? srso_return_thunk+0x5/0x5f
[  +0.004088]  ? __kasan_check_write+0x14/0x20
[  +0.004357]  drm_ioctl+0x3da/0x730 [drm]
[  +0.004461]  ? __pfx_amdgpu_cs_wait_ioctl+0x10/0x10 [amdgpu]
[  +0.009979]  ? __pfx_drm_ioctl+0x10/0x10 [drm]
[  +0.004993]  ? srso_return_thunk+0x5/0x5f
[  +0.004090]  ? __kasan_check_write+0x14/0x20
[  +0.004356]  ? srso_return_thunk+0x5/0x5f
[  +0.004090]  ? _raw_spin_lock_irqsave+0x99/0x100
[  +0.004712]  ? __pfx__raw_spin_lock_irqsave+0x10/0x10
[  +0.005063]  ? __pfx_arch_do_signal_or_restart+0x10/0x10
[  +0.005477]  

Re: [PATCH 05/10] drivers: use new capable_any functionality

2024-03-15 Thread Felix Kuehling

On 2024-03-15 7:37, Christian Göttsche wrote:

Use the new added capable_any function in appropriate cases, where a
task is required to have any of two capabilities.

Reorder CAP_SYS_ADMIN last.

Signed-off-by: Christian Göttsche 
Acked-by: Alexander Gordeev  (s390 portion)


Acked-by: Felix Kuehling  (amdkfd portion)



---
v4:
Additional usage in kfd_ioctl()
v3:
rename to capable_any()
---
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 3 +--
  drivers/net/caif/caif_serial.c   | 2 +-
  drivers/s390/block/dasd_eckd.c   | 2 +-
  3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index dfa8c69532d4..8c7ebca01c17 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -3290,8 +3290,7 @@ static long kfd_ioctl(struct file *filep, unsigned int 
cmd, unsigned long arg)
 * more priviledged access.
 */
if (unlikely(ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE)) {
-   if (!capable(CAP_CHECKPOINT_RESTORE) &&
-   !capable(CAP_SYS_ADMIN)) {
+   if (!capable_any(CAP_CHECKPOINT_RESTORE, CAP_SYS_ADMIN)) {
retcode = -EACCES;
goto err_i1;
}
diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index ed3a589def6b..e908b9ce57dc 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c
@@ -326,7 +326,7 @@ static int ldisc_open(struct tty_struct *tty)
/* No write no play */
if (tty->ops->write == NULL)
return -EOPNOTSUPP;
-   if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_TTY_CONFIG))
+   if (!capable_any(CAP_SYS_TTY_CONFIG, CAP_SYS_ADMIN))
return -EPERM;
  
  	/* release devices to avoid name collision */

diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 373c1a86c33e..8f9a5136306a 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -5384,7 +5384,7 @@ static int dasd_symm_io(struct dasd_device *device, void 
__user *argp)
char psf0, psf1;
int rc;
  
-	if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RAWIO))

+   if (!capable_any(CAP_SYS_RAWIO, CAP_SYS_ADMIN))
return -EACCES;
psf0 = psf1 = 0;
  


[linux-next:master] BUILD REGRESSION a1e7655b77e3391b58ac28256789ea45b1685abb

2024-03-15 Thread kernel test robot
tree/branch: 
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master
branch HEAD: a1e7655b77e3391b58ac28256789ea45b1685abb  Add linux-next specific 
files for 20240315

Error/Warning ids grouped by kconfigs:

gcc_recent_errors
|-- alpha-allyesconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_vcn.c:warning:.bin-directive-output-may-be-truncated-writing-bytes-into-a-region-of-size-between-and
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- arc-allmodconfig
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- arc-allyesconfig
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- arm-allmodconfig
|   |-- 
arch-arm-mach-omap2-prm33xx.c:warning:expecting-prototype-for-am33xx_prm_global_warm_sw_reset().-Prototype-was-for-am33xx_prm_global_sw_reset()-instead
|   |-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_vcn.c:warning:.bin-directive-output-may-be-truncated-writing-bytes-into-a-region-of-size-between-and
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- arm-allyesconfig
|   |-- 
arch-arm-mach-omap2-prm33xx.c:warning:expecting-prototype-for-am33xx_prm_global_warm_sw_reset().-Prototype-was-for-am33xx_prm_global_sw_reset()-instead
|   |-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_vcn.c:warning:.bin-directive-output-may-be-truncated-writing-bytes-into-a-region-of-size-between-and
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- arm-vexpress_defconfig
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- arm64-defconfig
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- arm64-randconfig-004-20240315
|   `-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_vcn.c:warning:.bin-directive-output-may-be-truncated-writing-bytes-into-a-region-of-size-between-and
|-- csky-allmodconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_vcn.c:warning:.bin-directive-output-may-be-truncated-writing-bytes-into-a-region-of-size-between-and
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- csky-allyesconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_vcn.c:warning:.bin-directive-output-may-be-truncated-writing-bytes-into-a-region-of-size-between-and
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- csky-randconfig-001-20240315
|   `-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_vcn.c:warning:.bin-directive-output-may-be-truncated-writing-bytes-into-a-region-of-size-between-and
|-- csky-randconfig-002-20240315
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- i386-allmodconfig
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- i386-allyesconfig
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- i386-buildonly-randconfig-003-20240315
|   `-- 
drivers-accessibility-speakup-devsynth.c:error:label-at-end-of-compound-statement
|-- loongarch-allmodconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_vcn.c:warning:.bin-directive-output-may-be-truncated-writing-bytes-into-a-region-of-size-between-and
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- loongarch-defconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_vcn.c:warning:.bin-directive-output-may-be-truncated-writing-bytes-into-a-region-of-size-between-and
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- loongarch-randconfig-001-20240315
|   `-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_vcn.c:warning:.bin-directive-output-may-be-truncated-writing-bytes-into-a-region-of-size-between-and
|-- m68k-allmodconfig
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- m68k-allyesconfig
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue()-instead
|-- microblaze-allmodconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_vcn.c:warning:.bin-directive-output-may-be-truncated-writing-bytes-into-a-region-of-size-between-and
|   `-- 
fs-ubifs-journal.c:warning:expecting-prototype-for-wake_up_reservation().-Prototype-was-for-add_or_start_queue

[PATCH] Documentation: add a page on amdgpu debugging

2024-03-15 Thread Alex Deucher
Covers GPU page fault debugging and adds a reference
to umr.

v2: update client ids to include SQC/G

Signed-off-by: Alex Deucher 
---
 Documentation/gpu/amdgpu/debugging.rst | 79 ++
 Documentation/gpu/amdgpu/index.rst |  1 +
 2 files changed, 80 insertions(+)
 create mode 100644 Documentation/gpu/amdgpu/debugging.rst

diff --git a/Documentation/gpu/amdgpu/debugging.rst 
b/Documentation/gpu/amdgpu/debugging.rst
new file mode 100644
index ..8b7fdcdf1158
--- /dev/null
+++ b/Documentation/gpu/amdgpu/debugging.rst
@@ -0,0 +1,79 @@
+===
+ GPU Debugging
+===
+
+GPUVM Debugging
+===
+
+To aid in debugging GPU virtual memory related problems, the driver supports a
+number of options module paramters:
+
+`vm_fault_stop` - If non-0, halt the GPU memory controller on a GPU page fault.
+
+`vm_update_mode` - If non-0, use the CPU to update GPU page tables rather than
+the GPU.
+
+
+Decoding a GPUVM Page Fault
+===
+
+If you see a GPU page fault in the kernel log, you can decode it to figure
+out what is going wrong in your application.  A page fault in your kernel
+log may look something like this:
+
+::
+
+ [gfxhub0] no-retry page fault (src_id:0 ring:24 vmid:3 pasid:32777, for 
process glxinfo pid 2424 thread glxinfo:cs0 pid 2425)
+   in page starting at address 0x80010280 from IH client 0x1b (UTCL2)
+ VM_L2_PROTECTION_FAULT_STATUS:0x00301030
+   Faulty UTCL2 client ID: TCP (0x8)
+   MORE_FAULTS: 0x0
+   WALKER_ERROR: 0x0
+   PERMISSION_FAULTS: 0x3
+   MAPPING_ERROR: 0x0
+   RW: 0x0
+
+First you have the memory hub, gfxhub and mmhub.  gfxhub is the memory
+hub used for graphics, compute, and sdma on some chips.  mmhub is the
+memory hub used for multi-media and sdma on some chips.
+
+Next you have the vmid and pasid.  If the vmid is 0, this fault was likely
+caused by the kernel driver or firmware.  If the vmid is non-0, it is generally
+a fault in a user application.  The pasid is used to link a vmid to a system
+process id.  If the process is active when the fault happens, the process
+information will be printed.
+
+The GPU virtual address that caused the fault comes next.
+
+The client ID indicates the GPU block that caused the fault.
+Some common client IDs:
+
+- CB/DB: The color/depth backend of the graphics pipe
+- CPF: Command Processor Frontend
+- CPC: Command Processor Compute
+- CPG: Command Processor Graphics
+- TCP/SQC/SQG: Shaders
+- SDMA: SDMA engines
+- VCN: Video encode/decode engines
+- JPEG: JPEG engines
+
+PERMISSION_FAULTS describe what faults were encountered:
+
+- bit 0: the PTE was not valid
+- bit 1: the PTE read bit was not set
+- bit 2: the PTE write bit was not set
+- bit 3: the PTE execute bit was not set
+
+Finally, RW, indicates whether the access was a read (0) or a write (1).
+
+In the example above, a shader (cliend id = TCP) generated a read (RW = 0x0) to
+an invalid page (PERMISSION_FAULTS = 0x3) at GPU virtual address
+0x80010280.  The user can then inspect can then inspect their shader
+code and resource descriptor state to determine what caused the GPU page fault.
+
+UMR
+===
+
+`umr `_ is a general purpose
+GPU debugging and diagnostics tool.  Please see the umr documentation for
+more information about its capabilities.
diff --git a/Documentation/gpu/amdgpu/index.rst 
b/Documentation/gpu/amdgpu/index.rst
index 912e699fd373..847e04924030 100644
--- a/Documentation/gpu/amdgpu/index.rst
+++ b/Documentation/gpu/amdgpu/index.rst
@@ -15,4 +15,5 @@ Next (GCN), Radeon DNA (RDNA), and Compute DNA (CDNA) 
architectures.
ras
thermal
driver-misc
+   debugging
amdgpu-glossary
-- 
2.44.0



[bug report] drm/amd/display: Add debug counters to IPS exit prints

2024-03-15 Thread Dan Carpenter
Hello Nicholas Kazlauskas,

Commit 2dfaea1d715a ("drm/amd/display: Add debug counters to IPS exit
prints") from Feb 21, 2024 (linux-next), leads to the following
Smatch static checker warning:

drivers/gpu/drm/amd/amdgpu/../display/dc/dc_dmub_srv.c:1398 
dc_dmub_srv_exit_low_power_state() error: uninitialized symbol 
'ips1_exit_count'.
drivers/gpu/drm/amd/amdgpu/../display/dc/dc_dmub_srv.c:1398 
dc_dmub_srv_exit_low_power_state() error: uninitialized symbol 
'ips2_exit_count'.
drivers/gpu/drm/amd/amdgpu/../display/dc/dc_dmub_srv.c:1398 
dc_dmub_srv_exit_low_power_state() error: uninitialized symbol 'rcg_exit_count'.

drivers/gpu/drm/amd/amdgpu/../display/dc/dc_dmub_srv.c
1279 static void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
1280 {
1281 struct dc_dmub_srv *dc_dmub_srv;
1282 uint32_t rcg_exit_count, ips1_exit_count, ips2_exit_count;
1283 
1284 if (dc->debug.dmcub_emulation)
1285 return;
1286 
1287 if (!dc->ctx->dmub_srv || !dc->ctx->dmub_srv->dmub)
1288 return;
1289 
1290 dc_dmub_srv = dc->ctx->dmub_srv;
1291 
1292 if (dc->clk_mgr->funcs->exit_low_power_state) {
1293 volatile const struct dmub_shared_state_ips_fw *ips_fw 
=
1294 
&dc_dmub_srv->dmub->shared_state[DMUB_SHARED_SHARE_FEATURE__IPS_FW].data.ips_fw;
1295 volatile struct dmub_shared_state_ips_driver 
*ips_driver =
1296 
&dc_dmub_srv->dmub->shared_state[DMUB_SHARED_SHARE_FEATURE__IPS_DRIVER].data.ips_driver;
1297 union dmub_shared_state_ips_driver_signals 
prev_driver_signals = ips_driver->signals;
1298 
1299 rcg_exit_count = ips_fw->rcg_exit_count;
1300 ips1_exit_count = ips_fw->ips1_exit_count;
1301 ips2_exit_count = ips_fw->ips2_exit_count;
1302 
1303 ips_driver->signals.all = 0;
1304 
1305 DC_LOG_IPS(
1306 "%s (allow ips1=%d ips2=%d) (commit ips1=%d 
ips2=%d) (count rcg=%d ips1=%d ips2=%d)",
1307 __func__,
1308 ips_driver->signals.bits.allow_ips1,
1309 ips_driver->signals.bits.allow_ips2,
1310 ips_fw->signals.bits.ips1_commit,
1311 ips_fw->signals.bits.ips2_commit,
1312 ips_fw->rcg_entry_count,
1313 ips_fw->ips1_entry_count,
1314 ips_fw->ips2_entry_count);
1315 
1316 /* Note: register access has technically not resumed 
for DCN here, but we
1317  * need to be message PMFW through our standard 
register interface.
1318  */
1319 dc_dmub_srv->needs_idle_wake = false;
1320 
1321 if (prev_driver_signals.bits.allow_ips2) {
1322 DC_LOG_IPS(
1323 "wait IPS2 eval (ips1_commit=%d 
ips2_commit=%d)",
1324 ips_fw->signals.bits.ips1_commit,
1325 ips_fw->signals.bits.ips2_commit);
1326 
1327 udelay(dc->debug.ips2_eval_delay_us);
1328 
1329 if (ips_fw->signals.bits.ips2_commit) {
1330 DC_LOG_IPS(
1331 "exit IPS2 #1 (ips1_commit=%d 
ips2_commit=%d)",
1332 
ips_fw->signals.bits.ips1_commit,
1333 
ips_fw->signals.bits.ips2_commit);
1334 
1335 // Tell PMFW to exit low power state
1336 
dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr);
1337 
1338 DC_LOG_IPS(
1339 "wait IPS2 entry delay 
(ips1_commit=%d ips2_commit=%d)",
1340 
ips_fw->signals.bits.ips1_commit,
1341 
ips_fw->signals.bits.ips2_commit);
1342 
1343 // Wait for IPS2 entry upper bound
1344 udelay(dc->debug.ips2_entry_delay_us);
1345 
1346 DC_LOG_IPS(
1347 "exit IPS2 #2 (ips1_commit=%d 
ips2_commit=%d)",
1348 
ips_fw->signals.bits.ips1_commit,
1349 
ips_fw->signals.bits.ips2_commit);
1350 
1351 
dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr);
1352 
1353   

[bug report] drm/amdgpu: add ring buffer information in devcoredump

2024-03-15 Thread Dan Carpenter
Hello Sunil Khatri,

Commit 42742cc541bb ("drm/amdgpu: add ring buffer information in
devcoredump") from Mar 11, 2024 (linux-next), leads to the following
Smatch static checker warning:

drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c:219 amdgpu_devcoredump_read()
error: we previously assumed 'coredump->adev' could be null (see line 
206)

drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
171 static ssize_t
172 amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
173 void *data, size_t datalen)
174 {
175 struct drm_printer p;
176 struct amdgpu_coredump_info *coredump = data;
177 struct drm_print_iterator iter;
178 int i;
179 
180 iter.data = buffer;
181 iter.offset = 0;
182 iter.start = offset;
183 iter.remain = count;
184 
185 p = drm_coredump_printer(&iter);
186 
187 drm_printf(&p, " AMDGPU Device Coredump \n");
188 drm_printf(&p, "version: " AMDGPU_COREDUMP_VERSION "\n");
189 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
190 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
191 drm_printf(&p, "time: %lld.%09ld\n", 
coredump->reset_time.tv_sec,
192 coredump->reset_time.tv_nsec);
193 
194 if (coredump->reset_task_info.pid)
195 drm_printf(&p, "process_name: %s PID: %d\n",
196coredump->reset_task_info.process_name,
197coredump->reset_task_info.pid);
198 
199 if (coredump->ring) {
200 drm_printf(&p, "\nRing timed out details\n");
201 drm_printf(&p, "IP Type: %d Ring Name: %s\n",
202coredump->ring->funcs->type,
203coredump->ring->name);
204 }
205 
206 if (coredump->adev) {
^^
Check for NULL

207 struct amdgpu_vm_fault_info *fault_info =
208 &coredump->adev->vm_manager.fault_info;
209 
210 drm_printf(&p, "\n[%s] Page fault observed\n",
211fault_info->vmhub ? "mmhub" : "gfxhub");
212 drm_printf(&p, "Faulty page starting at address: 
0x%016llx\n",
213fault_info->addr);
214 drm_printf(&p, "Protection fault status register: 
0x%x\n\n",
215fault_info->status);
216 }
217 
218 drm_printf(&p, "Ring buffer information\n");
--> 219 for (int i = 0; i < coredump->adev->num_rings; i++) {
^^
Unchecked dereference

220 int j = 0;
221 struct amdgpu_ring *ring = coredump->adev->rings[i];
222 
223 drm_printf(&p, "ring name: %s\n", ring->name);
224 drm_printf(&p, "Rptr: 0x%llx Wptr: 0x%llx RB mask: 
%x\n",
225amdgpu_ring_get_rptr(ring),
226amdgpu_ring_get_wptr(ring),
227ring->buf_mask);
228 drm_printf(&p, "Ring size in dwords: %d\n",
229ring->ring_size / 4);
230 drm_printf(&p, "Ring contents\n");
231 drm_printf(&p, "Offset \t Value\n");
232 
233 while (j < ring->ring_size) {
234 drm_printf(&p, "0x%x \t 0x%x\n", j, 
ring->ring[j/4]);
235 j += 4;
236 }
237 }
238 
239 if (coredump->reset_vram_lost)
240 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
241 if (coredump->adev->reset_info.num_regs) {
^^
Here too

242 drm_printf(&p, "AMDGPU register dumps:\nOffset: 
Value:\n");
243 
244 for (i = 0; i < coredump->adev->reset_info.num_regs; 
i++)
245 drm_printf(&p, "0x%08x: 0x%08x\n",
246
coredump->adev->reset_info.reset_dump_reg_list[i],
247
coredump->adev->reset_info.reset_dump_reg_value[i]);
248 }
249 
250 return count - iter.remain;
251 }

regards,
dan carpenter


Re: [PATCH] Documentation: add a page on amdgpu debugging

2024-03-15 Thread Alex Deucher
On Fri, Mar 15, 2024 at 12:07 PM Alex Deucher  wrote:
>
> Covers GPU page fault debugging and adds a reference
> to umr.
>
> v2: update client ids to include SQC/G
>
> Signed-off-by: Alex Deucher 
> ---
>  Documentation/gpu/amdgpu/debugging.rst | 79 ++
>  Documentation/gpu/amdgpu/index.rst |  1 +
>  2 files changed, 80 insertions(+)
>  create mode 100644 Documentation/gpu/amdgpu/debugging.rst
>
> diff --git a/Documentation/gpu/amdgpu/debugging.rst 
> b/Documentation/gpu/amdgpu/debugging.rst
> new file mode 100644
> index ..8b7fdcdf1158
> --- /dev/null
> +++ b/Documentation/gpu/amdgpu/debugging.rst
> @@ -0,0 +1,79 @@
> +===
> + GPU Debugging
> +===
> +
> +GPUVM Debugging
> +===
> +
> +To aid in debugging GPU virtual memory related problems, the driver supports 
> a
> +number of options module paramters:
> +
> +`vm_fault_stop` - If non-0, halt the GPU memory controller on a GPU page 
> fault.
> +
> +`vm_update_mode` - If non-0, use the CPU to update GPU page tables rather 
> than
> +the GPU.
> +
> +
> +Decoding a GPUVM Page Fault
> +===
> +
> +If you see a GPU page fault in the kernel log, you can decode it to figure
> +out what is going wrong in your application.  A page fault in your kernel
> +log may look something like this:
> +
> +::
> +
> + [gfxhub0] no-retry page fault (src_id:0 ring:24 vmid:3 pasid:32777, for 
> process glxinfo pid 2424 thread glxinfo:cs0 pid 2425)
> +   in page starting at address 0x80010280 from IH client 0x1b (UTCL2)
> + VM_L2_PROTECTION_FAULT_STATUS:0x00301030
> +   Faulty UTCL2 client ID: TCP (0x8)
> +   MORE_FAULTS: 0x0
> +   WALKER_ERROR: 0x0
> +   PERMISSION_FAULTS: 0x3
> +   MAPPING_ERROR: 0x0
> +   RW: 0x0
> +
> +First you have the memory hub, gfxhub and mmhub.  gfxhub is the memory
> +hub used for graphics, compute, and sdma on some chips.  mmhub is the
> +memory hub used for multi-media and sdma on some chips.
> +
> +Next you have the vmid and pasid.  If the vmid is 0, this fault was likely
> +caused by the kernel driver or firmware.  If the vmid is non-0, it is 
> generally
> +a fault in a user application.  The pasid is used to link a vmid to a system
> +process id.  If the process is active when the fault happens, the process
> +information will be printed.
> +
> +The GPU virtual address that caused the fault comes next.
> +
> +The client ID indicates the GPU block that caused the fault.
> +Some common client IDs:
> +
> +- CB/DB: The color/depth backend of the graphics pipe
> +- CPF: Command Processor Frontend
> +- CPC: Command Processor Compute
> +- CPG: Command Processor Graphics
> +- TCP/SQC/SQG: Shaders
> +- SDMA: SDMA engines
> +- VCN: Video encode/decode engines
> +- JPEG: JPEG engines
> +
> +PERMISSION_FAULTS describe what faults were encountered:
> +
> +- bit 0: the PTE was not valid
> +- bit 1: the PTE read bit was not set
> +- bit 2: the PTE write bit was not set
> +- bit 3: the PTE execute bit was not set
> +
> +Finally, RW, indicates whether the access was a read (0) or a write (1).
> +
> +In the example above, a shader (cliend id = TCP) generated a read (RW = 0x0) 
> to
> +an invalid page (PERMISSION_FAULTS = 0x3) at GPU virtual address
> +0x80010280.  The user can then inspect can then inspect their shader

removed the duplicated text above locally.

Alex

> +code and resource descriptor state to determine what caused the GPU page 
> fault.
> +
> +UMR
> +===
> +
> +`umr `_ is a general purpose
> +GPU debugging and diagnostics tool.  Please see the umr documentation for
> +more information about its capabilities.
> diff --git a/Documentation/gpu/amdgpu/index.rst 
> b/Documentation/gpu/amdgpu/index.rst
> index 912e699fd373..847e04924030 100644
> --- a/Documentation/gpu/amdgpu/index.rst
> +++ b/Documentation/gpu/amdgpu/index.rst
> @@ -15,4 +15,5 @@ Next (GCN), Radeon DNA (RDNA), and Compute DNA (CDNA) 
> architectures.
> ras
> thermal
> driver-misc
> +   debugging
> amdgpu-glossary
> --
> 2.44.0
>


[PATCH 0/2] drm/amdgpu/display: Make multi-plane configurations more flexible

2024-03-15 Thread sunpeng.li
From: Leo Li 

These patches aim to make the amdgpgu KMS driver play nicer with compositors
when building multi-plane scanout configurations. They do so by:

1. Making cursor behavior more sensible.
2. Allowing placement of DRM OVERLAY planes underneath the PRIMARY plane for
   'underlay' configurations (perhaps more of a RFC, see below).

Please see the commit messages for details.


For #2, the simplest way to accomplish this was to increase the value of the
immutable zpos property for the PRIMARY plane. This allowed OVERLAY planes with
a mutable zpos range of (0-254) to be positioned underneath the PRIMARY for an
underlay scanout configuration.

Technically speaking, DCN hardware does not have a concept of primary or overlay
planes - there are simply 4 general purpose hardware pipes that can be maped in
any configuration. So the immutable zpos restriction on the PRIMARY plane is
kind of arbitrary; it can have a mutable range of (0-254) just like the
OVERLAYs. The distinction between PRIMARY and OVERLAY planes is also somewhat
arbitrary. We can interpret PRIMARY as the first plane that should be enabled on
a CRTC, but beyond that, it doesn't mean much for amdgpu.

Therefore, I'm curious about how compositors devs understand KMS planes and
their zpos properties, and how we would like to use them. It isn't clear to me
how compositors wish to interpret and use the DRM zpos property, or
differentiate between OVERLAY and PRIMARY planes, when it comes to setting up
multi-plane scanout.

Ultimately, what I'd like to answer is "What can we do on the KMS driver and DRM
plane API side, that can make building multi-plane scanout configurations easier
for compositors?" I'm hoping we can converge on something, whether that be
updating the existing documentation to better define the usage, or update the
API to provide support for something that is lacking.

Thanks,
Leo


Some links to provide context and details:
* What is underlay?: 
https://gitlab.freedesktop.org/emersion/libliftoff/-/issues/76
* Discussion on how to implement underlay on Weston: 
https://gitlab.freedesktop.org/wayland/weston/-/merge_requests/1258#note_2325164

Cc: Joshua Ashton 
Cc: Michel Dänzer 
Cc: Chao Guo 
Cc: Xaver Hugl 
Cc: Vikas Korjani 
Cc: Robert Mader 
Cc: Pekka Paalanen 
Cc: Sean Paul 
Cc: Simon Ser 
Cc: Shashank Sharma 
Cc: Harry Wentland 
Cc: Sebastian Wick 

Leo Li (2):
  drm/amd/display: Introduce overlay cursor mode
  drm/amd/display: Move PRIMARY plane zpos higher

 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 405 --
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |   7 +
 .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c|   1 +
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   |  28 +-
 4 files changed, 391 insertions(+), 50 deletions(-)

-- 
2.44.0



[PATCH 1/2] drm/amd/display: Introduce overlay cursor mode

2024-03-15 Thread sunpeng.li
From: Leo Li 

[Why]

DCN is the display hardware for amdgpu. DRM planes are backed by DCN
hardware pipes, which carry pixel data from one end (memory), to the
other (output encoder).

Each DCN pipe has the ability to blend in a cursor early on in the
pipeline. In other words, there are no dedicated cursor planes in DCN,
which makes cursor behavior somewhat unintuitive for compositors.

For example, if the cursor is in RGB format, but the top-most DRM plane
is in YUV format, DCN will not be able to blend them. Because of this,
amdgpu_dm rejects all configurations where a cursor needs to be enabled
on top of a YUV formatted plane.

>From a compositor's perspective, when computing an allocation for
hardware plane offloading, this cursor-on-yuv configuration result in an
atomic test failure. Since the failure reason is not obvious at all,
compositors will likely fall back to full rendering, which is not ideal.

Instead, amdgpu_dm can try to accommodate the cursor-on-yuv
configuration by opportunistically reserving a separate DCN pipe just
for the cursor. We can refer to this as "overlay cursor mode". It is
contrasted with "native cursor mode", where the native DCN per-pipe
cursor is used.

[How]

On each crtc, compute whether the cursor plane should be enabled in
overlay mode (which currently, is iff the immediate plane below has a
YUV format). If it is, mark the CRTC as requesting overlay cursor mode.

During DC validation, attempt to enable a separate DCN pipe for the
cursor if it's in overlay mode. If that fails, or if no overlay mode is
requested, then fallback to native mode.

Signed-off-by: Leo Li 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 309 +++---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |   7 +
 .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c|   1 +
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   |  13 +-
 4 files changed, 288 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 21a61454c878..09ab330aed17 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8359,8 +8359,19 @@ static void amdgpu_dm_commit_planes(struct 
drm_atomic_state *state,
 * Disable the cursor first if we're disabling all the planes.
 * It'll remain on the screen after the planes are re-enabled
 * if we don't.
+*
+* If the cursor is transitioning from native to overlay mode, the
+* native cursor needs to be disabled first.
 */
-   if (acrtc_state->active_planes == 0)
+   if (acrtc_state->cursor_mode == DM_CURSOR_OVERLAY_MODE &&
+   dm_old_crtc_state->cursor_mode == DM_CURSOR_NATIVE_MODE) {
+   struct dc_cursor_position cursor_position = {0};
+   dc_stream_set_cursor_position(acrtc_state->stream,
+ &cursor_position);
+   }
+
+   if (acrtc_state->active_planes == 0 &&
+   dm_old_crtc_state->cursor_mode == DM_CURSOR_NATIVE_MODE)
amdgpu_dm_commit_cursors(state);
 
/* update planes when needed */
@@ -8374,7 +8385,8 @@ static void amdgpu_dm_commit_planes(struct 
drm_atomic_state *state,
struct dm_plane_state *dm_new_plane_state = 
to_dm_plane_state(new_plane_state);
 
/* Cursor plane is handled after stream updates */
-   if (plane->type == DRM_PLANE_TYPE_CURSOR) {
+   if (plane->type == DRM_PLANE_TYPE_CURSOR &&
+   acrtc_state->cursor_mode == DM_CURSOR_NATIVE_MODE) {
if ((fb && crtc == pcrtc) ||
(old_plane_state->fb && old_plane_state->crtc == 
pcrtc))
cursor_update = true;
@@ -8727,7 +8739,8 @@ static void amdgpu_dm_commit_planes(struct 
drm_atomic_state *state,
 * This avoids redundant programming in the case where we're going
 * to be disabling a single plane - those pipes are being disabled.
 */
-   if (acrtc_state->active_planes)
+   if (acrtc_state->active_planes &&
+   acrtc_state->cursor_mode == DM_CURSOR_NATIVE_MODE)
amdgpu_dm_commit_cursors(state);
 
 cleanup:
@@ -10039,7 +10052,8 @@ static bool should_reset_plane(struct drm_atomic_state 
*state,
 {
struct drm_plane *other;
struct drm_plane_state *old_other_state, *new_other_state;
-   struct drm_crtc_state *new_crtc_state;
+   struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+   struct dm_crtc_state *old_dm_crtc_state, *new_dm_crtc_state;
struct amdgpu_device *adev = drm_to_adev(plane->dev);
int i;
 
@@ -10061,10 +10075,24 @@ static bool should_reset_plane(struct 
drm_atomic_state *state,
 
new_crtc_state =
drm_atomic_get_new_crtc_state(state, new_plane_state->crtc);
+   old_crtc_state =
+   drm_atomic_g

[PATCH 2/2] drm/amd/display: Move PRIMARY plane zpos higher

2024-03-15 Thread sunpeng.li
From: Leo Li 

[Why]

Compositors have different ways of assigning surfaces to DRM planes for
render offloading. It may decide between various strategies: overlay,
underlay, or a mix of both

One way for compositors to implement the underlay strategy is to assign
a higher zpos to the DRM_PRIMARY plane than the DRM_OVERLAY planes,
effectively turning the DRM_OVERLAY plane into an underlay plane.

Today, amdgpu attaches an immutable zpos of 0 to the DRM_PRIMARY plane.
This however, is an arbitrary restriction. DCN pipes are general
purpose, and can be arranged in any z-order. To support compositors
using this allocation scheme, we can set a non-zero immutable zpos for
the PRIMARY, allowing the placement of OVERLAYS (mutable zpos range
0-254) beneath the PRIMARY.

[How]

Assign a zpos = #no of OVERLAY planes to the PRIMARY plane. Then, clean
up any assumptions in the driver of PRIMARY plane having the lowest
zpos.

Signed-off-by: Leo Li 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 96 ++-
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 17 +++-
 2 files changed, 104 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 09ab330aed17..01b00f587701 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -80,6 +80,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -369,6 +370,20 @@ static inline void reverse_planes_order(struct 
dc_surface_update *array_of_surfa
swap(array_of_surface_update[i], array_of_surface_update[j]);
 }
 
+/*
+ * DC will program planes with their z-order determined by their ordering
+ * in the dc_surface_updates array. This comparator is used to sort them
+ * by descending zpos.
+ */
+static int dm_plane_layer_index_cmp(const void *a, const void *b)
+{
+   const struct dc_surface_update *sa = (struct dc_surface_update *)a;
+   const struct dc_surface_update *sb = (struct dc_surface_update *)b;
+
+   /* Sort by descending dc_plane layer_index (i.e. normalized_zpos) */
+   return sb->surface->layer_index - sa->surface->layer_index;
+}
+
 /**
  * update_planes_and_stream_adapter() - Send planes to be updated in DC
  *
@@ -393,7 +408,8 @@ static inline bool update_planes_and_stream_adapter(struct 
dc *dc,
struct dc_stream_update 
*stream_update,
struct dc_surface_update 
*array_of_surface_update)
 {
-   reverse_planes_order(array_of_surface_update, planes_count);
+   sort(array_of_surface_update, planes_count,
+sizeof(*array_of_surface_update), dm_plane_layer_index_cmp, NULL);
 
/*
 * Previous frame finished and HW is ready for optimization.
@@ -9363,6 +9379,8 @@ static void amdgpu_dm_atomic_commit_tail(struct 
drm_atomic_state *state)
for (j = 0; j < status->plane_count; j++)
dummy_updates[j].surface = status->plane_states[0];
 
+   sort(dummy_updates, status->plane_count,
+sizeof(*dummy_updates), dm_plane_layer_index_cmp, NULL);
 
mutex_lock(&dm->dc_lock);
dc_update_planes_and_stream(dm->dc,
@@ -10097,6 +10115,17 @@ static bool should_reset_plane(struct drm_atomic_state 
*state,
if (new_crtc_state->color_mgmt_changed)
return true;
 
+   /*
+* On zpos change, planes need to be reordered by removing and re-adding
+* them one by one to the dc state, in order of descending zpos.
+*
+* TODO: We can likely skip bandwidth validation if the only thing that
+* changed about the plane was it'z z-ordering.
+*/
+   if (new_crtc_state->zpos_changed) {
+   return true;
+   }
+
if (drm_atomic_crtc_needs_modeset(new_crtc_state))
return true;
 
@@ -10509,6 +10538,65 @@ dm_get_plane_scale(struct drm_plane_state *plane_state,
*out_plane_scale_h = plane_state->crtc_h * 1000 / plane_src_h;
 }
 
+/*
+ * The normalized_zpos value cannot be used by this iterator directly. It's 
only
+ * calculated for enabled planes, potentially causing normalized_zpos 
collisions
+ * between enabled/disabled planes in the atomic state. We need a unique value
+ * so that the iterator will not generate the same object twice, or loop
+ * indefinitely.
+ */
+static inline struct __drm_planes_state *__get_next_zpos(
+   struct drm_atomic_state *state,
+   struct __drm_planes_state *prev)
+{
+   unsigned int highest_zpos = 0, prev_zpos = 256;
+   uint32_t highest_id = 0, prev_id = UINT_MAX;
+   struct drm_plane_state *new_plane_state;
+   struct drm_plane *plane;
+   int i, highest_i = -1;
+
+   if (prev != NULL) {
+   prev_zpos = prev->new_state->zpos;
+   prev_id = prev->ptr

[PATCH] drm/amdkfd: Check cgroup when returning DMABuf info

2024-03-15 Thread Mukul Joshi
Check cgroup permissions when returning DMA-buf info and
based on cgroup check return the id of the GPU that has
access to the BO.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index dfa8c69532d4..f9631f4b1a02 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1523,7 +1523,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep,
 
/* Find a KFD GPU device that supports the get_dmabuf_info query */
for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
-   if (dev)
+   if (dev && !kfd_devcgroup_check_permission(dev))
break;
if (!dev)
return -EINVAL;
@@ -1545,7 +1545,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep,
if (xcp_id >= 0)
args->gpu_id = dmabuf_adev->kfd.dev->nodes[xcp_id]->id;
else
-   args->gpu_id = dmabuf_adev->kfd.dev->nodes[0]->id;
+   args->gpu_id = dev->id;
args->flags = flags;
 
/* Copy metadata buffer to user mode */
-- 
2.35.1



Re: Proposal to add CRIU support to DRM render nodes

2024-03-15 Thread Tvrtko Ursulin



On 15/03/2024 02:33, Felix Kuehling wrote:


On 2024-03-12 5:45, Tvrtko Ursulin wrote:


On 11/03/2024 14:48, Tvrtko Ursulin wrote:


Hi Felix,

On 06/12/2023 21:23, Felix Kuehling wrote:
Executive Summary: We need to add CRIU support to DRM render nodes 
in order to maintain CRIU support for ROCm application once they 
start relying on render nodes for more GPU memory management. In 
this email I'm providing some background why we are doing this, and 
outlining some of the problems we need to solve to checkpoint and 
restore render node state and shared memory (DMABuf) state. I have 
some thoughts on the API design, leaning on what we did for KFD, but 
would like to get feedback from the DRI community regarding that API 
and to what extent there is interest in making that generic.


We are working on using DRM render nodes for virtual address 
mappings in ROCm applications to implement the CUDA11-style VM API 
and improve interoperability between graphics and compute. This uses 
DMABufs for sharing buffer objects between KFD and multiple render 
node devices, as well as between processes. In the long run this 
also provides a path to moving all or most memory management from 
the KFD ioctl API to libdrm.


Once ROCm user mode starts using render nodes for virtual address 
management, that creates a problem for checkpointing and restoring 
ROCm applications with CRIU. Currently there is no support for 
checkpointing and restoring render node state, other than CPU 
virtual address mappings. Support will be needed for checkpointing 
GEM buffer objects and handles, their GPU virtual address mappings 
and memory sharing relationships between devices and processes.


Eventually, if full CRIU support for graphics applications is 
desired, more state would need to be captured, including scheduler 
contexts and BO lists. Most of this state is driver-specific.


After some internal discussions we decided to take our design 
process public as this potentially touches DRM GEM and DMABuf APIs 
and may have implications for other drivers in the future.


One basic question before going into any API details: Is there a 
desire to have CRIU support for other DRM drivers?


This sounds like a very interesting feature on the overall, although 
I cannot answer on the last question here.


I forgot to finish this thought. I cannot answer / don't know of any 
concrete plans, but I think feature is pretty cool and if amdgpu gets 
it working I wouldn't be surprised if other drivers would get interested.


Thanks, that's good to hear!




Funnily enough, it has a tiny relation to an i915 feature I recently 
implemented on Mesa's request, which is to be able to "upload" the 
GPU context from the GPU hang error state and replay the hanging 
request. It is kind of (at a stretch) a very special tiny subset of 
checkout and restore so I am not mentioning it as a curiosity.


And there is also another partical conceptual intersect with the (at 
the moment not yet upstream) i915 online debugger. This part being in 
the area of discovering and enumerating GPU resources beloning to the 
client.


I don't see an immediate design or code sharing opportunities though 
but just mentioning.


I did spend some time reading your plugin and kernel implementation 
out of curiousity and have some comments and questions.


With that out of the way, some considerations for a possible DRM 
CRIU API (either generic of AMDGPU driver specific): The API goes 
through several phases during checkpoint and restore:


Checkpoint:

 1. Process-info (enumerates objects and sizes so user mode can 
allocate

    memory for the checkpoint, stops execution on the GPU)
 2. Checkpoint (store object metadata for BOs, queues, etc.)
 3. Unpause (resumes execution after the checkpoint is complete)

Restore:

 1. Restore (restore objects, VMAs are not in the right place at 
this time)
 2. Resume (final fixups after the VMAs are sorted out, resume 
execution)


Btw is check-pointing guaranteeing all relevant activity is idled? 
For instance dma_resv objects are free of fences which would need to 
restored for things to continue executing sensibly? Or how is that 
handled?


In our compute use cases, we suspend user mode queues. This can include 
CWSR (compute-wave-save-restore) where the state of in-flight waves is 
stored in memory and can be reloaded and resumed from memory later. We 
don't use any fences other than "eviction fences", that are signaled 
after the queues are suspended. And those fences are never handed to 
user mode. So we don't need to worry about any fence state in the 
checkpoint.


If we extended this to support the kernel mode command submission APIs, 
I would expect that we'd wait for all current submissions to complete, 
and stop new ones from being sent to the HW before taking the 
checkpoint. When we take the checkpoint in the CRIU plugin, the CPU 
threads are already frozen and cannot submit any more work. If we wait 
for all currently pendi

Re: [bug report] drm/amdgpu: add ring buffer information in devcoredump

2024-03-15 Thread Khatri, Sunil

Thanks for pointing these. I do have some doubt and i raised inline.

On 3/15/2024 8:46 PM, Dan Carpenter wrote:

Hello Sunil Khatri,

Commit 42742cc541bb ("drm/amdgpu: add ring buffer information in
devcoredump") from Mar 11, 2024 (linux-next), leads to the following
Smatch static checker warning:

drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c:219 amdgpu_devcoredump_read()
error: we previously assumed 'coredump->adev' could be null (see line 
206)

drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
 171 static ssize_t
 172 amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
 173 void *data, size_t datalen)
 174 {
 175 struct drm_printer p;
 176 struct amdgpu_coredump_info *coredump = data;
 177 struct drm_print_iterator iter;
 178 int i;
 179
 180 iter.data = buffer;
 181 iter.offset = 0;
 182 iter.start = offset;
 183 iter.remain = count;
 184
 185 p = drm_coredump_printer(&iter);
 186
 187 drm_printf(&p, " AMDGPU Device Coredump \n");
 188 drm_printf(&p, "version: " AMDGPU_COREDUMP_VERSION "\n");
 189 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
 190 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
 191 drm_printf(&p, "time: %lld.%09ld\n", 
coredump->reset_time.tv_sec,
 192 coredump->reset_time.tv_nsec);
 193
 194 if (coredump->reset_task_info.pid)
 195 drm_printf(&p, "process_name: %s PID: %d\n",
 196coredump->reset_task_info.process_name,
 197coredump->reset_task_info.pid);
 198
 199 if (coredump->ring) {
 200 drm_printf(&p, "\nRing timed out details\n");
 201 drm_printf(&p, "IP Type: %d Ring Name: %s\n",
 202coredump->ring->funcs->type,
 203coredump->ring->name);
 204 }
 205
 206 if (coredump->adev) {
 ^^
Check for NULL

This is the check for NULL. Is there any issue here ?


 207 struct amdgpu_vm_fault_info *fault_info =
 208 &coredump->adev->vm_manager.fault_info;
 209
 210 drm_printf(&p, "\n[%s] Page fault observed\n",
 211fault_info->vmhub ? "mmhub" : "gfxhub");
 212 drm_printf(&p, "Faulty page starting at address: 
0x%016llx\n",
 213fault_info->addr);
 214 drm_printf(&p, "Protection fault status register: 
0x%x\n\n",
 215fault_info->status);
 216 }
 217
 218 drm_printf(&p, "Ring buffer information\n");
--> 219 for (int i = 0; i < coredump->adev->num_rings; i++) {
 ^^
Unchecked dereference

Agree


 220 int j = 0;
 221 struct amdgpu_ring *ring = coredump->adev->rings[i];
 222
 223 drm_printf(&p, "ring name: %s\n", ring->name);
 224 drm_printf(&p, "Rptr: 0x%llx Wptr: 0x%llx RB mask: 
%x\n",
 225amdgpu_ring_get_rptr(ring),
 226amdgpu_ring_get_wptr(ring),
 227ring->buf_mask);
 228 drm_printf(&p, "Ring size in dwords: %d\n",
 229ring->ring_size / 4);
 230 drm_printf(&p, "Ring contents\n");
 231 drm_printf(&p, "Offset \t Value\n");
 232
 233 while (j < ring->ring_size) {
 234 drm_printf(&p, "0x%x \t 0x%x\n", j, 
ring->ring[j/4]);
 235 j += 4;
 236 }
 237 }
 238
 239 if (coredump->reset_vram_lost)
 240 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
 241 if (coredump->adev->reset_info.num_regs) {
 ^^
Here too

Agree.


 242 drm_printf(&p, "AMDGPU register dumps:\nOffset: 
Value:\n");
 243
 244 for (i = 0; i < coredump->adev->reset_info.num_regs; 
i++)
 245 drm_printf(&p, "0x%08x: 0x%08x\n",
 246
coredump->adev->reset_info.reset_dump_reg_list[i],
 247
coredump->adev->reset_info.reset_dump_reg_value[i]);
 248 }
 249
 250 return count - iter.remain;
 251 }



Although adev is a global structure and never in the code it is being 
checked for NULL as it wont be NULL until the driver is unloaded. I can 
add a check  for adev in the beg

[PATCH v2 1/3] drm/amdgpu: function to read physical xcc_id

2024-03-15 Thread Samir Dhume
For SRIOV CPX mode, the assignments of jpeg doorbells depends on
whether the VF is even/odd numbered. Physical xcc_id provides
info whether the VF is even/odd.

regCP_PSP_XCP_CTL is RO for VF through rlcg.

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 6 ++
 2 files changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 04a86dff71e6..451192403c24 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -297,6 +297,7 @@ struct amdgpu_gfx_funcs {
int (*switch_partition_mode)(struct amdgpu_device *adev,
 int num_xccs_per_xcp);
int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node);
+   int (*get_xcc_id)(struct amdgpu_device *adev, int inst);
 };
 
 struct sq_work {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index b53c8fd4e8cf..68508c19a9b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -669,6 +669,11 @@ static int gfx_v9_4_3_ih_to_xcc_inst(struct amdgpu_device 
*adev, int ih_node)
return xcc - 1;
 }
 
+static int gfx_v9_4_3_get_xcc_id(struct amdgpu_device *adev, int inst)
+{
+   return RREG32_SOC15(GC, GET_INST(GC, inst), regCP_PSP_XCP_CTL);
+}
+
 static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = {
.get_gpu_clock_counter = &gfx_v9_4_3_get_gpu_clock_counter,
.select_se_sh = &gfx_v9_4_3_xcc_select_se_sh,
@@ -678,6 +683,7 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = 
{
.select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q,
.switch_partition_mode = &gfx_v9_4_3_switch_compute_partition,
.ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst,
+   .get_xcc_id = &gfx_v9_4_3_get_xcc_id,
 };
 
 static int gfx_v9_4_3_aca_bank_generate_report(struct aca_handle *handle,
-- 
2.34.1



[PATCH v2 3/3] drm/amdgpu/jpeg: support for sriov cpx mode

2024-03-15 Thread Samir Dhume
In SRIOV CPX mode, each VF has 4 jpeg engines. The even-
numbered VFs point to JPEG0 block of the AID and the odd-
numbered VFs point to the JPEG1 block.

Even-numbered VFs Odd numbered VFs

VCN doorbell 0  VCN Decode ring   VCN Decode ring
VCN doorbell 1-3Reserved  Reserved
VCN doorbell 4  JPEG0-0 ring
VCN doorbell 5  JPEG0-1 ring
VCN doorbell 6  JPEG0-2 ring
VCN doorbell 7  JPEG0-3 ring
VCN doorbell 8JPEG1-0 ring
VCN doorbell 9JPEG1-1 ring
VCN doorbell 10   JPEG1-2 ring
VCN doorbell 11   JPEG1-3 ring

Changes involve
1. sriov cpx mode - 4 rings
2. sriov cpx mode for odd numbered VFs - register correct src-ids
(starting with JPEG4). Map src-id to correct instance in interrupt-
handler.

v2:
1. removed mmio access from interrupt handler. Use xcc_mask to detect
cpx mode.
2. remove unneccessary sriov variables

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 60 +---
 1 file changed, 53 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 32caeb37cef9..d95ca797412c 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -68,6 +68,11 @@ static int jpeg_v4_0_3_early_init(void *handle)
 
adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS;
 
+   /* check for sriov cpx mode */
+   if (amdgpu_sriov_vf(adev))
+   if (adev->gfx.xcc_mask == 0x1)
+   adev->jpeg.num_jpeg_rings = 4;
+
jpeg_v4_0_3_set_dec_ring_funcs(adev);
jpeg_v4_0_3_set_irq_funcs(adev);
jpeg_v4_0_3_set_ras_funcs(adev);
@@ -87,11 +92,25 @@ static int jpeg_v4_0_3_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring;
int i, j, r, jpeg_inst;
+   bool sriov_cpx_odd = false;
+
+   /* check for sriov cpx mode odd/even numbered vfs */
+   if (amdgpu_sriov_vf(adev)) {
+   if (adev->gfx.xcc_mask == 0x1) {
+   if (adev->gfx.funcs->get_xcc_id(adev, 0) & 0x1)
+   sriov_cpx_odd = true;
+   }
+   }
 
for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
/* JPEG TRAP */
-   r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+   if (!sriov_cpx_odd)
+   r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
amdgpu_ih_srcid_jpeg[j], &adev->jpeg.inst->irq);
+   else
+   r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+   amdgpu_ih_srcid_jpeg[j+4], 
&adev->jpeg.inst->irq);
+
if (r)
return r;
}
@@ -116,10 +135,14 @@ static int jpeg_v4_0_3_sw_init(void *handle)
(adev->doorbell_index.vcn.vcn_ring0_1 
<< 1) +
1 + j + 9 * jpeg_inst;
} else {
-   if (j < 4)
+   if ((j < 4) && (!sriov_cpx_odd))
ring->doorbell_index =

(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
4 + j + 32 * jpeg_inst;
+   else if (sriov_cpx_odd)
+   ring->doorbell_index =
+   
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+   12 + j + 32 * jpeg_inst;
else
ring->doorbell_index =

(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
@@ -186,6 +209,7 @@ static int jpeg_v4_0_3_start_sriov(struct amdgpu_device 
*adev)
uint32_t size, size_dw, item_offset;
uint32_t init_status;
int i, j, jpeg_inst;
+   bool cpx_odd = false;
 
struct mmsch_v4_0_cmd_direct_write
direct_wt = { {0} };
@@ -197,6 +221,12 @@ static int jpeg_v4_0_3_start_sriov(struct amdgpu_device 
*adev)
end.cmd_header.command_type =
MMSCH_COMMAND__END;
 
+   /* check for cpx mode odd/even numbered vf */
+   if (adev->gfx.xcc_mask == 0x1) {
+   if (adev->gfx.funcs->get_xcc_id(adev, 0) & 0x1)
+   cpx_odd = true;
+   }
+
for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
jpeg_inst = GET_INST(JPEG, i);
 
@@ -220,10 +250,14 @@ static int jpeg_v4_0_3_start_sriov(struct amdgpu_device 
*adev)
tmp = SOC15_REG_OFFSET(JPEG, 0, 
regUVD_JRBC0_UVD_JRBC_RB_SIZE);
 

[PATCH v2 2/3] drm/amdgpu: sdma support for sriov cpx mode

2024-03-15 Thread Samir Dhume
sdma has 2 instances in SRIOV cpx mode. Odd numbered VFs have
sdma0/sdma1 instances. Even numbered vfs have sdma2/sdma3.
Changes involve
1. identifying odd/even numbered VF
2. registering correct number of instances with irq handler
3. mapping instance number with IH client-id depending upon
whether vf is odd/even numbered.

v2:
1. fix for correct number of instances registered with irq
2. remove mmio access from interrupt handler. Use xcc_mask to
detect cpx mode.

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 63 
 1 file changed, 43 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index eaa4f5f49949..117a7c692c0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -66,13 +66,28 @@ static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device 
*adev,
return (adev->reg_offset[SDMA0_HWIP][dev_inst][0] + offset);
 }
 
-static unsigned sdma_v4_4_2_seq_to_irq_id(int seq_num)
+static unsigned sdma_v4_4_2_seq_to_irq_id(struct amdgpu_device *adev, int 
seq_num)
 {
+   bool sriov_cpx_odd = false;
+
+   /* check for sriov cpx mode odd/even vf */
+   if (amdgpu_sriov_vf(adev)) {
+   if (adev->gfx.xcc_mask == 0x1)
+   if (adev->gfx.funcs->get_xcc_id(adev, 0) & 0x1)
+   sriov_cpx_odd = true;
+   }
+
switch (seq_num) {
case 0:
-   return SOC15_IH_CLIENTID_SDMA0;
+   if (sriov_cpx_odd)
+   return SOC15_IH_CLIENTID_SDMA2;
+   else
+   return SOC15_IH_CLIENTID_SDMA0;
case 1:
-   return SOC15_IH_CLIENTID_SDMA1;
+   if (sriov_cpx_odd)
+   return SOC15_IH_CLIENTID_SDMA3;
+   else
+   return SOC15_IH_CLIENTID_SDMA1;
case 2:
return SOC15_IH_CLIENTID_SDMA2;
case 3:
@@ -82,7 +97,7 @@ static unsigned sdma_v4_4_2_seq_to_irq_id(int seq_num)
}
 }
 
-static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id)
+static int sdma_v4_4_2_irq_id_to_seq(struct amdgpu_device *adev, unsigned 
client_id)
 {
switch (client_id) {
case SOC15_IH_CLIENTID_SDMA0:
@@ -90,9 +105,15 @@ static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id)
case SOC15_IH_CLIENTID_SDMA1:
return 1;
case SOC15_IH_CLIENTID_SDMA2:
-   return 2;
+   if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1))
+   return 0;
+   else
+   return 2;
case SOC15_IH_CLIENTID_SDMA3:
-   return 3;
+   if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1))
+   return 1;
+   else
+   return 3;
default:
return -EINVAL;
}
@@ -1300,13 +1321,15 @@ static int sdma_v4_4_2_late_init(void *handle)
 static int sdma_v4_4_2_sw_init(void *handle)
 {
struct amdgpu_ring *ring;
-   int r, i;
+   int r, i, num_irq_inst;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
u32 aid_id;
 
+   num_irq_inst = min(adev->sdma.num_instances, 
adev->sdma.num_inst_per_aid);
+
/* SDMA trap event */
-   for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
-   r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+   for (i = 0; i < num_irq_inst; i++) {
+   r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(adev, i),
  SDMA0_4_0__SRCID__SDMA_TRAP,
  &adev->sdma.trap_irq);
if (r)
@@ -1314,8 +1337,8 @@ static int sdma_v4_4_2_sw_init(void *handle)
}
 
/* SDMA SRAM ECC event */
-   for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
-   r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+   for (i = 0; i < num_irq_inst; i++) {
+   r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(adev, i),
  SDMA0_4_0__SRCID__SDMA_SRAM_ECC,
  &adev->sdma.ecc_irq);
if (r)
@@ -1323,26 +1346,26 @@ static int sdma_v4_4_2_sw_init(void *handle)
}
 
/* SDMA VM_HOLE/DOORBELL_INV/POLL_TIMEOUT/SRBM_WRITE_PROTECTION event*/
-   for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
-   r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+   for (i = 0; i < num_irq_inst; i++) {
+   r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(adev, i),
  SDMA0_4_0__SRCID__SDMA_VM_HOLE,
  &adev->sdma.vm_hole_irq);
if (r)
return r;
 
-   r = amdgpu_irq_add_id

[PATCH] drm/amdgpu: add support for atom fw version v3_5

2024-03-15 Thread Alex Deucher
From: Likun Gao 

Support for atom_firmware_info_v3_5.

Signed-off-by: Likun Gao 
Reviewed-by: Hawking Zhang 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index 6857c586ded7..a6d64bdbbb14 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -34,6 +34,7 @@ union firmware_info {
struct atom_firmware_info_v3_2 v32;
struct atom_firmware_info_v3_3 v33;
struct atom_firmware_info_v3_4 v34;
+   struct atom_firmware_info_v3_5 v35;
 };
 
 /*
@@ -872,6 +873,10 @@ int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct 
amdgpu_device *adev)
fw_reserved_fb_size =
(firmware_info->v34.fw_reserved_size_in_kb << 10);
break;
+   case 5:
+   fw_reserved_fb_size =
+   (firmware_info->v35.fw_reserved_size_in_kb << 10);
+   break;
default:
fw_reserved_fb_size = 0;
break;
-- 
2.44.0



[PATCH] drm/amd/swsmu: add smu 14.0.1 vcn and jpeg msg

2024-03-15 Thread Alex Deucher
From: lima1002 

add new vcn and jpeg msg

v2: squash in updates (Alex)
v3: rework code for better compat with other smu14.x variants (Alex)

Signed-off-by: lima1002 
Signed-off-by: Alex Deucher 
---
 .../pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h  | 28 +--
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  | 10 
 .../gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c| 50 ---
 .../drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c  | 21 +---
 4 files changed, 82 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h
index 8a8a57c56bc0..ca7ce4251482 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h
@@ -54,14 +54,14 @@
 #define PPSMC_MSG_TestMessage   0x01 ///< To check if PMFW is 
alive and responding. Requirement specified by PMFW team
 #define PPSMC_MSG_GetPmfwVersion0x02 ///< Get PMFW version
 #define PPSMC_MSG_GetDriverIfVersion0x03 ///< Get PMFW_DRIVER_IF 
version
-#define PPSMC_MSG_SPARE00x04 ///< SPARE
-#define PPSMC_MSG_SPARE10x05 ///< SPARE
-#define PPSMC_MSG_PowerDownVcn  0x06 ///< Power down VCN
-#define PPSMC_MSG_PowerUpVcn0x07 ///< Power up VCN; VCN is 
power gated by default
-#define PPSMC_MSG_SetHardMinVcn 0x08 ///< For wireless display
+#define PPSMC_MSG_PowerDownVcn1 0x04 ///< Power down VCN1
+#define PPSMC_MSG_PowerUpVcn1   0x05 ///< Power up VCN1; VCN1 
is power gated by default
+#define PPSMC_MSG_PowerDownVcn0 0x06 ///< Power down VCN0
+#define PPSMC_MSG_PowerUpVcn0   0x07 ///< Power up VCN0; VCN0 
is power gated by default
+#define PPSMC_MSG_SetHardMinVcn00x08 ///< For wireless display
 #define PPSMC_MSG_SetSoftMinGfxclk  0x09 ///< Set SoftMin for 
GFXCLK, argument is frequency in MHz
-#define PPSMC_MSG_SPARE20x0A ///< SPARE
-#define PPSMC_MSG_SPARE30x0B ///< SPARE
+#define PPSMC_MSG_SetHardMinVcn10x0A ///< For wireless display
+#define PPSMC_MSG_SetSoftMinVcn10x0B ///< Set soft min for 
VCN1 clocks (VCLK1 and DCLK1)
 #define PPSMC_MSG_PrepareMp1ForUnload   0x0C ///< Prepare PMFW for GFX 
driver unload
 #define PPSMC_MSG_SetDriverDramAddrHigh 0x0D ///< Set high 32 bits of 
DRAM address for Driver table transfer
 #define PPSMC_MSG_SetDriverDramAddrLow  0x0E ///< Set low 32 bits of 
DRAM address for Driver table transfer
@@ -71,7 +71,7 @@
 #define PPSMC_MSG_GetEnabledSmuFeatures 0x12 ///< Get enabled features 
in PMFW
 #define PPSMC_MSG_SetHardMinSocclkByFreq0x13 ///< Set hard min for SOC 
CLK
 #define PPSMC_MSG_SetSoftMinFclk0x14 ///< Set hard min for FCLK
-#define PPSMC_MSG_SetSoftMinVcn 0x15 ///< Set soft min for VCN 
clocks (VCLK and DCLK)
+#define PPSMC_MSG_SetSoftMinVcn00x15 ///< Set soft min for 
VCN0 clocks (VCLK0 and DCLK0)
 
 #define PPSMC_MSG_EnableGfxImu  0x16 ///< Enable GFX IMU
 
@@ -84,17 +84,17 @@
 
 #define PPSMC_MSG_SetSoftMaxSocclkByFreq0x1D ///< Set soft max for SOC 
CLK
 #define PPSMC_MSG_SetSoftMaxFclkByFreq  0x1E ///< Set soft max for FCLK
-#define PPSMC_MSG_SetSoftMaxVcn 0x1F ///< Set soft max for VCN 
clocks (VCLK and DCLK)
+#define PPSMC_MSG_SetSoftMaxVcn00x1F ///< Set soft max for 
VCN0 clocks (VCLK0 and DCLK0)
 #define PPSMC_MSG_spare_0x200x20
-#define PPSMC_MSG_PowerDownJpeg 0x21 ///< Power down Jpeg
-#define PPSMC_MSG_PowerUpJpeg   0x22 ///< Power up Jpeg; VCN 
is power gated by default
+#define PPSMC_MSG_PowerDownJpeg00x21 ///< Power down Jpeg of 
VCN0
+#define PPSMC_MSG_PowerUpJpeg0  0x22 ///< Power up Jpeg of 
VCN0; VCN0 is power gated by default
 
 #define PPSMC_MSG_SetHardMinFclkByFreq  0x23 ///< Set hard min for FCLK
 #define PPSMC_MSG_SetSoftMinSocclkByFreq0x24 ///< Set soft min for SOC 
CLK
 #define PPSMC_MSG_AllowZstates  0x25 ///< Inform PMFM of 
allowing Zstate entry, i.e. no Miracast activity
-#define PPSMC_MSG_Reserved  0x26 ///< Not used
-#define PPSMC_MSG_Reserved1 0x27 ///< Not used, previously 
PPSMC_MSG_RequestActiveWgp
-#define PPSMC_MSG_Reserved2 0x28 ///< Not used, previously 
PPSMC_MSG_QueryActiveWgp
+#define PPSMC_MSG_PowerDownJpeg10x26 ///< Power down Jpeg of 
VCN1
+#define PPSMC_MSG_PowerUpJpeg1  0x27 ///< Power up Jpeg of 
VCN1; VCN1 is power gated by default
+#define PPSMC_MSG_SetSoftMaxVcn10x28 ///< Set soft max for 
VCN1 clocks (VCLK1 and DCL