[PATCH 7/8] drm/amdgpu: enable/disable ras_controller_irq and err_event_athub_irq
Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 55da6f5..8b4a568 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -822,6 +822,18 @@ static int gmc_v9_0_ecc_ras_block_late_init(void *handle, goto irq; } + if (adev->nbio.funcs->init_ras_controller_interrupt) { + r = amdgpu_irq_get(adev, >nbio.ras_controller_irq, 0); + if (r) + goto irq; + } + + if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) { + r = amdgpu_irq_get(adev, >nbio.ras_err_event_athub_irq, 0); + if (r) + goto irq; + } + return 0; irq: amdgpu_ras_sysfs_remove(adev, *ras_if); @@ -1427,6 +1439,10 @@ static int gmc_v9_0_hw_fini(void *handle) amdgpu_irq_put(adev, >gmc.ecc_irq, 0); amdgpu_irq_put(adev, >gmc.vm_fault, 0); + if (adev->nbio.funcs->init_ras_controller_interrupt) + amdgpu_irq_put(adev, >nbio.ras_controller_irq, 0); + if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) + amdgpu_irq_put(adev, >nbio.ras_err_event_athub_irq, 0); gmc_v9_0_gart_disable(adev); return 0; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 4/8] drm/amdgpu: add nbif v7_4 irq source header for vega20
nbif v7_4 interrupt source definition Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher --- .../amd/include/ivsrcid/nbio/irqsrcs_nbif_7_4.h| 42 ++ 1 file changed, 42 insertions(+) create mode 100644 drivers/gpu/drm/amd/include/ivsrcid/nbio/irqsrcs_nbif_7_4.h diff --git a/drivers/gpu/drm/amd/include/ivsrcid/nbio/irqsrcs_nbif_7_4.h b/drivers/gpu/drm/amd/include/ivsrcid/nbio/irqsrcs_nbif_7_4.h new file mode 100644 index 000..79af425 --- /dev/null +++ b/drivers/gpu/drm/amd/include/ivsrcid/nbio/irqsrcs_nbif_7_4.h @@ -0,0 +1,42 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __IRQSRCS_NBIF_7_4_H__ +#define __IRQSRCS_NBIF_7_4_H__ + +#define NBIF_7_4__SRCID__CHIP_ERR_INT_EVENT0x5E// Error generated +#define NBIF_7_4__SRCID__DOORBELL_INTERRUPT0x5F// Interrupt for doorbell event during VDDGFX off +#define NBIF_7_4__SRCID__RAS_CONTROLLER_INTERRUPT 0x60// Interrupt for ras_intr_valid from RAS controller +#define NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT 0x61// Interrupt for SDP ErrEvent received from ATHUB +#define NBIF_7_4__SRCID__PF_VF_MSGBUF_VALID0x87// Valid message in PF->VF mailbox message buffer (The interrupt is sent on behalf of PF) +#define NBIF_7_4__SRCID__PF_VF_MSGBUF_ACK 0x88// Acknowledge message in PF->VF mailbox message buffer (The interrupt is sent on behalf of VF) +#define NBIF_7_4__SRCID__VF_PF_MSGBUF_VALID0x89// Valid message in VF->PF mailbox message buffer (The interrupt is sent on behalf of VF) +#define NBIF_7_4__SRCID__VF_PF_MSGBUF_ACK 0x8A// Acknowledge message in VF->PF mailbox message buffer (The interrupt is sent on behalf of PF) +#define NBIF_7_4__SRCID__CHIP_DPA_INT_EVENT0xA0// BIF_CHIP_DPA_INT_EVENT +#define NBIF_7_4__SRCID__CHIP_SLOT_POWER_CHG_INT_EVENT 0xA1// BIF_CHIP_SLOT_POWER_CHG_INT_EVENT +#define NBIF_7_4__SRCID__ATOMIC_UR_OPCODE 0xCE// BIF receives unsupported atomic opcode from MC +#define NBIF_7_4__SRCID__ATOMIC_REQESTEREN_LOW 0xCF// BIF receive atomic request from MC while AtomicOp Requester is not enabled in PCIE config space + +#endif // __IRQSRCS_NBIF_7_4_H__ -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 6/8] drm/amdgpu: add ras_controller and err_event_athub interrupt support
Ras controller interrupt and Ras err event athub interrupt are two dedicated interrupts for RAS support. Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h | 4 + drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 14 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 125 +++ 3 files changed, 143 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index 28417e4..a04c5ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -79,10 +79,14 @@ struct amdgpu_nbio_funcs { void (*remap_hdp_registers)(struct amdgpu_device *adev); void (*handle_ras_controller_intr_no_bifring)(struct amdgpu_device *adev); void (*handle_ras_err_event_athub_intr_no_bifring)(struct amdgpu_device *adev); + int (*init_ras_controller_interrupt)(struct amdgpu_device *adev); + int (*init_ras_err_event_athub_interrupt)(struct amdgpu_device *adev); }; struct amdgpu_nbio { const struct nbio_hdp_flush_reg *hdp_flush_reg; + struct amdgpu_irq_src ras_controller_irq; + struct amdgpu_irq_src ras_err_event_athub_irq; const struct amdgpu_nbio_funcs *funcs; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index df4b9ae..230f7e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -27,6 +27,7 @@ #include "amdgpu.h" #include "amdgpu_ras.h" #include "amdgpu_atomfirmware.h" +#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" const char *ras_error_string[] = { "none", @@ -1498,6 +1499,7 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev, int amdgpu_ras_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + int r; if (con) return 0; @@ -1525,6 +1527,18 @@ int amdgpu_ras_init(struct amdgpu_device *adev) /* Might need get this flag from vbios. */ con->flags = RAS_DEFAULT_FLAGS; + if (adev->nbio.funcs->init_ras_controller_interrupt) { + r = adev->nbio.funcs->init_ras_controller_interrupt(adev); + if (r) + return r; + } + + if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) { + r = adev->nbio.funcs->init_ras_err_event_athub_interrupt(adev); + if (r) + return r; + } + if (amdgpu_ras_recovery_init(adev)) goto recovery_out; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 6ecdd5e..faf9300 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -27,6 +27,7 @@ #include "nbio/nbio_7_4_offset.h" #include "nbio/nbio_7_4_sh_mask.h" #include "nbio/nbio_7_4_0_smn.h" +#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" #include #define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c @@ -345,6 +346,128 @@ static void nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_d } } + +static int nbio_v7_4_set_ras_controller_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + /* The ras_controller_irq enablement should be done in psp bl when it +* tries to enable ras feature. Driver only need to set the correct interrupt +* vector for bare-metal and sriov use case respectively +*/ + uint32_t bif_intr_cntl; + + bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL); + if (state == AMDGPU_IRQ_STATE_ENABLE) { + /* set interrupt vector select bit to 0 to select +* vetcor 1 for bare metal case */ + bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl, + BIF_INTR_CNTL, + RAS_INTR_VEC_SEL, 0); + WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl); + } + + return 0; +} + +static int nbio_v7_4_process_ras_controller_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + /* By design, the ih cookie for ras_controller_irq should be written +* to BIFring instead of general iv ring. However, due to known bif ring +* hw bug, it has to be disabled. There is no chance the process function +* will be involked. Just left it as a dummy one. +*/ + return 0; +} + +static int nbio_v7_4_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev, +
[PATCH 0/8] drm/amdgpu: enable dedicated ras controller interrupt
ras_controller_interrupt and SDP err_event_athub_interrupt are two dedicated interrupt for VG20 RAS controller ras_controller interrupt will be triggered when there is NBIF error. Driver can leverage this interrupt to query and log necessary error count and other information. err_event_athub interrupt will be triggered when sdp err_event received from ATHUB. This is an very important ras interrupt for all hw block (except BIF and SDMA) when uncorrectable error happens and hw probably freeze. Driver can leverage this interrupt to query and log necessary error count before reset GPU. BIF ring was designed and dedicated for both interrupts. However, it can't be enabled due to known HW bug. Driver has to poll BIF_DOORBELL_INT_CNTL register to check whether the interrupt is triggered or not. Hawking Zhang (8): drm/amdgpu: add new amdgpu nbio header file drm/amdgpu: switch to new amdgpu_nbio structure drm/amdgpu/nbio: add functions to query ras specific interrupt status drm/amdgpu: add nbif v7_4 irq source header for vega20 drm/amdgpu: update nbio v7_4 ip header files drm/amdgpu: add ras_controller and err_event_athub interrupt support drm/amdgpu: enable/disable ras_controller_irq and err_event_athub_irq drm/amdgpu: poll ras_controller_irq and err_event_athub_irq status drivers/gpu/drm/amd/amdgpu/amdgpu.h| 63 +--- drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c| 12 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h | 93 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c| 14 ++ drivers/gpu/drm/amd/amdgpu/df_v3_6.c | 16 +-- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 14 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 20 ++- drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 4 +- drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 1 - drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h | 1 + drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c | 3 +- drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h | 1 + drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c | 1 - drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h | 1 + drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 160 - drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h | 1 + drivers/gpu/drm/amd/amdgpu/nv.c| 34 ++--- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 +- drivers/gpu/drm/amd/amdgpu/soc15.c | 71 - drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 2 +- drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 2 +- .../amd/include/asic_reg/nbio/nbio_7_4_offset.h| 4 +- .../amd/include/asic_reg/nbio/nbio_7_4_sh_mask.h | 6 +- .../amd/include/ivsrcid/nbio/irqsrcs_nbif_7_4.h| 42 ++ drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 2 +- .../gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c| 2 +- .../gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c | 2 +- .../gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c | 2 +- .../gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c | 4 +- 33 files changed, 447 insertions(+), 159 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h create mode 100644 drivers/gpu/drm/amd/include/ivsrcid/nbio/irqsrcs_nbif_7_4.h -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 8/8] drm/amdgpu: poll ras_controller_irq and err_event_athub_irq status
For the hardware that can not enable BIF ring for IH cookies for both ras_controller_irq and err_event_athub_irq, the driver has to poll the status register in irq handling and ack the hardware properly when there is interrupt triggered Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 12 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index af4c3b1..3e49aa1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -150,6 +150,18 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg) ret = amdgpu_ih_process(adev, >irq.ih); if (ret == IRQ_HANDLED) pm_runtime_mark_last_busy(dev->dev); + + /* For the hardware that cannot enable bif ring for both ras_controller_irq + * and ras_err_evnet_athub_irq ih cookies, the driver has to poll status +* register to check whether the interrupt is triggered or not, and properly +* ack the interrupt if it is there +*/ + if (adev->nbio.funcs->handle_ras_controller_intr_no_bifring) + adev->nbio.funcs->handle_ras_controller_intr_no_bifring(adev); + + if (adev->nbio.funcs->handle_ras_err_event_athub_intr_no_bifring) + adev->nbio.funcs->handle_ras_err_event_athub_intr_no_bifring(adev); + return ret; } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 5/8] drm/amdgpu: update nbio v7_4 ip header files
Add mmBIF_INTR_CNTL and its shift mask. Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h | 4 ++-- drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_sh_mask.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h index 994e796..ce5830e 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h @@ -2793,8 +2793,8 @@ #define mmBIF_DOORBELL_INT_CNTL_BASE_IDX 2 #define mmBIF_FB_EN 0x00ff #define mmBIF_FB_EN_BASE_IDX 2 -#define mmBIF_BUSY_DELAY_CNTR 0x0100 -#define mmBIF_BUSY_DELAY_CNTR_BASE_IDX 2 +#define mmBIF_INTR_CNTL 0x0100 +#define mmBIF_INTR_CNTL_BASE_IDX 2 #define mmBIF_MST_TRANS_PENDING_VF 0x0109 #define mmBIF_MST_TRANS_PENDING_VF_BASE_IDX 2 #define mmBIF_SLV_TRANS_PENDING_VF 0x010a diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_sh_mask.h index d467b93..f9829f5 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_sh_mask.h @@ -20420,9 +20420,9 @@ #define BIF_FB_EN__FB_WRITE_EN__SHIFT 0x1 #define BIF_FB_EN__FB_READ_EN_MASK 0x0001L #define BIF_FB_EN__FB_WRITE_EN_MASK 0x0002L -//BIF_BUSY_DELAY_CNTR -#define BIF_BUSY_DELAY_CNTR__DELAY_CNT__SHIFT 0x0 -#define BIF_BUSY_DELAY_CNTR__DELAY_CNT_MASK 0x003FL +//BIF_INTR_CNTL +#define BIF_INTR_CNTL__RAS_INTR_VEC_SEL__SHIFT 0x0 +#define BIF_INTR_CNTL__RAS_INTR_VEC_SEL_MASK 0x0001L //BIF_MST_TRANS_PENDING_VF #define BIF_MST_TRANS_PENDING_VF__BIF_MST_TRANS_PENDING__SHIFT 0x0 #define BIF_MST_TRANS_PENDING_VF__BIF_MST_TRANS_PENDING_MASK 0x7FFFL -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/8] drm/amdgpu: add new amdgpu nbio header file
More nbio funcitonalities will be added and nbio could be treated as an ip block like gfx/sdma.etc Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h | 87 1 file changed, 87 insertions(+) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h new file mode 100644 index 000..0563476 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -0,0 +1,87 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_NBIO_H__ +#define __AMDGPU_NBIO_H__ + +/* + * amdgpu nbio functions + */ +struct nbio_hdp_flush_reg { + u32 ref_and_mask_cp0; + u32 ref_and_mask_cp1; + u32 ref_and_mask_cp2; + u32 ref_and_mask_cp3; + u32 ref_and_mask_cp4; + u32 ref_and_mask_cp5; + u32 ref_and_mask_cp6; + u32 ref_and_mask_cp7; + u32 ref_and_mask_cp8; + u32 ref_and_mask_cp9; + u32 ref_and_mask_sdma0; + u32 ref_and_mask_sdma1; + u32 ref_and_mask_sdma2; + u32 ref_and_mask_sdma3; + u32 ref_and_mask_sdma4; + u32 ref_and_mask_sdma5; + u32 ref_and_mask_sdma6; + u32 ref_and_mask_sdma7; +}; + +struct amdgpu_nbio_funcs { + const struct nbio_hdp_flush_reg *hdp_flush_reg; + u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev); + u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev); + u32 (*get_pcie_index_offset)(struct amdgpu_device *adev); + u32 (*get_pcie_data_offset)(struct amdgpu_device *adev); + u32 (*get_rev_id)(struct amdgpu_device *adev); + void (*mc_access_enable)(struct amdgpu_device *adev, bool enable); + void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring); + u32 (*get_memsize)(struct amdgpu_device *adev); + void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance, + bool use_doorbell, int doorbell_index, int doorbell_size); + void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell, + int doorbell_index, int instance); + void (*enable_doorbell_aperture)(struct amdgpu_device *adev, +bool enable); + void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev, + bool enable); + void (*ih_doorbell_range)(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index); + void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, +bool enable); + void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev, + bool enable); + void (*get_clockgating_state)(struct amdgpu_device *adev, + u32 *flags); + void (*ih_control)(struct amdgpu_device *adev); + void (*init_registers)(struct amdgpu_device *adev); + void (*detect_hw_virt)(struct amdgpu_device *adev); + void (*remap_hdp_registers)(struct amdgpu_device *adev); +}; + +struct amdgpu_nbio { + const struct nbio_hdp_flush_reg *hdp_flush_reg; + const struct amdgpu_nbio_funcs *funcs; +}; + +#endif -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 3/8] drm/amdgpu/nbio: add functions to query ras specific interrupt status
ras_controller_interrupt and err_event_interrupt are ras specific interrupts. add functions to check their status and ack them if they are generated. both funcitons should only be invoked in ISR when BIF ring is disabled or even not initialized. Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h | 2 ++ drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 32 2 files changed, 34 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index 0563476..28417e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -77,6 +77,8 @@ struct amdgpu_nbio_funcs { void (*init_registers)(struct amdgpu_device *adev); void (*detect_hw_virt)(struct amdgpu_device *adev); void (*remap_hdp_registers)(struct amdgpu_device *adev); + void (*handle_ras_controller_intr_no_bifring)(struct amdgpu_device *adev); + void (*handle_ras_err_event_athub_intr_no_bifring)(struct amdgpu_device *adev); }; struct amdgpu_nbio { diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index c416ab8..6ecdd5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -315,6 +315,36 @@ static void nbio_v7_4_init_registers(struct amdgpu_device *adev) WREG32_PCIE(smnPCIE_CI_CNTL, data); } +static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device *adev) +{ + uint32_t bif_doorbell_intr_cntl; + + bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL); + if (REG_GET_FIELD(bif_doorbell_intr_cntl, + BIF_DOORBELL_INT_CNTL, RAS_CNTLR_INTERRUPT_STATUS)) { + /* driver has to clear the interrupt status when bif ring is disabled */ + bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl, + BIF_DOORBELL_INT_CNTL, + RAS_CNTLR_INTERRUPT_CLEAR, 1); + WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); + } +} + +static void nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev) +{ + uint32_t bif_doorbell_intr_cntl; + + bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL); + if (REG_GET_FIELD(bif_doorbell_intr_cntl, + BIF_DOORBELL_INT_CNTL, RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) { + /* driver has to clear the interrupt status when bif ring is disabled */ + bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl, + BIF_DOORBELL_INT_CNTL, + RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1); + WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); + } +} + const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { .get_hdp_flush_req_offset = nbio_v7_4_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_4_get_hdp_flush_done_offset, @@ -336,4 +366,6 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { .init_registers = nbio_v7_4_init_registers, .detect_hw_virt = nbio_v7_4_detect_hw_virt, .remap_hdp_registers = nbio_v7_4_remap_hdp_registers, + .handle_ras_controller_intr_no_bifring = nbio_v7_4_handle_ras_controller_intr_no_bifring, + .handle_ras_err_event_athub_intr_no_bifring = nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring, }; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/8] drm/amdgpu: switch to new amdgpu_nbio structure
no functional change, just switch to new structures Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h| 63 ++- drivers/gpu/drm/amd/amdgpu/df_v3_6.c | 16 ++--- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 14 ++--- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 +- drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 4 +- drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 1 - drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h | 1 + drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c | 3 +- drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h | 1 + drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c | 1 - drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h | 1 + drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 3 +- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h | 1 + drivers/gpu/drm/amd/amdgpu/nv.c| 34 ++- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 +-- drivers/gpu/drm/amd/amdgpu/soc15.c | 71 -- drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 2 +- drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 2 +- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 2 +- .../gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c| 2 +- .../gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c | 2 +- .../gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c | 2 +- .../gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c | 4 +- 27 files changed, 108 insertions(+), 154 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f6ae565..bcf169e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -73,6 +73,7 @@ #include "amdgpu_gmc.h" #include "amdgpu_gfx.h" #include "amdgpu_sdma.h" +#include "amdgpu_nbio.h" #include "amdgpu_dm.h" #include "amdgpu_virt.h" #include "amdgpu_csa.h" @@ -647,69 +648,11 @@ typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t); typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t); typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t); - -/* - * amdgpu nbio functions - * - */ -struct nbio_hdp_flush_reg { - u32 ref_and_mask_cp0; - u32 ref_and_mask_cp1; - u32 ref_and_mask_cp2; - u32 ref_and_mask_cp3; - u32 ref_and_mask_cp4; - u32 ref_and_mask_cp5; - u32 ref_and_mask_cp6; - u32 ref_and_mask_cp7; - u32 ref_and_mask_cp8; - u32 ref_and_mask_cp9; - u32 ref_and_mask_sdma0; - u32 ref_and_mask_sdma1; - u32 ref_and_mask_sdma2; - u32 ref_and_mask_sdma3; - u32 ref_and_mask_sdma4; - u32 ref_and_mask_sdma5; - u32 ref_and_mask_sdma6; - u32 ref_and_mask_sdma7; -}; - struct amdgpu_mmio_remap { u32 reg_offset; resource_size_t bus_addr; }; -struct amdgpu_nbio_funcs { - const struct nbio_hdp_flush_reg *hdp_flush_reg; - u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev); - u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev); - u32 (*get_pcie_index_offset)(struct amdgpu_device *adev); - u32 (*get_pcie_data_offset)(struct amdgpu_device *adev); - u32 (*get_rev_id)(struct amdgpu_device *adev); - void (*mc_access_enable)(struct amdgpu_device *adev, bool enable); - void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring); - u32 (*get_memsize)(struct amdgpu_device *adev); - void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance, - bool use_doorbell, int doorbell_index, int doorbell_size); - void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell, - int doorbell_index, int instance); - void (*enable_doorbell_aperture)(struct amdgpu_device *adev, -bool enable); - void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev, - bool enable); - void (*ih_doorbell_range)(struct amdgpu_device *adev, - bool use_doorbell, int doorbell_index); - void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, -bool enable); - void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev, - bool enable); - void (*get_clockgating_state)(struct amdgpu_device *adev, - u32 *flags); - void
RE: [PATCH] drm/amdgpu: add dummy read for some GCVM status registers
Reviewed-by: Jack Xiao -Original Message- From: Yuan, Xiaojie Sent: Thursday, August 22, 2019 11:01 AM To: amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Zhang, Hawking ; Xiao, Jack ; Yuan, Xiaojie Subject: [PATCH] drm/amdgpu: add dummy read for some GCVM status registers The GRBM register interface is now capable of bursting 1 cycle per register wr->wr, wr->rd much faster than previous muticycle per transaction done interface. This has caused a problem where status registers requiring HW to update have a 1 cycle delay, due to the register update having to go through GRBM. SW may operate on an incorrect value if they write a register and immediately check the corresponding status register. Registers requiring HW to clear or set fields may be delayed by 1 cycle. For example, 1. write VM_INVALIDATE_ENG0_REQ mask = 5a 2. read VM_INVALIDATE_ENG0_ACK till the ack is same as the request mask = 5a a. HW will reset VM_INVALIDATE_ENG0_ACK = 0 until invalidation is complete 3. write VM_INVALIDATE_ENG0_REQ mask = 5a 4. read VM_INVALIDATE_ENG0_ACK till the ack is same as the request mask = 5a a. First read of VM_INVALIDATE_ENG0_ACK = 5a instead of 0 b. Second read of VM_INVALIDATE_ENG0_ACK = 0 because the remote GRBM h/w register takes one extra cycle to be cleared c. In this case, SW will see a false ACK if they exit on first read Affected registers (only GC variant) | Recommended Dummy Read --+ VM_INVALIDATE_ENG*_ACK| VM_INVALIDATE_ENG*_REQ VM_L2_STATUS | VM_L2_STATUS VM_L2_PROTECTION_FAULT_STATUS | VM_L2_PROTECTION_FAULT_STATUS VM_L2_PROTECTION_FAULT_ADDR_HI/LO32 | VM_L2_PROTECTION_FAULT_ADDR_HI/LO32 VM_L2_IH_LOG_BUSY | VM_L2_IH_LOG_BUSY MC_VM_L2_PERFCOUNTER_HI/LO| MC_VM_L2_PERFCOUNTER_HI/LO ATC_L2_PERFCOUNTER_HI/LO | ATC_L2_PERFCOUNTER_HI/LO ATC_L2_PERFCOUNTER2_HI/LO | ATC_L2_PERFCOUNTER2_HI/LO Signed-off-by: Xiaojie Yuan --- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 15 +++ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 2 files changed, 31 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index d83d8a6a1fc0..56f76a1f32ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -135,6 +135,14 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, addr |= ((u64)entry->src_data[1] & 0xf) << 44; if (!amdgpu_sriov_vf(adev)) { + /* +* Issue a dummy read to wait for the status register to +* be updated to avoid reading an incorrect value due to +* the new fast GRBM interface. +*/ + if (entry->vmid_src == AMDGPU_GFXHUB_0) + RREG32(hub->vm_l2_pro_fault_status); + status = RREG32(hub->vm_l2_pro_fault_status); WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); } @@ -228,6 +236,13 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); + /* +* Issue a dummy read to wait for the ACK register to be cleared +* to avoid a false ACK due to the new fast GRBM interface. +*/ + if (vmhub == AMDGPU_GFXHUB_0) + RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng); + /* Wait for ACK with a delay.*/ for (i = 0; i < adev->usec_timeout; i++) { tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 6de17267fc35..17700606f54b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -352,6 +352,14 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, /* If it's the first fault for this address, process it normally */ if (!amdgpu_sriov_vf(adev)) { + /* +* Issue a dummy read to wait for the status register to +* be updated to avoid reading an incorrect value due to +* the new fast GRBM interface. +*/ + if (entry->vmid_src == AMDGPU_GFXHUB_0) + RREG32(hub->vm_l2_pro_fault_status); + status = RREG32(hub->vm_l2_pro_fault_status); WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); } @@ -480,6 +488,14 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, spin_lock(>gmc.invalidate_lock); WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); + + /* +* Issue a dummy read to wait for the ACK register to be cleared +* to avoid a false ACK due to the new fast GRBM interface. +*/
RE: [PATCH] drm/powerplay: Fix Vega20 power reading again
Reviewed-by: Evan Quan > -Original Message- > From: amd-gfx On Behalf Of > Russell, Kent > Sent: Friday, August 23, 2019 9:37 PM > To: amd-gfx@lists.freedesktop.org > Cc: Russell, Kent > Subject: [PATCH] drm/powerplay: Fix Vega20 power reading again > > For the 40.46 SMU release, they changed CurrSocketPower to > AverageSocketPower, but this was changed back in 40.47 so just check if > it's 40.46 and make the appropriate change > > Tested with 40.45, 40.46 and 40.47 successfully > > Change-Id: Icbbe6fd3381b8ad6298c2d0852a726ffac98f93a > Signed-off-by: Kent Russell > --- > drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 7 --- > drivers/gpu/drm/amd/powerplay/vega20_ppt.c | 7 --- > 2 files changed, 8 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c > b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c > index 9f50a12f5c03..98a6f5305974 100644 > --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c > +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c > @@ -2101,10 +2101,11 @@ static int vega20_get_gpu_power(struct > pp_hwmgr *hwmgr, > if (ret) > return ret; > > - if (hwmgr->smu_version < 0x282e00) > - *query = metrics_table.CurrSocketPower << 8; > - else > + /* For the 40.46 release, they changed the value name */ > + if (hwmgr->smu_version == 0x282e00) > *query = metrics_table.AverageSocketPower << 8; > + else > + *query = metrics_table.CurrSocketPower << 8; > > return ret; > } > diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c > b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c > index 0fac824490d7..899bf96b23e1 100644 > --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c > +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c > @@ -2932,10 +2932,11 @@ static int vega20_get_gpu_power(struct > smu_context *smu, uint32_t *value) > if (ret) > return ret; > > - if (smu_version < 0x282e00) > - *value = metrics.CurrSocketPower << 8; > - else > + /* For the 40.46 release, they changed the value name */ > + if (smu_version == 0x282e00) > *value = metrics.AverageSocketPower << 8; > + else > + *value = metrics.CurrSocketPower << 8; > > return 0; > } > -- > 2.17.1 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] Revert "drm/amdgpu: free up the first paging queue v2"
Am 25.08.19 um 17:29 schrieb Michel Dänzer: On 2019-08-24 1:10 p.m., Christian König wrote: Am 23.08.19 um 22:01 schrieb Gang Ba: This reverts commit 4f8bc72fbf10f2dc8bca74d5da08b3a981b2e5cd. Change-Id: I577ba236e0571d11400a51f9d95840234aef678a Missing Signed-of-by line here. With that fixed Reviewed-by: Christian König Every revert needs to explain in the commit log why the commit is being reverted. Something like: It turned out that a single reserved queue wouldn't be sufficient for page fault handling. Christian. ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] Revert "drm/amdgpu: free up the first paging queue v2"
On 2019-08-24 1:10 p.m., Christian König wrote: > Am 23.08.19 um 22:01 schrieb Gang Ba: >> This reverts commit 4f8bc72fbf10f2dc8bca74d5da08b3a981b2e5cd. >> >> Change-Id: I577ba236e0571d11400a51f9d95840234aef678a > > Missing Signed-of-by line here. With that fixed Reviewed-by: Christian > König Every revert needs to explain in the commit log why the commit is being reverted. -- Earthling Michel Dänzer | https://redhat.com Libre software enthusiast | Mesa and X developer ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
gnome-shell stuck because of amdgpu driver [5.3 RC5]
Hi folks, I left unblocked gnome-shell at noon, and when I returned at the evening I discovered than monitor not sleeping and show open gnome activity. At first, I thought that some application did not let fall asleep the system. But when I try to move the mouse, I realized that the system hanged. So I connect via ssh and tried to investigate the problem. I did not see anything strange in kernel logs. And my last idea before trying to kill the gnome-shell process was dumps tasks that are in uninterruptable (blocked) state. After [Alt + PrnScr + W] I saw this: [32840.701909] sysrq: Show Blocked State [32840.701976] taskPC stack pid father [32840.702407] gnome-shell D11240 1900 1830 0x [32840.702438] Call Trace: [32840.702446] ? __schedule+0x352/0x900 [32840.702453] schedule+0x3a/0xb0 [32840.702457] schedule_timeout+0x289/0x3c0 [32840.702461] ? find_held_lock+0x32/0x90 [32840.702464] ? find_held_lock+0x32/0x90 [32840.702469] ? mark_held_locks+0x50/0x80 [32840.702473] ? _raw_spin_unlock_irqrestore+0x4b/0x60 [32840.702478] dma_fence_default_wait+0x1f5/0x340 [32840.702482] ? dma_fence_free+0x20/0x20 [32840.702487] dma_fence_wait_timeout+0x182/0x1e0 [32840.702533] amdgpu_fence_wait_empty+0xe7/0x210 [amdgpu] [32840.702577] amdgpu_pm_compute_clocks+0x70/0x5f0 [amdgpu] [32840.702641] dm_pp_apply_display_requirements+0x19e/0x1c0 [amdgpu] [32840.702705] dce12_update_clocks+0xd8/0x110 [amdgpu] [32840.702766] dc_commit_state+0x414/0x590 [amdgpu] [32840.702834] amdgpu_dm_atomic_commit_tail+0xd1e/0x1cf0 [amdgpu] [32840.702840] ? reacquire_held_locks+0xed/0x210 [32840.702848] ? ttm_eu_backoff_reservation+0xa5/0x160 [ttm] [32840.702853] ? find_held_lock+0x32/0x90 [32840.702855] ? find_held_lock+0x32/0x90 [32840.702860] ? __lock_acquire+0x247/0x1910 [32840.702867] ? find_held_lock+0x32/0x90 [32840.702871] ? mark_held_locks+0x50/0x80 [32840.702874] ? _raw_spin_unlock_irq+0x29/0x40 [32840.702877] ? lockdep_hardirqs_on+0xf0/0x180 [32840.702881] ? _raw_spin_unlock_irq+0x29/0x40 [32840.702884] ? wait_for_completion_timeout+0x75/0x190 [32840.702895] ? commit_tail+0x3c/0x70 [drm_kms_helper] [32840.702902] commit_tail+0x3c/0x70 [drm_kms_helper] [32840.702909] drm_atomic_helper_commit+0xe3/0x150 [drm_kms_helper] [32840.702922] drm_atomic_connector_commit_dpms+0xd7/0x100 [drm] [32840.702936] set_property_atomic+0xcc/0x140 [drm] [32840.702955] drm_mode_obj_set_property_ioctl+0xcb/0x1c0 [drm] [32840.702968] ? drm_mode_obj_find_prop_id+0x40/0x40 [drm] [32840.702978] drm_ioctl_kernel+0xaa/0xf0 [drm] [32840.702990] drm_ioctl+0x208/0x390 [drm] [32840.703003] ? drm_mode_obj_find_prop_id+0x40/0x40 [drm] [32840.703007] ? sched_clock_cpu+0xc/0xc0 [32840.703012] ? lockdep_hardirqs_on+0xf0/0x180 [32840.703053] amdgpu_drm_ioctl+0x49/0x80 [amdgpu] [32840.703058] do_vfs_ioctl+0x411/0x750 [32840.703065] ksys_ioctl+0x5e/0x90 [32840.703069] __x64_sys_ioctl+0x16/0x20 [32840.703072] do_syscall_64+0x5c/0xb0 [32840.703076] entry_SYSCALL_64_after_hwframe+0x49/0xbe [32840.703079] RIP: 0033:0x7f8bcab0f00b [32840.703084] Code: Bad RIP value. [32840.703086] RSP: 002b:7ffe76c62338 EFLAGS: 0246 ORIG_RAX: 0010 [32840.703089] RAX: ffda RBX: 7ffe76c62370 RCX: 7f8bcab0f00b [32840.703092] RDX: 7ffe76c62370 RSI: c01864ba RDI: 0009 [32840.703094] RBP: c01864ba R08: 0003 R09: c0c0c0c0 [32840.703096] R10: 56476c86a018 R11: 0246 R12: 56476c8ad940 [32840.703098] R13: 0009 R14: 0002 R15: 0003 [root@localhost ~]# [root@localhost ~]# ps aux | grep gnome-shell mikhail 1900 0.3 1.1 6447496 378696 tty2 Dl+ Aug24 2:10 /usr/bin/gnome-shell mikhail 2099 0.0 0.0 519984 23392 ?Ssl Aug24 0:00 /usr/libexec/gnome-shell-calendar-server mikhail12214 0.0 0.0 399484 29660 pts/2Sl+ Aug24 0:00 /usr/bin/python3 /usr/bin/chrome-gnome-shell chrome-extension://gphhapmejobijbbhgpjhcjognlahblep/ root 22957 0.0 0.0 216120 2456 pts/10 S+ 03:59 0:00 grep --color=auto gnome-shell After it, I tried to kill gnome-shell process with signal 9, but the process won't terminate after several unsuccessful attempts. Only [Alt + PrnScr + B] helped reboot the hanging system. I am writing here because I hope some ampgpu hackers cal look in the trace and understand that is happening. Sorry, I don’t know how to reproduce this bug. But the problem itself is very annoying. Thanks. GPU: AMD Radeon VII Kernel: 5.3 RC5 -- Best Regards, Mike Gavrilov. ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx