[PATCH 13/13] drm/amd/display: move FPU related code from dcn316 to dml/dcn31 folder
From: Melissa Wen Moves FPU-related structs and dcn316_update_bw_bounding_box from dcn316 driver to dml/dcn31 that centralize FPU operations for DCN 3.1x Signed-off-by: Melissa Wen Reviewed-by: Alex Hung --- .../gpu/drm/amd/display/dc/dcn316/Makefile| 26 -- .../amd/display/dc/dcn316/dcn316_resource.c | 231 +- .../amd/display/dc/dcn316/dcn316_resource.h | 3 + .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 229 + .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.h | 2 + 5 files changed, 235 insertions(+), 256 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile b/drivers/gpu/drm/amd/display/dc/dcn316/Makefile index cd87b687c5e2..819d44a9439b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn316/Makefile @@ -25,32 +25,6 @@ DCN316 = dcn316_resource.o -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o := -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o += -mhard-float -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o += -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o += -msse2 -endif -endif - AMD_DAL_DCN316 = $(addprefix $(AMDDALPATH)/dc/dcn316/,$(DCN316)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN316) diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c index 8decc3ccf8ca..d73145dab173 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c @@ -66,6 +66,7 @@ #include "virtual/virtual_stream_encoder.h" #include "dce110/dce110_resource.h" #include "dml/display_mode_vba.h" +#include "dml/dcn31/dcn31_fpu.h" #include "dcn31/dcn31_dccg.h" #include "dcn10/dcn10_resource.h" #include "dcn31/dcn31_panel_cntl.h" @@ -123,157 +124,10 @@ #include "link_enc_cfg.h" -#define DC_LOGGER_INIT(logger) - -#define DCN3_16_DEFAULT_DET_SIZE 192 #define DCN3_16_MAX_DET_SIZE 384 #define DCN3_16_MIN_COMPBUF_SIZE_KB 128 #define DCN3_16_CRB_SEGMENT_SIZE_KB 64 -struct _vcs_dpi_ip_params_st dcn3_16_ip = { - .gpuvm_enable = 1, - .gpuvm_max_page_table_levels = 1, - .hostvm_enable = 1, - .hostvm_max_page_table_levels = 2, - .rob_buffer_size_kbytes = 64, - .det_buffer_size_kbytes = DCN3_16_DEFAULT_DET_SIZE, - .config_return_buffer_size_in_kbytes = 1024, - .compressed_buffer_segment_size_in_kbytes = 64, - .meta_fifo_size_in_kentries = 32, - .zero_size_buffer_entries = 512, - .compbuf_reserved_space_64b = 256, - .compbuf_reserved_space_zs = 64, - .dpp_output_buffer_pixels = 2560, - .opp_output_buffer_lines = 1, - .pixel_chunk_size_kbytes = 8, - .meta_chunk_size_kbytes = 2, - .min_meta_chunk_size_bytes = 256, - .writeback_chunk_size_kbytes = 8, - .ptoi_supported = false, - .num_dsc = 3, - .maximum_dsc_bits_per_component = 10, - .dsc422_native_support = false, - .is_line_buffer_bpp_fixed = true, - .line_buffer_fixed_bpp = 48, - .line_buffer_size_bits = 789504, - .max_line_buffer_lines = 12, - .writeback_interface_buffer_size_kbytes = 90, - .max_num_dpp = 4, - .max_num_otg = 4, - .max_num_hdmi_frl_outputs = 1, - .max_num_wb = 1, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 6, - .max_vscl_ratio = 6, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dpte_buffer_size_in_pte_reqs_luma = 64, - .dpte_buffer_size_in_pte_reqs_chroma = 34, - .dispclk_ramp_margin_percent = 1, - .max_inter_dcn_tile_repeaters = 8, - .cursor_buffer_size = 16, - .cursor_chunk_size = 2, - .writeback_line_buffer_buffer_size = 0, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1, - .writeback_max_hscl_taps = 1, - .writeback_max_vscl_taps = 1, - .dppclk_delay_subtotal = 46, - .dppclk_delay_scl = 50, - .dppclk_delay_scl_lb_only = 16, - .dppclk_delay_cnvc_formatter = 27, - .dppclk_delay_cnvc_cursor = 6, - .dispclk_delay_subtotal = 119, - .dynamic_metadata_vm_enabled = false, - .odm_combine_4to1_supported = false, - .dcc_supported = true, -}; - -struct _vcs_dpi_soc_bounding_box_st
[PATCH 12/13] drm/amd/display: move FPU related code from dcn315 to dml/dcn31 folder
From: Melissa Wen Moves related structs and dcn315_update_bw_bounding_box from dcn315 driver code to dml/dcn31_fpu that centralizes FPU code for DCN 3.1x. Signed-off-by: Melissa Wen Reviewed-by: Alex Hung --- .../gpu/drm/amd/display/dc/dcn315/Makefile| 26 -- .../amd/display/dc/dcn315/dcn315_resource.c | 232 +- .../amd/display/dc/dcn315/dcn315_resource.h | 3 + .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 228 + .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.h | 3 + 5 files changed, 235 insertions(+), 257 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile b/drivers/gpu/drm/amd/display/dc/dcn315/Makefile index c831ad46e81c..59381d24800b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn315/Makefile @@ -25,32 +25,6 @@ DCN315 = dcn315_resource.o -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o := -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o += -mhard-float -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o += -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o += -msse2 -endif -endif - AMD_DAL_DCN315 = $(addprefix $(AMDDALPATH)/dc/dcn315/,$(DCN315)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN315) diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c index 06adb77c206b..fadb89326999 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c @@ -66,6 +66,7 @@ #include "virtual/virtual_stream_encoder.h" #include "dce110/dce110_resource.h" #include "dml/display_mode_vba.h" +#include "dml/dcn31/dcn31_fpu.h" #include "dcn31/dcn31_dccg.h" #include "dcn10/dcn10_resource.h" #include "dcn31/dcn31_panel_cntl.h" @@ -133,158 +134,9 @@ #include "link_enc_cfg.h" -#define DC_LOGGER_INIT(logger) - -#define DCN3_15_DEFAULT_DET_SIZE 192 #define DCN3_15_MAX_DET_SIZE 384 -#define DCN3_15_MIN_COMPBUF_SIZE_KB 128 #define DCN3_15_CRB_SEGMENT_SIZE_KB 64 -struct _vcs_dpi_ip_params_st dcn3_15_ip = { - .gpuvm_enable = 1, - .gpuvm_max_page_table_levels = 1, - .hostvm_enable = 1, - .hostvm_max_page_table_levels = 2, - .rob_buffer_size_kbytes = 64, - .det_buffer_size_kbytes = DCN3_15_DEFAULT_DET_SIZE, - .min_comp_buffer_size_kbytes = DCN3_15_MIN_COMPBUF_SIZE_KB, - .config_return_buffer_size_in_kbytes = 1024, - .compressed_buffer_segment_size_in_kbytes = 64, - .meta_fifo_size_in_kentries = 32, - .zero_size_buffer_entries = 512, - .compbuf_reserved_space_64b = 256, - .compbuf_reserved_space_zs = 64, - .dpp_output_buffer_pixels = 2560, - .opp_output_buffer_lines = 1, - .pixel_chunk_size_kbytes = 8, - .meta_chunk_size_kbytes = 2, - .min_meta_chunk_size_bytes = 256, - .writeback_chunk_size_kbytes = 8, - .ptoi_supported = false, - .num_dsc = 3, - .maximum_dsc_bits_per_component = 10, - .dsc422_native_support = false, - .is_line_buffer_bpp_fixed = true, - .line_buffer_fixed_bpp = 49, - .line_buffer_size_bits = 789504, - .max_line_buffer_lines = 12, - .writeback_interface_buffer_size_kbytes = 90, - .max_num_dpp = 4, - .max_num_otg = 4, - .max_num_hdmi_frl_outputs = 1, - .max_num_wb = 1, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 6, - .max_vscl_ratio = 6, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dpte_buffer_size_in_pte_reqs_luma = 64, - .dpte_buffer_size_in_pte_reqs_chroma = 34, - .dispclk_ramp_margin_percent = 1, - .max_inter_dcn_tile_repeaters = 9, - .cursor_buffer_size = 16, - .cursor_chunk_size = 2, - .writeback_line_buffer_buffer_size = 0, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1, - .writeback_max_hscl_taps = 1, - .writeback_max_vscl_taps = 1, - .dppclk_delay_subtotal = 46, - .dppclk_delay_scl = 50, - .dppclk_delay_scl_lb_only = 16, - .dppclk_delay_cnvc_formatter = 27, - .dppclk_delay_cnvc_cursor = 6, - .dispclk_delay_subtotal = 119, - .dynamic_metadata_vm_enabled = false, - .odm_combine_4to1_supported = false, -
[PATCH 11/13] drm/amd/dicplay: move FPU related code from dcn31 to dml/dcn31 folder
From: Melissa Wen Creates FPU files in dml/dcn31 folder to centralize FPU operations from 3.1x drivers and moves all FPU-associated code from dcn31 driver to there. It includes the struct _vcs_dpi_ip_params_st and _vcs_dpi_soc_bounding_box_st and functions: - dcn31_calculate_wm_and_dlg_fp() - dcn31_update_bw_bounding_box() adding dc_assert_fp_enabled to them and drop DC_FP_START/END inside functions that was moved to dml folder, as required. Signed-off-by: Melissa Wen Reviewed-by: Alex Hung --- drivers/gpu/drm/amd/display/dc/dcn31/Makefile | 26 -- .../drm/amd/display/dc/dcn31/dcn31_resource.c | 355 +-- .../drm/amd/display/dc/dcn31/dcn31_resource.h | 4 +- drivers/gpu/drm/amd/display/dc/dml/Makefile | 2 + .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 406 ++ .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.h | 39 ++ 6 files changed, 451 insertions(+), 381 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile index d20e3b8ccc30..ec041e3cda30 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile @@ -15,32 +15,6 @@ DCN31 = dcn31_resource.o dcn31_hubbub.o dcn31_hwseq.o dcn31_init.o dcn31_hubp.o dcn31_apg.o dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o \ dcn31_afmt.o dcn31_vpg.o -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o := -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -mhard-float -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -msse2 -endif -endif - AMD_DAL_DCN31 = $(addprefix $(AMDDALPATH)/dc/dcn31/,$(DCN31)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN31) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 338235bcef4a..bf130b2435ab 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -65,6 +65,7 @@ #include "virtual/virtual_stream_encoder.h" #include "dce110/dce110_resource.h" #include "dml/display_mode_vba.h" +#include "dml/dcn31/dcn31_fpu.h" #include "dcn31/dcn31_dccg.h" #include "dcn10/dcn10_resource.h" #include "dcn31_panel_cntl.h" @@ -102,152 +103,6 @@ #define DC_LOGGER_INIT(logger) -#define DCN3_1_DEFAULT_DET_SIZE 384 - -struct _vcs_dpi_ip_params_st dcn3_1_ip = { - .gpuvm_enable = 1, - .gpuvm_max_page_table_levels = 1, - .hostvm_enable = 1, - .hostvm_max_page_table_levels = 2, - .rob_buffer_size_kbytes = 64, - .det_buffer_size_kbytes = DCN3_1_DEFAULT_DET_SIZE, - .config_return_buffer_size_in_kbytes = 1792, - .compressed_buffer_segment_size_in_kbytes = 64, - .meta_fifo_size_in_kentries = 32, - .zero_size_buffer_entries = 512, - .compbuf_reserved_space_64b = 256, - .compbuf_reserved_space_zs = 64, - .dpp_output_buffer_pixels = 2560, - .opp_output_buffer_lines = 1, - .pixel_chunk_size_kbytes = 8, - .meta_chunk_size_kbytes = 2, - .min_meta_chunk_size_bytes = 256, - .writeback_chunk_size_kbytes = 8, - .ptoi_supported = false, - .num_dsc = 3, - .maximum_dsc_bits_per_component = 10, - .dsc422_native_support = false, - .is_line_buffer_bpp_fixed = true, - .line_buffer_fixed_bpp = 48, - .line_buffer_size_bits = 789504, - .max_line_buffer_lines = 12, - .writeback_interface_buffer_size_kbytes = 90, - .max_num_dpp = 4, - .max_num_otg = 4, - .max_num_hdmi_frl_outputs = 1, - .max_num_wb = 1, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 6, - .max_vscl_ratio = 6, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dpte_buffer_size_in_pte_reqs_luma = 64, - .dpte_buffer_size_in_pte_reqs_chroma = 34, - .dispclk_ramp_margin_percent = 1, - .max_inter_dcn_tile_repeaters = 8, - .cursor_buffer_size = 16, - .cursor_chunk_size = 2, - .writeback_line_buffer_buffer_size = 0, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1,
Re: [PATCH 0/1] Title: DC Patches March 18, 2022
[AMD Official Use Only] My apology to send incorrect one. Please ignore this. An updated one will be sent. From: Hung, Alex Sent: 18 March 2022 15:44 To: amd-gfx@lists.freedesktop.org Cc: Hung, Alex Subject: [PATCH 0/1] Title: DC Patches March 18, 2022 This DC patchset brings improvements in multiple areas. In summary, we highlight: * HDCP SEND AKI INIT error * fix audio format not updated after edid updated * Reduce stack size * FEC check in timing validation * Add fSMC_MSG_SetDtbClk support * Update VTEM Infopacket definition * [FW Promotion] Release 0.0.109.0 * Add support for zstate during extended vblank * remove destructive verify link for TMDS * move FPU related code from dcn31 to dml/dcn31 folder * move FPU related code from dcn315 to dml/dcn31 folder * move FPU related code from dcn316 to dml/dcn31 folder Aric Cyr (1): drm/amd/display: 3.2.178 drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- 2.35.1
[PATCH 10/13] drm/amd/display: remove destructive verify link for TMDS
From: Charlene Liu [why and how] TMDS not need destructive verify link Reviewed-by: Aric Cyr Acked-by: Alan Liu Acked-by: Alex Hung Signed-off-by: Charlene Liu --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index cb87dd643180..bbaa5abdf888 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -983,8 +983,7 @@ static bool should_verify_link_capability_destructively(struct dc_link *link, destrictive = false; } } - } else if (dc_is_hdmi_signal(link->local_sink->sink_signal)) - destrictive = true; + } return destrictive; } -- 2.35.1
[PATCH 09/13] drm/amd/display: Add support for zstate during extended vblank
From: Gabe Teeger [why] When we enter FREESYNC_STATE_VIDEO, we want to use the extra vblank portion to enter zstate if possible. [how] When we enter freesync, a full update is triggered and the new vtotal with extra lines is passed to dml in a stream update. The time gained from extra vblank lines is calculated in microseconds. We allow zstate entry if the time gained is greater than 5 ms, which is the current policy. Furthermore, an optimized value for min_dst_y_next_start is calculated and written to its register. When exiting freesync, another full update is triggered and default values are restored. Reviewed-by: Nicholas Kazlauskas Acked-by: Alex Hung Signed-off-by: Gabe Teeger --- drivers/gpu/drm/amd/display/dc/core/dc.c | 19 +++ drivers/gpu/drm/amd/display/dc/dc.h | 6 +- drivers/gpu/drm/amd/display/dc/dc_stream.h| 2 ++ .../drm/amd/display/dc/dcn20/dcn20_hwseq.c| 12 .../gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c | 8 .../drm/amd/display/dc/dcn31/dcn31_resource.c | 1 + .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 18 +++--- .../dc/dml/dcn31/display_rq_dlg_calc_31.c | 13 + .../amd/display/dc/dml/display_mode_structs.h | 2 ++ drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h | 3 +++ 10 files changed, 80 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 75f9c97bebb0..f2ad8f58e69c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2393,6 +2393,8 @@ static enum surface_update_type check_update_surfaces_for_stream( if (stream_update->mst_bw_update) su_flags->bits.mst_bw = 1; + if (stream_update->crtc_timing_adjust && dc_extended_blank_supported(dc)) + su_flags->bits.crtc_timing_adjust = 1; if (su_flags->raw != 0) overall_type = UPDATE_TYPE_FULL; @@ -2654,6 +2656,9 @@ static void copy_stream_update_to_stream(struct dc *dc, if (update->vrr_infopacket) stream->vrr_infopacket = *update->vrr_infopacket; + if (update->crtc_timing_adjust) + stream->adjust = *update->crtc_timing_adjust; + if (update->dpms_off) stream->dpms_off = *update->dpms_off; @@ -4055,3 +4060,17 @@ void dc_notify_vsync_int_state(struct dc *dc, struct dc_stream_state *stream, bo if (pipe->stream_res.abm && pipe->stream_res.abm->funcs->set_abm_pause) pipe->stream_res.abm->funcs->set_abm_pause(pipe->stream_res.abm, !enable, i, pipe->stream_res.tg->inst); } +/* + * dc_extended_blank_supported: Decide whether extended blank is supported + * + * Extended blank is a freesync optimization feature to be enabled in the future. + * During the extra vblank period gained from freesync, we have the ability to enter z9/z10. + * + * @param [in] dc: Current DC state + * @return: Indicate whether extended blank is supported (true or false) + */ +bool dc_extended_blank_supported(struct dc *dc) +{ + return dc->debug.extended_blank_optimization && !dc->debug.disable_z10 + && dc->caps.zstate_support && dc->caps.is_apu; +} diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 116967b96b01..ced40fe218ac 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -188,6 +188,7 @@ struct dc_caps { bool psp_setup_panel_mode; bool extended_aux_timeout_support; bool dmcub_support; + bool zstate_support; uint32_t num_of_internal_disp; enum dp_protocol_version max_dp_protocol_version; unsigned int mall_size_per_mem_channel; @@ -703,13 +704,14 @@ struct dc_debug_options { bool enable_driver_sequence_debug; enum det_size crb_alloc_policy; int crb_alloc_policy_min_disp_count; -#if defined(CONFIG_DRM_AMD_DC_DCN) bool disable_z10; +#if defined(CONFIG_DRM_AMD_DC_DCN) bool enable_z9_disable_interface; bool enable_sw_cntl_psr; union dpia_debug_options dpia_debug; #endif bool apply_vendor_specific_lttpr_wa; + bool extended_blank_optimization; bool ignore_dpref_ss; uint8_t psr_power_use_phy_fsm; }; @@ -1369,6 +1371,8 @@ struct dc_sink_init_data { bool converter_disable_audio; }; +bool dc_extended_blank_supported(struct dc *dc); + struct dc_sink *dc_sink_create(const struct dc_sink_init_data *init_params); /* Newer interfaces */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 99a750f561f8..c4168c11257c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -131,6 +131,7 @@ union stream_update_flags { uint32_t wb_update:1;
[PATCH 08/13] drm/amd/display: 3.2.178
From: Aric Cyr This version brings along following fixes: - HDCP SEND AKI INIT error - fix audio format not updated after edid updated - Reduce stack size - FEC check in timing validation - Add fSMC_MSG_SetDtbClk support - Update VTEM Infopacket definition - [FW Promotion] Release 0.0.109.0 Acked-by: Alex Hung Signed-off-by: Aric Cyr --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 4ffab7bb1098..116967b96b01 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -47,7 +47,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.177" +#define DC_VER "3.2.178" #define MAX_SURFACES 3 #define MAX_PLANES 6 -- 2.35.1
[PATCH 07/13] drm/amd/display: [FW Promotion] Release 0.0.109.0
From: Anthony Koo Reviewed-by: Aric Cyr Acked-by: Alex Hung Signed-off-by: Anthony Koo --- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 71214c7a60fc..ce773b56a778 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -46,10 +46,10 @@ /* Firmware versioning. */ #ifdef DMUB_EXPOSE_VERSION -#define DMUB_FW_VERSION_GIT_HASH 0x929554ba +#define DMUB_FW_VERSION_GIT_HASH 0x51b95a35 #define DMUB_FW_VERSION_MAJOR 0 #define DMUB_FW_VERSION_MINOR 0 -#define DMUB_FW_VERSION_REVISION 108 +#define DMUB_FW_VERSION_REVISION 109 #define DMUB_FW_VERSION_TEST 0 #define DMUB_FW_VERSION_VBIOS 0 #define DMUB_FW_VERSION_HOTFIX 0 -- 2.35.1
[PATCH 06/13] drm/amd/display: Update VTEM Infopacket definition
From: "Leo (Hanghong) Ma" [Why & How] The latest HDMI SPEC has updated the VTEM packet structure, so change the VTEM Infopacket defined in the driver side to align with the SPEC. Reviewed-by: Chris Park Acked-by: Alex Hung Signed-off-by: Leo (Hanghong) Ma --- .../gpu/drm/amd/display/modules/info_packet/info_packet.c| 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index b691aa45e84f..79bc207415bc 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -100,7 +100,8 @@ enum vsc_packet_revision { //PB7 = MD0 #define MASK_VTEM_MD0__VRR_EN 0x01 #define MASK_VTEM_MD0__M_CONST0x02 -#define MASK_VTEM_MD0__RESERVED2 0x0C +#define MASK_VTEM_MD0__QMS_EN 0x04 +#define MASK_VTEM_MD0__RESERVED2 0x08 #define MASK_VTEM_MD0__FVA_FACTOR_M1 0xF0 //MD1 @@ -109,7 +110,7 @@ enum vsc_packet_revision { //MD2 #define MASK_VTEM_MD2__BASE_REFRESH_RATE_98 0x03 #define MASK_VTEM_MD2__RB0x04 -#define MASK_VTEM_MD2__RESERVED3 0xF8 +#define MASK_VTEM_MD2__NEXT_TFR 0xF8 //MD3 #define MASK_VTEM_MD3__BASE_REFRESH_RATE_07 0xFF -- 2.35.1
[PATCH 05/13] drm/amd/display: Add fSMC_MSG_SetDtbClk support
From: Oliver Logush [why] Needed to support dcn315 Reviewed-by: Charlene Liu Acked-by: Alex Hung Signed-off-by: Oliver Logush --- .../display/dc/clk_mgr/dcn315/dcn315_smu.c| 19 +++ .../display/dc/clk_mgr/dcn315/dcn315_smu.h| 4 +++- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c index 880ffea2afc6..2600313fea57 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c @@ -80,8 +80,8 @@ static const struct IP_BASE NBIO_BASE = { { { { 0x, 0x0014, 0x0D #define VBIOSSMC_MSG_SetDppclkFreq0x06 ///< Set DPP clock frequency in MHZ #define VBIOSSMC_MSG_SetHardMinDcfclkByFreq 0x07 ///< Set DCF clock frequency hard min in MHZ #define VBIOSSMC_MSG_SetMinDeepSleepDcfclk0x08 ///< Set DCF clock minimum frequency in deep sleep in MHZ -#define VBIOSSMC_MSG_SetPhyclkVoltageByFreq 0x09 ///< Set display phy clock frequency in MHZ in case VMIN does not support phy frequency -#define VBIOSSMC_MSG_GetFclkFrequency 0x0A ///< Get FCLK frequency, return frequemcy in MHZ +#define VBIOSSMC_MSG_GetDtbclkFreq0x09 ///< Get display dtb clock frequency in MHZ in case VMIN does not support phy frequency +#define VBIOSSMC_MSG_SetDtbClk0x0A ///< Set dtb clock frequency, return frequemcy in MHZ #define VBIOSSMC_MSG_SetDisplayCount 0x0B ///< Inform PMFW of number of display connected #define VBIOSSMC_MSG_EnableTmdp48MHzRefclkPwrDown 0x0C ///< To ask PMFW turn off TMDP 48MHz refclk during display off to save power #define VBIOSSMC_MSG_UpdatePmeRestore 0x0D ///< To ask PMFW to write into Azalia for PME wake up event @@ -324,15 +324,26 @@ int dcn315_smu_get_dpref_clk(struct clk_mgr_internal *clk_mgr) return (dprefclk_get_mhz * 1000); } -int dcn315_smu_get_smu_fclk(struct clk_mgr_internal *clk_mgr) +int dcn315_smu_get_dtbclk(struct clk_mgr_internal *clk_mgr) { int fclk_get_mhz = -1; if (clk_mgr->smu_present) { fclk_get_mhz = dcn315_smu_send_msg_with_param( clk_mgr, - VBIOSSMC_MSG_GetFclkFrequency, + VBIOSSMC_MSG_GetDtbclkFreq, 0); } return (fclk_get_mhz * 1000); } + +void dcn315_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable) +{ + if (!clk_mgr->smu_present) + return; + + dcn315_smu_send_msg_with_param( + clk_mgr, + VBIOSSMC_MSG_SetDtbClk, + enable); +} diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h index 66fa42f8dd18..5aa3275ac7d8 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h @@ -37,6 +37,7 @@ #define NUM_SOC_VOLTAGE_LEVELS 4 #define NUM_DF_PSTATE_LEVELS4 + typedef struct { uint16_t MinClock; // This is either DCFCLK or SOCCLK (in MHz) uint16_t MaxClock; // This is either DCFCLK or SOCCLK (in MHz) @@ -124,5 +125,6 @@ void dcn315_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr); void dcn315_smu_request_voltage_via_phyclk(struct clk_mgr_internal *clk_mgr, int requested_phyclk_khz); void dcn315_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr); int dcn315_smu_get_dpref_clk(struct clk_mgr_internal *clk_mgr); -int dcn315_smu_get_smu_fclk(struct clk_mgr_internal *clk_mgr); +int dcn315_smu_get_dtbclk(struct clk_mgr_internal *clk_mgr); +void dcn315_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable); #endif /* DAL_DC_315_SMU_H_ */ -- 2.35.1
[PATCH 04/13] drm/amd/display: FEC check in timing validation
From: Chiawen Huang [Why] disable/enable leads fec mismatch between hw/sw fec state. [How] check fec status to fastboot on/off. Reviewed-by: Anthony Koo Acked-by: Alex Hung Signed-off-by: Chiawen Huang --- drivers/gpu/drm/amd/display/dc/core/dc.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index f6e19efea756..75f9c97bebb0 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1496,6 +1496,10 @@ bool dc_validate_boot_timing(const struct dc *dc, if (!link->link_enc->funcs->is_dig_enabled(link->link_enc)) return false; + /* Check for FEC status*/ + if (link->link_enc->funcs->fec_is_active(link->link_enc)) + return false; + enc_inst = link->link_enc->funcs->get_dig_frontend(link->link_enc); if (enc_inst == ENGINE_ID_UNKNOWN) -- 2.35.1
[PATCH 03/13] drm/amd/display: Reduce stack size
From: Rodrigo Siqueira Linux kernel enabled more compilation restrictions related to the stack size, which caused compilation failures in our code. This commit reduces the allocation size by allocating the required memory dynamically. Reviewed-by: Harry Wentland Reviewed-by: Aric Cyr Acked-by: Alex Hung Signed-off-by: Rodrigo Siqueira --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index c3e141c19a77..ad757b59e00e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2056,7 +2056,7 @@ static int dcn10_align_pixel_clocks(struct dc *dc, int group_size, { struct dc_context *dc_ctx = dc->ctx; int i, master = -1, embedded = -1; - struct dc_crtc_timing hw_crtc_timing[MAX_PIPES] = {0}; + struct dc_crtc_timing *hw_crtc_timing; uint64_t phase[MAX_PIPES]; uint64_t modulo[MAX_PIPES]; unsigned int pclk; @@ -2067,6 +2067,10 @@ static int dcn10_align_pixel_clocks(struct dc *dc, int group_size, uint32_t dp_ref_clk_100hz = dc->res_pool->dp_clock_source->ctx->dc->clk_mgr->dprefclk_khz*10; + hw_crtc_timing = kcalloc(MAX_PIPES, sizeof(*hw_crtc_timing), GFP_KERNEL); + if (!hw_crtc_timing) + return master; + if (dc->config.vblank_alignment_dto_params && dc->res_pool->dp_clock_source->funcs->override_dp_pix_clk) { embedded_h_total = @@ -2130,6 +2134,8 @@ static int dcn10_align_pixel_clocks(struct dc *dc, int group_size, } } + + kfree(hw_crtc_timing); return master; } -- 2.35.1
[PATCH 02/13] drm/amd/display: fix audio format not updated after edid updated
From: Charlene Liu [why] for the case edid change only changed audio format. driver still need to update stream. Reviewed-by: Alvin Lee Reviewed-by: Aric Cyr Acked-by: Alex Hung Signed-off-by: Charlene Liu --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 7af153434e9e..d251c3f3a714 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1685,8 +1685,8 @@ bool dc_is_stream_unchanged( if (old_stream->ignore_msa_timing_param != stream->ignore_msa_timing_param) return false; - // Only Have Audio left to check whether it is same or not. This is a corner case for Tiled sinks - if (old_stream->audio_info.mode_count != stream->audio_info.mode_count) + /*compare audio info*/ + if (memcmp(_stream->audio_info, >audio_info, sizeof(stream->audio_info)) != 0) return false; return true; -- 2.35.1
[PATCH 01/13] drm/amd/display: HDCP SEND AKI INIT error
From: Ahmad Othman [why] HDCP sends AKI INIT error in case of multiple display on dock [how] Added new checks and method to handfle display adjustment for multiple display cases Reviewed-by: Wenjing Liu Acked-by: Alex Hung Signed-off-by: Ahmad Othman --- .../gpu/drm/amd/display/modules/hdcp/hdcp.c | 38 ++- .../gpu/drm/amd/display/modules/hdcp/hdcp.h | 8 .../drm/amd/display/modules/inc/mod_hdcp.h| 2 +- 3 files changed, 46 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c index 3e81850a7ffe..5e01c6e24cbc 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c @@ -251,6 +251,33 @@ static enum mod_hdcp_status reset_connection(struct mod_hdcp *hdcp, return status; } +static enum mod_hdcp_status update_display_adjustments(struct mod_hdcp *hdcp, + struct mod_hdcp_display *display, + struct mod_hdcp_display_adjustment *adj) +{ + enum mod_hdcp_status status = MOD_HDCP_STATUS_NOT_IMPLEMENTED; + + if (is_in_authenticated_states(hdcp) && + is_dp_mst_hdcp(hdcp) && + display->adjust.disable == true && + adj->disable == false) { + display->adjust.disable = false; + if (is_hdcp1(hdcp)) + status = mod_hdcp_hdcp1_enable_dp_stream_encryption(hdcp); + else if (is_hdcp2(hdcp)) + status = mod_hdcp_hdcp2_enable_dp_stream_encryption(hdcp); + + if (status != MOD_HDCP_STATUS_SUCCESS) + display->adjust.disable = true; + } + + if (status == MOD_HDCP_STATUS_SUCCESS && + memcmp(adj, >adjust, + sizeof(struct mod_hdcp_display_adjustment)) != 0) + status = MOD_HDCP_STATUS_NOT_IMPLEMENTED; + + return status; +} /* * Implementation of functions in mod_hdcp.h */ @@ -391,7 +418,7 @@ enum mod_hdcp_status mod_hdcp_remove_display(struct mod_hdcp *hdcp, return status; } -enum mod_hdcp_status mod_hdcp_update_authentication(struct mod_hdcp *hdcp, +enum mod_hdcp_status mod_hdcp_update_display(struct mod_hdcp *hdcp, uint8_t index, struct mod_hdcp_link_adjustment *link_adjust, struct mod_hdcp_display_adjustment *display_adjust, @@ -419,6 +446,15 @@ enum mod_hdcp_status mod_hdcp_update_authentication(struct mod_hdcp *hdcp, goto out; } + if (memcmp(link_adjust, >connection.link.adjust, + sizeof(struct mod_hdcp_link_adjustment)) == 0 && + memcmp(display_adjust, >adjust, + sizeof(struct mod_hdcp_display_adjustment)) != 0) { + status = update_display_adjustments(hdcp, display, display_adjust); + if (status != MOD_HDCP_STATUS_NOT_IMPLEMENTED) + goto out; + } + /* stop current authentication */ status = reset_authentication(hdcp, output); if (status != MOD_HDCP_STATUS_SUCCESS) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h index 399fbca8947b..6b195207de90 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h @@ -445,6 +445,14 @@ static inline uint8_t is_in_hdcp2_dp_states(struct mod_hdcp *hdcp) current_state(hdcp) <= HDCP2_DP_STATE_END); } +static inline uint8_t is_in_authenticated_states(struct mod_hdcp *hdcp) +{ + return (current_state(hdcp) == D1_A4_AUTHENTICATED || + current_state(hdcp) == H1_A45_AUTHENTICATED || + current_state(hdcp) == D2_A5_AUTHENTICATED || + current_state(hdcp) == H2_A5_AUTHENTICATED); +} + static inline uint8_t is_hdcp1(struct mod_hdcp *hdcp) { return (is_in_hdcp1_states(hdcp) || is_in_hdcp1_dp_states(hdcp)); diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h index f7420c3f5672..3348bb97ef81 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h @@ -294,7 +294,7 @@ enum mod_hdcp_status mod_hdcp_remove_display(struct mod_hdcp *hdcp, uint8_t index, struct mod_hdcp_output *output); /* called per display to apply new authentication adjustment */ -enum mod_hdcp_status mod_hdcp_update_authentication(struct mod_hdcp *hdcp, +enum mod_hdcp_status mod_hdcp_update_display(struct mod_hdcp *hdcp, uint8_t index, struct mod_hdcp_link_adjustment *link_adjust, struct mod_hdcp_display_adjustment *display_adjust, -- 2.35.1
[PATCH 00/13] Title: DC Patches March 18, 2022
This DC patchset brings improvements in multiple areas. In summary, we highlight: * HDCP SEND AKI INIT error * fix audio format not updated after edid updated * Reduce stack size * FEC check in timing validation * Add fSMC_MSG_SetDtbClk support * Update VTEM Infopacket definition * [FW Promotion] Release 0.0.109.0 * Add support for zstate during extended vblank * remove destructive verify link for TMDS * move FPU related code from dcn31 to dml/dcn31 folder * move FPU related code from dcn315 to dml/dcn31 folder * move FPU related code from dcn316 to dml/dcn31 folder Ahmad Othman (1): drm/amd/display: HDCP SEND AKI INIT error Anthony Koo (1): drm/amd/display: [FW Promotion] Release 0.0.109.0 Aric Cyr (1): drm/amd/display: 3.2.178 Charlene Liu (2): drm/amd/display: fix audio format not updated after edid updated drm/amd/display: remove destructive verify link for TMDS Chiawen Huang (1): drm/amd/display: FEC check in timing validation Gabe Teeger (1): drm/amd/display: Add support for zstate during extended vblank Leo (Hanghong) Ma (1): drm/amd/display: Update VTEM Infopacket definition Melissa Wen (3): drm/amd/dicplay: move FPU related code from dcn31 to dml/dcn31 folder drm/amd/display: move FPU related code from dcn315 to dml/dcn31 folder drm/amd/display: move FPU related code from dcn316 to dml/dcn31 folder Oliver Logush (1): drm/amd/display: Add fSMC_MSG_SetDtbClk support Rodrigo Siqueira (1): drm/amd/display: Reduce stack size .../display/dc/clk_mgr/dcn315/dcn315_smu.c| 19 +- .../display/dc/clk_mgr/dcn315/dcn315_smu.h| 4 +- drivers/gpu/drm/amd/display/dc/core/dc.c | 23 + drivers/gpu/drm/amd/display/dc/core/dc_link.c | 3 +- .../gpu/drm/amd/display/dc/core/dc_resource.c | 4 +- drivers/gpu/drm/amd/display/dc/dc.h | 8 +- drivers/gpu/drm/amd/display/dc/dc_stream.h| 2 + .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 8 +- .../drm/amd/display/dc/dcn20/dcn20_hwseq.c| 12 + drivers/gpu/drm/amd/display/dc/dcn31/Makefile | 26 - .../gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c | 8 + .../drm/amd/display/dc/dcn31/dcn31_resource.c | 356 +--- .../drm/amd/display/dc/dcn31/dcn31_resource.h | 4 +- .../gpu/drm/amd/display/dc/dcn315/Makefile| 26 - .../amd/display/dc/dcn315/dcn315_resource.c | 232 + .../amd/display/dc/dcn315/dcn315_resource.h | 3 + .../gpu/drm/amd/display/dc/dcn316/Makefile| 26 - .../amd/display/dc/dcn316/dcn316_resource.c | 231 + .../amd/display/dc/dcn316/dcn316_resource.h | 3 + drivers/gpu/drm/amd/display/dc/dml/Makefile | 2 + .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 18 +- .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 863 ++ .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.h | 44 + .../dc/dml/dcn31/display_rq_dlg_calc_31.c | 13 + .../amd/display/dc/dml/display_mode_structs.h | 2 + drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h | 3 + .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 4 +- .../gpu/drm/amd/display/modules/hdcp/hdcp.c | 38 +- .../gpu/drm/amd/display/modules/hdcp/hdcp.h | 8 + .../drm/amd/display/modules/inc/mod_hdcp.h| 2 +- .../display/modules/info_packet/info_packet.c | 5 +- 31 files changed, 1085 insertions(+), 915 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h -- 2.35.1
[PATCH 0/1] Title: DC Patches March 18, 2022
This DC patchset brings improvements in multiple areas. In summary, we highlight: * HDCP SEND AKI INIT error * fix audio format not updated after edid updated * Reduce stack size * FEC check in timing validation * Add fSMC_MSG_SetDtbClk support * Update VTEM Infopacket definition * [FW Promotion] Release 0.0.109.0 * Add support for zstate during extended vblank * remove destructive verify link for TMDS * move FPU related code from dcn31 to dml/dcn31 folder * move FPU related code from dcn315 to dml/dcn31 folder * move FPU related code from dcn316 to dml/dcn31 folder Aric Cyr (1): drm/amd/display: 3.2.178 drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- 2.35.1
[pull] amdgpu, amdkfd drm-next-5.18
Hi Dave, Daniel, Fixes for 5.18. The following changes since commit f6d790e5a7fe42706756c7fa1686d08d230610fc: Merge tag 'drm-intel-next-fixes-2022-03-10' of git://anongit.freedesktop.org/drm/drm-intel into drm-next (2022-03-11 13:27:00 +1000) are available in the Git repository at: https://gitlab.freedesktop.org/agd5f/linux.git tags/amd-drm-next-5.18-2022-03-18 for you to fetch changes up to 426c89aa203bcec9d9cf6eea36735eafa1b1f099: drm/amdgpu: Use drm_mode_copy() (2022-03-15 15:01:12 -0400) amd-drm-next-5.18-2022-03-18: amdgpu: - Aldebaran fixes - SMU 13.0.5 fixes - DCN 3.1.5 fixes - DCN 3.1.6 fixes - Pipe split fixes - More display FP cleanup - DP 2.0 UHBR fix - DC GPU reset fix - DC deep color ratio fix - SMU robustness fixes - Runtime PM fix for APUs - IGT reload fixes - SR-IOV fix - Misc fixes and cleanups amdkfd: - CRIU fixes - SVM fixes UAPI: - Properly handle SDMA transfers with CRIU Proposed user mode change: https://github.com/checkpoint-restore/criu/pull/1709 Alex Deucher (2): drm/amdgpu/display: enable scatter/gather display for DCN 3.1.6 drm/amdgpu: only check for _PR3 on dGPUs Anthony Koo (2): drm/amd/display: [FW Promotion] Release 0.0.107.0 drm/amd/display: [FW Promotion] Release 0.0.108.0 Aric Cyr (2): drm/amd/display: 3.2.176 drm/amd/display: 3.2.177 Becle Lee (1): drm/amd/display: Wait for hubp read line for Pollock Charlene Liu (5): drm/amd/display: add debug option to bypass ssinfo from bios for dcn315 drm/amd/display: fix the clock source contruct for dcn315 drm/amd/display: merge two duplicated clock_source_create drm/amd/display: enable dcn315/316 s0i2 support drm/amd/display: Add save/restore PANEL_PWRSEQ_REF_DIV2 Chris Park (1): drm/amd/display: Add NULL check Dale Zhao (1): drm/amd/display: Add new enum for EDID status Dan Carpenter (1): drm/amd/pm: fix indenting in __smu_cmn_reg_print_error() David Yat Sin (3): drm/amdkfd: CRIU remove sync and TLB flush on restore drm/amdkfd: CRIU Refactor restore BO function drm/amdkfd: CRIU export dmabuf handles for GTT BOs Dillon Varone (1): drm/amd/display: Add minimal pipe split transition state Eric Yang (1): drm/amd/display: Block zstate when more than one plane enabled George Shen (1): drm/amd/display: Clean up fixed VS PHY test w/a function Hansen Dsouza (1): drm/amd/display: fix deep color ratio Hawking Zhang (1): drm/amdgpu: drop xmgi23 error query/reset support Jasdeep Dhillon (1): drm/amd/display: move FPU associated DCN303 code to DML folder JinZe.Xu (1): drm/amd/display: Add I2C escape to support query device exist. Jing Zhou (2): drm/amd/display: Update engine ddc drm/amd/display: Add null pointer filter Jingwen Zhu (1): drm/amd/display: add gamut coefficient set A and B Jonathan Kim (1): drm/amdgpu: fix aldebaran xgmi topology for vf Julia Lawall (3): drm/amd/pm: fix typos in comments drm/amdgpu: fix typos in comments drm/amdgpu/dc: fix typos in comments Lang Yu (1): drm/amdgpu: only allow secure submission on rings which support that Leo (Hanghong) Ma (2): drm/amd/display: Add link dp trace support drm/amd/display: Add function to get the pipe from the stream context Leo Li (1): drm/amd/display: Fix compile error from TO_CLK_MGR_INTERNAL Leung, Martin (1): drm/amd/display: cleaning up smu_if to add future flexibility Lijo Lazar (2): drm/amdgpu: Disable baco dummy mode drm/amd/pm: Send message when resp status is 0xFC Melissa Wen (3): drm/amd/display: move FPU-related code from dcn20 to dml folder drm/amd/display: move FPU operations from dcn21 to dml/dcn20 folder drm/amd/display: move FPU code from dcn10 to dml/dcn10 folder Nicholas Kazlauskas (2): drm/amd/display: Fix double free during GPU reset on DC streams drm/amd/display: Add pstate verification and recovery for DCN31 Paul Menzel (1): drm/amdgpu: Use ternary operator in `vcn_v1_0_start()` Philip Yang (2): drm/amdgpu: Move reset domain init before calling RREG32 drm/amdkfd: evict svm bo worker handle error Stanley.Yang (3): drm/amd/pm: add send bad channel info function drm/amdgpu: message smu to update bad channel info drm/amd/pm: use pm mutex to protect ecc info table Sung Joon Kim (1): drm/amd/display: disable HPD SW timer for passive dongle type 1 only Tianci Yin (1): drm/amdgpu/vcn: fix vcn ring test failure in igt reload test Tianci.Yin (2): drm/amd/display: fix dp kvm can't light up drm/amd: fix gfx hang on renoir in IGT reload test Ville Syrjälä (3): drm/amdgpu: Remove pointless on stack mode copies drm/radeon: Use drm_mode_copy() drm/amdgpu:
[PATCH] drm/amd/display: Fix p-state allow debug index on dcn31
[Why] It changed since dcn30 but the hubbub31 constructor hasn't been modified to reflect this. [How] Update the value in the constructor to 0x6 so we're checking the right bits for p-state allow. It worked before by accident, but can falsely assert 0 depending on HW state transitions. The most frequent of which appears to be when all pipes turn off during IGT tests. Cc: Harry Wentland Fixes: d158560fc0e1 ("drm/amd/display: Add pstate verification and recovery for DCN31") Signed-off-by: Nicholas Kazlauskas Reviewed-by: Eric Yang --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c index 3e6d6ebd199e..51c5f3685470 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c @@ -1042,5 +1042,7 @@ void hubbub31_construct(struct dcn20_hubbub *hubbub31, hubbub31->detile_buf_size = det_size_kb * 1024; hubbub31->pixel_chunk_size = pixel_chunk_size_kb * 1024; hubbub31->crb_size_segs = config_return_buffer_size_kb / DCN31_CRB_SEGMENT_SIZE_KB; + + hubbub31->debug_test_index_pstate = 0x6; } -- 2.25.1
Re: [PATCH 4/7] drm/amdgpu: rework TLB flushing
On 2022-03-17 9:50 a.m., Christian König wrote: Instead of tracking the VM updates through the dependencies just use a sequence counter for page table updates which indicates the need to flush the TLB. This reduces the need to flush the TLB drastically. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 8 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 6 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 20 - drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 2 - drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 56 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 15 +++ 6 files changed, 75 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e8a3078a85cc..2d4a89fb264e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -810,7 +810,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_vm_fence(>job->sync, fpriv->prt_va->last_pt_update); + r = amdgpu_sync_fence(>job->sync, fpriv->prt_va->last_pt_update); if (r) return r; @@ -821,7 +821,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_vm_fence(>job->sync, bo_va->last_pt_update); + r = amdgpu_sync_fence(>job->sync, bo_va->last_pt_update); if (r) return r; } @@ -840,7 +840,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_vm_fence(>job->sync, bo_va->last_pt_update); + r = amdgpu_sync_fence(>job->sync, bo_va->last_pt_update); if (r) return r; } @@ -853,7 +853,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_vm_fence(>job->sync, vm->last_update); + r = amdgpu_sync_fence(>job->sync, vm->last_update); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index b05c5fcb168d..93be290fc327 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -277,7 +277,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, unsigned vmhub = ring->funcs->vmhub; uint64_t fence_context = adev->fence_context + ring->idx; bool needs_flush = vm->use_cpu_for_update; - uint64_t updates = sync->last_vm_update; + uint64_t updates = amdgpu_vm_tlb_seq(vm); int r; *id = vm->reserved_vmid[vmhub]; @@ -338,7 +338,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, unsigned vmhub = ring->funcs->vmhub; struct amdgpu_vmid_mgr *id_mgr = >vm_manager.id_mgr[vmhub]; uint64_t fence_context = adev->fence_context + ring->idx; - uint64_t updates = sync->last_vm_update; + uint64_t updates = amdgpu_vm_tlb_seq(vm); int r; job->vm_needs_flush = vm->use_cpu_for_update; @@ -426,7 +426,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r) goto error; - id->flushed_updates = sync->last_vm_update; + id->flushed_updates = amdgpu_vm_tlb_seq(vm); job->vm_needs_flush = true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index bc5ab44c5830..ff9229819b79 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -51,7 +51,6 @@ static struct kmem_cache *amdgpu_sync_slab; void amdgpu_sync_create(struct amdgpu_sync *sync) { hash_init(sync->fences); - sync->last_vm_update = 0; } /** @@ -171,23 +170,6 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f) return 0; } -/** - * amdgpu_sync_vm_fence - remember to sync to this VM fence - * - * @sync: sync object to add fence to - * @fence: the VM fence to add - * - * Add the fence to the sync object and remember it as VM update. - */ -int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence) -{ - if (!fence) - return 0; - - sync->last_vm_update = max(sync->last_vm_update, fence->seqno); - return amdgpu_sync_fence(sync, fence); -} - /* Determine based on the owner and mode if we should sync to a fence or not */ static bool amdgpu_sync_test_fence(struct amdgpu_device *adev, enum amdgpu_sync_mode mode, @@ -377,8 +359,6 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) } } - clone->last_vm_update = source->last_vm_update; - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index 876c1ee8869c..2d5c613cda10 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -43,12 +43,10 @@ enum amdgpu_sync_mode { */ struct amdgpu_sync { DECLARE_HASHTABLE(fences, 4); - uint64_t last_vm_update; }; void amdgpu_sync_create(struct amdgpu_sync *sync); int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f);
Re: [REGRESSION] Too-low frequency limit for AMD GPU PCI-passed-through to Windows VM
On Fri, 18 Mar 2022 11:06:00 -0400 Alex Deucher wrote: > On Fri, Mar 18, 2022 at 10:46 AM Alex Williamson > wrote: > > > > On Fri, 18 Mar 2022 08:01:31 +0100 > > Thorsten Leemhuis wrote: > > > > > On 18.03.22 06:43, Paul Menzel wrote: > > > > > > > > Am 17.03.22 um 13:54 schrieb Thorsten Leemhuis: > > > >> On 13.03.22 19:33, James Turner wrote: > > > >>> > > > My understanding at this point is that the root problem is probably > > > not in the Linux kernel but rather something else (e.g. the machine > > > firmware or AMD Windows driver) and that the change in f9b7f3703ff9 > > > ("drm/amdgpu/acpi: make ATPX/ATCS structures global (v2)") simply > > > exposed the underlying problem. > > > >> > > > >> FWIW: that in the end is irrelevant when it comes to the Linux kernel's > > > >> 'no regressions' rule. For details see: > > > >> > > > >> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/Documentation/admin-guide/reporting-regressions.rst > > > >> > > > >> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/Documentation/process/handling-regressions.rst > > > >> > > > >> > > > >> That being said: sometimes for the greater good it's better to not > > > >> insist on that. And I guess that might be the case here. > > > > > > > > But who decides that? > > > > > > In the end afaics: Linus. But he can't watch each and every discussion, > > > so it partly falls down to people discussing a regression, as they can > > > always decide to get him involved in case they are unhappy with how a > > > regression is handled. That obviously includes me in this case. I simply > > > use my best judgement in such situations. I'm still undecided if that > > > path is appropriate here, that's why I wrote above to see what James > > > would say, as he afaics was the only one that reported this regression. > > > > > > > Running stuff in a virtual machine is not that uncommon. > > > > > > No, it's about passing through a GPU to a VM, which is a lot less common > > > -- and afaics an area where blacklisting GPUs on the host to pass them > > > through is not uncommon (a quick internet search confirmed that, but I > > > might be wrong there). > > > > Right, interference from host drivers and pre-boot environments is > > always a concern with GPU assignment in particular. AMD GPUs have a > > long history of poor behavior relative to things like PCI secondary bus > > resets which we use to try to get devices to clean, reusable states for > > assignment. Here a device is being bound to a host driver that > > initiates some sort of power control, unbound from that driver and > > exposed to new drivers far beyond the scope of the kernel's regression > > policy. Perhaps it's possible to undo such power control when > > unbinding the device, but it's not necessarily a given that such a > > thing is possible for this device without a cold reset. > > > > IMO, it's not fair to restrict the kernel from such advancements. If > > the use case is within a VM, don't bind host drivers. It's difficult > > to make promises when dynamically switching between host and userspace > > drivers for devices that don't have functional reset mechanisms. > > Thanks, > > Additionally, operating the isolated device in a VM on a constrained > environment like a laptop may have other adverse side effects. The > driver in the guest would ideally know that this is a laptop and needs > to properly interact with APCI to handle power management on the > device. If that is not the case, the driver in the guest may end up > running the device out of spec with what the platform supports. It's > also likely to break suspend and resume, especially on systems which > use S0ix since the firmware will generally only turn off certain power > rails if all of the devices on the rails have been put into the proper > state. That state may vary depending on the platform requirements. Good point, devices with platform dependencies to manage thermal budgets, etc. should be considered "use at your own risk" relative to device assignment currently. Thanks, Alex
Re: [PATCH v2 1/2] drm: Add GPU reset sysfs event
On Fri, Mar 18, 2022 at 12:42 AM Christian König wrote: > > Am 17.03.22 um 18:31 schrieb Rob Clark: > > On Thu, Mar 17, 2022 at 10:27 AM Daniel Vetter wrote: > >> [SNIP] > >>> (At some point, I'd like to use scheduler for the replay, and actually > >>> use drm_sched_stop()/etc.. but last time I looked there were still > >>> some sched bugs in that area which prevented me from deleting a bunch > >>> of code ;-)) > >> Not sure about your hw, but at least on intel replaying tends to just > >> result in follow-on fun. And that holds even more so the more complex a > >> workload is. This is why vk just dies immediately and does not try to > >> replay anything, offloading it to the app. Same with arb robusteness. > >> Afaik it's really only media and classic gl which insist that the driver > >> stack somehow recover. > > At least for us, each submit must be self-contained (ie. not rely on > > previous GPU hw state), so in practice replay works out pretty well. > > The worst case is subsequent submits from same process fail as well > > (if they depended on something that crashing submit failed to write > > back to memory.. but in that case they just crash as well and we move > > on to the next one.. the recent gens (a5xx+ at least) are pretty good > > about quickly detecting problems and giving us an error irq. > > Well I absolutely agree with Daniel. > > The whole replay thing AMD did in the scheduler is an absolutely mess > and should probably be killed with fire. > > I strongly recommend not to do the same mistake in other drivers. > > If you want to have some replay feature then please make it driver > specific and don't use anything from the infrastructure in the DRM > scheduler. hmm, perhaps I was not clear, but I'm only talking about re-emitting jobs *following* the faulting one (which could be from other contexts, etc).. not trying to restart the faulting job. You *absolutely* need to replay jobs following the faulting one, they could be from unrelated contexts/processes. You can't just drop them on the floor. Currently it is all driver specific, but I wanted to delete a lot of code and move to using scheduler to handle faults/timeouts (but blocked on that until [1] is resolved) [1] https://patchwork.kernel.org/project/dri-devel/patch/1630457207-13107-2-git-send-email-monk@amd.com/ BR, -R > Thanks, > Christian. > > > > > BR, > > -R > > > >> And recovering from a mess in userspace is a lot simpler than trying to > >> pull of the same magic in the kernel. Plus it also helps with a few of the > >> dma_fence rules, which is a nice bonus. > >> -Daniel > >> >
Re: [REGRESSION] Too-low frequency limit for AMD GPU PCI-passed-through to Windows VM
On Fri, Mar 18, 2022 at 10:46 AM Alex Williamson wrote: > > On Fri, 18 Mar 2022 08:01:31 +0100 > Thorsten Leemhuis wrote: > > > On 18.03.22 06:43, Paul Menzel wrote: > > > > > > Am 17.03.22 um 13:54 schrieb Thorsten Leemhuis: > > >> On 13.03.22 19:33, James Turner wrote: > > >>> > > My understanding at this point is that the root problem is probably > > not in the Linux kernel but rather something else (e.g. the machine > > firmware or AMD Windows driver) and that the change in f9b7f3703ff9 > > ("drm/amdgpu/acpi: make ATPX/ATCS structures global (v2)") simply > > exposed the underlying problem. > > >> > > >> FWIW: that in the end is irrelevant when it comes to the Linux kernel's > > >> 'no regressions' rule. For details see: > > >> > > >> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/Documentation/admin-guide/reporting-regressions.rst > > >> > > >> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/Documentation/process/handling-regressions.rst > > >> > > >> > > >> That being said: sometimes for the greater good it's better to not > > >> insist on that. And I guess that might be the case here. > > > > > > But who decides that? > > > > In the end afaics: Linus. But he can't watch each and every discussion, > > so it partly falls down to people discussing a regression, as they can > > always decide to get him involved in case they are unhappy with how a > > regression is handled. That obviously includes me in this case. I simply > > use my best judgement in such situations. I'm still undecided if that > > path is appropriate here, that's why I wrote above to see what James > > would say, as he afaics was the only one that reported this regression. > > > > > Running stuff in a virtual machine is not that uncommon. > > > > No, it's about passing through a GPU to a VM, which is a lot less common > > -- and afaics an area where blacklisting GPUs on the host to pass them > > through is not uncommon (a quick internet search confirmed that, but I > > might be wrong there). > > Right, interference from host drivers and pre-boot environments is > always a concern with GPU assignment in particular. AMD GPUs have a > long history of poor behavior relative to things like PCI secondary bus > resets which we use to try to get devices to clean, reusable states for > assignment. Here a device is being bound to a host driver that > initiates some sort of power control, unbound from that driver and > exposed to new drivers far beyond the scope of the kernel's regression > policy. Perhaps it's possible to undo such power control when > unbinding the device, but it's not necessarily a given that such a > thing is possible for this device without a cold reset. > > IMO, it's not fair to restrict the kernel from such advancements. If > the use case is within a VM, don't bind host drivers. It's difficult > to make promises when dynamically switching between host and userspace > drivers for devices that don't have functional reset mechanisms. > Thanks, Additionally, operating the isolated device in a VM on a constrained environment like a laptop may have other adverse side effects. The driver in the guest would ideally know that this is a laptop and needs to properly interact with APCI to handle power management on the device. If that is not the case, the driver in the guest may end up running the device out of spec with what the platform supports. It's also likely to break suspend and resume, especially on systems which use S0ix since the firmware will generally only turn off certain power rails if all of the devices on the rails have been put into the proper state. That state may vary depending on the platform requirements. Alex > > Alex >
Re: [REGRESSION] Too-low frequency limit for AMD GPU PCI-passed-through to Windows VM
On Fri, 18 Mar 2022 08:01:31 +0100 Thorsten Leemhuis wrote: > On 18.03.22 06:43, Paul Menzel wrote: > > > > Am 17.03.22 um 13:54 schrieb Thorsten Leemhuis: > >> On 13.03.22 19:33, James Turner wrote: > >>> > My understanding at this point is that the root problem is probably > not in the Linux kernel but rather something else (e.g. the machine > firmware or AMD Windows driver) and that the change in f9b7f3703ff9 > ("drm/amdgpu/acpi: make ATPX/ATCS structures global (v2)") simply > exposed the underlying problem. > >> > >> FWIW: that in the end is irrelevant when it comes to the Linux kernel's > >> 'no regressions' rule. For details see: > >> > >> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/Documentation/admin-guide/reporting-regressions.rst > >> > >> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/Documentation/process/handling-regressions.rst > >> > >> > >> That being said: sometimes for the greater good it's better to not > >> insist on that. And I guess that might be the case here. > > > > But who decides that? > > In the end afaics: Linus. But he can't watch each and every discussion, > so it partly falls down to people discussing a regression, as they can > always decide to get him involved in case they are unhappy with how a > regression is handled. That obviously includes me in this case. I simply > use my best judgement in such situations. I'm still undecided if that > path is appropriate here, that's why I wrote above to see what James > would say, as he afaics was the only one that reported this regression. > > > Running stuff in a virtual machine is not that uncommon. > > No, it's about passing through a GPU to a VM, which is a lot less common > -- and afaics an area where blacklisting GPUs on the host to pass them > through is not uncommon (a quick internet search confirmed that, but I > might be wrong there). Right, interference from host drivers and pre-boot environments is always a concern with GPU assignment in particular. AMD GPUs have a long history of poor behavior relative to things like PCI secondary bus resets which we use to try to get devices to clean, reusable states for assignment. Here a device is being bound to a host driver that initiates some sort of power control, unbound from that driver and exposed to new drivers far beyond the scope of the kernel's regression policy. Perhaps it's possible to undo such power control when unbinding the device, but it's not necessarily a given that such a thing is possible for this device without a cold reset. IMO, it's not fair to restrict the kernel from such advancements. If the use case is within a VM, don't bind host drivers. It's difficult to make promises when dynamically switching between host and userspace drivers for devices that don't have functional reset mechanisms. Thanks, Alex
Re: [RFC PATCH 1/4] drm/amdkfd: Improve amdgpu_vm_handle_moved
Am 2022-03-18 um 08:38 schrieb Christian König: Am 17.03.22 um 20:11 schrieb Felix Kuehling: Am 2022-03-17 um 04:21 schrieb Christian König: Am 17.03.22 um 01:20 schrieb Felix Kuehling: Let amdgpu_vm_handle_moved update all BO VA mappings of BOs reserved by the caller. This will be useful for handling extra BO VA mappings in KFD VMs that are managed through the render node API. Yes, that change is on my TODO list for quite a while as well. TODO: This may also allow simplification of amdgpu_cs_vm_handling. See the TODO comment in the code. No, that won't work just yet. We need to change the TLB flush detection for that, but I'm already working on those as well. Your TLB flushing patch series looks good to me. There is one other issue, though. amdgpu_vm_handle_moved doesn't update the sync object, so I couldn't figure out I can wait for all the page table updates to finish. Yes, and inside the CS we still need to go over all the BOs and gather the VM updates to wait for. Not sure if you can do that in the KFD code as well. How exactly do you want to use it? Before resuming user mode queues after an eviction, KFD currently updates all the BOs and their mappings that it knows about. But it doesn't know about the mappings made using the render node API. So my plan was to use amdgpu_vm_handle_moved for that. But I don't get any fences for the page table operations queues by amdgpu_vm_handle_moved. I think amdgpu_cs has the same problem. So how do I reliably wait for those to finish before I resume user mode queues? If amdgpu_vm_handle_moved were able to update the sync object, then I also wouldn't need explicit amdgpu_vm_bo_update calls any more, similar to what I suggested in the TODO comment in amdgpu_cs_vm_handling. Regards, Felix Regards, Christian. Regards, Felix Signed-off-by: Felix Kuehling Please update the TODO, with that done: Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 ++- 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d162243d8e78..10941f0d8dde 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -826,6 +826,10 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) return r; } + /* TODO: Is this loop still needed, or could this be handled by + * amdgpu_vm_handle_moved, now that it can handle all BOs that are + * reserved under p->ticket? + */ amdgpu_bo_list_for_each_entry(e, p->bo_list) { /* ignore duplicates */ bo = ttm_to_amdgpu_bo(e->tv.bo); @@ -845,7 +849,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) return r; } - r = amdgpu_vm_handle_moved(adev, vm); + r = amdgpu_vm_handle_moved(adev, vm, >ticket); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 579adfafe4d0..50805613c38c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -414,7 +414,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) r = amdgpu_vm_clear_freed(adev, vm, NULL); if (!r) - r = amdgpu_vm_handle_moved(adev, vm); + r = amdgpu_vm_handle_moved(adev, vm, ticket); if (r && r != -EBUSY) DRM_ERROR("Failed to invalidate VM page tables (%d))\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index fc4563cf2828..726b42c6d606 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2190,11 +2190,12 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, * PTs have to be reserved! */ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, - struct amdgpu_vm *vm) + struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket) { struct amdgpu_bo_va *bo_va, *tmp; struct dma_resv *resv; - bool clear; + bool clear, unlock; int r; list_for_each_entry_safe(bo_va, tmp, >moved, base.vm_status) { @@ -2212,17 +2213,24 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, spin_unlock(>invalidated_lock); /* Try to reserve the BO to avoid clearing its ptes */ - if (!amdgpu_vm_debug && dma_resv_trylock(resv)) + if (!amdgpu_vm_debug && dma_resv_trylock(resv)) { clear = false; + unlock = true; + /* The caller is already holding the reservation lock */ + } else if (ticket && dma_resv_locking_ctx(resv) == ticket) { +
Re: radeon ring 0 test failed on arm64
On Fri, Mar 18, 2022 at 4:35 AM Christian König wrote: > > > > Am 18.03.22 um 08:51 schrieb Kever Yang: > > > On 2022/3/17 20:19, Peter Geis wrote: > > On Wed, Mar 16, 2022 at 11:08 PM Kever Yang wrote: > > Hi Peter, > > On 2022/3/17 08:14, Peter Geis wrote: > > Good Evening, > > I apologize for raising this email chain from the dead, but there have > been some developments that have introduced even more questions. > I've looped the Rockchip mailing list into this too, as this affects > rk356x, and likely the upcoming rk3588 if [1] is to be believed. > > TLDR for those not familiar: It seems the rk356x series (and possibly > the rk3588) were built without any outer coherent cache. > This means (unless Rockchip wants to clarify here) devices such as the > ITS and PCIe cannot utilize cache snooping. > This is based on the results of the email chain [2]. > > The new circumstances are as follows: > The RPi CM4 Adventure Team as I've taken to calling them has been > attempting to get a dGPU working with the very broken Broadcom > controller in the RPi CM4. > Recently they acquired a SoQuartz rk3566 module which is pin > compatible with the CM4, and have taken to trying it out as well. > > This is how I got involved. > It seems they found a trivial way to force the Radeon R600 driver to > use Non-Cached memory for everything. > This single line change, combined with using memset_io instead of > memset, allows the ring tests to pass and the card probes successfully > (minus the DMA limitations of the rk356x due to the 32 bit > interconnect). > I discovered using this method that we start having unaligned io > memory access faults (bus errors) when running glmark2-drm (running > glmark2 directly was impossible, as both X and Wayland crashed too > early). > I traced this to using what I thought at the time was an unsafe memcpy > in the mesa stack. > Rewriting this function to force aligned writes solved the problem and > allows glmark2-drm to run to completion. > With some extensive debugging, I found about half a dozen memcpy > functions in mesa that if forced to be aligned would allow Wayland to > start, but with hilarious display corruption (see [3]. [4]). > The CM4 team is convinced this is an issue with memcpy in glibc, but > I'm not convinced it's that simple. > > On my two hour drive in to work this morning, I got to thinking. > If this was an memcpy fault, this would be universally broken on arm64 > which is obviously not the case. > So I started thinking, what is different here than with systems known to work: > 1. No IOMMU for the PCIe controller. > 2. The Outer Cache Issue. > > Robin: > My questions for you, since you're the smartest person I know about > arm64 memory management: > Could cache snooping permit unaligned accesses to IO to be safe? > Or > Is it the lack of an IOMMU that's causing the ali gnment faults to become > fatal? > Or > Am I insane here? > > Rockchip: > Please update on the status for the Outer Cache errata for ITS services. > > Our SoC design team has double check with ARM GIC/ITS IP team for many > times, and the GITS_CBASER > of GIC600 IP does not support hardware bind or config to a fix value, so > they insist this is an IP > limitation instead of a SoC bug, software should take care of it :( > I will check again if we can provide errata for this issue. > > Thanks. This is necessary as the mbi-alias provides an imperfect > implementation of the ITS and causes certain PCIe cards (eg x520 Intel > 10G NIC) to misbehave. > > Please provide an answer to the errata of the PCIe controller, in > regard to cache snooping and buffering, for both the rk356x and the > upcoming rk3588. > > > Sorry, what is this? > > Part of the ITS bug is it expects to be cache coherent with the CPU > cluster by design. > Due to the rk356x being implemented without an outer accessible cache, > the ITS and other devices that require cache coherency (PCIe for > example) crash in fun ways. > > Then this is still the ITS issue, not PCIe issue. > PCIe is a peripheral bus controller like USB and other device, the driver > should maintain the "cache coherency" if there is any, and there is no > requirement for hardware cache coherency between PCIe and CPU. Kever, These issues are one and the same. Certain hardware blocks *require* cache coherency as part of their design. All of the *interesting* things PCIe can do stem from it. When I saw you bumped the available window to the PCIe controller to 1GB I was really excited, because that meant we could finally support devices that used these interesting features. However, without cache coherency, having more than a 256MB window is a waste, as any card that can take advantage of it *requires* coherency. The same thing goes for a resizable BAR. EP mode is the same, having the ability to connect one CPU to another CPU over a PCIe bus loses the advantages when you don't have coherency. At that point, you might as well toss in a 2.5GB ethernet port and just use
[PATCH v9 3/4] drm/msm: init panel orientation property
Init panel orientation property after connector is initialized. Let the panel driver decides the orientation value later. Signed-off-by: Hsin-Yi Wang --- drivers/gpu/drm/msm/dsi/dsi_manager.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c index 0c1b7dde377c..b5dc86ebcab9 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_manager.c +++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c @@ -627,6 +627,10 @@ struct drm_connector *msm_dsi_manager_connector_init(u8 id) connector->interlace_allowed = 0; connector->doublescan_allowed = 0; + ret = drm_connector_init_panel_orientation_property(connector); + if (ret) + goto fail; + drm_connector_attach_encoder(connector, msm_dsi->encoder); ret = msm_dsi_manager_panel_init(connector, id); -- 2.35.1.894.gb6a874cedc-goog
Re: [Intel-gfx] [PATCH v8 1/3] gpu: drm: separate panel orientation property creating and value setting
On Fri, Feb 18, 2022 at 11:57 PM Harry Wentland wrote: > > On 2022-02-18 07:12, Simon Ser wrote: > > On Friday, February 18th, 2022 at 12:54, Hans de Goede > > wrote: > > > >> On 2/18/22 12:39, Simon Ser wrote: > >>> On Friday, February 18th, 2022 at 11:38, Hans de Goede > >>> wrote: > >>> > What I'm reading in the above is that it is being considered to allow > changing the panel-orientation value after the connector has been made > available to userspace; and let userspace know about this through a > uevent. > > I believe that this is a bad idea, it is important to keep in mind here > what userspace (e.g. plymouth) uses this prorty for. This property is > used to rotate the image being rendered / shown on the framebuffer to > adjust for the panel orientation. > > So now lets assume we apply the correct upside-down orientation later > on a device with an upside-down mounted LCD panel. Then on boot the > following could happen: > > 1. amdgpu exports a connector for the LCD panel to userspace without > setting panel-orient=upside-down > 2. plymouth sees this and renders its splash normally, but since the > panel is upside-down it will now actually show upside-down > >>> > >>> At this point amdgpu hasn't probed the connector yet. So the connector > >>> will be marked as disconnected, and plymouth shouldn't render anything. > >> > >> If before the initial probe of the connector there is a /dev/dri/card0 > >> which plymouth can access, then plymouth may at this point decide > >> to disable any seemingly unused crtcs, which will make the screen go > >> black... > >> > >> I'm not sure if plymouth will actually do this, but AFAICT this would > >> not be invalid behavior for a userspace kms consumer to do and I > >> believe it is likely that mutter will disable unused crtcs. > >> > >> IMHO it is just a bad idea to register /dev/dri/card0 with userspace > >> before the initial connector probe is done. Nothing good can come > >> of that. > >> > >> If all the exposed connectors initially are going to show up as > >> disconnected anyways what is the value in registering /dev/dri/card0 > >> with userspace early ? > > > > OK. I'm still unsure how I feel about this, but I think I agree with > > you. That said, the amdgpu architecture is quite involved with multiple > > abstraction levels, so I don't think I'm equipped to write a patch to > > fix this... > > > > amdgpu_dm's connector registration already triggers a detection. See the > calls to dc_link_detect and amdgpu_dm_update_connector_after_detect in > amdgpu_dm_initialize_drm_device. > > dc_link_detect is supposed to read the edid via > dm_helpers_read_local_edid and amdgpu_dm_update_connector_after_detect > will update the EDID on the connector via a > drm_connector_update_edid_property call. > > This all happens at driver load. > > I don't know why you're seeing the embedded connector as disconnected > unless the DP-MIPI bridge for some reason doesn't indicate that the panel > is connected at driver load. > > Harry > > > cc Daniel Vetter: can you confirm probing all connectors is a good thing > > to do on driver module load? > > > I guess the initial modeline is inherited from the video-bios, but > what about the physical size? Note that you cannot just change the > physical size later either, that gets used to determine the hidpi > scaling factor in the bootsplash, and changing that after the initial > bootsplash dislay will also look ugly > > b) Why you need the edid for the panel-orientation property at all, > typically the edid prom is part of the panel and the panel does not > know that it is mounted e.g. upside down at all, that is a property > of the system as a whole not of the panel as a standalone unit so > in my experience getting panel-orient info is something which comes > from the firmware /video-bios not from edid ? > >>> > >>> This is an internal DRM thing. The orientation quirks logic uses the > >>> mode size advertised by the EDID. > >> > >> The DMI based quirking does, yes. But e.g. the quirk code directly > >> reading this from the Intel VBT does not rely on the mode. > >> > >> But if you are planning on using a DMI based quirk for the steamdeck > >> then yes that needs the mode. > >> > >> Thee mode check is there for 2 reasons: > >> > >> 1. To avoid also applying the quirk to external displays, but > >> I think that that is also solved in most drivers by only checking for > >> a quirk at all on the eDP connector > >> > >> 2. Some laptop models ship with different panels in different badges > >> some of these are portrait (so need a panel-orient) setting and others > >> are landscape. > > > > That makes sense. So yeah the EDID mode based matching logic needs to > > stay to accomodate for these cases. > > > >>> I agree that at least in the Steam > >>> Deck case it may not make a lot of
Re: radeon ring 0 test failed on arm64
On Fri, Mar 18, 2022 at 8:31 AM Christian König wrote: > > Am 18.03.22 um 12:24 schrieb Peter Geis: > > On Fri, Mar 18, 2022 at 4:35 AM Christian König > > wrote: > >> > >> > >> Am 18.03.22 um 08:51 schrieb Kever Yang: > >> > >> > >> On 2022/3/17 20:19, Peter Geis wrote: > >> > >> On Wed, Mar 16, 2022 at 11:08 PM Kever Yang > >> wrote: > >> > >> Hi Peter, > >> > >> On 2022/3/17 08:14, Peter Geis wrote: > >> > >> Good Evening, > >> > >> I apologize for raising this email chain from the dead, but there have > >> been some developments that have introduced even more questions. > >> I've looped the Rockchip mailing list into this too, as this affects > >> rk356x, and likely the upcoming rk3588 if [1] is to be believed. > >> > >> TLDR for those not familiar: It seems the rk356x series (and possibly > >> the rk3588) were built without any outer coherent cache. > >> This means (unless Rockchip wants to clarify here) devices such as the > >> ITS and PCIe cannot utilize cache snooping. > >> This is based on the results of the email chain [2]. > >> > >> The new circumstances are as follows: > >> The RPi CM4 Adventure Team as I've taken to calling them has been > >> attempting to get a dGPU working with the very broken Broadcom > >> controller in the RPi CM4. > >> Recently they acquired a SoQuartz rk3566 module which is pin > >> compatible with the CM4, and have taken to trying it out as well. > >> > >> This is how I got involved. > >> It seems they found a trivial way to force the Radeon R600 driver to > >> use Non-Cached memory for everything. > >> This single line change, combined with using memset_io instead of > >> memset, allows the ring tests to pass and the card probes successfully > >> (minus the DMA limitations of the rk356x due to the 32 bit > >> interconnect). > >> I discovered using this method that we start having unaligned io > >> memory access faults (bus errors) when running glmark2-drm (running > >> glmark2 directly was impossible, as both X and Wayland crashed too > >> early). > >> I traced this to using what I thought at the time was an unsafe memcpy > >> in the mesa stack. > >> Rewriting this function to force aligned writes solved the problem and > >> allows glmark2-drm to run to completion. > >> With some extensive debugging, I found about half a dozen memcpy > >> functions in mesa that if forced to be aligned would allow Wayland to > >> start, but with hilarious display corruption (see [3]. [4]). > >> The CM4 team is convinced this is an issue with memcpy in glibc, but > >> I'm not convinced it's that simple. > >> > >> On my two hour drive in to work this morning, I got to thinking. > >> If this was an memcpy fault, this would be universally broken on arm64 > >> which is obviously not the case. > >> So I started thinking, what is different here than with systems known to > >> work: > >> 1. No IOMMU for the PCIe controller. > >> 2. The Outer Cache Issue. > >> > >> Robin: > >> My questions for you, since you're the smartest person I know about > >> arm64 memory management: > >> Could cache snooping permit unaligned accesses to IO to be safe? > >> Or > >> Is it the lack of an IOMMU that's causing the ali gnment faults to become > >> fatal? > >> Or > >> Am I insane here? > >> > >> Rockchip: > >> Please update on the status for the Outer Cache errata for ITS services. > >> > >> Our SoC design team has double check with ARM GIC/ITS IP team for many > >> times, and the GITS_CBASER > >> of GIC600 IP does not support hardware bind or config to a fix value, so > >> they insist this is an IP > >> limitation instead of a SoC bug, software should take care of it :( > >> I will check again if we can provide errata for this issue. > >> > >> Thanks. This is necessary as the mbi-alias provides an imperfect > >> implementation of the ITS and causes certain PCIe cards (eg x520 Intel > >> 10G NIC) to misbehave. > >> > >> Please provide an answer to the errata of the PCIe controller, in > >> regard to cache snooping and buffering, for both the rk356x and the > >> upcoming rk3588. > >> > >> > >> Sorry, what is this? > >> > >> Part of the ITS bug is it expects to be cache coherent with the CPU > >> cluster by design. > >> Due to the rk356x being implemented without an outer accessible cache, > >> the ITS and other devices that require cache coherency (PCIe for > >> example) crash in fun ways. > >> > >> Then this is still the ITS issue, not PCIe issue. > >> PCIe is a peripheral bus controller like USB and other device, the driver > >> should maintain the "cache coherency" if there is any, and there is no > >> requirement for hardware cache coherency between PCIe and CPU. > > Kever, > > > > These issues are one and the same. > > Well, that's not correct. You are still mixing two things up here: > > 1. The memory accesses from the device to the system memory must be > coherent with the CPU cache. E.g. we root complex must snoop the CPU cache. > That's a requirement of the PCIe spec. If you don't get that
[PATCH v9 4/4] arm64: dts: mt8183: Add panel rotation
krane, kakadu, and kodama boards have a default panel rotation. Signed-off-by: Hsin-Yi Wang Reviewed-by: Enric Balletbo i Serra Tested-by: Enric Balletbo i Serra --- arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi index 0f9480f91261..c7c6be106e2e 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi @@ -276,6 +276,7 @@ panel: panel@0 { avee-supply = <_lcd>; pp1800-supply = <_lcd>; backlight = <_lcd0>; + rotation = <270>; port { panel_in: endpoint { remote-endpoint = <_out>; -- 2.35.1.894.gb6a874cedc-goog
[PATCH v9 2/4] drm/mediatek: init panel orientation property
Init panel orientation property after connector is initialized. Let the panel driver decides the orientation value later. Signed-off-by: Hsin-Yi Wang Acked-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_dsi.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index ccb0511b9cd5..0376b33e9651 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -810,6 +810,13 @@ static int mtk_dsi_encoder_init(struct drm_device *drm, struct mtk_dsi *dsi) ret = PTR_ERR(dsi->connector); goto err_cleanup_encoder; } + + ret = drm_connector_init_panel_orientation_property(dsi->connector); + if (ret) { + DRM_ERROR("Unable to init panel orientation\n"); + goto err_cleanup_encoder; + } + drm_connector_attach_encoder(dsi->connector, >encoder); return 0; -- 2.35.1.894.gb6a874cedc-goog
[PATCH v9 0/4] Separate panel orientation property creating and value setting
Some drivers, eg. mtk_drm and msm_drm, rely on the panel to set the orientation. Panel calls drm_connector_set_panel_orientation() to create orientation property and sets the value. However, connector properties can't be created after drm_dev_register() is called. The goal is to separate the orientation property creation, so drm drivers can create it earlier before drm_dev_register(). After this series, drm_connector_set_panel_orientation() works like before, so it won't affect other drm drivers. The only difference is that some drm drivers can call drm_connector_init_panel_orientation_property() earlier. Hsin-Yi Wang (4): gpu: drm: separate panel orientation property creating and value setting drm/mediatek: init panel orientation property drm/msm: init panel orientation property arm64: dts: mt8183: Add panel rotation .../arm64/boot/dts/mediatek/mt8183-kukui.dtsi | 1 + drivers/gpu/drm/drm_connector.c | 58 ++- drivers/gpu/drm/mediatek/mtk_dsi.c| 7 +++ drivers/gpu/drm/msm/dsi/dsi_manager.c | 4 ++ include/drm/drm_connector.h | 2 + 5 files changed, 59 insertions(+), 13 deletions(-) -- 2.35.1.894.gb6a874cedc-goog
Re: radeon ring 0 test failed on arm64
On 2022/3/17 20:19, Peter Geis wrote: On Wed, Mar 16, 2022 at 11:08 PM Kever Yang wrote: Hi Peter, On 2022/3/17 08:14, Peter Geis wrote: Good Evening, I apologize for raising this email chain from the dead, but there have been some developments that have introduced even more questions. I've looped the Rockchip mailing list into this too, as this affects rk356x, and likely the upcoming rk3588 if [1] is to be believed. TLDR for those not familiar: It seems the rk356x series (and possibly the rk3588) were built without any outer coherent cache. This means (unless Rockchip wants to clarify here) devices such as the ITS and PCIe cannot utilize cache snooping. This is based on the results of the email chain [2]. The new circumstances are as follows: The RPi CM4 Adventure Team as I've taken to calling them has been attempting to get a dGPU working with the very broken Broadcom controller in the RPi CM4. Recently they acquired a SoQuartz rk3566 module which is pin compatible with the CM4, and have taken to trying it out as well. This is how I got involved. It seems they found a trivial way to force the Radeon R600 driver to use Non-Cached memory for everything. This single line change, combined with using memset_io instead of memset, allows the ring tests to pass and the card probes successfully (minus the DMA limitations of the rk356x due to the 32 bit interconnect). I discovered using this method that we start having unaligned io memory access faults (bus errors) when running glmark2-drm (running glmark2 directly was impossible, as both X and Wayland crashed too early). I traced this to using what I thought at the time was an unsafe memcpy in the mesa stack. Rewriting this function to force aligned writes solved the problem and allows glmark2-drm to run to completion. With some extensive debugging, I found about half a dozen memcpy functions in mesa that if forced to be aligned would allow Wayland to start, but with hilarious display corruption (see [3]. [4]). The CM4 team is convinced this is an issue with memcpy in glibc, but I'm not convinced it's that simple. On my two hour drive in to work this morning, I got to thinking. If this was an memcpy fault, this would be universally broken on arm64 which is obviously not the case. So I started thinking, what is different here than with systems known to work: 1. No IOMMU for the PCIe controller. 2. The Outer Cache Issue. Robin: My questions for you, since you're the smartest person I know about arm64 memory management: Could cache snooping permit unaligned accesses to IO to be safe? Or Is it the lack of an IOMMU that's causing the alignment faults to become fatal? Or Am I insane here? Rockchip: Please update on the status for the Outer Cache errata for ITS services. Our SoC design team has double check with ARM GIC/ITS IP team for many times, and the GITS_CBASER of GIC600 IP does not support hardware bind or config to a fix value, so they insist this is an IP limitation instead of a SoC bug, software should take care of it :( I will check again if we can provide errata for this issue. Thanks. This is necessary as the mbi-alias provides an imperfect implementation of the ITS and causes certain PCIe cards (eg x520 Intel 10G NIC) to misbehave. Please provide an answer to the errata of the PCIe controller, in regard to cache snooping and buffering, for both the rk356x and the upcoming rk3588. Sorry, what is this? Part of the ITS bug is it expects to be cache coherent with the CPU cluster by design. Due to the rk356x being implemented without an outer accessible cache, the ITS and other devices that require cache coherency (PCIe for example) crash in fun ways. Then this is still the ITS issue, not PCIe issue. PCIe is a peripheral bus controller like USB and other device, the driver should maintain the "cache coherency" if there is any, and there is no requirement for hardware cache coherency between PCIe and CPU. We didn't see any transfer error on rk356x PCIe till now, we can take a look if it's easy to reproduce. Thanks, - Kever This means that rk356x cannot implement a specification compliant ITS or PCIe. >From the rk3588 source dump it appears it was produced without an outer accessible cache, which means if true it also will be unable to use any PCIe cards that implement cache coherency as part of their design. Thanks, - Kever [1] https://github.com/JeffyCN/mirrors/commit/0b985f29304dcb9d644174edacb67298e8049d4f [2] https://lore.kernel.org/lkml/871rbdt4tu.wl-...@kernel.org/T/ [3] https://cdn.discordapp.com/attachments/926487797844541510/953414755970850816/unknown.png [4] https://cdn.discordapp.com/attachments/926487797844541510/953424952042852422/unknown.png Thank you everyone for your time. Very Respectfully, Peter Geis On Wed, May 26, 2021 at 7:21 AM Christian König wrote: Hi Robin, Am 26.05.21 um 12:59 schrieb Robin Murphy: On 2021-05-26 10:42, Christian König wrote: Hi Robin, Am 25.05.21 um 22:09 schrieb Robin
Re: [REGRESSION] Too-low frequency limit for AMD GPU PCI-passed-through to Windows VM
On 18.03.22 06:43, Paul Menzel wrote: > > Am 17.03.22 um 13:54 schrieb Thorsten Leemhuis: >> On 13.03.22 19:33, James Turner wrote: >>> My understanding at this point is that the root problem is probably not in the Linux kernel but rather something else (e.g. the machine firmware or AMD Windows driver) and that the change in f9b7f3703ff9 ("drm/amdgpu/acpi: make ATPX/ATCS structures global (v2)") simply exposed the underlying problem. >> >> FWIW: that in the end is irrelevant when it comes to the Linux kernel's >> 'no regressions' rule. For details see: >> >> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/Documentation/admin-guide/reporting-regressions.rst >> >> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/Documentation/process/handling-regressions.rst >> >> >> That being said: sometimes for the greater good it's better to not >> insist on that. And I guess that might be the case here. > > But who decides that? In the end afaics: Linus. But he can't watch each and every discussion, so it partly falls down to people discussing a regression, as they can always decide to get him involved in case they are unhappy with how a regression is handled. That obviously includes me in this case. I simply use my best judgement in such situations. I'm still undecided if that path is appropriate here, that's why I wrote above to see what James would say, as he afaics was the only one that reported this regression. > Running stuff in a virtual machine is not that uncommon. No, it's about passing through a GPU to a VM, which is a lot less common -- and afaics an area where blacklisting GPUs on the host to pass them through is not uncommon (a quick internet search confirmed that, but I might be wrong there). > Should the commit be reverted, and re-added with a more elaborate commit > message documenting the downsides? > > Could the user be notified somehow? Can PCI passthrough and a loaded > amdgpu driver be detected, so Linux warns about this? > > Also, should this be documented in the code? > >>> I'm not sure where to go from here. This issue isn't much of a concern >>> for me anymore, since blacklisting `amdgpu` works for my machine. At >>> this point, my understanding is that the root problem needs to be fixed >>> in AMD's Windows GPU driver or Dell's firmware, not the Linux kernel. If >>> any of the AMD developers on this thread would like to forward it to the >>> AMD Windows driver team, I'd be happy to work with AMD to fix the issue >>> properly. > > (Thorsten, your mailer mangled the quote somehow Kinda, but it IIRC was more me doing something stupid with my mailer. Sorry about that. > – I reformatted it –, thx! > which is too bad, as this message is shown when clicking on the link > *marked invalid* in the regzbot Web page [1]. (The link is a very nice > feature.) > >> In that case I'll drop it from the list of regressions, unless what I >> wrote above makes you change your mind. >> >> #regzbot invalid: firmware issue exposed by kernel change, user seems to >> be happy with a workaround >> >> Thx everyone who participated in handling this. > > Should the regression issue be re-opened until the questions above are > answered, and a more user friendly solution is found? I'll for now will just continue to watch this discussion and see what happens. > [1]: https://linux-regtracking.leemhuis.info/regzbot/resolved/ Ciao, Thorsten
[PATCH v9 1/4] gpu: drm: separate panel orientation property creating and value setting
drm_dev_register() sets connector->registration_state to DRM_CONNECTOR_REGISTERED and dev->registered to true. If drm_connector_set_panel_orientation() is first called after drm_dev_register(), it will fail several checks and results in following warning. Add a function to create panel orientation property and set default value to UNKNOWN, so drivers can call this function to init the property earlier , and let the panel set the real value later. [4.480976] [ cut here ] [4.485603] WARNING: CPU: 5 PID: 369 at drivers/gpu/drm/drm_mode_object.c:45 __drm_mode_object_add+0xb4/0xbc [4.609772] Call trace: [4.612208] __drm_mode_object_add+0xb4/0xbc [4.616466] drm_mode_object_add+0x20/0x2c [4.620552] drm_property_create+0xdc/0x174 [4.624723] drm_property_create_enum+0x34/0x98 [4.629241] drm_connector_set_panel_orientation+0x64/0xa0 [4.634716] boe_panel_get_modes+0x88/0xd8 [4.638802] drm_panel_get_modes+0x2c/0x48 [4.642887] panel_bridge_get_modes+0x1c/0x28 [4.647233] drm_bridge_connector_get_modes+0xa0/0xd4 [4.652273] drm_helper_probe_single_connector_modes+0x218/0x700 [4.658266] drm_mode_getconnector+0x1b4/0x45c [4.662699] drm_ioctl_kernel+0xac/0x128 [4.11] drm_ioctl+0x268/0x410 [4.670002] drm_compat_ioctl+0xdc/0xf0 [4.673829] __arm64_compat_sys_ioctl+0xc8/0x100 [4.678436] el0_svc_common+0xf4/0x1c0 [4.682174] do_el0_svc_compat+0x28/0x3c [4.686088] el0_svc_compat+0x10/0x1c [4.689738] el0_sync_compat_handler+0xa8/0xcc [4.694171] el0_sync_compat+0x178/0x180 [4.698082] ---[ end trace b4f2db9d9c88610b ]--- [4.702721] [ cut here ] [4.707329] WARNING: CPU: 5 PID: 369 at drivers/gpu/drm/drm_mode_object.c:243 drm_object_attach_property+0x48/0xb8 [4.833830] Call trace: [4.836266] drm_object_attach_property+0x48/0xb8 [4.840958] drm_connector_set_panel_orientation+0x84/0xa0 [4.846432] boe_panel_get_modes+0x88/0xd8 [4.850516] drm_panel_get_modes+0x2c/0x48 [4.854600] panel_bridge_get_modes+0x1c/0x28 [4.858946] drm_bridge_connector_get_modes+0xa0/0xd4 [4.863984] drm_helper_probe_single_connector_modes+0x218/0x700 [4.869978] drm_mode_getconnector+0x1b4/0x45c [4.874410] drm_ioctl_kernel+0xac/0x128 [4.878320] drm_ioctl+0x268/0x410 [4.881711] drm_compat_ioctl+0xdc/0xf0 [4.885536] __arm64_compat_sys_ioctl+0xc8/0x100 [4.890142] el0_svc_common+0xf4/0x1c0 [4.893879] do_el0_svc_compat+0x28/0x3c [4.897791] el0_svc_compat+0x10/0x1c [4.901441] el0_sync_compat_handler+0xa8/0xcc [4.905873] el0_sync_compat+0x178/0x180 [4.909783] ---[ end trace b4f2db9d9c88610c ]--- Signed-off-by: Hsin-Yi Wang Reviewed-by: Sean Paul --- drivers/gpu/drm/drm_connector.c | 58 + include/drm/drm_connector.h | 2 ++ 2 files changed, 47 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 76a8c707c34b..149709e05622 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -1252,7 +1252,7 @@ static const struct drm_prop_enum_list dp_colorspaces[] = { * INPUT_PROP_DIRECT) will still map 1:1 to the actual LCD panel * coordinates, so if userspace rotates the picture to adjust for * the orientation it must also apply the same transformation to the - * touchscreen input coordinates. This property is initialized by calling + * touchscreen input coordinates. This property value is set by calling * drm_connector_set_panel_orientation() or * drm_connector_set_panel_orientation_with_quirk() * @@ -2344,8 +2344,8 @@ EXPORT_SYMBOL(drm_connector_set_vrr_capable_property); * @connector: connector for which to set the panel-orientation property. * @panel_orientation: drm_panel_orientation value to set * - * This function sets the connector's panel_orientation and attaches - * a "panel orientation" property to the connector. + * This function sets the connector's panel_orientation value. If the property + * doesn't exist, it will try to create one. * * Calling this function on a connector where the panel_orientation has * already been set is a no-op (e.g. the orientation has been overridden with @@ -2377,18 +2377,13 @@ int drm_connector_set_panel_orientation( prop = dev->mode_config.panel_orientation_property; if (!prop) { - prop = drm_property_create_enum(dev, DRM_MODE_PROP_IMMUTABLE, - "panel orientation", - drm_panel_orientation_enum_list, - ARRAY_SIZE(drm_panel_orientation_enum_list)); - if (!prop) + if (drm_connector_init_panel_orientation_property(connector) < 0) return -ENOMEM; - - dev->mode_config.panel_orientation_property = prop; +
Re: [RFC PATCH 1/4] drm/amdkfd: Improve amdgpu_vm_handle_moved
Am 17.03.22 um 20:11 schrieb Felix Kuehling: Am 2022-03-17 um 04:21 schrieb Christian König: Am 17.03.22 um 01:20 schrieb Felix Kuehling: Let amdgpu_vm_handle_moved update all BO VA mappings of BOs reserved by the caller. This will be useful for handling extra BO VA mappings in KFD VMs that are managed through the render node API. Yes, that change is on my TODO list for quite a while as well. TODO: This may also allow simplification of amdgpu_cs_vm_handling. See the TODO comment in the code. No, that won't work just yet. We need to change the TLB flush detection for that, but I'm already working on those as well. Your TLB flushing patch series looks good to me. There is one other issue, though. amdgpu_vm_handle_moved doesn't update the sync object, so I couldn't figure out I can wait for all the page table updates to finish. Yes, and inside the CS we still need to go over all the BOs and gather the VM updates to wait for. Not sure if you can do that in the KFD code as well. How exactly do you want to use it? Regards, Christian. Regards, Felix Signed-off-by: Felix Kuehling Please update the TODO, with that done: Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 ++- 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d162243d8e78..10941f0d8dde 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -826,6 +826,10 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) return r; } + /* TODO: Is this loop still needed, or could this be handled by + * amdgpu_vm_handle_moved, now that it can handle all BOs that are + * reserved under p->ticket? + */ amdgpu_bo_list_for_each_entry(e, p->bo_list) { /* ignore duplicates */ bo = ttm_to_amdgpu_bo(e->tv.bo); @@ -845,7 +849,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) return r; } - r = amdgpu_vm_handle_moved(adev, vm); + r = amdgpu_vm_handle_moved(adev, vm, >ticket); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 579adfafe4d0..50805613c38c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -414,7 +414,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) r = amdgpu_vm_clear_freed(adev, vm, NULL); if (!r) - r = amdgpu_vm_handle_moved(adev, vm); + r = amdgpu_vm_handle_moved(adev, vm, ticket); if (r && r != -EBUSY) DRM_ERROR("Failed to invalidate VM page tables (%d))\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index fc4563cf2828..726b42c6d606 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2190,11 +2190,12 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, * PTs have to be reserved! */ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, - struct amdgpu_vm *vm) + struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket) { struct amdgpu_bo_va *bo_va, *tmp; struct dma_resv *resv; - bool clear; + bool clear, unlock; int r; list_for_each_entry_safe(bo_va, tmp, >moved, base.vm_status) { @@ -2212,17 +2213,24 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, spin_unlock(>invalidated_lock); /* Try to reserve the BO to avoid clearing its ptes */ - if (!amdgpu_vm_debug && dma_resv_trylock(resv)) + if (!amdgpu_vm_debug && dma_resv_trylock(resv)) { clear = false; + unlock = true; + /* The caller is already holding the reservation lock */ + } else if (ticket && dma_resv_locking_ctx(resv) == ticket) { + clear = false; + unlock = false; /* Somebody else is using the BO right now */ - else + } else { clear = true; + unlock = false; + } r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL); if (r) return r; - if (!clear) + if (unlock) dma_resv_unlock(resv); spin_lock(>invalidated_lock); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index a40a6a993bb0..120a76aaae75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -396,7 +396,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
Re: radeon ring 0 test failed on arm64
Am 18.03.22 um 12:24 schrieb Peter Geis: On Fri, Mar 18, 2022 at 4:35 AM Christian König wrote: Am 18.03.22 um 08:51 schrieb Kever Yang: On 2022/3/17 20:19, Peter Geis wrote: On Wed, Mar 16, 2022 at 11:08 PM Kever Yang wrote: Hi Peter, On 2022/3/17 08:14, Peter Geis wrote: Good Evening, I apologize for raising this email chain from the dead, but there have been some developments that have introduced even more questions. I've looped the Rockchip mailing list into this too, as this affects rk356x, and likely the upcoming rk3588 if [1] is to be believed. TLDR for those not familiar: It seems the rk356x series (and possibly the rk3588) were built without any outer coherent cache. This means (unless Rockchip wants to clarify here) devices such as the ITS and PCIe cannot utilize cache snooping. This is based on the results of the email chain [2]. The new circumstances are as follows: The RPi CM4 Adventure Team as I've taken to calling them has been attempting to get a dGPU working with the very broken Broadcom controller in the RPi CM4. Recently they acquired a SoQuartz rk3566 module which is pin compatible with the CM4, and have taken to trying it out as well. This is how I got involved. It seems they found a trivial way to force the Radeon R600 driver to use Non-Cached memory for everything. This single line change, combined with using memset_io instead of memset, allows the ring tests to pass and the card probes successfully (minus the DMA limitations of the rk356x due to the 32 bit interconnect). I discovered using this method that we start having unaligned io memory access faults (bus errors) when running glmark2-drm (running glmark2 directly was impossible, as both X and Wayland crashed too early). I traced this to using what I thought at the time was an unsafe memcpy in the mesa stack. Rewriting this function to force aligned writes solved the problem and allows glmark2-drm to run to completion. With some extensive debugging, I found about half a dozen memcpy functions in mesa that if forced to be aligned would allow Wayland to start, but with hilarious display corruption (see [3]. [4]). The CM4 team is convinced this is an issue with memcpy in glibc, but I'm not convinced it's that simple. On my two hour drive in to work this morning, I got to thinking. If this was an memcpy fault, this would be universally broken on arm64 which is obviously not the case. So I started thinking, what is different here than with systems known to work: 1. No IOMMU for the PCIe controller. 2. The Outer Cache Issue. Robin: My questions for you, since you're the smartest person I know about arm64 memory management: Could cache snooping permit unaligned accesses to IO to be safe? Or Is it the lack of an IOMMU that's causing the ali gnment faults to become fatal? Or Am I insane here? Rockchip: Please update on the status for the Outer Cache errata for ITS services. Our SoC design team has double check with ARM GIC/ITS IP team for many times, and the GITS_CBASER of GIC600 IP does not support hardware bind or config to a fix value, so they insist this is an IP limitation instead of a SoC bug, software should take care of it :( I will check again if we can provide errata for this issue. Thanks. This is necessary as the mbi-alias provides an imperfect implementation of the ITS and causes certain PCIe cards (eg x520 Intel 10G NIC) to misbehave. Please provide an answer to the errata of the PCIe controller, in regard to cache snooping and buffering, for both the rk356x and the upcoming rk3588. Sorry, what is this? Part of the ITS bug is it expects to be cache coherent with the CPU cluster by design. Due to the rk356x being implemented without an outer accessible cache, the ITS and other devices that require cache coherency (PCIe for example) crash in fun ways. Then this is still the ITS issue, not PCIe issue. PCIe is a peripheral bus controller like USB and other device, the driver should maintain the "cache coherency" if there is any, and there is no requirement for hardware cache coherency between PCIe and CPU. Kever, These issues are one and the same. Well, that's not correct. You are still mixing two things up here: 1. The memory accesses from the device to the system memory must be coherent with the CPU cache. E.g. we root complex must snoop the CPU cache. That's a requirement of the PCIe spec. If you don't get that right a whole bunch of PCIe devices won't work correctly. 2. The memory accesses from the CPU to the devices PCIe BAR can be unaligned. E.g. a 64bit read can be aligned on a 32bit address. That is a requirement of the graphics stack. Other devices still might work fine without that. Regards, Christian. Certain hardware blocks *require* cache coherency as part of their design. All of the *interesting* things PCIe can do stem from it. When I saw you bumped the available window to the PCIe controller to 1GB I was really excited, because that meant we could finally
Re: radeon ring 0 test failed on arm64
Am 18.03.22 um 08:51 schrieb Kever Yang: On 2022/3/17 20:19, Peter Geis wrote: On Wed, Mar 16, 2022 at 11:08 PM Kever Yang wrote: Hi Peter, On 2022/3/17 08:14, Peter Geis wrote: Good Evening, I apologize for raising this email chain from the dead, but there have been some developments that have introduced even more questions. I've looped the Rockchip mailing list into this too, as this affects rk356x, and likely the upcoming rk3588 if [1] is to be believed. TLDR for those not familiar: It seems the rk356x series (and possibly the rk3588) were built without any outer coherent cache. This means (unless Rockchip wants to clarify here) devices such as the ITS and PCIe cannot utilize cache snooping. This is based on the results of the email chain [2]. The new circumstances are as follows: The RPi CM4 Adventure Team as I've taken to calling them has been attempting to get a dGPU working with the very broken Broadcom controller in the RPi CM4. Recently they acquired a SoQuartz rk3566 module which is pin compatible with the CM4, and have taken to trying it out as well. This is how I got involved. It seems they found a trivial way to force the Radeon R600 driver to use Non-Cached memory for everything. This single line change, combined with using memset_io instead of memset, allows the ring tests to pass and the card probes successfully (minus the DMA limitations of the rk356x due to the 32 bit interconnect). I discovered using this method that we start having unaligned io memory access faults (bus errors) when running glmark2-drm (running glmark2 directly was impossible, as both X and Wayland crashed too early). I traced this to using what I thought at the time was an unsafe memcpy in the mesa stack. Rewriting this function to force aligned writes solved the problem and allows glmark2-drm to run to completion. With some extensive debugging, I found about half a dozen memcpy functions in mesa that if forced to be aligned would allow Wayland to start, but with hilarious display corruption (see [3]. [4]). The CM4 team is convinced this is an issue with memcpy in glibc, but I'm not convinced it's that simple. On my two hour drive in to work this morning, I got to thinking. If this was an memcpy fault, this would be universally broken on arm64 which is obviously not the case. So I started thinking, what is different here than with systems known to work: 1. No IOMMU for the PCIe controller. 2. The Outer Cache Issue. Robin: My questions for you, since you're the smartest person I know about arm64 memory management: Could cache snooping permit unaligned accesses to IO to be safe? Or Is it the lack of an IOMMU that's causing the ali gnment faults to become fatal? Or Am I insane here? Rockchip: Please update on the status for the Outer Cache errata for ITS services. Our SoC design team has double check with ARM GIC/ITS IP team for many times, and the GITS_CBASER of GIC600 IP does not support hardware bind or config to a fix value, so they insist this is an IP limitation instead of a SoC bug, software should take care of it :( I will check again if we can provide errata for this issue. Thanks. This is necessary as the mbi-alias provides an imperfect implementation of the ITS and causes certain PCIe cards (eg x520 Intel 10G NIC) to misbehave. Please provide an answer to the errata of the PCIe controller, in regard to cache snooping and buffering, for both the rk356x and the upcoming rk3588. Sorry, what is this? Part of the ITS bug is it expects to be cache coherent with the CPU cluster by design. Due to the rk356x being implemented without an outer accessible cache, the ITS and other devices that require cache coherency (PCIe for example) crash in fun ways. Then this is still the ITS issue, not PCIe issue. PCIe is a peripheral bus controller like USB and other device, the driver should maintain the "cache coherency" if there is any, and there is no requirement for hardware cache coherency between PCIe and CPU. Well then I suggest to re-read the PCIe specification. Cache coherency is defined as mandatory there. Non-cache coherency is an optional feature. See section 2.2.6.5 in the PCIe 2.0 specification for a good example. Regards, Christian. We didn't see any transfer error on rk356x PCIe till now, we can take a look if it's easy to reproduce. Thanks, - Kever This means that rk356x cannot implement a specification compliant ITS or PCIe. >From the rk3588 source dump it appears it was produced without an outer accessible cache, which means if true it also will be unable to use any PCIe cards that implement cache coherency as part of their design. Thanks, - Kever [1]
Re: [PATCH v2 1/2] drm: Add GPU reset sysfs event
Am 17.03.22 um 18:31 schrieb Rob Clark: On Thu, Mar 17, 2022 at 10:27 AM Daniel Vetter wrote: [SNIP] (At some point, I'd like to use scheduler for the replay, and actually use drm_sched_stop()/etc.. but last time I looked there were still some sched bugs in that area which prevented me from deleting a bunch of code ;-)) Not sure about your hw, but at least on intel replaying tends to just result in follow-on fun. And that holds even more so the more complex a workload is. This is why vk just dies immediately and does not try to replay anything, offloading it to the app. Same with arb robusteness. Afaik it's really only media and classic gl which insist that the driver stack somehow recover. At least for us, each submit must be self-contained (ie. not rely on previous GPU hw state), so in practice replay works out pretty well. The worst case is subsequent submits from same process fail as well (if they depended on something that crashing submit failed to write back to memory.. but in that case they just crash as well and we move on to the next one.. the recent gens (a5xx+ at least) are pretty good about quickly detecting problems and giving us an error irq. Well I absolutely agree with Daniel. The whole replay thing AMD did in the scheduler is an absolutely mess and should probably be killed with fire. I strongly recommend not to do the same mistake in other drivers. If you want to have some replay feature then please make it driver specific and don't use anything from the infrastructure in the DRM scheduler. Thanks, Christian. BR, -R And recovering from a mess in userspace is a lot simpler than trying to pull of the same magic in the kernel. Plus it also helps with a few of the dma_fence rules, which is a nice bonus. -Daniel
Re: [PATCH v3] drm/amdgpu: add workarounds for VCN TMZ issue on CHIP_RAVEN
Dear Christian, Am 16.03.22 um 11:08 schrieb Christian König: Am 16.03.22 um 10:57 schrieb Paul Menzel: Am 16.03.22 um 10:41 schrieb Christian König: Am 16.03.22 um 07:21 schrieb Lang Yu: On 03/16/ , Paul Menzel wrote: Am 16.03.22 um 02:27 schrieb Lang Yu: On 03/15/ , Paul Menzel wrote: Am 14.03.22 um 03:45 schrieb Lang Yu: Thank you for your patch. A shorter commit message summary would be: drm/amdgpu: Work around VNC TMZ issue on CHIP_RAVEN It is a hardware issue that VCN can't handle a GTT backing stored TMZ buffer on CHIP_RAVEN series ASIC. Where is that documented, and how can this be reproduced? It is documented in AMD internal Confluence and JIRA. Secure playback with a low VRAM config(thus TMZ buffer will be allocted in GTT domain) may reproduce this issue. It’d be great if as much of the details from this non-publicly accessible information could be added to the commit message, and a way to reproduce this as there does not seem to be a test for this. (Also I guess a tag with a reference to the internal issue would be acceptable, so in case more question surface in the future.) Thanks. I will add an internal link. Lang, please don't! This isn't an information which is expected to be made public. Well, how are then even the AMD folks able to link a (upstream) commit to an issue? Well quite simply: We don't do that since it isn't necessary. What other ways do you (or future AMD developers) have then? (I would also use *helpful* or *useful*.) (In two years, when maybe nobody of the current AMD developers work at AMD anymore, and a user bisects a problems to this patch I could imagine it would help the future AMD developers to have this connection.) If it’s not possible, even more detailed information about the issue including how to reproduce it needs to be part of the commit message. No, why should we do that? It's an AMD internal hardware problem which we add a software workaround for here. The hardware details why and what are completely irrelevant to the public. All that we need to document is that VCN can't handle GTT on Raven, and that's exactly what the commit message is doing. That's perfectly enough to write a test case. Thank you for clarifying, but I am not interested in the hardware details, but how to reproduce and test the issue. And according to Lang this information is present in the issue. Seeing how complex the graphics driver are, a lot of documentation is not publicly available, a recipe to manually reproduce and test the issue is most helpful. Kind regards, Paul