[PATCH 13/13] drm/amd/display: move FPU related code from dcn316 to dml/dcn31 folder

2022-03-18 Thread Alex Hung
From: Melissa Wen 

Moves FPU-related structs and dcn316_update_bw_bounding_box from dcn316
driver to dml/dcn31 that centralize FPU operations for DCN 3.1x

Signed-off-by: Melissa Wen 
Reviewed-by: Alex Hung 
---
 .../gpu/drm/amd/display/dc/dcn316/Makefile|  26 --
 .../amd/display/dc/dcn316/dcn316_resource.c   | 231 +-
 .../amd/display/dc/dcn316/dcn316_resource.h   |   3 +
 .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c  | 229 +
 .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.h  |   2 +
 5 files changed, 235 insertions(+), 256 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile 
b/drivers/gpu/drm/amd/display/dc/dcn316/Makefile
index cd87b687c5e2..819d44a9439b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn316/Makefile
@@ -25,32 +25,6 @@
 
 DCN316 = dcn316_resource.o
 
-ifdef CONFIG_X86
-CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o := -msse
-endif
-
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o += -mhard-float
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o += 
-mpreferred-stack-boundary=4
-else
-CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o += -msse2
-endif
-endif
-
 AMD_DAL_DCN316 = $(addprefix $(AMDDALPATH)/dc/dcn316/,$(DCN316))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_DCN316)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c
index 8decc3ccf8ca..d73145dab173 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c
@@ -66,6 +66,7 @@
 #include "virtual/virtual_stream_encoder.h"
 #include "dce110/dce110_resource.h"
 #include "dml/display_mode_vba.h"
+#include "dml/dcn31/dcn31_fpu.h"
 #include "dcn31/dcn31_dccg.h"
 #include "dcn10/dcn10_resource.h"
 #include "dcn31/dcn31_panel_cntl.h"
@@ -123,157 +124,10 @@
 
 #include "link_enc_cfg.h"
 
-#define DC_LOGGER_INIT(logger)
-
-#define DCN3_16_DEFAULT_DET_SIZE 192
 #define DCN3_16_MAX_DET_SIZE 384
 #define DCN3_16_MIN_COMPBUF_SIZE_KB 128
 #define DCN3_16_CRB_SEGMENT_SIZE_KB 64
 
-struct _vcs_dpi_ip_params_st dcn3_16_ip = {
-   .gpuvm_enable = 1,
-   .gpuvm_max_page_table_levels = 1,
-   .hostvm_enable = 1,
-   .hostvm_max_page_table_levels = 2,
-   .rob_buffer_size_kbytes = 64,
-   .det_buffer_size_kbytes = DCN3_16_DEFAULT_DET_SIZE,
-   .config_return_buffer_size_in_kbytes = 1024,
-   .compressed_buffer_segment_size_in_kbytes = 64,
-   .meta_fifo_size_in_kentries = 32,
-   .zero_size_buffer_entries = 512,
-   .compbuf_reserved_space_64b = 256,
-   .compbuf_reserved_space_zs = 64,
-   .dpp_output_buffer_pixels = 2560,
-   .opp_output_buffer_lines = 1,
-   .pixel_chunk_size_kbytes = 8,
-   .meta_chunk_size_kbytes = 2,
-   .min_meta_chunk_size_bytes = 256,
-   .writeback_chunk_size_kbytes = 8,
-   .ptoi_supported = false,
-   .num_dsc = 3,
-   .maximum_dsc_bits_per_component = 10,
-   .dsc422_native_support = false,
-   .is_line_buffer_bpp_fixed = true,
-   .line_buffer_fixed_bpp = 48,
-   .line_buffer_size_bits = 789504,
-   .max_line_buffer_lines = 12,
-   .writeback_interface_buffer_size_kbytes = 90,
-   .max_num_dpp = 4,
-   .max_num_otg = 4,
-   .max_num_hdmi_frl_outputs = 1,
-   .max_num_wb = 1,
-   .max_dchub_pscl_bw_pix_per_clk = 4,
-   .max_pscl_lb_bw_pix_per_clk = 2,
-   .max_lb_vscl_bw_pix_per_clk = 4,
-   .max_vscl_hscl_bw_pix_per_clk = 4,
-   .max_hscl_ratio = 6,
-   .max_vscl_ratio = 6,
-   .max_hscl_taps = 8,
-   .max_vscl_taps = 8,
-   .dpte_buffer_size_in_pte_reqs_luma = 64,
-   .dpte_buffer_size_in_pte_reqs_chroma = 34,
-   .dispclk_ramp_margin_percent = 1,
-   .max_inter_dcn_tile_repeaters = 8,
-   .cursor_buffer_size = 16,
-   .cursor_chunk_size = 2,
-   .writeback_line_buffer_buffer_size = 0,
-   .writeback_min_hscl_ratio = 1,
-   .writeback_min_vscl_ratio = 1,
-   .writeback_max_hscl_ratio = 1,
-   .writeback_max_vscl_ratio = 1,
-   .writeback_max_hscl_taps = 1,
-   .writeback_max_vscl_taps = 1,
-   .dppclk_delay_subtotal = 46,
-   .dppclk_delay_scl = 50,
-   .dppclk_delay_scl_lb_only = 16,
-   .dppclk_delay_cnvc_formatter = 27,
-   .dppclk_delay_cnvc_cursor = 6,
-   .dispclk_delay_subtotal = 119,
-   .dynamic_metadata_vm_enabled = false,
-   .odm_combine_4to1_supported = false,
-   .dcc_supported = true,
-};
-
-struct _vcs_dpi_soc_bounding_box_st 

[PATCH 12/13] drm/amd/display: move FPU related code from dcn315 to dml/dcn31 folder

2022-03-18 Thread Alex Hung
From: Melissa Wen 

Moves related structs and dcn315_update_bw_bounding_box from dcn315
driver code to dml/dcn31_fpu that centralizes FPU code for DCN 3.1x.

Signed-off-by: Melissa Wen 
Reviewed-by: Alex Hung 
---
 .../gpu/drm/amd/display/dc/dcn315/Makefile|  26 --
 .../amd/display/dc/dcn315/dcn315_resource.c   | 232 +-
 .../amd/display/dc/dcn315/dcn315_resource.h   |   3 +
 .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c  | 228 +
 .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.h  |   3 +
 5 files changed, 235 insertions(+), 257 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile 
b/drivers/gpu/drm/amd/display/dc/dcn315/Makefile
index c831ad46e81c..59381d24800b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn315/Makefile
@@ -25,32 +25,6 @@
 
 DCN315 = dcn315_resource.o
 
-ifdef CONFIG_X86
-CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o := -msse
-endif
-
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o += -mhard-float
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o += 
-mpreferred-stack-boundary=4
-else
-CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o += -msse2
-endif
-endif
-
 AMD_DAL_DCN315 = $(addprefix $(AMDDALPATH)/dc/dcn315/,$(DCN315))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_DCN315)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c
index 06adb77c206b..fadb89326999 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c
@@ -66,6 +66,7 @@
 #include "virtual/virtual_stream_encoder.h"
 #include "dce110/dce110_resource.h"
 #include "dml/display_mode_vba.h"
+#include "dml/dcn31/dcn31_fpu.h"
 #include "dcn31/dcn31_dccg.h"
 #include "dcn10/dcn10_resource.h"
 #include "dcn31/dcn31_panel_cntl.h"
@@ -133,158 +134,9 @@
 
 #include "link_enc_cfg.h"
 
-#define DC_LOGGER_INIT(logger)
-
-#define DCN3_15_DEFAULT_DET_SIZE 192
 #define DCN3_15_MAX_DET_SIZE 384
-#define DCN3_15_MIN_COMPBUF_SIZE_KB 128
 #define DCN3_15_CRB_SEGMENT_SIZE_KB 64
 
-struct _vcs_dpi_ip_params_st dcn3_15_ip = {
-   .gpuvm_enable = 1,
-   .gpuvm_max_page_table_levels = 1,
-   .hostvm_enable = 1,
-   .hostvm_max_page_table_levels = 2,
-   .rob_buffer_size_kbytes = 64,
-   .det_buffer_size_kbytes = DCN3_15_DEFAULT_DET_SIZE,
-   .min_comp_buffer_size_kbytes = DCN3_15_MIN_COMPBUF_SIZE_KB,
-   .config_return_buffer_size_in_kbytes = 1024,
-   .compressed_buffer_segment_size_in_kbytes = 64,
-   .meta_fifo_size_in_kentries = 32,
-   .zero_size_buffer_entries = 512,
-   .compbuf_reserved_space_64b = 256,
-   .compbuf_reserved_space_zs = 64,
-   .dpp_output_buffer_pixels = 2560,
-   .opp_output_buffer_lines = 1,
-   .pixel_chunk_size_kbytes = 8,
-   .meta_chunk_size_kbytes = 2,
-   .min_meta_chunk_size_bytes = 256,
-   .writeback_chunk_size_kbytes = 8,
-   .ptoi_supported = false,
-   .num_dsc = 3,
-   .maximum_dsc_bits_per_component = 10,
-   .dsc422_native_support = false,
-   .is_line_buffer_bpp_fixed = true,
-   .line_buffer_fixed_bpp = 49,
-   .line_buffer_size_bits = 789504,
-   .max_line_buffer_lines = 12,
-   .writeback_interface_buffer_size_kbytes = 90,
-   .max_num_dpp = 4,
-   .max_num_otg = 4,
-   .max_num_hdmi_frl_outputs = 1,
-   .max_num_wb = 1,
-   .max_dchub_pscl_bw_pix_per_clk = 4,
-   .max_pscl_lb_bw_pix_per_clk = 2,
-   .max_lb_vscl_bw_pix_per_clk = 4,
-   .max_vscl_hscl_bw_pix_per_clk = 4,
-   .max_hscl_ratio = 6,
-   .max_vscl_ratio = 6,
-   .max_hscl_taps = 8,
-   .max_vscl_taps = 8,
-   .dpte_buffer_size_in_pte_reqs_luma = 64,
-   .dpte_buffer_size_in_pte_reqs_chroma = 34,
-   .dispclk_ramp_margin_percent = 1,
-   .max_inter_dcn_tile_repeaters = 9,
-   .cursor_buffer_size = 16,
-   .cursor_chunk_size = 2,
-   .writeback_line_buffer_buffer_size = 0,
-   .writeback_min_hscl_ratio = 1,
-   .writeback_min_vscl_ratio = 1,
-   .writeback_max_hscl_ratio = 1,
-   .writeback_max_vscl_ratio = 1,
-   .writeback_max_hscl_taps = 1,
-   .writeback_max_vscl_taps = 1,
-   .dppclk_delay_subtotal = 46,
-   .dppclk_delay_scl = 50,
-   .dppclk_delay_scl_lb_only = 16,
-   .dppclk_delay_cnvc_formatter = 27,
-   .dppclk_delay_cnvc_cursor = 6,
-   .dispclk_delay_subtotal = 119,
-   .dynamic_metadata_vm_enabled = false,
-   .odm_combine_4to1_supported = false,
-   

[PATCH 11/13] drm/amd/dicplay: move FPU related code from dcn31 to dml/dcn31 folder

2022-03-18 Thread Alex Hung
From: Melissa Wen 

Creates FPU files in dml/dcn31 folder to centralize FPU operations
from 3.1x drivers and moves all FPU-associated code from dcn31 driver
to there. It includes the struct _vcs_dpi_ip_params_st and
_vcs_dpi_soc_bounding_box_st and functions:

- dcn31_calculate_wm_and_dlg_fp()
- dcn31_update_bw_bounding_box()

adding dc_assert_fp_enabled to them and drop DC_FP_START/END inside
functions that was moved to dml folder, as required.

Signed-off-by: Melissa Wen 
Reviewed-by: Alex Hung 
---
 drivers/gpu/drm/amd/display/dc/dcn31/Makefile |  26 --
 .../drm/amd/display/dc/dcn31/dcn31_resource.c | 355 +--
 .../drm/amd/display/dc/dcn31/dcn31_resource.h |   4 +-
 drivers/gpu/drm/amd/display/dc/dml/Makefile   |   2 +
 .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c  | 406 ++
 .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.h  |  39 ++
 6 files changed, 451 insertions(+), 381 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
 create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h

diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile 
b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
index d20e3b8ccc30..ec041e3cda30 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
@@ -15,32 +15,6 @@ DCN31 = dcn31_resource.o dcn31_hubbub.o dcn31_hwseq.o 
dcn31_init.o dcn31_hubp.o
dcn31_apg.o dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o \
dcn31_afmt.o dcn31_vpg.o
 
-ifdef CONFIG_X86
-CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o := -msse
-endif
-
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -mhard-float
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -mpreferred-stack-boundary=4
-else
-CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -msse2
-endif
-endif
-
 AMD_DAL_DCN31 = $(addprefix $(AMDDALPATH)/dc/dcn31/,$(DCN31))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_DCN31)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
index 338235bcef4a..bf130b2435ab 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
@@ -65,6 +65,7 @@
 #include "virtual/virtual_stream_encoder.h"
 #include "dce110/dce110_resource.h"
 #include "dml/display_mode_vba.h"
+#include "dml/dcn31/dcn31_fpu.h"
 #include "dcn31/dcn31_dccg.h"
 #include "dcn10/dcn10_resource.h"
 #include "dcn31_panel_cntl.h"
@@ -102,152 +103,6 @@
 
 #define DC_LOGGER_INIT(logger)
 
-#define DCN3_1_DEFAULT_DET_SIZE 384
-
-struct _vcs_dpi_ip_params_st dcn3_1_ip = {
-   .gpuvm_enable = 1,
-   .gpuvm_max_page_table_levels = 1,
-   .hostvm_enable = 1,
-   .hostvm_max_page_table_levels = 2,
-   .rob_buffer_size_kbytes = 64,
-   .det_buffer_size_kbytes = DCN3_1_DEFAULT_DET_SIZE,
-   .config_return_buffer_size_in_kbytes = 1792,
-   .compressed_buffer_segment_size_in_kbytes = 64,
-   .meta_fifo_size_in_kentries = 32,
-   .zero_size_buffer_entries = 512,
-   .compbuf_reserved_space_64b = 256,
-   .compbuf_reserved_space_zs = 64,
-   .dpp_output_buffer_pixels = 2560,
-   .opp_output_buffer_lines = 1,
-   .pixel_chunk_size_kbytes = 8,
-   .meta_chunk_size_kbytes = 2,
-   .min_meta_chunk_size_bytes = 256,
-   .writeback_chunk_size_kbytes = 8,
-   .ptoi_supported = false,
-   .num_dsc = 3,
-   .maximum_dsc_bits_per_component = 10,
-   .dsc422_native_support = false,
-   .is_line_buffer_bpp_fixed = true,
-   .line_buffer_fixed_bpp = 48,
-   .line_buffer_size_bits = 789504,
-   .max_line_buffer_lines = 12,
-   .writeback_interface_buffer_size_kbytes = 90,
-   .max_num_dpp = 4,
-   .max_num_otg = 4,
-   .max_num_hdmi_frl_outputs = 1,
-   .max_num_wb = 1,
-   .max_dchub_pscl_bw_pix_per_clk = 4,
-   .max_pscl_lb_bw_pix_per_clk = 2,
-   .max_lb_vscl_bw_pix_per_clk = 4,
-   .max_vscl_hscl_bw_pix_per_clk = 4,
-   .max_hscl_ratio = 6,
-   .max_vscl_ratio = 6,
-   .max_hscl_taps = 8,
-   .max_vscl_taps = 8,
-   .dpte_buffer_size_in_pte_reqs_luma = 64,
-   .dpte_buffer_size_in_pte_reqs_chroma = 34,
-   .dispclk_ramp_margin_percent = 1,
-   .max_inter_dcn_tile_repeaters = 8,
-   .cursor_buffer_size = 16,
-   .cursor_chunk_size = 2,
-   .writeback_line_buffer_buffer_size = 0,
-   .writeback_min_hscl_ratio = 1,
-   .writeback_min_vscl_ratio = 1,
-   .writeback_max_hscl_ratio = 1,
-   .writeback_max_vscl_ratio = 1,

Re: [PATCH 0/1] Title: DC Patches March 18, 2022

2022-03-18 Thread Hung, Alex
[AMD Official Use Only]

My apology to send incorrect one. Please ignore this. An updated one will be 
sent.

From: Hung, Alex 
Sent: 18 March 2022 15:44
To: amd-gfx@lists.freedesktop.org 
Cc: Hung, Alex 
Subject: [PATCH 0/1] Title: DC Patches March 18, 2022

This DC patchset brings improvements in multiple areas. In summary, we 
highlight:

* HDCP SEND AKI INIT error
* fix audio format not updated after edid updated
* Reduce stack size
* FEC check in timing validation
* Add fSMC_MSG_SetDtbClk support
* Update VTEM Infopacket definition
* [FW Promotion] Release 0.0.109.0
* Add support for zstate during extended vblank
* remove destructive verify link for TMDS
* move FPU related code from dcn31 to dml/dcn31 folder
* move FPU related code from dcn315 to dml/dcn31 folder
* move FPU related code from dcn316 to dml/dcn31 folder


Aric Cyr (1):
  drm/amd/display: 3.2.178

 drivers/gpu/drm/amd/display/dc/dc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--
2.35.1



[PATCH 10/13] drm/amd/display: remove destructive verify link for TMDS

2022-03-18 Thread Alex Hung
From: Charlene Liu 

[why and how]
TMDS not need destructive verify link

Reviewed-by: Aric Cyr 
Acked-by: Alan Liu 
Acked-by: Alex Hung 
Signed-off-by: Charlene Liu 
---
 drivers/gpu/drm/amd/display/dc/core/dc_link.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index cb87dd643180..bbaa5abdf888 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -983,8 +983,7 @@ static bool 
should_verify_link_capability_destructively(struct dc_link *link,
destrictive = false;
}
}
-   } else if (dc_is_hdmi_signal(link->local_sink->sink_signal))
-   destrictive = true;
+   }
 
return destrictive;
 }
-- 
2.35.1



[PATCH 09/13] drm/amd/display: Add support for zstate during extended vblank

2022-03-18 Thread Alex Hung
From: Gabe Teeger 

[why]
When we enter FREESYNC_STATE_VIDEO, we want to use the extra vblank
portion to enter zstate if possible.

[how]
When we enter freesync, a full update is triggered and the new vtotal
with extra lines is passed to dml in a stream update. The time gained
from extra vblank lines is calculated in microseconds. We allow zstate
entry if the time gained is greater than 5 ms, which is the current
policy. Furthermore, an optimized value for min_dst_y_next_start is
calculated and written to its register. When exiting freesync, another
full update is triggered and default values are restored.

Reviewed-by: Nicholas Kazlauskas 
Acked-by: Alex Hung 
Signed-off-by: Gabe Teeger 
---
 drivers/gpu/drm/amd/display/dc/core/dc.c  | 19 +++
 drivers/gpu/drm/amd/display/dc/dc.h   |  6 +-
 drivers/gpu/drm/amd/display/dc/dc_stream.h|  2 ++
 .../drm/amd/display/dc/dcn20/dcn20_hwseq.c| 12 
 .../gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c |  8 
 .../drm/amd/display/dc/dcn31/dcn31_resource.c |  1 +
 .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c  | 18 +++---
 .../dc/dml/dcn31/display_rq_dlg_calc_31.c | 13 +
 .../amd/display/dc/dml/display_mode_structs.h |  2 ++
 drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h  |  3 +++
 10 files changed, 80 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 75f9c97bebb0..f2ad8f58e69c 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -2393,6 +2393,8 @@ static enum surface_update_type 
check_update_surfaces_for_stream(
 
if (stream_update->mst_bw_update)
su_flags->bits.mst_bw = 1;
+   if (stream_update->crtc_timing_adjust && 
dc_extended_blank_supported(dc))
+   su_flags->bits.crtc_timing_adjust = 1;
 
if (su_flags->raw != 0)
overall_type = UPDATE_TYPE_FULL;
@@ -2654,6 +2656,9 @@ static void copy_stream_update_to_stream(struct dc *dc,
if (update->vrr_infopacket)
stream->vrr_infopacket = *update->vrr_infopacket;
 
+   if (update->crtc_timing_adjust)
+   stream->adjust = *update->crtc_timing_adjust;
+
if (update->dpms_off)
stream->dpms_off = *update->dpms_off;
 
@@ -4055,3 +4060,17 @@ void dc_notify_vsync_int_state(struct dc *dc, struct 
dc_stream_state *stream, bo
if (pipe->stream_res.abm && pipe->stream_res.abm->funcs->set_abm_pause)

pipe->stream_res.abm->funcs->set_abm_pause(pipe->stream_res.abm, !enable, i, 
pipe->stream_res.tg->inst);
 }
+/*
+ * dc_extended_blank_supported: Decide whether extended blank is supported
+ *
+ * Extended blank is a freesync optimization feature to be enabled in the 
future.
+ * During the extra vblank period gained from freesync, we have the ability to 
enter z9/z10.
+ *
+ * @param [in] dc: Current DC state
+ * @return: Indicate whether extended blank is supported (true or false)
+ */
+bool dc_extended_blank_supported(struct dc *dc)
+{
+   return dc->debug.extended_blank_optimization && !dc->debug.disable_z10
+   && dc->caps.zstate_support && dc->caps.is_apu;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index 116967b96b01..ced40fe218ac 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -188,6 +188,7 @@ struct dc_caps {
bool psp_setup_panel_mode;
bool extended_aux_timeout_support;
bool dmcub_support;
+   bool zstate_support;
uint32_t num_of_internal_disp;
enum dp_protocol_version max_dp_protocol_version;
unsigned int mall_size_per_mem_channel;
@@ -703,13 +704,14 @@ struct dc_debug_options {
bool enable_driver_sequence_debug;
enum det_size crb_alloc_policy;
int crb_alloc_policy_min_disp_count;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
bool disable_z10;
+#if defined(CONFIG_DRM_AMD_DC_DCN)
bool enable_z9_disable_interface;
bool enable_sw_cntl_psr;
union dpia_debug_options dpia_debug;
 #endif
bool apply_vendor_specific_lttpr_wa;
+   bool extended_blank_optimization;
bool ignore_dpref_ss;
uint8_t psr_power_use_phy_fsm;
 };
@@ -1369,6 +1371,8 @@ struct dc_sink_init_data {
bool converter_disable_audio;
 };
 
+bool dc_extended_blank_supported(struct dc *dc);
+
 struct dc_sink *dc_sink_create(const struct dc_sink_init_data *init_params);
 
 /* Newer interfaces  */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h 
b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index 99a750f561f8..c4168c11257c 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -131,6 +131,7 @@ union stream_update_flags {
uint32_t wb_update:1;

[PATCH 08/13] drm/amd/display: 3.2.178

2022-03-18 Thread Alex Hung
From: Aric Cyr 

This version brings along following fixes:
- HDCP SEND AKI INIT error
- fix audio format not updated after edid updated
- Reduce stack size
- FEC check in timing validation
- Add fSMC_MSG_SetDtbClk support
- Update VTEM Infopacket definition
- [FW Promotion] Release 0.0.109.0

Acked-by: Alex Hung 
Signed-off-by: Aric Cyr 
---
 drivers/gpu/drm/amd/display/dc/dc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index 4ffab7bb1098..116967b96b01 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -47,7 +47,7 @@ struct aux_payload;
 struct set_config_cmd_payload;
 struct dmub_notification;
 
-#define DC_VER "3.2.177"
+#define DC_VER "3.2.178"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
-- 
2.35.1



[PATCH 07/13] drm/amd/display: [FW Promotion] Release 0.0.109.0

2022-03-18 Thread Alex Hung
From: Anthony Koo 

Reviewed-by: Aric Cyr 
Acked-by: Alex Hung 
Signed-off-by: Anthony Koo 
---
 drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h 
b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index 71214c7a60fc..ce773b56a778 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -46,10 +46,10 @@
 
 /* Firmware versioning. */
 #ifdef DMUB_EXPOSE_VERSION
-#define DMUB_FW_VERSION_GIT_HASH 0x929554ba
+#define DMUB_FW_VERSION_GIT_HASH 0x51b95a35
 #define DMUB_FW_VERSION_MAJOR 0
 #define DMUB_FW_VERSION_MINOR 0
-#define DMUB_FW_VERSION_REVISION 108
+#define DMUB_FW_VERSION_REVISION 109
 #define DMUB_FW_VERSION_TEST 0
 #define DMUB_FW_VERSION_VBIOS 0
 #define DMUB_FW_VERSION_HOTFIX 0
-- 
2.35.1



[PATCH 06/13] drm/amd/display: Update VTEM Infopacket definition

2022-03-18 Thread Alex Hung
From: "Leo (Hanghong) Ma" 

[Why & How]
The latest HDMI SPEC has updated the VTEM packet structure,
so change the VTEM Infopacket defined in the driver side to align
with the SPEC.

Reviewed-by: Chris Park 
Acked-by: Alex Hung 
Signed-off-by: Leo (Hanghong) Ma 
---
 .../gpu/drm/amd/display/modules/info_packet/info_packet.c| 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c 
b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
index b691aa45e84f..79bc207415bc 100644
--- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
+++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
@@ -100,7 +100,8 @@ enum vsc_packet_revision {
 //PB7 = MD0
 #define MASK_VTEM_MD0__VRR_EN 0x01
 #define MASK_VTEM_MD0__M_CONST0x02
-#define MASK_VTEM_MD0__RESERVED2  0x0C
+#define MASK_VTEM_MD0__QMS_EN 0x04
+#define MASK_VTEM_MD0__RESERVED2  0x08
 #define MASK_VTEM_MD0__FVA_FACTOR_M1  0xF0
 
 //MD1
@@ -109,7 +110,7 @@ enum vsc_packet_revision {
 //MD2
 #define MASK_VTEM_MD2__BASE_REFRESH_RATE_98  0x03
 #define MASK_VTEM_MD2__RB0x04
-#define MASK_VTEM_MD2__RESERVED3 0xF8
+#define MASK_VTEM_MD2__NEXT_TFR  0xF8
 
 //MD3
 #define MASK_VTEM_MD3__BASE_REFRESH_RATE_07  0xFF
-- 
2.35.1



[PATCH 05/13] drm/amd/display: Add fSMC_MSG_SetDtbClk support

2022-03-18 Thread Alex Hung
From: Oliver Logush 

[why]
Needed to support dcn315

Reviewed-by: Charlene Liu 
Acked-by: Alex Hung 
Signed-off-by: Oliver Logush 
---
 .../display/dc/clk_mgr/dcn315/dcn315_smu.c| 19 +++
 .../display/dc/clk_mgr/dcn315/dcn315_smu.h|  4 +++-
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c
index 880ffea2afc6..2600313fea57 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c
@@ -80,8 +80,8 @@ static const struct IP_BASE NBIO_BASE = { { { { 0x, 
0x0014, 0x0D
 #define VBIOSSMC_MSG_SetDppclkFreq0x06 ///< Set DPP clock 
frequency in MHZ
 #define VBIOSSMC_MSG_SetHardMinDcfclkByFreq   0x07 ///< Set DCF clock 
frequency hard min in MHZ
 #define VBIOSSMC_MSG_SetMinDeepSleepDcfclk0x08 ///< Set DCF clock 
minimum frequency in deep sleep in MHZ
-#define VBIOSSMC_MSG_SetPhyclkVoltageByFreq   0x09 ///< Set display phy 
clock frequency in MHZ in case VMIN does not support phy frequency
-#define VBIOSSMC_MSG_GetFclkFrequency 0x0A ///< Get FCLK 
frequency, return frequemcy in MHZ
+#define VBIOSSMC_MSG_GetDtbclkFreq0x09 ///< Get display dtb 
clock frequency in MHZ in case VMIN does not support phy frequency
+#define VBIOSSMC_MSG_SetDtbClk0x0A ///< Set dtb clock 
frequency, return frequemcy in MHZ
 #define VBIOSSMC_MSG_SetDisplayCount  0x0B ///< Inform PMFW of 
number of display connected
 #define VBIOSSMC_MSG_EnableTmdp48MHzRefclkPwrDown 0x0C ///< To ask PMFW turn 
off TMDP 48MHz refclk during display off to save power
 #define VBIOSSMC_MSG_UpdatePmeRestore 0x0D ///< To ask PMFW to 
write into Azalia for PME wake up event
@@ -324,15 +324,26 @@ int dcn315_smu_get_dpref_clk(struct clk_mgr_internal 
*clk_mgr)
return (dprefclk_get_mhz * 1000);
 }
 
-int dcn315_smu_get_smu_fclk(struct clk_mgr_internal *clk_mgr)
+int dcn315_smu_get_dtbclk(struct clk_mgr_internal *clk_mgr)
 {
int fclk_get_mhz = -1;
 
if (clk_mgr->smu_present) {
fclk_get_mhz = dcn315_smu_send_msg_with_param(
clk_mgr,
-   VBIOSSMC_MSG_GetFclkFrequency,
+   VBIOSSMC_MSG_GetDtbclkFreq,
0);
}
return (fclk_get_mhz * 1000);
 }
+
+void dcn315_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable)
+{
+   if (!clk_mgr->smu_present)
+   return;
+
+   dcn315_smu_send_msg_with_param(
+   clk_mgr,
+   VBIOSSMC_MSG_SetDtbClk,
+   enable);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h
index 66fa42f8dd18..5aa3275ac7d8 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h
@@ -37,6 +37,7 @@
 #define NUM_SOC_VOLTAGE_LEVELS  4
 #define NUM_DF_PSTATE_LEVELS4
 
+
 typedef struct {
   uint16_t MinClock; // This is either DCFCLK or SOCCLK (in MHz)
   uint16_t MaxClock; // This is either DCFCLK or SOCCLK (in MHz)
@@ -124,5 +125,6 @@ void dcn315_smu_transfer_wm_table_dram_2_smu(struct 
clk_mgr_internal *clk_mgr);
 void dcn315_smu_request_voltage_via_phyclk(struct clk_mgr_internal *clk_mgr, 
int requested_phyclk_khz);
 void dcn315_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr);
 int dcn315_smu_get_dpref_clk(struct clk_mgr_internal *clk_mgr);
-int dcn315_smu_get_smu_fclk(struct clk_mgr_internal *clk_mgr);
+int dcn315_smu_get_dtbclk(struct clk_mgr_internal *clk_mgr);
+void dcn315_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable);
 #endif /* DAL_DC_315_SMU_H_ */
-- 
2.35.1



[PATCH 04/13] drm/amd/display: FEC check in timing validation

2022-03-18 Thread Alex Hung
From: Chiawen Huang 

[Why]
disable/enable leads fec mismatch between hw/sw fec state.

[How]
check fec status to fastboot on/off.

Reviewed-by: Anthony Koo 
Acked-by: Alex Hung 
Signed-off-by: Chiawen Huang 
---
 drivers/gpu/drm/amd/display/dc/core/dc.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index f6e19efea756..75f9c97bebb0 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1496,6 +1496,10 @@ bool dc_validate_boot_timing(const struct dc *dc,
if (!link->link_enc->funcs->is_dig_enabled(link->link_enc))
return false;
 
+   /* Check for FEC status*/
+   if (link->link_enc->funcs->fec_is_active(link->link_enc))
+   return false;
+
enc_inst = link->link_enc->funcs->get_dig_frontend(link->link_enc);
 
if (enc_inst == ENGINE_ID_UNKNOWN)
-- 
2.35.1



[PATCH 03/13] drm/amd/display: Reduce stack size

2022-03-18 Thread Alex Hung
From: Rodrigo Siqueira 

Linux kernel enabled more compilation restrictions related to the stack
size, which caused compilation failures in our code. This commit reduces
the allocation size by allocating the required memory dynamically.

Reviewed-by: Harry Wentland 
Reviewed-by: Aric Cyr 
Acked-by: Alex Hung 
Signed-off-by: Rodrigo Siqueira 
---
 drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index c3e141c19a77..ad757b59e00e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -2056,7 +2056,7 @@ static int dcn10_align_pixel_clocks(struct dc *dc, int 
group_size,
 {
struct dc_context *dc_ctx = dc->ctx;
int i, master = -1, embedded = -1;
-   struct dc_crtc_timing hw_crtc_timing[MAX_PIPES] = {0};
+   struct dc_crtc_timing *hw_crtc_timing;
uint64_t phase[MAX_PIPES];
uint64_t modulo[MAX_PIPES];
unsigned int pclk;
@@ -2067,6 +2067,10 @@ static int dcn10_align_pixel_clocks(struct dc *dc, int 
group_size,
uint32_t dp_ref_clk_100hz =

dc->res_pool->dp_clock_source->ctx->dc->clk_mgr->dprefclk_khz*10;
 
+   hw_crtc_timing = kcalloc(MAX_PIPES, sizeof(*hw_crtc_timing), 
GFP_KERNEL);
+   if (!hw_crtc_timing)
+   return master;
+
if (dc->config.vblank_alignment_dto_params &&
dc->res_pool->dp_clock_source->funcs->override_dp_pix_clk) {
embedded_h_total =
@@ -2130,6 +2134,8 @@ static int dcn10_align_pixel_clocks(struct dc *dc, int 
group_size,
}
 
}
+
+   kfree(hw_crtc_timing);
return master;
 }
 
-- 
2.35.1



[PATCH 02/13] drm/amd/display: fix audio format not updated after edid updated

2022-03-18 Thread Alex Hung
From: Charlene Liu 

[why]
for the case edid change only changed audio format.
driver still need to update stream.

Reviewed-by: Alvin Lee 
Reviewed-by: Aric Cyr 
Acked-by: Alex Hung 
Signed-off-by: Charlene Liu 
---
 drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 7af153434e9e..d251c3f3a714 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -1685,8 +1685,8 @@ bool dc_is_stream_unchanged(
if (old_stream->ignore_msa_timing_param != 
stream->ignore_msa_timing_param)
return false;
 
-   // Only Have Audio left to check whether it is same or not. This is a 
corner case for Tiled sinks
-   if (old_stream->audio_info.mode_count != stream->audio_info.mode_count)
+   /*compare audio info*/
+   if (memcmp(_stream->audio_info, >audio_info, 
sizeof(stream->audio_info)) != 0)
return false;
 
return true;
-- 
2.35.1



[PATCH 01/13] drm/amd/display: HDCP SEND AKI INIT error

2022-03-18 Thread Alex Hung
From: Ahmad Othman 

[why]
HDCP sends AKI INIT error in case of multiple display on dock

[how]
Added new checks and method to handfle display adjustment
for multiple display cases

Reviewed-by: Wenjing Liu 
Acked-by: Alex Hung 
Signed-off-by: Ahmad Othman 
---
 .../gpu/drm/amd/display/modules/hdcp/hdcp.c   | 38 ++-
 .../gpu/drm/amd/display/modules/hdcp/hdcp.h   |  8 
 .../drm/amd/display/modules/inc/mod_hdcp.h|  2 +-
 3 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c 
b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c
index 3e81850a7ffe..5e01c6e24cbc 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c
@@ -251,6 +251,33 @@ static enum mod_hdcp_status reset_connection(struct 
mod_hdcp *hdcp,
return status;
 }
 
+static enum mod_hdcp_status update_display_adjustments(struct mod_hdcp *hdcp,
+   struct mod_hdcp_display *display,
+   struct mod_hdcp_display_adjustment *adj)
+{
+   enum mod_hdcp_status status = MOD_HDCP_STATUS_NOT_IMPLEMENTED;
+
+   if (is_in_authenticated_states(hdcp) &&
+   is_dp_mst_hdcp(hdcp) &&
+   display->adjust.disable == true &&
+   adj->disable == false) {
+   display->adjust.disable = false;
+   if (is_hdcp1(hdcp))
+   status = 
mod_hdcp_hdcp1_enable_dp_stream_encryption(hdcp);
+   else if (is_hdcp2(hdcp))
+   status = 
mod_hdcp_hdcp2_enable_dp_stream_encryption(hdcp);
+
+   if (status != MOD_HDCP_STATUS_SUCCESS)
+   display->adjust.disable = true;
+   }
+
+   if (status == MOD_HDCP_STATUS_SUCCESS &&
+   memcmp(adj, >adjust,
+   sizeof(struct mod_hdcp_display_adjustment)) != 0)
+   status = MOD_HDCP_STATUS_NOT_IMPLEMENTED;
+
+   return status;
+}
 /*
  * Implementation of functions in mod_hdcp.h
  */
@@ -391,7 +418,7 @@ enum mod_hdcp_status mod_hdcp_remove_display(struct 
mod_hdcp *hdcp,
return status;
 }
 
-enum mod_hdcp_status mod_hdcp_update_authentication(struct mod_hdcp *hdcp,
+enum mod_hdcp_status mod_hdcp_update_display(struct mod_hdcp *hdcp,
uint8_t index,
struct mod_hdcp_link_adjustment *link_adjust,
struct mod_hdcp_display_adjustment *display_adjust,
@@ -419,6 +446,15 @@ enum mod_hdcp_status mod_hdcp_update_authentication(struct 
mod_hdcp *hdcp,
goto out;
}
 
+   if (memcmp(link_adjust, >connection.link.adjust,
+   sizeof(struct mod_hdcp_link_adjustment)) == 0 &&
+   memcmp(display_adjust, >adjust,
+   sizeof(struct 
mod_hdcp_display_adjustment)) != 0) {
+   status = update_display_adjustments(hdcp, display, 
display_adjust);
+   if (status != MOD_HDCP_STATUS_NOT_IMPLEMENTED)
+   goto out;
+   }
+
/* stop current authentication */
status = reset_authentication(hdcp, output);
if (status != MOD_HDCP_STATUS_SUCCESS)
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h 
b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h
index 399fbca8947b..6b195207de90 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h
@@ -445,6 +445,14 @@ static inline uint8_t is_in_hdcp2_dp_states(struct 
mod_hdcp *hdcp)
current_state(hdcp) <= HDCP2_DP_STATE_END);
 }
 
+static inline uint8_t is_in_authenticated_states(struct mod_hdcp *hdcp)
+{
+   return (current_state(hdcp) == D1_A4_AUTHENTICATED ||
+   current_state(hdcp) == H1_A45_AUTHENTICATED ||
+   current_state(hdcp) == D2_A5_AUTHENTICATED ||
+   current_state(hdcp) == H2_A5_AUTHENTICATED);
+}
+
 static inline uint8_t is_hdcp1(struct mod_hdcp *hdcp)
 {
return (is_in_hdcp1_states(hdcp) || is_in_hdcp1_dp_states(hdcp));
diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h 
b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h
index f7420c3f5672..3348bb97ef81 100644
--- a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h
+++ b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h
@@ -294,7 +294,7 @@ enum mod_hdcp_status mod_hdcp_remove_display(struct 
mod_hdcp *hdcp,
uint8_t index, struct mod_hdcp_output *output);
 
 /* called per display to apply new authentication adjustment */
-enum mod_hdcp_status mod_hdcp_update_authentication(struct mod_hdcp *hdcp,
+enum mod_hdcp_status mod_hdcp_update_display(struct mod_hdcp *hdcp,
uint8_t index,
struct mod_hdcp_link_adjustment *link_adjust,
struct mod_hdcp_display_adjustment *display_adjust,
-- 
2.35.1



[PATCH 00/13] Title: DC Patches March 18, 2022

2022-03-18 Thread Alex Hung
This DC patchset brings improvements in multiple areas. In summary, we 
highlight:

* HDCP SEND AKI INIT error
* fix audio format not updated after edid updated
* Reduce stack size
* FEC check in timing validation
* Add fSMC_MSG_SetDtbClk support
* Update VTEM Infopacket definition
* [FW Promotion] Release 0.0.109.0
* Add support for zstate during extended vblank
* remove destructive verify link for TMDS
* move FPU related code from dcn31 to dml/dcn31 folder
* move FPU related code from dcn315 to dml/dcn31 folder
* move FPU related code from dcn316 to dml/dcn31 folder

Ahmad Othman (1):
  drm/amd/display: HDCP SEND AKI INIT error

Anthony Koo (1):
  drm/amd/display: [FW Promotion] Release 0.0.109.0

Aric Cyr (1):
  drm/amd/display: 3.2.178

Charlene Liu (2):
  drm/amd/display: fix audio format not updated after edid updated
  drm/amd/display: remove destructive verify link for TMDS

Chiawen Huang (1):
  drm/amd/display: FEC check in timing validation

Gabe Teeger (1):
  drm/amd/display: Add support for zstate during extended vblank

Leo (Hanghong) Ma (1):
  drm/amd/display: Update VTEM Infopacket definition

Melissa Wen (3):
  drm/amd/dicplay: move FPU related code from dcn31 to dml/dcn31 folder
  drm/amd/display: move FPU related code from dcn315 to dml/dcn31 folder
  drm/amd/display: move FPU related code from dcn316 to dml/dcn31 folder

Oliver Logush (1):
  drm/amd/display: Add fSMC_MSG_SetDtbClk support

Rodrigo Siqueira (1):
  drm/amd/display: Reduce stack size

 .../display/dc/clk_mgr/dcn315/dcn315_smu.c|  19 +-
 .../display/dc/clk_mgr/dcn315/dcn315_smu.h|   4 +-
 drivers/gpu/drm/amd/display/dc/core/dc.c  |  23 +
 drivers/gpu/drm/amd/display/dc/core/dc_link.c |   3 +-
 .../gpu/drm/amd/display/dc/core/dc_resource.c |   4 +-
 drivers/gpu/drm/amd/display/dc/dc.h   |   8 +-
 drivers/gpu/drm/amd/display/dc/dc_stream.h|   2 +
 .../amd/display/dc/dcn10/dcn10_hw_sequencer.c |   8 +-
 .../drm/amd/display/dc/dcn20/dcn20_hwseq.c|  12 +
 drivers/gpu/drm/amd/display/dc/dcn31/Makefile |  26 -
 .../gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c |   8 +
 .../drm/amd/display/dc/dcn31/dcn31_resource.c | 356 +---
 .../drm/amd/display/dc/dcn31/dcn31_resource.h |   4 +-
 .../gpu/drm/amd/display/dc/dcn315/Makefile|  26 -
 .../amd/display/dc/dcn315/dcn315_resource.c   | 232 +
 .../amd/display/dc/dcn315/dcn315_resource.h   |   3 +
 .../gpu/drm/amd/display/dc/dcn316/Makefile|  26 -
 .../amd/display/dc/dcn316/dcn316_resource.c   | 231 +
 .../amd/display/dc/dcn316/dcn316_resource.h   |   3 +
 drivers/gpu/drm/amd/display/dc/dml/Makefile   |   2 +
 .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c  |  18 +-
 .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c  | 863 ++
 .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.h  |  44 +
 .../dc/dml/dcn31/display_rq_dlg_calc_31.c |  13 +
 .../amd/display/dc/dml/display_mode_structs.h |   2 +
 drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h  |   3 +
 .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h   |   4 +-
 .../gpu/drm/amd/display/modules/hdcp/hdcp.c   |  38 +-
 .../gpu/drm/amd/display/modules/hdcp/hdcp.h   |   8 +
 .../drm/amd/display/modules/inc/mod_hdcp.h|   2 +-
 .../display/modules/info_packet/info_packet.c |   5 +-
 31 files changed, 1085 insertions(+), 915 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
 create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h

-- 
2.35.1



[PATCH 0/1] Title: DC Patches March 18, 2022

2022-03-18 Thread Alex Hung
This DC patchset brings improvements in multiple areas. In summary, we 
highlight:

* HDCP SEND AKI INIT error
* fix audio format not updated after edid updated
* Reduce stack size
* FEC check in timing validation
* Add fSMC_MSG_SetDtbClk support
* Update VTEM Infopacket definition
* [FW Promotion] Release 0.0.109.0
* Add support for zstate during extended vblank
* remove destructive verify link for TMDS
* move FPU related code from dcn31 to dml/dcn31 folder
* move FPU related code from dcn315 to dml/dcn31 folder
* move FPU related code from dcn316 to dml/dcn31 folder


Aric Cyr (1):
  drm/amd/display: 3.2.178

 drivers/gpu/drm/amd/display/dc/dc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

-- 
2.35.1



[pull] amdgpu, amdkfd drm-next-5.18

2022-03-18 Thread Alex Deucher
Hi Dave, Daniel,

Fixes for 5.18.

The following changes since commit f6d790e5a7fe42706756c7fa1686d08d230610fc:

  Merge tag 'drm-intel-next-fixes-2022-03-10' of 
git://anongit.freedesktop.org/drm/drm-intel into drm-next (2022-03-11 13:27:00 
+1000)

are available in the Git repository at:

  https://gitlab.freedesktop.org/agd5f/linux.git 
tags/amd-drm-next-5.18-2022-03-18

for you to fetch changes up to 426c89aa203bcec9d9cf6eea36735eafa1b1f099:

  drm/amdgpu: Use drm_mode_copy() (2022-03-15 15:01:12 -0400)


amd-drm-next-5.18-2022-03-18:

amdgpu:
- Aldebaran fixes
- SMU 13.0.5 fixes
- DCN 3.1.5 fixes
- DCN 3.1.6 fixes
- Pipe split fixes
- More display FP cleanup
- DP 2.0 UHBR fix
- DC GPU reset fix
- DC deep color ratio fix
- SMU robustness fixes
- Runtime PM fix for APUs
- IGT reload fixes
- SR-IOV fix
- Misc fixes and cleanups

amdkfd:
- CRIU fixes
- SVM fixes

UAPI:
- Properly handle SDMA transfers with CRIU
  Proposed user mode change: 
https://github.com/checkpoint-restore/criu/pull/1709


Alex Deucher (2):
  drm/amdgpu/display: enable scatter/gather display for DCN 3.1.6
  drm/amdgpu: only check for _PR3 on dGPUs

Anthony Koo (2):
  drm/amd/display: [FW Promotion] Release 0.0.107.0
  drm/amd/display: [FW Promotion] Release 0.0.108.0

Aric Cyr (2):
  drm/amd/display: 3.2.176
  drm/amd/display: 3.2.177

Becle Lee (1):
  drm/amd/display: Wait for hubp read line for Pollock

Charlene Liu (5):
  drm/amd/display: add debug option to bypass ssinfo from bios for dcn315
  drm/amd/display: fix the clock source contruct for dcn315
  drm/amd/display: merge two duplicated clock_source_create
  drm/amd/display: enable dcn315/316 s0i2 support
  drm/amd/display: Add save/restore PANEL_PWRSEQ_REF_DIV2

Chris Park (1):
  drm/amd/display: Add NULL check

Dale Zhao (1):
  drm/amd/display: Add new enum for EDID status

Dan Carpenter (1):
  drm/amd/pm: fix indenting in __smu_cmn_reg_print_error()

David Yat Sin (3):
  drm/amdkfd: CRIU remove sync and TLB flush on restore
  drm/amdkfd: CRIU Refactor restore BO function
  drm/amdkfd: CRIU export dmabuf handles for GTT BOs

Dillon Varone (1):
  drm/amd/display: Add minimal pipe split transition state

Eric Yang (1):
  drm/amd/display: Block zstate when more than one plane enabled

George Shen (1):
  drm/amd/display: Clean up fixed VS PHY test w/a function

Hansen Dsouza (1):
  drm/amd/display: fix deep color ratio

Hawking Zhang (1):
  drm/amdgpu: drop xmgi23 error query/reset support

Jasdeep Dhillon (1):
  drm/amd/display: move FPU associated DCN303 code to DML folder

JinZe.Xu (1):
  drm/amd/display: Add I2C escape to support query device exist.

Jing Zhou (2):
  drm/amd/display: Update engine ddc
  drm/amd/display: Add null pointer filter

Jingwen Zhu (1):
  drm/amd/display: add gamut coefficient set A and B

Jonathan Kim (1):
  drm/amdgpu: fix aldebaran xgmi topology for vf

Julia Lawall (3):
  drm/amd/pm: fix typos in comments
  drm/amdgpu: fix typos in comments
  drm/amdgpu/dc: fix typos in comments

Lang Yu (1):
  drm/amdgpu: only allow secure submission on rings which support that

Leo (Hanghong) Ma (2):
  drm/amd/display: Add link dp trace support
  drm/amd/display: Add function to get the pipe from the stream context

Leo Li (1):
  drm/amd/display: Fix compile error from TO_CLK_MGR_INTERNAL

Leung, Martin (1):
  drm/amd/display: cleaning up smu_if to add future flexibility

Lijo Lazar (2):
  drm/amdgpu: Disable baco dummy mode
  drm/amd/pm: Send message when resp status is 0xFC

Melissa Wen (3):
  drm/amd/display: move FPU-related code from dcn20 to dml folder
  drm/amd/display: move FPU operations from dcn21 to dml/dcn20 folder
  drm/amd/display: move FPU code from dcn10 to dml/dcn10 folder

Nicholas Kazlauskas (2):
  drm/amd/display: Fix double free during GPU reset on DC streams
  drm/amd/display: Add pstate verification and recovery for DCN31

Paul Menzel (1):
  drm/amdgpu: Use ternary operator in `vcn_v1_0_start()`

Philip Yang (2):
  drm/amdgpu: Move reset domain init before calling RREG32
  drm/amdkfd: evict svm bo worker handle error

Stanley.Yang (3):
  drm/amd/pm: add send bad channel info function
  drm/amdgpu: message smu to update bad channel info
  drm/amd/pm: use pm mutex to protect ecc info table

Sung Joon Kim (1):
  drm/amd/display: disable HPD SW timer for passive dongle type 1 only

Tianci Yin (1):
  drm/amdgpu/vcn: fix vcn ring test failure in igt reload test

Tianci.Yin (2):
  drm/amd/display: fix dp kvm can't light up
  drm/amd: fix gfx hang on renoir in IGT reload test

Ville Syrjälä (3):
  drm/amdgpu: Remove pointless on stack mode copies
  drm/radeon: Use drm_mode_copy()
  drm/amdgpu: 

[PATCH] drm/amd/display: Fix p-state allow debug index on dcn31

2022-03-18 Thread Nicholas Kazlauskas
[Why]
It changed since dcn30 but the hubbub31 constructor hasn't been
modified to reflect this.

[How]
Update the value in the constructor to 0x6 so we're checking the right
bits for p-state allow.

It worked before by accident, but can falsely assert 0 depending on HW
state transitions. The most frequent of which appears to be when
all pipes turn off during IGT tests.

Cc: Harry Wentland 
Fixes: d158560fc0e1 ("drm/amd/display: Add pstate verification and recovery for 
DCN31")
Signed-off-by: Nicholas Kazlauskas 
Reviewed-by: Eric Yang 
---
 drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c 
b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c
index 3e6d6ebd199e..51c5f3685470 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c
@@ -1042,5 +1042,7 @@ void hubbub31_construct(struct dcn20_hubbub *hubbub31,
hubbub31->detile_buf_size = det_size_kb * 1024;
hubbub31->pixel_chunk_size = pixel_chunk_size_kb * 1024;
hubbub31->crb_size_segs = config_return_buffer_size_kb / 
DCN31_CRB_SEGMENT_SIZE_KB;
+
+   hubbub31->debug_test_index_pstate = 0x6;
 }
 
-- 
2.25.1



Re: [PATCH 4/7] drm/amdgpu: rework TLB flushing

2022-03-18 Thread philip yang

  


On 2022-03-17 9:50 a.m., Christian
  König wrote:


  Instead of tracking the VM updates through the dependencies just use a
sequence counter for page table updates which indicates the need to
flush the TLB.

This reduces the need to flush the TLB drastically.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  8 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c  |  6 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 20 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h |  2 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 56 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h   | 15 +++
 6 files changed, 75 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index e8a3078a85cc..2d4a89fb264e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -810,7 +810,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 	if (r)
 		return r;
 
-	r = amdgpu_sync_vm_fence(>job->sync, fpriv->prt_va->last_pt_update);
+	r = amdgpu_sync_fence(>job->sync, fpriv->prt_va->last_pt_update);
 	if (r)
 		return r;
 
@@ -821,7 +821,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 		if (r)
 			return r;
 
-		r = amdgpu_sync_vm_fence(>job->sync, bo_va->last_pt_update);
+		r = amdgpu_sync_fence(>job->sync, bo_va->last_pt_update);
 		if (r)
 			return r;
 	}
@@ -840,7 +840,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 		if (r)
 			return r;
 
-		r = amdgpu_sync_vm_fence(>job->sync, bo_va->last_pt_update);
+		r = amdgpu_sync_fence(>job->sync, bo_va->last_pt_update);
 		if (r)
 			return r;
 	}
@@ -853,7 +853,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 	if (r)
 		return r;
 
-	r = amdgpu_sync_vm_fence(>job->sync, vm->last_update);
+	r = amdgpu_sync_fence(>job->sync, vm->last_update);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index b05c5fcb168d..93be290fc327 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -277,7 +277,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
 	unsigned vmhub = ring->funcs->vmhub;
 	uint64_t fence_context = adev->fence_context + ring->idx;
 	bool needs_flush = vm->use_cpu_for_update;
-	uint64_t updates = sync->last_vm_update;
+	uint64_t updates = amdgpu_vm_tlb_seq(vm);
 	int r;
 
 	*id = vm->reserved_vmid[vmhub];
@@ -338,7 +338,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
 	unsigned vmhub = ring->funcs->vmhub;
 	struct amdgpu_vmid_mgr *id_mgr = >vm_manager.id_mgr[vmhub];
 	uint64_t fence_context = adev->fence_context + ring->idx;
-	uint64_t updates = sync->last_vm_update;
+	uint64_t updates = amdgpu_vm_tlb_seq(vm);
 	int r;
 
 	job->vm_needs_flush = vm->use_cpu_for_update;
@@ -426,7 +426,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 			if (r)
 goto error;
 
-			id->flushed_updates = sync->last_vm_update;
+			id->flushed_updates = amdgpu_vm_tlb_seq(vm);
 			job->vm_needs_flush = true;
 		}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index bc5ab44c5830..ff9229819b79 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -51,7 +51,6 @@ static struct kmem_cache *amdgpu_sync_slab;
 void amdgpu_sync_create(struct amdgpu_sync *sync)
 {
 	hash_init(sync->fences);
-	sync->last_vm_update = 0;
 }
 
 /**
@@ -171,23 +170,6 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
 	return 0;
 }
 
-/**
- * amdgpu_sync_vm_fence - remember to sync to this VM fence
- *
- * @sync: sync object to add fence to
- * @fence: the VM fence to add
- *
- * Add the fence to the sync object and remember it as VM update.
- */
-int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
-{
-	if (!fence)
-		return 0;
-
-	sync->last_vm_update = max(sync->last_vm_update, fence->seqno);
-	return amdgpu_sync_fence(sync, fence);
-}
-
 /* Determine based on the owner and mode if we should sync to a fence or not */
 static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
    enum amdgpu_sync_mode mode,
@@ -377,8 +359,6 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
 		}
 	}
 
-	clone->last_vm_update = source->last_vm_update;
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
index 876c1ee8869c..2d5c613cda10 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
@@ -43,12 +43,10 @@ enum amdgpu_sync_mode {
  */
 struct amdgpu_sync {
 	DECLARE_HASHTABLE(fences, 4);
-	uint64_t	last_vm_update;
 };
 
 void amdgpu_sync_create(struct amdgpu_sync *sync);
 int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f);

Re: [REGRESSION] Too-low frequency limit for AMD GPU PCI-passed-through to Windows VM

2022-03-18 Thread Alex Williamson
On Fri, 18 Mar 2022 11:06:00 -0400
Alex Deucher  wrote:

> On Fri, Mar 18, 2022 at 10:46 AM Alex Williamson
>  wrote:
> >
> > On Fri, 18 Mar 2022 08:01:31 +0100
> > Thorsten Leemhuis  wrote:
> >  
> > > On 18.03.22 06:43, Paul Menzel wrote:  
> > > >
> > > > Am 17.03.22 um 13:54 schrieb Thorsten Leemhuis:  
> > > >> On 13.03.22 19:33, James Turner wrote:  
> > > >>>  
> > >  My understanding at this point is that the root problem is probably
> > >  not in the Linux kernel but rather something else (e.g. the machine
> > >  firmware or AMD Windows driver) and that the change in f9b7f3703ff9
> > >  ("drm/amdgpu/acpi: make ATPX/ATCS structures global (v2)") simply
> > >  exposed the underlying problem.  
> > > >>
> > > >> FWIW: that in the end is irrelevant when it comes to the Linux kernel's
> > > >> 'no regressions' rule. For details see:
> > > >>
> > > >> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/Documentation/admin-guide/reporting-regressions.rst
> > > >>
> > > >> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/Documentation/process/handling-regressions.rst
> > > >>
> > > >>
> > > >> That being said: sometimes for the greater good it's better to not
> > > >> insist on that. And I guess that might be the case here.  
> > > >
> > > > But who decides that?  
> > >
> > > In the end afaics: Linus. But he can't watch each and every discussion,
> > > so it partly falls down to people discussing a regression, as they can
> > > always decide to get him involved in case they are unhappy with how a
> > > regression is handled. That obviously includes me in this case. I simply
> > > use my best judgement in such situations. I'm still undecided if that
> > > path is appropriate here, that's why I wrote above to see what James
> > > would say, as he afaics was the only one that reported this regression.
> > >  
> > > > Running stuff in a virtual machine is not that uncommon.  
> > >
> > > No, it's about passing through a GPU to a VM, which is a lot less common
> > > -- and afaics an area where blacklisting GPUs on the host to pass them
> > > through is not uncommon (a quick internet search confirmed that, but I
> > > might be wrong there).  
> >
> > Right, interference from host drivers and pre-boot environments is
> > always a concern with GPU assignment in particular.  AMD GPUs have a
> > long history of poor behavior relative to things like PCI secondary bus
> > resets which we use to try to get devices to clean, reusable states for
> > assignment.  Here a device is being bound to a host driver that
> > initiates some sort of power control, unbound from that driver and
> > exposed to new drivers far beyond the scope of the kernel's regression
> > policy.  Perhaps it's possible to undo such power control when
> > unbinding the device, but it's not necessarily a given that such a
> > thing is possible for this device without a cold reset.
> >
> > IMO, it's not fair to restrict the kernel from such advancements.  If
> > the use case is within a VM, don't bind host drivers.  It's difficult
> > to make promises when dynamically switching between host and userspace
> > drivers for devices that don't have functional reset mechanisms.
> > Thanks,  
> 
> Additionally, operating the isolated device in a VM on a constrained
> environment like a laptop may have other adverse side effects.  The
> driver in the guest would ideally know that this is a laptop and needs
> to properly interact with APCI to handle power management on the
> device.  If that is not the case, the driver in the guest may end up
> running the device out of spec with what the platform supports.  It's
> also likely to break suspend and resume, especially on systems which
> use S0ix since the firmware will generally only turn off certain power
> rails if all of the devices on the rails have been put into the proper
> state.  That state may vary depending on the platform requirements.

Good point, devices with platform dependencies to manage thermal
budgets, etc. should be considered "use at your own risk" relative to
device assignment currently.  Thanks,

Alex



Re: [PATCH v2 1/2] drm: Add GPU reset sysfs event

2022-03-18 Thread Rob Clark
On Fri, Mar 18, 2022 at 12:42 AM Christian König
 wrote:
>
> Am 17.03.22 um 18:31 schrieb Rob Clark:
> > On Thu, Mar 17, 2022 at 10:27 AM Daniel Vetter  wrote:
> >> [SNIP]
> >>> (At some point, I'd like to use scheduler for the replay, and actually
> >>> use drm_sched_stop()/etc.. but last time I looked there were still
> >>> some sched bugs in that area which prevented me from deleting a bunch
> >>> of code ;-))
> >> Not sure about your hw, but at least on intel replaying tends to just
> >> result in follow-on fun. And that holds even more so the more complex a
> >> workload is. This is why vk just dies immediately and does not try to
> >> replay anything, offloading it to the app. Same with arb robusteness.
> >> Afaik it's really only media and classic gl which insist that the driver
> >> stack somehow recover.
> > At least for us, each submit must be self-contained (ie. not rely on
> > previous GPU hw state), so in practice replay works out pretty well.
> > The worst case is subsequent submits from same process fail as well
> > (if they depended on something that crashing submit failed to write
> > back to memory.. but in that case they just crash as well and we move
> > on to the next one.. the recent gens (a5xx+ at least) are pretty good
> > about quickly detecting problems and giving us an error irq.
>
> Well I absolutely agree with Daniel.
>
> The whole replay thing AMD did in the scheduler is an absolutely mess
> and should probably be killed with fire.
>
> I strongly recommend not to do the same mistake in other drivers.
>
> If you want to have some replay feature then please make it driver
> specific and don't use anything from the infrastructure in the DRM
> scheduler.

hmm, perhaps I was not clear, but I'm only talking about re-emitting
jobs *following* the faulting one (which could be from other contexts,
etc).. not trying to restart the faulting job.

You *absolutely* need to replay jobs following the faulting one, they
could be from unrelated contexts/processes.  You can't just drop them
on the floor.

Currently it is all driver specific, but I wanted to delete a lot of
code and move to using scheduler to handle faults/timeouts (but
blocked on that until [1] is resolved)

[1] 
https://patchwork.kernel.org/project/dri-devel/patch/1630457207-13107-2-git-send-email-monk@amd.com/

BR,
-R

> Thanks,
> Christian.
>
> >
> > BR,
> > -R
> >
> >> And recovering from a mess in userspace is a lot simpler than trying to
> >> pull of the same magic in the kernel. Plus it also helps with a few of the
> >> dma_fence rules, which is a nice bonus.
> >> -Daniel
> >>
>


Re: [REGRESSION] Too-low frequency limit for AMD GPU PCI-passed-through to Windows VM

2022-03-18 Thread Alex Deucher
On Fri, Mar 18, 2022 at 10:46 AM Alex Williamson
 wrote:
>
> On Fri, 18 Mar 2022 08:01:31 +0100
> Thorsten Leemhuis  wrote:
>
> > On 18.03.22 06:43, Paul Menzel wrote:
> > >
> > > Am 17.03.22 um 13:54 schrieb Thorsten Leemhuis:
> > >> On 13.03.22 19:33, James Turner wrote:
> > >>>
> >  My understanding at this point is that the root problem is probably
> >  not in the Linux kernel but rather something else (e.g. the machine
> >  firmware or AMD Windows driver) and that the change in f9b7f3703ff9
> >  ("drm/amdgpu/acpi: make ATPX/ATCS structures global (v2)") simply
> >  exposed the underlying problem.
> > >>
> > >> FWIW: that in the end is irrelevant when it comes to the Linux kernel's
> > >> 'no regressions' rule. For details see:
> > >>
> > >> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/Documentation/admin-guide/reporting-regressions.rst
> > >>
> > >> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/Documentation/process/handling-regressions.rst
> > >>
> > >>
> > >> That being said: sometimes for the greater good it's better to not
> > >> insist on that. And I guess that might be the case here.
> > >
> > > But who decides that?
> >
> > In the end afaics: Linus. But he can't watch each and every discussion,
> > so it partly falls down to people discussing a regression, as they can
> > always decide to get him involved in case they are unhappy with how a
> > regression is handled. That obviously includes me in this case. I simply
> > use my best judgement in such situations. I'm still undecided if that
> > path is appropriate here, that's why I wrote above to see what James
> > would say, as he afaics was the only one that reported this regression.
> >
> > > Running stuff in a virtual machine is not that uncommon.
> >
> > No, it's about passing through a GPU to a VM, which is a lot less common
> > -- and afaics an area where blacklisting GPUs on the host to pass them
> > through is not uncommon (a quick internet search confirmed that, but I
> > might be wrong there).
>
> Right, interference from host drivers and pre-boot environments is
> always a concern with GPU assignment in particular.  AMD GPUs have a
> long history of poor behavior relative to things like PCI secondary bus
> resets which we use to try to get devices to clean, reusable states for
> assignment.  Here a device is being bound to a host driver that
> initiates some sort of power control, unbound from that driver and
> exposed to new drivers far beyond the scope of the kernel's regression
> policy.  Perhaps it's possible to undo such power control when
> unbinding the device, but it's not necessarily a given that such a
> thing is possible for this device without a cold reset.
>
> IMO, it's not fair to restrict the kernel from such advancements.  If
> the use case is within a VM, don't bind host drivers.  It's difficult
> to make promises when dynamically switching between host and userspace
> drivers for devices that don't have functional reset mechanisms.
> Thanks,

Additionally, operating the isolated device in a VM on a constrained
environment like a laptop may have other adverse side effects.  The
driver in the guest would ideally know that this is a laptop and needs
to properly interact with APCI to handle power management on the
device.  If that is not the case, the driver in the guest may end up
running the device out of spec with what the platform supports.  It's
also likely to break suspend and resume, especially on systems which
use S0ix since the firmware will generally only turn off certain power
rails if all of the devices on the rails have been put into the proper
state.  That state may vary depending on the platform requirements.

Alex

>
> Alex
>


Re: [REGRESSION] Too-low frequency limit for AMD GPU PCI-passed-through to Windows VM

2022-03-18 Thread Alex Williamson
On Fri, 18 Mar 2022 08:01:31 +0100
Thorsten Leemhuis  wrote:

> On 18.03.22 06:43, Paul Menzel wrote:
> >
> > Am 17.03.22 um 13:54 schrieb Thorsten Leemhuis:  
> >> On 13.03.22 19:33, James Turner wrote:  
> >>>  
>  My understanding at this point is that the root problem is probably
>  not in the Linux kernel but rather something else (e.g. the machine
>  firmware or AMD Windows driver) and that the change in f9b7f3703ff9
>  ("drm/amdgpu/acpi: make ATPX/ATCS structures global (v2)") simply
>  exposed the underlying problem.  
> >>
> >> FWIW: that in the end is irrelevant when it comes to the Linux kernel's
> >> 'no regressions' rule. For details see:
> >>
> >> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/Documentation/admin-guide/reporting-regressions.rst
> >>
> >> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/Documentation/process/handling-regressions.rst
> >>
> >>
> >> That being said: sometimes for the greater good it's better to not
> >> insist on that. And I guess that might be the case here.  
> > 
> > But who decides that?  
> 
> In the end afaics: Linus. But he can't watch each and every discussion,
> so it partly falls down to people discussing a regression, as they can
> always decide to get him involved in case they are unhappy with how a
> regression is handled. That obviously includes me in this case. I simply
> use my best judgement in such situations. I'm still undecided if that
> path is appropriate here, that's why I wrote above to see what James
> would say, as he afaics was the only one that reported this regression.
> 
> > Running stuff in a virtual machine is not that uncommon.  
> 
> No, it's about passing through a GPU to a VM, which is a lot less common
> -- and afaics an area where blacklisting GPUs on the host to pass them
> through is not uncommon (a quick internet search confirmed that, but I
> might be wrong there).

Right, interference from host drivers and pre-boot environments is
always a concern with GPU assignment in particular.  AMD GPUs have a
long history of poor behavior relative to things like PCI secondary bus
resets which we use to try to get devices to clean, reusable states for
assignment.  Here a device is being bound to a host driver that
initiates some sort of power control, unbound from that driver and
exposed to new drivers far beyond the scope of the kernel's regression
policy.  Perhaps it's possible to undo such power control when
unbinding the device, but it's not necessarily a given that such a
thing is possible for this device without a cold reset.

IMO, it's not fair to restrict the kernel from such advancements.  If
the use case is within a VM, don't bind host drivers.  It's difficult
to make promises when dynamically switching between host and userspace
drivers for devices that don't have functional reset mechanisms.
Thanks,

Alex



Re: [RFC PATCH 1/4] drm/amdkfd: Improve amdgpu_vm_handle_moved

2022-03-18 Thread Felix Kuehling

Am 2022-03-18 um 08:38 schrieb Christian König:

Am 17.03.22 um 20:11 schrieb Felix Kuehling:


Am 2022-03-17 um 04:21 schrieb Christian König:

Am 17.03.22 um 01:20 schrieb Felix Kuehling:
Let amdgpu_vm_handle_moved update all BO VA mappings of BOs 
reserved by

the caller. This will be useful for handling extra BO VA mappings in
KFD VMs that are managed through the render node API.


Yes, that change is on my TODO list for quite a while as well.


TODO: This may also allow simplification of amdgpu_cs_vm_handling. See
the TODO comment in the code.


No, that won't work just yet.

We need to change the TLB flush detection for that, but I'm already 
working on those as well.


Your TLB flushing patch series looks good to me.

There is one other issue, though. amdgpu_vm_handle_moved doesn't 
update the sync object, so I couldn't figure out I can wait for all 
the page table updates to finish.


Yes, and inside the CS we still need to go over all the BOs and gather 
the VM updates to wait for.


Not sure if you can do that in the KFD code as well. How exactly do 
you want to use it?


Before resuming user mode queues after an eviction, KFD currently 
updates all the BOs and their mappings that it knows about. But it 
doesn't know about the mappings made using the render node API. So my 
plan was to use amdgpu_vm_handle_moved for that. But I don't get any 
fences for the page table operations queues by amdgpu_vm_handle_moved. I 
think amdgpu_cs has the same problem. So how do I reliably wait for 
those to finish before I resume user mode queues?


If amdgpu_vm_handle_moved were able to update the sync object, then I 
also wouldn't need explicit amdgpu_vm_bo_update calls any more, similar 
to what I suggested in the TODO comment in amdgpu_cs_vm_handling.


Regards,
  Felix




Regards,
Christian.



Regards,
  Felix





Signed-off-by: Felix Kuehling 


Please update the TODO, with that done: Reviewed-by: Christian König 




---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  6 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 18 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++-
  4 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index d162243d8e78..10941f0d8dde 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -826,6 +826,10 @@ static int amdgpu_cs_vm_handling(struct 
amdgpu_cs_parser *p)

  return r;
  }
  +    /* TODO: Is this loop still needed, or could this be handled by
+ * amdgpu_vm_handle_moved, now that it can handle all BOs that 
are

+ * reserved under p->ticket?
+ */
  amdgpu_bo_list_for_each_entry(e, p->bo_list) {
  /* ignore duplicates */
  bo = ttm_to_amdgpu_bo(e->tv.bo);
@@ -845,7 +849,7 @@ static int amdgpu_cs_vm_handling(struct 
amdgpu_cs_parser *p)

  return r;
  }
  -    r = amdgpu_vm_handle_moved(adev, vm);
+    r = amdgpu_vm_handle_moved(adev, vm, >ticket);
  if (r)
  return r;
  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c

index 579adfafe4d0..50805613c38c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -414,7 +414,7 @@ amdgpu_dma_buf_move_notify(struct 
dma_buf_attachment *attach)

    r = amdgpu_vm_clear_freed(adev, vm, NULL);
  if (!r)
-    r = amdgpu_vm_handle_moved(adev, vm);
+    r = amdgpu_vm_handle_moved(adev, vm, ticket);
    if (r && r != -EBUSY)
  DRM_ERROR("Failed to invalidate VM page tables (%d))\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index fc4563cf2828..726b42c6d606 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2190,11 +2190,12 @@ int amdgpu_vm_clear_freed(struct 
amdgpu_device *adev,

   * PTs have to be reserved!
   */
  int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
-   struct amdgpu_vm *vm)
+   struct amdgpu_vm *vm,
+   struct ww_acquire_ctx *ticket)
  {
  struct amdgpu_bo_va *bo_va, *tmp;
  struct dma_resv *resv;
-    bool clear;
+    bool clear, unlock;
  int r;
    list_for_each_entry_safe(bo_va, tmp, >moved, 
base.vm_status) {
@@ -2212,17 +2213,24 @@ int amdgpu_vm_handle_moved(struct 
amdgpu_device *adev,

  spin_unlock(>invalidated_lock);
    /* Try to reserve the BO to avoid clearing its ptes */
-    if (!amdgpu_vm_debug && dma_resv_trylock(resv))
+    if (!amdgpu_vm_debug && dma_resv_trylock(resv)) {
  clear = false;
+    unlock = true;
+    /* The caller is already holding the reservation lock */
+    } else if (ticket && dma_resv_locking_ctx(resv) == ticket) {
+ 

Re: radeon ring 0 test failed on arm64

2022-03-18 Thread Peter Geis
On Fri, Mar 18, 2022 at 4:35 AM Christian König
 wrote:
>
>
>
> Am 18.03.22 um 08:51 schrieb Kever Yang:
>
>
> On 2022/3/17 20:19, Peter Geis wrote:
>
> On Wed, Mar 16, 2022 at 11:08 PM Kever Yang  wrote:
>
> Hi Peter,
>
> On 2022/3/17 08:14, Peter Geis wrote:
>
> Good Evening,
>
> I apologize for raising this email chain from the dead, but there have
> been some developments that have introduced even more questions.
> I've looped the Rockchip mailing list into this too, as this affects
> rk356x, and likely the upcoming rk3588 if [1] is to be believed.
>
> TLDR for those not familiar: It seems the rk356x series (and possibly
> the rk3588) were built without any outer coherent cache.
> This means (unless Rockchip wants to clarify here) devices such as the
> ITS and PCIe cannot utilize cache snooping.
> This is based on the results of the email chain [2].
>
> The new circumstances are as follows:
> The RPi CM4 Adventure Team as I've taken to calling them has been
> attempting to get a dGPU working with the very broken Broadcom
> controller in the RPi CM4.
> Recently they acquired a SoQuartz rk3566 module which is pin
> compatible with the CM4, and have taken to trying it out as well.
>
> This is how I got involved.
> It seems they found a trivial way to force the Radeon R600 driver to
> use Non-Cached memory for everything.
> This single line change, combined with using memset_io instead of
> memset, allows the ring tests to pass and the card probes successfully
> (minus the DMA limitations of the rk356x due to the 32 bit
> interconnect).
> I discovered using this method that we start having unaligned io
> memory access faults (bus errors) when running glmark2-drm (running
> glmark2 directly was impossible, as both X and Wayland crashed too
> early).
> I traced this to using what I thought at the time was an unsafe memcpy
> in the mesa stack.
> Rewriting this function to force aligned writes solved the problem and
> allows glmark2-drm to run to completion.
> With some extensive debugging, I found about half a dozen memcpy
> functions in mesa that if forced to be aligned would allow Wayland to
> start, but with hilarious display corruption (see [3]. [4]).
> The CM4 team is convinced this is an issue with memcpy in glibc, but
> I'm not convinced it's that simple.
>
> On my two hour drive in to work this morning, I got to thinking.
> If this was an memcpy fault, this would be universally broken on arm64
> which is obviously not the case.
> So I started thinking, what is different here than with systems known to work:
> 1. No IOMMU for the PCIe controller.
> 2. The Outer Cache Issue.
>
> Robin:
> My questions for you, since you're the smartest person I know about
> arm64 memory management:
> Could cache snooping permit unaligned accesses to IO to be safe?
> Or
> Is it the lack of an IOMMU that's causing the ali gnment faults to become 
> fatal?
> Or
> Am I insane here?
>
> Rockchip:
> Please update on the status for the Outer Cache errata for ITS services.
>
> Our SoC design team has double check with ARM GIC/ITS IP team for many
> times, and the GITS_CBASER
> of GIC600 IP does not support hardware bind or config to a fix value, so
> they insist this is an IP
> limitation instead of a SoC bug, software should take  care of it :(
> I will check again if we can provide errata for this issue.
>
> Thanks. This is necessary as the mbi-alias provides an imperfect
> implementation of the ITS and causes certain PCIe cards (eg x520 Intel
> 10G NIC) to misbehave.
>
> Please provide an answer to the errata of the PCIe controller, in
> regard to cache snooping and buffering, for both the rk356x and the
> upcoming rk3588.
>
>
> Sorry, what is this?
>
> Part of the ITS bug is it expects to be cache coherent with the CPU
> cluster by design.
> Due to the rk356x being implemented without an outer accessible cache,
> the ITS and other devices that require cache coherency (PCIe for
> example) crash in fun ways.
>
> Then this is still the ITS issue, not PCIe issue.
> PCIe is a peripheral bus controller like USB and other device, the driver 
> should maintain the "cache coherency" if there is any, and there is no 
> requirement for hardware cache coherency between PCIe and CPU.

Kever,

These issues are one and the same.
Certain hardware blocks *require* cache coherency as part of their design.
All of the *interesting* things PCIe can do stem from it.

When I saw you bumped the available window to the PCIe controller to
1GB I was really excited, because that meant we could finally support
devices that used these interesting features.
However, without cache coherency, having more than a 256MB window is a
waste, as any card that can take advantage of it *requires* coherency.
The same thing goes for a resizable BAR.
EP mode is the same, having the ability to connect one CPU to another
CPU over a PCIe bus loses the advantages when you don't have
coherency.
At that point, you might as well toss in a 2.5GB ethernet port and
just use 

[PATCH v9 3/4] drm/msm: init panel orientation property

2022-03-18 Thread Hsin-Yi Wang
Init panel orientation property after connector is initialized. Let the
panel driver decides the orientation value later.

Signed-off-by: Hsin-Yi Wang 
---
 drivers/gpu/drm/msm/dsi/dsi_manager.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c 
b/drivers/gpu/drm/msm/dsi/dsi_manager.c
index 0c1b7dde377c..b5dc86ebcab9 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_manager.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c
@@ -627,6 +627,10 @@ struct drm_connector *msm_dsi_manager_connector_init(u8 id)
connector->interlace_allowed = 0;
connector->doublescan_allowed = 0;
 
+   ret = drm_connector_init_panel_orientation_property(connector);
+   if (ret)
+   goto fail;
+
drm_connector_attach_encoder(connector, msm_dsi->encoder);
 
ret = msm_dsi_manager_panel_init(connector, id);
-- 
2.35.1.894.gb6a874cedc-goog



Re: [Intel-gfx] [PATCH v8 1/3] gpu: drm: separate panel orientation property creating and value setting

2022-03-18 Thread Hsin-Yi Wang
On Fri, Feb 18, 2022 at 11:57 PM Harry Wentland  wrote:
>
> On 2022-02-18 07:12, Simon Ser wrote:
> > On Friday, February 18th, 2022 at 12:54, Hans de Goede 
> >  wrote:
> >
> >> On 2/18/22 12:39, Simon Ser wrote:
> >>> On Friday, February 18th, 2022 at 11:38, Hans de Goede 
> >>>  wrote:
> >>>
>  What I'm reading in the above is that it is being considered to allow
>  changing the panel-orientation value after the connector has been made
>  available to userspace; and let userspace know about this through a 
>  uevent.
> 
>  I believe that this is a bad idea, it is important to keep in mind here
>  what userspace (e.g. plymouth) uses this prorty for. This property is
>  used to rotate the image being rendered / shown on the framebuffer to
>  adjust for the panel orientation.
> 
>  So now lets assume we apply the correct upside-down orientation later
>  on a device with an upside-down mounted LCD panel. Then on boot the
>  following could happen:
> 
>  1. amdgpu exports a connector for the LCD panel to userspace without
>  setting panel-orient=upside-down
>  2. plymouth sees this and renders its splash normally, but since the
>  panel is upside-down it will now actually show upside-down
> >>>
> >>> At this point amdgpu hasn't probed the connector yet. So the connector
> >>> will be marked as disconnected, and plymouth shouldn't render anything.
> >>
> >> If before the initial probe of the connector there is a /dev/dri/card0
> >> which plymouth can access, then plymouth may at this point decide
> >> to disable any seemingly unused crtcs, which will make the screen go 
> >> black...
> >>
> >> I'm not sure if plymouth will actually do this, but AFAICT this would
> >> not be invalid behavior for a userspace kms consumer to do and I
> >> believe it is likely that mutter will disable unused crtcs.
> >>
> >> IMHO it is just a bad idea to register /dev/dri/card0 with userspace
> >> before the initial connector probe is done. Nothing good can come
> >> of that.
> >>
> >> If all the exposed connectors initially are going to show up as
> >> disconnected anyways what is the value in registering /dev/dri/card0
> >> with userspace early ?
> >
> > OK. I'm still unsure how I feel about this, but I think I agree with
> > you. That said, the amdgpu architecture is quite involved with multiple
> > abstraction levels, so I don't think I'm equipped to write a patch to
> > fix this...
> >
>
> amdgpu_dm's connector registration already triggers a detection. See the
> calls to dc_link_detect and amdgpu_dm_update_connector_after_detect in
> amdgpu_dm_initialize_drm_device.
>
> dc_link_detect is supposed to read the edid via
> dm_helpers_read_local_edid and amdgpu_dm_update_connector_after_detect
> will update the EDID on the connector via a
> drm_connector_update_edid_property call.
>
> This all happens at driver load.
>
> I don't know why you're seeing the embedded connector as disconnected
> unless the DP-MIPI bridge for some reason doesn't indicate that the panel
> is connected at driver load.
>
> Harry
>
> > cc Daniel Vetter: can you confirm probing all connectors is a good thing
> > to do on driver module load?
> >
>  I guess the initial modeline is inherited from the video-bios, but
>  what about the physical size? Note that you cannot just change the
>  physical size later either, that gets used to determine the hidpi
>  scaling factor in the bootsplash, and changing that after the initial
>  bootsplash dislay will also look ugly
> 
>  b) Why you need the edid for the panel-orientation property at all,
>  typically the edid prom is part of the panel and the panel does not
>  know that it is mounted e.g. upside down at all, that is a property
>  of the system as a whole not of the panel as a standalone unit so
>  in my experience getting panel-orient info is something which comes
>  from the firmware /video-bios not from edid ?
> >>>
> >>> This is an internal DRM thing. The orientation quirks logic uses the
> >>> mode size advertised by the EDID.
> >>
> >> The DMI based quirking does, yes. But e.g. the quirk code directly
> >> reading this from the Intel VBT does not rely on the mode.
> >>
> >> But if you are planning on using a DMI based quirk for the steamdeck
> >> then yes that needs the mode.
> >>
> >> Thee mode check is there for 2 reasons:
> >>
> >> 1. To avoid also applying the quirk to external displays, but
> >> I think that that is also solved in most drivers by only checking for
> >> a quirk at all on the eDP connector
> >>
> >> 2. Some laptop models ship with different panels in different badges
> >> some of these are portrait (so need a panel-orient) setting and others
> >> are landscape.
> >
> > That makes sense. So yeah the EDID mode based matching logic needs to
> > stay to accomodate for these cases.
> >
> >>> I agree that at least in the Steam
> >>> Deck case it may not make a lot of 

Re: radeon ring 0 test failed on arm64

2022-03-18 Thread Peter Geis
On Fri, Mar 18, 2022 at 8:31 AM Christian König
 wrote:
>
> Am 18.03.22 um 12:24 schrieb Peter Geis:
> > On Fri, Mar 18, 2022 at 4:35 AM Christian König
> >  wrote:
> >>
> >>
> >> Am 18.03.22 um 08:51 schrieb Kever Yang:
> >>
> >>
> >> On 2022/3/17 20:19, Peter Geis wrote:
> >>
> >> On Wed, Mar 16, 2022 at 11:08 PM Kever Yang  
> >> wrote:
> >>
> >> Hi Peter,
> >>
> >> On 2022/3/17 08:14, Peter Geis wrote:
> >>
> >> Good Evening,
> >>
> >> I apologize for raising this email chain from the dead, but there have
> >> been some developments that have introduced even more questions.
> >> I've looped the Rockchip mailing list into this too, as this affects
> >> rk356x, and likely the upcoming rk3588 if [1] is to be believed.
> >>
> >> TLDR for those not familiar: It seems the rk356x series (and possibly
> >> the rk3588) were built without any outer coherent cache.
> >> This means (unless Rockchip wants to clarify here) devices such as the
> >> ITS and PCIe cannot utilize cache snooping.
> >> This is based on the results of the email chain [2].
> >>
> >> The new circumstances are as follows:
> >> The RPi CM4 Adventure Team as I've taken to calling them has been
> >> attempting to get a dGPU working with the very broken Broadcom
> >> controller in the RPi CM4.
> >> Recently they acquired a SoQuartz rk3566 module which is pin
> >> compatible with the CM4, and have taken to trying it out as well.
> >>
> >> This is how I got involved.
> >> It seems they found a trivial way to force the Radeon R600 driver to
> >> use Non-Cached memory for everything.
> >> This single line change, combined with using memset_io instead of
> >> memset, allows the ring tests to pass and the card probes successfully
> >> (minus the DMA limitations of the rk356x due to the 32 bit
> >> interconnect).
> >> I discovered using this method that we start having unaligned io
> >> memory access faults (bus errors) when running glmark2-drm (running
> >> glmark2 directly was impossible, as both X and Wayland crashed too
> >> early).
> >> I traced this to using what I thought at the time was an unsafe memcpy
> >> in the mesa stack.
> >> Rewriting this function to force aligned writes solved the problem and
> >> allows glmark2-drm to run to completion.
> >> With some extensive debugging, I found about half a dozen memcpy
> >> functions in mesa that if forced to be aligned would allow Wayland to
> >> start, but with hilarious display corruption (see [3]. [4]).
> >> The CM4 team is convinced this is an issue with memcpy in glibc, but
> >> I'm not convinced it's that simple.
> >>
> >> On my two hour drive in to work this morning, I got to thinking.
> >> If this was an memcpy fault, this would be universally broken on arm64
> >> which is obviously not the case.
> >> So I started thinking, what is different here than with systems known to 
> >> work:
> >> 1. No IOMMU for the PCIe controller.
> >> 2. The Outer Cache Issue.
> >>
> >> Robin:
> >> My questions for you, since you're the smartest person I know about
> >> arm64 memory management:
> >> Could cache snooping permit unaligned accesses to IO to be safe?
> >> Or
> >> Is it the lack of an IOMMU that's causing the ali gnment faults to become 
> >> fatal?
> >> Or
> >> Am I insane here?
> >>
> >> Rockchip:
> >> Please update on the status for the Outer Cache errata for ITS services.
> >>
> >> Our SoC design team has double check with ARM GIC/ITS IP team for many
> >> times, and the GITS_CBASER
> >> of GIC600 IP does not support hardware bind or config to a fix value, so
> >> they insist this is an IP
> >> limitation instead of a SoC bug, software should take  care of it :(
> >> I will check again if we can provide errata for this issue.
> >>
> >> Thanks. This is necessary as the mbi-alias provides an imperfect
> >> implementation of the ITS and causes certain PCIe cards (eg x520 Intel
> >> 10G NIC) to misbehave.
> >>
> >> Please provide an answer to the errata of the PCIe controller, in
> >> regard to cache snooping and buffering, for both the rk356x and the
> >> upcoming rk3588.
> >>
> >>
> >> Sorry, what is this?
> >>
> >> Part of the ITS bug is it expects to be cache coherent with the CPU
> >> cluster by design.
> >> Due to the rk356x being implemented without an outer accessible cache,
> >> the ITS and other devices that require cache coherency (PCIe for
> >> example) crash in fun ways.
> >>
> >> Then this is still the ITS issue, not PCIe issue.
> >> PCIe is a peripheral bus controller like USB and other device, the driver 
> >> should maintain the "cache coherency" if there is any, and there is no 
> >> requirement for hardware cache coherency between PCIe and CPU.
> > Kever,
> >
> > These issues are one and the same.
>
> Well, that's not correct. You are still mixing two things up here:
>
> 1. The memory accesses from the device to the system memory must be
> coherent with the CPU cache. E.g. we root complex must snoop the CPU cache.
>  That's a requirement of the PCIe spec. If you don't get that 

[PATCH v9 4/4] arm64: dts: mt8183: Add panel rotation

2022-03-18 Thread Hsin-Yi Wang
krane, kakadu, and kodama boards have a default panel rotation.

Signed-off-by: Hsin-Yi Wang 
Reviewed-by: Enric Balletbo i Serra 
Tested-by: Enric Balletbo i Serra 
---
 arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi 
b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
index 0f9480f91261..c7c6be106e2e 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
@@ -276,6 +276,7 @@ panel: panel@0 {
avee-supply = <_lcd>;
pp1800-supply = <_lcd>;
backlight = <_lcd0>;
+   rotation = <270>;
port {
panel_in: endpoint {
remote-endpoint = <_out>;
-- 
2.35.1.894.gb6a874cedc-goog



[PATCH v9 2/4] drm/mediatek: init panel orientation property

2022-03-18 Thread Hsin-Yi Wang
Init panel orientation property after connector is initialized. Let the
panel driver decides the orientation value later.

Signed-off-by: Hsin-Yi Wang 
Acked-by: Chun-Kuang Hu 
---
 drivers/gpu/drm/mediatek/mtk_dsi.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c 
b/drivers/gpu/drm/mediatek/mtk_dsi.c
index ccb0511b9cd5..0376b33e9651 100644
--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
@@ -810,6 +810,13 @@ static int mtk_dsi_encoder_init(struct drm_device *drm, 
struct mtk_dsi *dsi)
ret = PTR_ERR(dsi->connector);
goto err_cleanup_encoder;
}
+
+   ret = drm_connector_init_panel_orientation_property(dsi->connector);
+   if (ret) {
+   DRM_ERROR("Unable to init panel orientation\n");
+   goto err_cleanup_encoder;
+   }
+
drm_connector_attach_encoder(dsi->connector, >encoder);
 
return 0;
-- 
2.35.1.894.gb6a874cedc-goog



[PATCH v9 0/4] Separate panel orientation property creating and value setting

2022-03-18 Thread Hsin-Yi Wang
Some drivers, eg. mtk_drm and msm_drm, rely on the panel to set the
orientation. Panel calls drm_connector_set_panel_orientation() to create
orientation property and sets the value. However, connector properties
can't be created after drm_dev_register() is called. The goal is to
separate the orientation property creation, so drm drivers can create it
earlier before drm_dev_register().

After this series, drm_connector_set_panel_orientation() works like
before, so it won't affect other drm drivers. The only difference is that
some drm drivers can call drm_connector_init_panel_orientation_property()
earlier.

Hsin-Yi Wang (4):
  gpu: drm: separate panel orientation property creating and value
setting
  drm/mediatek: init panel orientation property
  drm/msm: init panel orientation property
  arm64: dts: mt8183: Add panel rotation

 .../arm64/boot/dts/mediatek/mt8183-kukui.dtsi |  1 +
 drivers/gpu/drm/drm_connector.c   | 58 ++-
 drivers/gpu/drm/mediatek/mtk_dsi.c|  7 +++
 drivers/gpu/drm/msm/dsi/dsi_manager.c |  4 ++
 include/drm/drm_connector.h   |  2 +
 5 files changed, 59 insertions(+), 13 deletions(-)

-- 
2.35.1.894.gb6a874cedc-goog



Re: radeon ring 0 test failed on arm64

2022-03-18 Thread Kever Yang



On 2022/3/17 20:19, Peter Geis wrote:

On Wed, Mar 16, 2022 at 11:08 PM Kever Yang  wrote:

Hi Peter,

On 2022/3/17 08:14, Peter Geis wrote:

Good Evening,

I apologize for raising this email chain from the dead, but there have
been some developments that have introduced even more questions.
I've looped the Rockchip mailing list into this too, as this affects
rk356x, and likely the upcoming rk3588 if [1] is to be believed.

TLDR for those not familiar: It seems the rk356x series (and possibly
the rk3588) were built without any outer coherent cache.
This means (unless Rockchip wants to clarify here) devices such as the
ITS and PCIe cannot utilize cache snooping.
This is based on the results of the email chain [2].

The new circumstances are as follows:
The RPi CM4 Adventure Team as I've taken to calling them has been
attempting to get a dGPU working with the very broken Broadcom
controller in the RPi CM4.
Recently they acquired a SoQuartz rk3566 module which is pin
compatible with the CM4, and have taken to trying it out as well.

This is how I got involved.
It seems they found a trivial way to force the Radeon R600 driver to
use Non-Cached memory for everything.
This single line change, combined with using memset_io instead of
memset, allows the ring tests to pass and the card probes successfully
(minus the DMA limitations of the rk356x due to the 32 bit
interconnect).
I discovered using this method that we start having unaligned io
memory access faults (bus errors) when running glmark2-drm (running
glmark2 directly was impossible, as both X and Wayland crashed too
early).
I traced this to using what I thought at the time was an unsafe memcpy
in the mesa stack.
Rewriting this function to force aligned writes solved the problem and
allows glmark2-drm to run to completion.
With some extensive debugging, I found about half a dozen memcpy
functions in mesa that if forced to be aligned would allow Wayland to
start, but with hilarious display corruption (see [3]. [4]).
The CM4 team is convinced this is an issue with memcpy in glibc, but
I'm not convinced it's that simple.

On my two hour drive in to work this morning, I got to thinking.
If this was an memcpy fault, this would be universally broken on arm64
which is obviously not the case.
So I started thinking, what is different here than with systems known to work:
1. No IOMMU for the PCIe controller.
2. The Outer Cache Issue.

Robin:
My questions for you, since you're the smartest person I know about
arm64 memory management:
Could cache snooping permit unaligned accesses to IO to be safe?
Or
Is it the lack of an IOMMU that's causing the alignment faults to become fatal?
Or
Am I insane here?

Rockchip:
Please update on the status for the Outer Cache errata for ITS services.

Our SoC design team has double check with ARM GIC/ITS IP team for many
times, and the GITS_CBASER
of GIC600 IP does not support hardware bind or config to a fix value, so
they insist this is an IP
limitation instead of a SoC bug, software should take  care of it :(
I will check again if we can provide errata for this issue.

Thanks. This is necessary as the mbi-alias provides an imperfect
implementation of the ITS and causes certain PCIe cards (eg x520 Intel
10G NIC) to misbehave.


Please provide an answer to the errata of the PCIe controller, in
regard to cache snooping and buffering, for both the rk356x and the
upcoming rk3588.


Sorry, what is this?

Part of the ITS bug is it expects to be cache coherent with the CPU
cluster by design.
Due to the rk356x being implemented without an outer accessible cache,
the ITS and other devices that require cache coherency (PCIe for
example) crash in fun ways.

Then this is still the ITS issue, not PCIe issue.
PCIe is a peripheral bus controller like USB and other device, the 
driver should maintain the "cache coherency" if there is any, and there 
is no requirement for hardware cache coherency between PCIe and CPU.
We didn't see any transfer error on rk356x PCIe till now, we can take a 
look if it's easy to reproduce.


Thanks,
- Kever



This means that rk356x cannot implement a specification compliant ITS or PCIe.
>From the rk3588 source dump it appears it was produced without an
outer accessible cache, which means if true it also will be unable to
use any PCIe cards that implement cache coherency as part of their
design.



Thanks,
- Kever

[1] 
https://github.com/JeffyCN/mirrors/commit/0b985f29304dcb9d644174edacb67298e8049d4f
[2] https://lore.kernel.org/lkml/871rbdt4tu.wl-...@kernel.org/T/
[3] 
https://cdn.discordapp.com/attachments/926487797844541510/953414755970850816/unknown.png
[4] 
https://cdn.discordapp.com/attachments/926487797844541510/953424952042852422/unknown.png

Thank you everyone for your time.

Very Respectfully,
Peter Geis

On Wed, May 26, 2021 at 7:21 AM Christian König
 wrote:

Hi Robin,

Am 26.05.21 um 12:59 schrieb Robin Murphy:

On 2021-05-26 10:42, Christian König wrote:

Hi Robin,

Am 25.05.21 um 22:09 schrieb Robin 

Re: [REGRESSION] Too-low frequency limit for AMD GPU PCI-passed-through to Windows VM

2022-03-18 Thread Thorsten Leemhuis
On 18.03.22 06:43, Paul Menzel wrote:
>
> Am 17.03.22 um 13:54 schrieb Thorsten Leemhuis:
>> On 13.03.22 19:33, James Turner wrote:
>>>
 My understanding at this point is that the root problem is probably
 not in the Linux kernel but rather something else (e.g. the machine
 firmware or AMD Windows driver) and that the change in f9b7f3703ff9
 ("drm/amdgpu/acpi: make ATPX/ATCS structures global (v2)") simply
 exposed the underlying problem.
>>
>> FWIW: that in the end is irrelevant when it comes to the Linux kernel's
>> 'no regressions' rule. For details see:
>>
>> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/Documentation/admin-guide/reporting-regressions.rst
>>
>> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/Documentation/process/handling-regressions.rst
>>
>>
>> That being said: sometimes for the greater good it's better to not
>> insist on that. And I guess that might be the case here.
> 
> But who decides that?

In the end afaics: Linus. But he can't watch each and every discussion,
so it partly falls down to people discussing a regression, as they can
always decide to get him involved in case they are unhappy with how a
regression is handled. That obviously includes me in this case. I simply
use my best judgement in such situations. I'm still undecided if that
path is appropriate here, that's why I wrote above to see what James
would say, as he afaics was the only one that reported this regression.

> Running stuff in a virtual machine is not that uncommon.

No, it's about passing through a GPU to a VM, which is a lot less common
-- and afaics an area where blacklisting GPUs on the host to pass them
through is not uncommon (a quick internet search confirmed that, but I
might be wrong there).

> Should the commit be reverted, and re-added with a more elaborate commit
> message documenting the downsides?
> 
> Could the user be notified somehow? Can PCI passthrough and a loaded
> amdgpu driver be detected, so Linux warns about this?
>
> Also, should this be documented in the code?
>
>>> I'm not sure where to go from here. This issue isn't much of a concern
>>> for me anymore, since blacklisting `amdgpu` works for my machine. At
>>> this point, my understanding is that the root problem needs to be fixed
>>> in AMD's Windows GPU driver or Dell's firmware, not the Linux kernel. If
>>> any of the AMD developers on this thread would like to forward it to the
>>> AMD Windows driver team, I'd be happy to work with AMD to fix the issue
>>> properly.
> 
> (Thorsten, your mailer mangled the quote somehow 

Kinda, but it IIRC was more me doing something stupid with my mailer.
Sorry about that.

> – I reformatted it –,

thx!

> which is too bad, as this message is shown when clicking on the link
> *marked invalid* in the regzbot Web page [1]. (The link is a very nice
> feature.)
> 
>> In that case I'll drop it from the list of regressions, unless what I
>> wrote above makes you change your mind.
>>
>> #regzbot invalid: firmware issue exposed by kernel change, user seems to
>> be happy with a workaround
>>
>> Thx everyone who participated in handling this.
> 
> Should the regression issue be re-opened until the questions above are
> answered, and a more user friendly solution is found?

I'll for now will just continue to watch this discussion and see what
happens.

> [1]: https://linux-regtracking.leemhuis.info/regzbot/resolved/

Ciao, Thorsten


[PATCH v9 1/4] gpu: drm: separate panel orientation property creating and value setting

2022-03-18 Thread Hsin-Yi Wang
drm_dev_register() sets connector->registration_state to
DRM_CONNECTOR_REGISTERED and dev->registered to true. If
drm_connector_set_panel_orientation() is first called after
drm_dev_register(), it will fail several checks and results in following
warning.

Add a function to create panel orientation property and set default value
to UNKNOWN, so drivers can call this function to init the property earlier
, and let the panel set the real value later.

[4.480976] [ cut here ]
[4.485603] WARNING: CPU: 5 PID: 369 at drivers/gpu/drm/drm_mode_object.c:45 
__drm_mode_object_add+0xb4/0xbc

[4.609772] Call trace:
[4.612208]  __drm_mode_object_add+0xb4/0xbc
[4.616466]  drm_mode_object_add+0x20/0x2c
[4.620552]  drm_property_create+0xdc/0x174
[4.624723]  drm_property_create_enum+0x34/0x98
[4.629241]  drm_connector_set_panel_orientation+0x64/0xa0
[4.634716]  boe_panel_get_modes+0x88/0xd8
[4.638802]  drm_panel_get_modes+0x2c/0x48
[4.642887]  panel_bridge_get_modes+0x1c/0x28
[4.647233]  drm_bridge_connector_get_modes+0xa0/0xd4
[4.652273]  drm_helper_probe_single_connector_modes+0x218/0x700
[4.658266]  drm_mode_getconnector+0x1b4/0x45c
[4.662699]  drm_ioctl_kernel+0xac/0x128
[4.11]  drm_ioctl+0x268/0x410
[4.670002]  drm_compat_ioctl+0xdc/0xf0
[4.673829]  __arm64_compat_sys_ioctl+0xc8/0x100
[4.678436]  el0_svc_common+0xf4/0x1c0
[4.682174]  do_el0_svc_compat+0x28/0x3c
[4.686088]  el0_svc_compat+0x10/0x1c
[4.689738]  el0_sync_compat_handler+0xa8/0xcc
[4.694171]  el0_sync_compat+0x178/0x180
[4.698082] ---[ end trace b4f2db9d9c88610b ]---
[4.702721] [ cut here ]
[4.707329] WARNING: CPU: 5 PID: 369 at 
drivers/gpu/drm/drm_mode_object.c:243 drm_object_attach_property+0x48/0xb8

[4.833830] Call trace:
[4.836266]  drm_object_attach_property+0x48/0xb8
[4.840958]  drm_connector_set_panel_orientation+0x84/0xa0
[4.846432]  boe_panel_get_modes+0x88/0xd8
[4.850516]  drm_panel_get_modes+0x2c/0x48
[4.854600]  panel_bridge_get_modes+0x1c/0x28
[4.858946]  drm_bridge_connector_get_modes+0xa0/0xd4
[4.863984]  drm_helper_probe_single_connector_modes+0x218/0x700
[4.869978]  drm_mode_getconnector+0x1b4/0x45c
[4.874410]  drm_ioctl_kernel+0xac/0x128
[4.878320]  drm_ioctl+0x268/0x410
[4.881711]  drm_compat_ioctl+0xdc/0xf0
[4.885536]  __arm64_compat_sys_ioctl+0xc8/0x100
[4.890142]  el0_svc_common+0xf4/0x1c0
[4.893879]  do_el0_svc_compat+0x28/0x3c
[4.897791]  el0_svc_compat+0x10/0x1c
[4.901441]  el0_sync_compat_handler+0xa8/0xcc
[4.905873]  el0_sync_compat+0x178/0x180
[4.909783] ---[ end trace b4f2db9d9c88610c ]---

Signed-off-by: Hsin-Yi Wang 
Reviewed-by: Sean Paul 
---
 drivers/gpu/drm/drm_connector.c | 58 +
 include/drm/drm_connector.h |  2 ++
 2 files changed, 47 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index 76a8c707c34b..149709e05622 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -1252,7 +1252,7 @@ static const struct drm_prop_enum_list dp_colorspaces[] = 
{
  * INPUT_PROP_DIRECT) will still map 1:1 to the actual LCD panel
  * coordinates, so if userspace rotates the picture to adjust for
  * the orientation it must also apply the same transformation to the
- * touchscreen input coordinates. This property is initialized by calling
+ * touchscreen input coordinates. This property value is set by calling
  * drm_connector_set_panel_orientation() or
  * drm_connector_set_panel_orientation_with_quirk()
  *
@@ -2344,8 +2344,8 @@ EXPORT_SYMBOL(drm_connector_set_vrr_capable_property);
  * @connector: connector for which to set the panel-orientation property.
  * @panel_orientation: drm_panel_orientation value to set
  *
- * This function sets the connector's panel_orientation and attaches
- * a "panel orientation" property to the connector.
+ * This function sets the connector's panel_orientation value. If the property
+ * doesn't exist, it will try to create one.
  *
  * Calling this function on a connector where the panel_orientation has
  * already been set is a no-op (e.g. the orientation has been overridden with
@@ -2377,18 +2377,13 @@ int drm_connector_set_panel_orientation(
 
prop = dev->mode_config.panel_orientation_property;
if (!prop) {
-   prop = drm_property_create_enum(dev, DRM_MODE_PROP_IMMUTABLE,
-   "panel orientation",
-   drm_panel_orientation_enum_list,
-   ARRAY_SIZE(drm_panel_orientation_enum_list));
-   if (!prop)
+   if (drm_connector_init_panel_orientation_property(connector) < 
0)
return -ENOMEM;
-
-   dev->mode_config.panel_orientation_property = prop;
+   

Re: [RFC PATCH 1/4] drm/amdkfd: Improve amdgpu_vm_handle_moved

2022-03-18 Thread Christian König

Am 17.03.22 um 20:11 schrieb Felix Kuehling:


Am 2022-03-17 um 04:21 schrieb Christian König:

Am 17.03.22 um 01:20 schrieb Felix Kuehling:

Let amdgpu_vm_handle_moved update all BO VA mappings of BOs reserved by
the caller. This will be useful for handling extra BO VA mappings in
KFD VMs that are managed through the render node API.


Yes, that change is on my TODO list for quite a while as well.


TODO: This may also allow simplification of amdgpu_cs_vm_handling. See
the TODO comment in the code.


No, that won't work just yet.

We need to change the TLB flush detection for that, but I'm already 
working on those as well.


Your TLB flushing patch series looks good to me.

There is one other issue, though. amdgpu_vm_handle_moved doesn't 
update the sync object, so I couldn't figure out I can wait for all 
the page table updates to finish.


Yes, and inside the CS we still need to go over all the BOs and gather 
the VM updates to wait for.


Not sure if you can do that in the KFD code as well. How exactly do you 
want to use it?


Regards,
Christian.



Regards,
  Felix





Signed-off-by: Felix Kuehling 


Please update the TODO, with that done: Reviewed-by: Christian König 




---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  6 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 18 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++-
  4 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index d162243d8e78..10941f0d8dde 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -826,6 +826,10 @@ static int amdgpu_cs_vm_handling(struct 
amdgpu_cs_parser *p)

  return r;
  }
  +    /* TODO: Is this loop still needed, or could this be handled by
+ * amdgpu_vm_handle_moved, now that it can handle all BOs that are
+ * reserved under p->ticket?
+ */
  amdgpu_bo_list_for_each_entry(e, p->bo_list) {
  /* ignore duplicates */
  bo = ttm_to_amdgpu_bo(e->tv.bo);
@@ -845,7 +849,7 @@ static int amdgpu_cs_vm_handling(struct 
amdgpu_cs_parser *p)

  return r;
  }
  -    r = amdgpu_vm_handle_moved(adev, vm);
+    r = amdgpu_vm_handle_moved(adev, vm, >ticket);
  if (r)
  return r;
  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c

index 579adfafe4d0..50805613c38c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -414,7 +414,7 @@ amdgpu_dma_buf_move_notify(struct 
dma_buf_attachment *attach)

    r = amdgpu_vm_clear_freed(adev, vm, NULL);
  if (!r)
-    r = amdgpu_vm_handle_moved(adev, vm);
+    r = amdgpu_vm_handle_moved(adev, vm, ticket);
    if (r && r != -EBUSY)
  DRM_ERROR("Failed to invalidate VM page tables (%d))\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index fc4563cf2828..726b42c6d606 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2190,11 +2190,12 @@ int amdgpu_vm_clear_freed(struct 
amdgpu_device *adev,

   * PTs have to be reserved!
   */
  int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
-   struct amdgpu_vm *vm)
+   struct amdgpu_vm *vm,
+   struct ww_acquire_ctx *ticket)
  {
  struct amdgpu_bo_va *bo_va, *tmp;
  struct dma_resv *resv;
-    bool clear;
+    bool clear, unlock;
  int r;
    list_for_each_entry_safe(bo_va, tmp, >moved, 
base.vm_status) {
@@ -2212,17 +2213,24 @@ int amdgpu_vm_handle_moved(struct 
amdgpu_device *adev,

  spin_unlock(>invalidated_lock);
    /* Try to reserve the BO to avoid clearing its ptes */
-    if (!amdgpu_vm_debug && dma_resv_trylock(resv))
+    if (!amdgpu_vm_debug && dma_resv_trylock(resv)) {
  clear = false;
+    unlock = true;
+    /* The caller is already holding the reservation lock */
+    } else if (ticket && dma_resv_locking_ctx(resv) == ticket) {
+    clear = false;
+    unlock = false;
  /* Somebody else is using the BO right now */
-    else
+    } else {
  clear = true;
+    unlock = false;
+    }
    r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL);
  if (r)
  return r;
  -    if (!clear)
+    if (unlock)
  dma_resv_unlock(resv);
  spin_lock(>invalidated_lock);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

index a40a6a993bb0..120a76aaae75 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -396,7 +396,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device 
*adev,

   

Re: radeon ring 0 test failed on arm64

2022-03-18 Thread Christian König

Am 18.03.22 um 12:24 schrieb Peter Geis:

On Fri, Mar 18, 2022 at 4:35 AM Christian König
 wrote:



Am 18.03.22 um 08:51 schrieb Kever Yang:


On 2022/3/17 20:19, Peter Geis wrote:

On Wed, Mar 16, 2022 at 11:08 PM Kever Yang  wrote:

Hi Peter,

On 2022/3/17 08:14, Peter Geis wrote:

Good Evening,

I apologize for raising this email chain from the dead, but there have
been some developments that have introduced even more questions.
I've looped the Rockchip mailing list into this too, as this affects
rk356x, and likely the upcoming rk3588 if [1] is to be believed.

TLDR for those not familiar: It seems the rk356x series (and possibly
the rk3588) were built without any outer coherent cache.
This means (unless Rockchip wants to clarify here) devices such as the
ITS and PCIe cannot utilize cache snooping.
This is based on the results of the email chain [2].

The new circumstances are as follows:
The RPi CM4 Adventure Team as I've taken to calling them has been
attempting to get a dGPU working with the very broken Broadcom
controller in the RPi CM4.
Recently they acquired a SoQuartz rk3566 module which is pin
compatible with the CM4, and have taken to trying it out as well.

This is how I got involved.
It seems they found a trivial way to force the Radeon R600 driver to
use Non-Cached memory for everything.
This single line change, combined with using memset_io instead of
memset, allows the ring tests to pass and the card probes successfully
(minus the DMA limitations of the rk356x due to the 32 bit
interconnect).
I discovered using this method that we start having unaligned io
memory access faults (bus errors) when running glmark2-drm (running
glmark2 directly was impossible, as both X and Wayland crashed too
early).
I traced this to using what I thought at the time was an unsafe memcpy
in the mesa stack.
Rewriting this function to force aligned writes solved the problem and
allows glmark2-drm to run to completion.
With some extensive debugging, I found about half a dozen memcpy
functions in mesa that if forced to be aligned would allow Wayland to
start, but with hilarious display corruption (see [3]. [4]).
The CM4 team is convinced this is an issue with memcpy in glibc, but
I'm not convinced it's that simple.

On my two hour drive in to work this morning, I got to thinking.
If this was an memcpy fault, this would be universally broken on arm64
which is obviously not the case.
So I started thinking, what is different here than with systems known to work:
1. No IOMMU for the PCIe controller.
2. The Outer Cache Issue.

Robin:
My questions for you, since you're the smartest person I know about
arm64 memory management:
Could cache snooping permit unaligned accesses to IO to be safe?
Or
Is it the lack of an IOMMU that's causing the ali gnment faults to become fatal?
Or
Am I insane here?

Rockchip:
Please update on the status for the Outer Cache errata for ITS services.

Our SoC design team has double check with ARM GIC/ITS IP team for many
times, and the GITS_CBASER
of GIC600 IP does not support hardware bind or config to a fix value, so
they insist this is an IP
limitation instead of a SoC bug, software should take  care of it :(
I will check again if we can provide errata for this issue.

Thanks. This is necessary as the mbi-alias provides an imperfect
implementation of the ITS and causes certain PCIe cards (eg x520 Intel
10G NIC) to misbehave.

Please provide an answer to the errata of the PCIe controller, in
regard to cache snooping and buffering, for both the rk356x and the
upcoming rk3588.


Sorry, what is this?

Part of the ITS bug is it expects to be cache coherent with the CPU
cluster by design.
Due to the rk356x being implemented without an outer accessible cache,
the ITS and other devices that require cache coherency (PCIe for
example) crash in fun ways.

Then this is still the ITS issue, not PCIe issue.
PCIe is a peripheral bus controller like USB and other device, the driver should maintain 
the "cache coherency" if there is any, and there is no requirement for hardware 
cache coherency between PCIe and CPU.

Kever,

These issues are one and the same.


Well, that's not correct. You are still mixing two things up here:

1. The memory accesses from the device to the system memory must be 
coherent with the CPU cache. E.g. we root complex must snoop the CPU cache.
    That's a requirement of the PCIe spec. If you don't get that right 
a whole bunch of PCIe devices won't work correctly.


2. The memory accesses from the CPU to the devices PCIe BAR can be 
unaligned. E.g. a 64bit read can be aligned on a 32bit address.
    That is a requirement of the graphics stack. Other devices still 
might work fine without that.


Regards,
Christian.


Certain hardware blocks *require* cache coherency as part of their design.
All of the *interesting* things PCIe can do stem from it.

When I saw you bumped the available window to the PCIe controller to
1GB I was really excited, because that meant we could finally 

Re: radeon ring 0 test failed on arm64

2022-03-18 Thread Christian König



Am 18.03.22 um 08:51 schrieb Kever Yang:


On 2022/3/17 20:19, Peter Geis wrote:
On Wed, Mar 16, 2022 at 11:08 PM Kever Yang 
 wrote:

Hi Peter,

On 2022/3/17 08:14, Peter Geis wrote:

Good Evening,

I apologize for raising this email chain from the dead, but there have
been some developments that have introduced even more questions.
I've looped the Rockchip mailing list into this too, as this affects
rk356x, and likely the upcoming rk3588 if [1] is to be believed.

TLDR for those not familiar: It seems the rk356x series (and possibly
the rk3588) were built without any outer coherent cache.
This means (unless Rockchip wants to clarify here) devices such as the
ITS and PCIe cannot utilize cache snooping.
This is based on the results of the email chain [2].

The new circumstances are as follows:
The RPi CM4 Adventure Team as I've taken to calling them has been
attempting to get a dGPU working with the very broken Broadcom
controller in the RPi CM4.
Recently they acquired a SoQuartz rk3566 module which is pin
compatible with the CM4, and have taken to trying it out as well.

This is how I got involved.
It seems they found a trivial way to force the Radeon R600 driver to
use Non-Cached memory for everything.
This single line change, combined with using memset_io instead of
memset, allows the ring tests to pass and the card probes successfully
(minus the DMA limitations of the rk356x due to the 32 bit
interconnect).
I discovered using this method that we start having unaligned io
memory access faults (bus errors) when running glmark2-drm (running
glmark2 directly was impossible, as both X and Wayland crashed too
early).
I traced this to using what I thought at the time was an unsafe memcpy
in the mesa stack.
Rewriting this function to force aligned writes solved the problem and
allows glmark2-drm to run to completion.
With some extensive debugging, I found about half a dozen memcpy
functions in mesa that if forced to be aligned would allow Wayland to
start, but with hilarious display corruption (see [3]. [4]).
The CM4 team is convinced this is an issue with memcpy in glibc, but
I'm not convinced it's that simple.

On my two hour drive in to work this morning, I got to thinking.
If this was an memcpy fault, this would be universally broken on arm64
which is obviously not the case.
So I started thinking, what is different here than with systems 
known to work:

1. No IOMMU for the PCIe controller.
2. The Outer Cache Issue.

Robin:
My questions for you, since you're the smartest person I know about
arm64 memory management:
Could cache snooping permit unaligned accesses to IO to be safe?
Or
Is it the lack of an IOMMU that's causing the ali gnment faults to 
become fatal?

Or
Am I insane here?

Rockchip:
Please update on the status for the Outer Cache errata for ITS 
services.

Our SoC design team has double check with ARM GIC/ITS IP team for many
times, and the GITS_CBASER
of GIC600 IP does not support hardware bind or config to a fix 
value, so

they insist this is an IP
limitation instead of a SoC bug, software should take  care of it :(
I will check again if we can provide errata for this issue.

Thanks. This is necessary as the mbi-alias provides an imperfect
implementation of the ITS and causes certain PCIe cards (eg x520 Intel
10G NIC) to misbehave.


Please provide an answer to the errata of the PCIe controller, in
regard to cache snooping and buffering, for both the rk356x and the
upcoming rk3588.


Sorry, what is this?

Part of the ITS bug is it expects to be cache coherent with the CPU
cluster by design.
Due to the rk356x being implemented without an outer accessible cache,
the ITS and other devices that require cache coherency (PCIe for
example) crash in fun ways.

Then this is still the ITS issue, not PCIe issue.
PCIe is a peripheral bus controller like USB and other device, the 
driver should maintain the "cache coherency" if there is any, and 
there is no requirement for hardware cache coherency between PCIe and CPU.


Well then I suggest to re-read the PCIe specification.

Cache coherency is defined as mandatory there. Non-cache coherency is an 
optional feature.


See section 2.2.6.5 in the PCIe 2.0 specification for a good example.

Regards,
Christian.



We didn't see any transfer error on rk356x PCIe till now, we can take 
a look if it's easy to reproduce.


Thanks,
- Kever


This means that rk356x cannot implement a specification compliant ITS 
or PCIe.

>From the rk3588 source dump it appears it was produced without an
outer accessible cache, which means if true it also will be unable to
use any PCIe cards that implement cache coherency as part of their
design.



Thanks,
- Kever
[1] 

Re: [PATCH v2 1/2] drm: Add GPU reset sysfs event

2022-03-18 Thread Christian König

Am 17.03.22 um 18:31 schrieb Rob Clark:

On Thu, Mar 17, 2022 at 10:27 AM Daniel Vetter  wrote:

[SNIP]

(At some point, I'd like to use scheduler for the replay, and actually
use drm_sched_stop()/etc.. but last time I looked there were still
some sched bugs in that area which prevented me from deleting a bunch
of code ;-))

Not sure about your hw, but at least on intel replaying tends to just
result in follow-on fun. And that holds even more so the more complex a
workload is. This is why vk just dies immediately and does not try to
replay anything, offloading it to the app. Same with arb robusteness.
Afaik it's really only media and classic gl which insist that the driver
stack somehow recover.

At least for us, each submit must be self-contained (ie. not rely on
previous GPU hw state), so in practice replay works out pretty well.
The worst case is subsequent submits from same process fail as well
(if they depended on something that crashing submit failed to write
back to memory.. but in that case they just crash as well and we move
on to the next one.. the recent gens (a5xx+ at least) are pretty good
about quickly detecting problems and giving us an error irq.


Well I absolutely agree with Daniel.

The whole replay thing AMD did in the scheduler is an absolutely mess 
and should probably be killed with fire.


I strongly recommend not to do the same mistake in other drivers.

If you want to have some replay feature then please make it driver 
specific and don't use anything from the infrastructure in the DRM 
scheduler.


Thanks,
Christian.



BR,
-R


And recovering from a mess in userspace is a lot simpler than trying to
pull of the same magic in the kernel. Plus it also helps with a few of the
dma_fence rules, which is a nice bonus.
-Daniel





Re: [PATCH v3] drm/amdgpu: add workarounds for VCN TMZ issue on CHIP_RAVEN

2022-03-18 Thread Paul Menzel

Dear Christian,


Am 16.03.22 um 11:08 schrieb Christian König:


Am 16.03.22 um 10:57 schrieb Paul Menzel:



Am 16.03.22 um 10:41 schrieb Christian König:

Am 16.03.22 um 07:21 schrieb Lang Yu:

On 03/16/ , Paul Menzel wrote:



Am 16.03.22 um 02:27 schrieb Lang Yu:

On 03/15/ , Paul Menzel wrote:

Am 14.03.22 um 03:45 schrieb Lang Yu:

Thank you for your patch. A shorter commit message summary would be:


drm/amdgpu: Work around VNC TMZ issue on CHIP_RAVEN
It is a hardware issue that VCN can't handle a GTT
backing stored TMZ buffer on CHIP_RAVEN series ASIC.

Where is that documented, and how can this be reproduced?

It is documented in AMD internal Confluence and JIRA.
Secure playback with a low VRAM config(thus TMZ buffer
will be allocted in GTT domain) may reproduce this issue.

It’d be great if as much of the details from this non-publicly accessible
information could be added to the commit message, and a way to reproduce
this as there does not seem to be a test for this. (Also I guess a tag with
a reference to the internal issue would be acceptable, so in case more
question surface in the future.)

Thanks. I will add an internal link.


Lang, please don't!

This isn't an information which is expected to be made public.


Well, how are then even the AMD folks able to link a (upstream) commit 
to an issue?


Well quite simply: We don't do that since it isn't necessary.


What other ways do you (or future AMD developers) have then? (I would 
also use *helpful* or *useful*.)


(In two years, when maybe nobody of the current AMD developers work at 
AMD anymore, and a user bisects a problems to this patch I could imagine 
it would help the future AMD developers to have this connection.)


If it’s not possible, even more detailed information about the issue 
including how to reproduce it needs to be part of the commit message.


No, why should we do that? It's an AMD internal hardware problem which 
we add a software workaround for here. The hardware details why and what 
are completely irrelevant to the public.


All that we need to document is that VCN can't handle GTT on Raven, and 
that's exactly what the commit message is doing. That's perfectly enough 
to write a test case.


Thank you for clarifying, but I am not interested in the hardware 
details, but how to reproduce and test the issue. And according to Lang 
this information is present in the issue. Seeing how complex the 
graphics driver are, a lot of documentation is not publicly available, a 
recipe to manually reproduce and test the issue is most helpful.



Kind regards,

Paul