[PATCH 3/4] drm/amd/display: add doc entries for MPC blending configuration
Describe structs and enums used to set blend mode properties to MPC blocks. Some pieces of information are already available as code comments, and were just formatted. Others were collected and summarised from discusssions on AMD issue tracker[1][2]. [1] https://gitlab.freedesktop.org/drm/amd/-/issues/1734 [2] https://gitlab.freedesktop.org/drm/amd/-/issues/1769 Signed-off-by: Melissa Wen --- drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h | 91 + 1 file changed, 77 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h index 5097037e3962..cf28b841c42d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h @@ -22,6 +22,16 @@ * */ +/** + * DOC: mpc-overview + * + * Multiple Pipe/Plane Combined (MPC) is a component in the hardware pipeline + * that performs blending of multiple planes, using global and per-pixel alpha. + * It also performs post-blending color correction operations according to the + * hardware capabilities, such as color transformation matrix and gamma 1D and + * 3D LUT. + */ + #ifndef __DC_MPCC_H__ #define __DC_MPCC_H__ @@ -48,14 +58,39 @@ enum mpcc_blend_mode { MPCC_BLEND_MODE_TOP_BOT_BLENDING }; +/** + * enum mpcc_alpha_blend_mode - define the alpha blend mode regarding pixel + * alpha and plane alpha values + */ enum mpcc_alpha_blend_mode { + /** +* @MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA: per pixel alpha using DPP +* alpha value +*/ MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA, + /** +* @MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN: per +* pixel alpha using DPP alpha value multiplied by a global gain (plane +* alpha) +*/ MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN, + /** +* @MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA: global alpha value, ignores +* pixel alpha and consider only plane alpha +*/ MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA }; -/* - * MPCC blending configuration +/** + * struct mpcc_blnd_cfg - MPCC blending configuration + * + * @black_color: background color + * @alpha_mode: alpha blend mode (MPCC_ALPHA_BLND_MODE) + * @pre_multiplied_alpha: whether pixel color values were pre-multiplied by the + * alpha channel (MPCC_ALPHA_MULTIPLIED_MODE) + * @global_gain: used when blend mode considers both pixel alpha and plane + * alpha value and assumes the global alpha value. + * @global_alpha: plane alpha value */ struct mpcc_blnd_cfg { struct tg_color black_color;/* background color */ @@ -107,8 +142,15 @@ struct mpc_dwb_flow_control { int flow_ctrl_cnt1; }; -/* - * MPCC connection and blending configuration for a single MPCC instance. +/** + * struct mpcc - MPCC connection and blending configuration for a single MPCC instance. + * @mpcc_id: MPCC physical instance + * @dpp_id: DPP input to this MPCC + * @mpcc_bot: pointer to bottom layer MPCC. NULL when not connected. + * @blnd_cfg: the blending configuration for this MPCC + * @sm_cfg: stereo mix setting for this MPCC + * @shared_bottom: if MPCC output to both OPP and DWB endpoints, true. Othewise, false. + * * This struct is used as a node in an MPC tree. */ struct mpcc { @@ -120,8 +162,12 @@ struct mpcc { bool shared_bottom; /* TRUE if MPCC output to both OPP and DWB endpoints, else FALSE */ }; -/* - * MPC tree represents all MPCC connections for a pipe. +/** + * struct mpc_tree - MPC tree represents all MPCC connections for a pipe. + * + * @opp_id: the OPP instance that owns this MPC tree + * @opp_list: the top MPCC layer of the MPC tree that outputs to OPP endpoint + * */ struct mpc_tree { int opp_id; /* The OPP instance that owns this MPC tree */ @@ -149,13 +195,18 @@ struct mpcc_state { uint32_t busy; }; +/** + * struct mpc_funcs - funcs + */ struct mpc_funcs { void (*read_mpcc_state)( struct mpc *mpc, int mpcc_inst, struct mpcc_state *s); - /* + /** +* @insert_plane: +* * Insert DPP into MPC tree based on specified blending position. * Only used for planes that are part of blending chain for OPP output * @@ -180,7 +231,9 @@ struct mpc_funcs { int dpp_id, int mpcc_id); - /* + /** +* @remove_mpcc: +* * Remove a specified MPCC from the MPC tree. * * Parameters: @@ -195,7 +248,9 @@ struct mpc_funcs { struct mpc_tree *tree, struct mpcc *mpcc); - /* + /** +* @mpc_init: +* * Reset the MPCC HW status by disconnecting all muxes. * * Parameters: @@ -208,7 +263,9 @@ struct mpc_funcs {
[PATCH 4/4] Documentation/gpu/amdgpu/amdgpu_dm: add DM docs for pixel blend mode
AMD GPU display manager (DM) maps DRM pixel blend modes (None, Pre-multiplied, Coverage) to MPC hw blocks through blend configuration options. Describe relevant elements and how to set and test them to get the expected DRM blend mode on DCN hw. Signed-off-by: Melissa Wen --- .../gpu/amdgpu/display/display-manager.rst| 98 +++ Documentation/gpu/drm-kms.rst | 2 + 2 files changed, 100 insertions(+) diff --git a/Documentation/gpu/amdgpu/display/display-manager.rst b/Documentation/gpu/amdgpu/display/display-manager.rst index 8960a5f1fa66..7a495ed1f69e 100644 --- a/Documentation/gpu/amdgpu/display/display-manager.rst +++ b/Documentation/gpu/amdgpu/display/display-manager.rst @@ -84,3 +84,101 @@ families below. **DCN 3.0 family color caps and mapping** .. kernel-figure:: dcn3_cm_drm_current.svg + +Blend Mode Properties += + +Pixel blend mode is a DRM plane composition property of :c:type:`drm_plane` used to +describes how pixels from a foreground plane (fg) are composited with the +background plane (bg). Here, we present main concepts of DRM blend mode to help +to understand how this property is mapped to AMD DC interface. See more about +this DRM property and the alpha blending equations in :ref:`DRM Plane +Composition Properties `. + +Basically, a blend mode sets the alpha blending equation for plane +composition that fits the mode in which the alpha channel affects the state of +pixel color values and, therefore, the resulted pixel color. For +example, consider the following elements of the alpha blending equation: + +- *fg.rgb*: Each of the RGB component values from the foreground's pixel. +- *fg.alpha*: Alpha component value from the foreground's pixel. +- *bg.rgb*: Each of the RGB component values from the background. +- *plane_alpha*: Plane alpha value set by the **plane "alpha" property**, see + more in `DRM Plane Composition Properties `. + +in the basic alpha blending equation:: + + out.rgb = alpha * fg.rgb + (1 - alpha) * bg.rgb + +the alpha channel value of each pixel in a plane is ignored and only the plane +alpha affects the resulted pixel color values. + +DRM has three blend mode to define the blend formula in the plane composition: + +* **None**: Blend formula that ignores the pixel alpha. + +* **Pre-multiplied**: Blend formula that assumes the pixel color values in a + plane was already pre-multiplied by its own alpha channel before storage. + +* **Coverage**: Blend formula that assumes the pixel color values were not + pre-multiplied with the alpha channel values. + +and pre-multiplied is the default pixel blend mode, that means, when no blend +mode property is created or defined, DRM considers the plane's pixels has +pre-multiplied color values. On IGT GPU tools, the kms_plane_alpha_blend test +provides a set of subtests to verify plane alpha and blend mode properties. + +The DRM blend mode and its elements are then mapped by AMDGPU display manager +(DM) to program the blending configuration of the Multiple Pipe/Plane Combined +(MPC), as follows: + +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h + :doc: mpc-overview + +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h + :functions: mpcc_blnd_cfg + +Therefore, the blending configuration for a single MPCC instance on the MPC +tree is defined by :c:type:`mpcc_blnd_cfg`, where +:c:type:`pre_multiplied_alpha` is the alpha pre-multiplied mode flag used to +set :c:type:`MPCC_ALPHA_MULTIPLIED_MODE`. It controls whether alpha is +multiplied (true/false), being only true for DRM pre-multiplied blend mode. +:c:type:`mpcc_alpha_blend_mode` defines the alpha blend mode regarding pixel +alpha and plane alpha values. It sets one of the three modes for +:c:type:`MPCC_ALPHA_BLND_MODE`, as described below. + +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h + :functions: mpcc_alpha_blend_mode + +DM then maps the elements of `enum mpcc_alpha_blend_mode` to those in the DRM +blend formula, as follows: + +* *MPC pixel alpha* matches *DRM fg.alpha* as the alpha component value + from the plane's pixel +* *MPC global alpha* matches *DRM plane_alpha* when the pixel alpha should + be ignored and, therefore, pixel values are not pre-multiplied +* *MPC global gain* assumes *MPC global alpha* value when both *DRM + fg.alpha* and *DRM plane_alpha* participate in the blend equation + +In short, *fg.alpha* is ignored by selecting +:c:type:`MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA`. On the other hand, (plane_alpha * +fg.alpha) component becomes available by selecting +:c:type:`MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN`. And the +:c:type:`MPCC_ALPHA_MULTIPLIED_MODE` defines if the pixel color values are +pre-multiplied by alpha or not. + +Blend configuration flow + + +The alpha blending equation is configured from DRM to DC interface by the +following path: + +1. When updating a :c:type:`drm_plane_state `, DM calls +
[PATCH 2/4] Documentation/amdgpu/display: add DC color caps info
Add details about color correction capabilities and explain a bit about differences between DC hw generations and also how they are mapped between DRM and DC interface. Two schemas for DCN 2.0 and 3.0 (converted to svg from the original png) is included to illustrate it. They were obtained from a discussion[1] in the amd-gfx mailing list. [1] https://lore.kernel.org/amd-gfx/20220422142811.dm6vtk6v64jcw...@mail.igalia.com/ v2: - remove redundant comments (Harry) - fix typo (Harry) Signed-off-by: Melissa Wen --- .../amdgpu/display/dcn2_cm_drm_current.svg| 1370 +++ .../amdgpu/display/dcn3_cm_drm_current.svg| 1529 + .../gpu/amdgpu/display/display-manager.rst| 35 + drivers/gpu/drm/amd/display/dc/dc.h | 74 +- 4 files changed, 2995 insertions(+), 13 deletions(-) create mode 100644 Documentation/gpu/amdgpu/display/dcn2_cm_drm_current.svg create mode 100644 Documentation/gpu/amdgpu/display/dcn3_cm_drm_current.svg diff --git a/Documentation/gpu/amdgpu/display/dcn2_cm_drm_current.svg b/Documentation/gpu/amdgpu/display/dcn2_cm_drm_current.svg new file mode 100644 index ..315ffc5a1a4b --- /dev/null +++ b/Documentation/gpu/amdgpu/display/dcn2_cm_drm_current.svg @@ -0,0 +1,1370 @@ + + + +http://www.inkscape.org/namespaces/inkscape; + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd; + xmlns="http://www.w3.org/2000/svg; + xmlns:svg="http://www.w3.org/2000/svg;> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Matrix +1D LUT +3D LUT +Unpacking +Other +drm_framebuffer +format +drm_plane +drm_crtc +Stream +MPC +DPP + +Blender +Degamma +CTM +Gamma +format +bias_and_scale +color space matrix +input_csc_color_matrix +in_transfer_func +hdr_mult +gamut_remap_matrix +in_shaper_func +lut3d_func +blend_tf +Blender +gamut_remap_matrix +func_shaper +lut3d_func +out_transfer_func +csc_color_matrix +bit_depth_param +clamping +output_color_space +Plane +Legend +DCN 2.0 +DC Interface +DRM Interface + +CNVC +Input CSC +DeGammaRAM and ROM(sRGB, BT2020 +HDR Multiply +Gamut Remap +Shaper LUTRAM +3D LUTRAM +Blend Gamma +Blender +GammaRAM +OCSC + + +color_encoding + +pixel_blend_mode + +color_range + + + + + + + + + + + + + + diff --git a/Documentation/gpu/amdgpu/display/dcn3_cm_drm_current.svg b/Documentation/gpu/amdgpu/display/dcn3_cm_drm_current.svg new file mode 100644 index ..7299ee9b6d64 --- /dev/null +++ b/Documentation/gpu/amdgpu/display/dcn3_cm_drm_current.svg @@ -0,0 +1,1529 @@ + + + +http://www.inkscape.org/namespaces/inkscape; + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd; + xmlns="http://www.w3.org/2000/svg; + xmlns:svg="http://www.w3.org/2000/svg;> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Matrix +1D LUT +3D LUT +Unpacking +Other +drm_framebuffer +format +drm_plane +drm_crtc +Stream +MPC +DPP + +Blender +Degamma +CTM +Gamma +format +bias_and_scale +color space matrix +input_csc_color_matrix +in_transfer_func +hdr_mult +gamut_remap_matrix +in_shaper_func +lut3d_func +blend_tf +Blender +gamut_remap_matrix +func_shaper +lut3d_func +out_transfer_func +csc_color_matrix +bit_depth_param +clamping +output_color_space +Plane +Legend +DCN 3.0 +DC Interface +DRM Interface + +CNVC +Input CSC +DeGammaROM(sRGB, BT2020, Gamma 2.2,PQ, HLG) +Post CSC +Gamma Correction +HDR Multiply +Gamut Remap +Shaper LUTRAM +3D LUTRAM +Blend Gamma +Blender +Gamut Remap +Shaper LUTRAM +3D LUTRAM +GammaRAM +OCSC + + +color_encoding + +
[PATCH 1/4] Documentation/amdgpu_dm: Add DM color correction documentation
AMDGPU DM maps DRM color management properties (degamma, ctm and gamma) to DC color correction entities. Part of this mapping is already documented as code comments and can be converted as kernel docs. v2: - rebase to amd-staging-drm-next Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen --- .../gpu/amdgpu/display/display-manager.rst| 9 ++ .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 121 +- 2 files changed, 98 insertions(+), 32 deletions(-) diff --git a/Documentation/gpu/amdgpu/display/display-manager.rst b/Documentation/gpu/amdgpu/display/display-manager.rst index 7ce31f89d9a0..b1b0f11aed83 100644 --- a/Documentation/gpu/amdgpu/display/display-manager.rst +++ b/Documentation/gpu/amdgpu/display/display-manager.rst @@ -40,3 +40,12 @@ Atomic Implementation .. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c :functions: amdgpu_dm_atomic_check amdgpu_dm_atomic_commit_tail + +Color Management Properties +=== + +.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c + :doc: overview + +.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c + :internal: diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index a71177305bcd..93c813089bff 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -29,7 +29,9 @@ #include "modules/color/color_gamma.h" #include "basics/conversion.h" -/* +/** + * DOC: overview + * * The DC interface to HW gives us the following color management blocks * per pipe (surface): * @@ -71,8 +73,8 @@ #define MAX_DRM_LUT_VALUE 0x -/* - * Initialize the color module. +/** + * amdgpu_dm_init_color_mod - Initialize the color module. * * We're not using the full color module, only certain components. * Only call setup functions for components that we need. @@ -82,7 +84,14 @@ void amdgpu_dm_init_color_mod(void) setup_x_points_distribution(); } -/* Extracts the DRM lut and lut size from a blob. */ +/** + * __extract_blob_lut - Extracts the DRM lut and lut size from a blob. + * @blob: DRM color mgmt property blob + * @size: lut size + * + * Returns: + * DRM LUT or NULL + */ static const struct drm_color_lut * __extract_blob_lut(const struct drm_property_blob *blob, uint32_t *size) { @@ -90,13 +99,18 @@ __extract_blob_lut(const struct drm_property_blob *blob, uint32_t *size) return blob ? (struct drm_color_lut *)blob->data : NULL; } -/* - * Return true if the given lut is a linear mapping of values, i.e. it acts - * like a bypass LUT. +/** + * __is_lut_linear - check if the given lut is a linear mapping of values + * @lut: given lut to check values + * @size: lut size * * It is considered linear if the lut represents: - * f(a) = (0xFF00/MAX_COLOR_LUT_ENTRIES-1)a; for integer a in - * [0, MAX_COLOR_LUT_ENTRIES) + * f(a) = (0xFF00/MAX_COLOR_LUT_ENTRIES-1)a; for integer a in [0, + * MAX_COLOR_LUT_ENTRIES) + * + * Returns: + * True if the given lut is a linear mapping of values, i.e. it acts like a + * bypass LUT. Otherwise, false. */ static bool __is_lut_linear(const struct drm_color_lut *lut, uint32_t size) { @@ -119,9 +133,13 @@ static bool __is_lut_linear(const struct drm_color_lut *lut, uint32_t size) return true; } -/* - * Convert the drm_color_lut to dc_gamma. The conversion depends on the size - * of the lut - whether or not it's legacy. +/** + * __drm_lut_to_dc_gamma - convert the drm_color_lut to dc_gamma. + * @lut: DRM lookup table for color conversion + * @gamma: DC gamma to set entries + * @is_legacy: legacy or atomic gamma + * + * The conversion depends on the size of the lut - whether or not it's legacy. */ static void __drm_lut_to_dc_gamma(const struct drm_color_lut *lut, struct dc_gamma *gamma, bool is_legacy) @@ -154,8 +172,11 @@ static void __drm_lut_to_dc_gamma(const struct drm_color_lut *lut, } } -/* - * Converts a DRM CTM to a DC CSC float matrix. +/** + * __drm_ctm_to_dc_matrix - converts a DRM CTM to a DC CSC float matrix + * @ctm: DRM color transformation matrix + * @matrix: DC CSC float matrix + * * The matrix needs to be a 3x4 (12 entry) matrix. */ static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm, @@ -189,7 +210,18 @@ static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm, } } -/* Calculates the legacy transfer function - only for sRGB input space. */ +/** + * __set_legacy_tf - Calculates the legacy transfer function + * @func: transfer function + * @lut: lookup table that defines the color space + * @lut_size: size of respective lut + * @has_rom: if ROM can be used for hardcoded curve + * + * Only for sRGB input space + * + * Returns: + * 0 in case of sucess, -ENOMEM if fails + */ static int
[PATCH 0/4] Documentation/amdgpu/display: describe color and blend mode properties mapping
Patches 1 and 2 describe DM mapping of DRM color correction properties to DC interface and where detached from 3D LUT RFC series [1]. Patches 3 and 4 describe MPC block programming that matches the three DRM blend modes and came from previous work [2][3] and discussions on AMD issue tracker. Let me know any misleading information. [1] https://lore.kernel.org/amd-gfx/20220619223104.667413-1-m...@igalia.com/ [2] https://lore.kernel.org/amd-gfx/20220329201835.2393141-1-m...@igalia.com/ [3] https://lore.kernel.org/amd-gfx/7a95d6a4-bc2f-b0e8-83f8-8cc5b7559...@amd.com/ Melissa Wen (4): Documentation/amdgpu_dm: Add DM color correction documentation Documentation/amdgpu/display: add DC color caps info drm/amd/display: add doc entries for MPC blending configuration Documentation/gpu/amdgpu/amdgpu_dm: add DM docs for pixel blend mode .../amdgpu/display/dcn2_cm_drm_current.svg| 1370 +++ .../amdgpu/display/dcn3_cm_drm_current.svg| 1529 + .../gpu/amdgpu/display/display-manager.rst| 142 ++ Documentation/gpu/drm-kms.rst |2 + .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 121 +- drivers/gpu/drm/amd/display/dc/dc.h | 74 +- drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h | 91 +- 7 files changed, 3270 insertions(+), 59 deletions(-) create mode 100644 Documentation/gpu/amdgpu/display/dcn2_cm_drm_current.svg create mode 100644 Documentation/gpu/amdgpu/display/dcn3_cm_drm_current.svg -- 2.35.1
[PATCH] drm/amd/display: move dcn31_update_soc_for_wm_a func to dml fpu folder
Although dcn31_update_soc_for_wm_a() is only called in dml/dcn31/dcn31_fpu by dc->res_pool->funcs->update_soc_for_wm_a(dc, context), it's declared in dcn31_resource that is not FPU protected. Move this function to dcn31_fpu file as part of the work to isolate FPU code. Signed-off-by: Melissa Wen --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 9 - drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h | 1 - drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 9 + drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h | 2 ++ 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 16bbccc69fdc..17c776e88514 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1716,15 +1716,6 @@ int dcn31_populate_dml_pipes_from_context( return pipe_cnt; } -void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) -{ - if (dc->clk_mgr->bw_params->wm_table.entries[WM_A].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_exit_time_us; - } -} - void dcn31_calculate_wm_and_dlg( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h index 393458015d6a..41f8ec99da6b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h @@ -59,7 +59,6 @@ dcn31_set_mcif_arb_params(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int pipe_cnt); -void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context); struct resource_pool *dcn31_create_resource_pool( const struct dc_init_data *init_data, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c index 7be3476989ce..facac3daeaca 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c @@ -435,6 +435,15 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = { .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, }; +void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) +{ + if (dc->clk_mgr->bw_params->wm_table.entries[WM_A].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_exit_time_us; + } +} + void dcn31_calculate_wm_and_dlg_fp( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h index 24ac19c83687..0a10de80c1a4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h @@ -31,6 +31,8 @@ #define DCN3_15_MIN_COMPBUF_SIZE_KB 128 #define DCN3_16_DEFAULT_DET_SIZE 192 +void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context); + void dcn31_calculate_wm_and_dlg_fp( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, -- 2.35.1
Re: [PATCH] drm/amd/pm: enable mode1 reset for smu_v13_0_7
[AMD Official Use Only - General] Reviewed-by: Yang Wang Best Regards, Kevin From: amd-gfx on behalf of Kenneth Feng Sent: Saturday, July 16, 2022 12:43 PM To: amd-gfx@lists.freedesktop.org Cc: Feng, Kenneth Subject: [PATCH] drm/amd/pm: enable mode1 reset for smu_v13_0_7 enable mode1 reset for smu_v13_0_7 since it's missing. Signed-off-by: Kenneth Feng --- drivers/gpu/drm/amd/amdgpu/soc21.c | 1 + drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 765c3543ad18..00e9b7089feb 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -320,6 +320,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev) switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 0): + case IP_VERSION(13, 0, 7): return AMD_RESET_METHOD_MODE1; case IP_VERSION(13, 0, 4): return AMD_RESET_METHOD_MODE2; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 6259a85bc818..6f0548714566 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -118,6 +118,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(DramLogSetDramSize, PPSMC_MSG_DramLogSetDramSize, 0), MSG_MAP(AllowGfxOff,PPSMC_MSG_AllowGfxOff, 0), MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff, 0), + MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset, 0), MSG_MAP(PrepareMp1ForUnload,PPSMC_MSG_PrepareMp1ForUnload, 0), }; -- 2.25.1
Re: [PATCH v2] drm/amdgpu: add HW_IP_VCN_UNIFIED type
Am 15.07.22 um 17:25 schrieb Dong, Ruijing: [AMD Official Use Only - General] Why exactly do we need a new define for this? Essentially the encode queue is extended with new functionality, isn't it? So I think we should just stick to AMDGPU_HW_IP_VCN_ENC and not add an alias for it. Yes, it extended the encode queue to include new functionality, and that looks little confused when send decoding jobs to the encoding queue. Then I assume this bias can reduce the confusion. Does this change make sense in this regard? certainly we can stick to AMDGPU_HW_IP_VCN_ENC. I'm a bit on the edge with that. On the one hand I agree with you that using AMDGPU_HW_IP_VCN_ENC for decoding is then a bit confusing, but on the other hand adding another enum with the same value as AMDGPU_HW_IP_VCN_ENC might be even more confusing. I think the best middle way would be to at least add a comment explaining what's going on. Regards, Christian. Thanks, Ruijing -Original Message- From: Koenig, Christian Sent: Friday, July 15, 2022 11:18 AM To: Dong, Ruijing ; amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Liu, Leo Subject: Re: [PATCH v2] drm/amdgpu: add HW_IP_VCN_UNIFIED type Am 15.07.22 um 16:44 schrieb Ruijing Dong: Define HW_IP_VCN_UNIFIED type the same as HW_IP_VCN_ENC. VCN4 support for libdrm needs a new definition for the unified queue, so that it can align to the kernel. link: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/245/commits Signed-off-by: Ruijing Dong --- include/uapi/drm/amdgpu_drm.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 18d3246d636e..fe33db8441bc 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -560,6 +560,7 @@ struct drm_amdgpu_gem_va { #define AMDGPU_HW_IP_UVD_ENC 5 #define AMDGPU_HW_IP_VCN_DEC 6 #define AMDGPU_HW_IP_VCN_ENC 7 +#define AMDGPU_HW_IP_VCN_UNIFIED AMDGPU_HW_IP_VCN_ENC Why exactly do we need a new define for this? Essentially the encode queue is extended with new functionality, isn't it? So I think we should just stick to AMDGPU_HW_IP_VCN_ENC and not add an alias for it. Regards, Christian. #define AMDGPU_HW_IP_VCN_JPEG 8 #define AMDGPU_HW_IP_NUM 9
[PATCH v4] drm/amdgpu: add HW_IP_VCN_UNIFIED type
>From VCN4, AMDGPU_HW_IP_VCN_UNIFIED is used to support both encoding and decoding jobs, it re-uses the same queue number of AMDGPU_HW_IP_VCN_ENC. link: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/245/commits Signed-off-by: Ruijing Dong --- include/uapi/drm/amdgpu_drm.h | 6 ++ 1 file changed, 6 insertions(+) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 18d3246d636e..e268cd3cdb12 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -560,6 +560,12 @@ struct drm_amdgpu_gem_va { #define AMDGPU_HW_IP_UVD_ENC 5 #define AMDGPU_HW_IP_VCN_DEC 6 #define AMDGPU_HW_IP_VCN_ENC 7 +/** + * From VCN4, AMDGPU_HW_IP_VCN_UNIFIED is used to support + * both encoding and decoding jobs, it re-uses the same + * queue number of AMDGPU_HW_IP_VCN_ENC. + */ +#define AMDGPU_HW_IP_VCN_UNIFIED AMDGPU_HW_IP_VCN_ENC #define AMDGPU_HW_IP_VCN_JPEG 8 #define AMDGPU_HW_IP_NUM 9 -- 2.25.1
Re: Linux 5.19-rc6
Hi Russell, On Fri, Jul 15, 2022 at 12:34 AM Russell Currey wrote: > > Hi Linus, > > On Wed, 2022-07-13 at 14:32 -0700, Linus Torvalds wrote: > > On Wed, Jul 13, 2022 at 2:01 PM Alex Deucher > > wrote: > > > > > > If you want to apply Guenter's patch original patch: > > > https://patchwork.freedesktop.org/patch/490184/ > > > That's fine with me. > > > > Honestly, by this time I feel that it's too little, too late. > > > > The ppc people apparently didn't care at all about the fact that this > > driver didn't compile. > > > > At least Michael Ellerman and Daniel Axtens were cc'd on that thread > > with the proposed fix originally. > > > > I don't see any replies from ppc people as to why it happened, even > > though apparently a bog-standard "make allmodconfig" just doesn't > > build. > > I believe Michael Ellerman has been on holiday for some time, and > Daniel Axtens no longer works on powerpc (and wasn't the one that > submitted the patch, it was submitted by Paul Mackerras, who wasn't on > CC). > > The proposed fix didn't get sent to linuxppc-dev either, so it's > unlikely many ppc people knew about it. > > We certainly should have noticed allmodconfig was broken, and should > have more than just Michael keeping an eye on all his automated builds. Not sure if I have added the correct people in my another mail, but thats also ppc allmodconfig with gcc-12. https://lore.kernel.org/lkml/Ys%2FaDKZNhhsENH9S@debian/ -- Regards Sudip
[PATCH v9 00/14] Add MEMORY_DEVICE_COHERENT for coherent device memory mapping
This is our MEMORY_DEVICE_COHERENT patch series rebased and updated for current 5.19.0-rc6 Changes since the last version: - Fixed problems with migration during long-term pinning in get_user_pages - Open coded vm_normal_lru_pages as suggested in previous code review - Update hmm_gup_test with more get_user_pages calls, include hmm_cow_in_device in hmm-test. This patch series introduces MEMORY_DEVICE_COHERENT, a type of memory owned by a device that can be mapped into CPU page tables like MEMORY_DEVICE_GENERIC and can also be migrated like MEMORY_DEVICE_PRIVATE. This patch series is mostly self-contained except for a few places where it needs to update other subsystems to handle the new memory type. System stability and performance are not affected according to our ongoing testing, including xfstests. How it works: The system BIOS advertises the GPU device memory (aka VRAM) as SPM (special purpose memory) in the UEFI system address map. The amdgpu driver registers the memory with devmap as MEMORY_DEVICE_COHERENT using devm_memremap_pages. The initial user for this hardware page migration capability is the Frontier supercomputer project. This functionality is not AMD-specific. We expect other GPU vendors to find this functionality useful, and possibly other hardware types in the future. Our test nodes in the lab are similar to the Frontier configuration, with .5 TB of system memory plus 256 GB of device memory split across 4 GPUs, all in a single coherent address space. Page migration is expected to improve application efficiency significantly. We will report empirical results as they become available. Coherent device type pages at gup are now migrated back to system memory if they are being pinned long-term (FOLL_LONGTERM). The reason is, that long-term pinning would interfere with the device memory manager owning the device-coherent pages (e.g. evictions in TTM). These series incorporate Alistair Popple patches to do this migration from pin_user_pages() calls. hmm_gup_test has been added to hmm-test to test different get user pages calls. This series includes handling of device-managed anonymous pages returned by vm_normal_pages. Although they behave like normal pages for purposes of mapping in CPU page tables and for COW, they do not support LRU lists, NUMA migration or THP. We also introduced a FOLL_LRU flag that adds the same behaviour to follow_page and related APIs, to allow callers to specify that they expect to put pages on an LRU list. v2: - Rebase to latest 5.18-rc7. - Drop patch "mm: add device coherent checker to remove migration pte" and modify try_to_migrate_one, to let DEVICE_COHERENT pages fall through to normal page path. Based on Alistair Popple's comment. - Fix comment formatting. - Reword comment in vm_normal_page about pte_devmap(). - Merge "drm/amdkfd: coherent type as sys mem on migration to ram" to "drm/amdkfd: add SPM support for SVM". v3: - Rebase to latest 5.18.0. - Patch "mm: handling Non-LRU pages returned by vm_normal_pages" reordered. - Add WARN_ON_ONCE for thp device coherent case. v4: - Rebase to latest 5.18.0 - Fix consitency between pages with FOLL_LRU flag set and pte_devmap at follow_page_pte. v5: - Remove unused zone_device_type from lib/test_hmm and selftest/vm/hmm-test.c. v6: - Rebase to 5.19.0-rc4 - Rename is_pinnable_page to is_longterm_pinnable_page and add a coherent device checker. - Add a new gup test to hmm-test to cover fast pinnable case with FOLL_LONGTERM. v7: - Reorder patch series. - Remove FOLL_LRU and check on each caller for LRU pages handling instead. v8: - Add "mm: move page zone helpers into new header-specific file" patch. The intention is to centralize all page zone helpers and keep them independent from mm.h and memremap.h. v9: - Rebase to 5.19.0-rc6 - Include latest Alistair's patch "mm/gup: migrate device coherent pages when pinning instead of failing" with changes based on David Hildenbrand comments. - Replace moving page zone helpers into new header-specific file. Instead, those were moved to mmzone.h. Patch "mm: move page zone helpers from mm.h to mmzone.h" Alex Sierra (13): mm: rename is_pinnable_pages to is_longterm_pinnable_pages mm: move page zone helpers from mm.h to mmzone.h mm: add zone device coherent type memory support mm: handling Non-LRU pages returned by vm_normal_pages mm: add device coherent vma selection for memory migration drm/amdkfd: add SPM support for SVM lib: test_hmm add ioctl to get zone device type lib: test_hmm add module param for zone device type lib: add support for device coherent type in test_hmm tools: update hmm-test to support device coherent type tools: update test_hmm script to support SP config tools: add hmm gup tests for device coherent type tools: add selftests to hmm for COW in device memory Alistair Popple (1): mm/gup: migrate device coherent pages when pinning instead of failing drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 34 ++- fs/proc/task_mmu.c
Re: [PATCH] drm/amdgpu: align between libdrm and drm api
Hi Ruijing, ok in this case please prepare a kernel patch and send it to the mailing list with full description why we do this change. Thanks, Christian. Am 15.07.22 um 15:33 schrieb Dong, Ruijing: [AMD Official Use Only - General] Hi Christian, You are right, when process the libdrm code review (not committed yet), we realized the corresponding file needs to align to the kernel. So we will need to have this header file changed first, then to process libdrm code again. Thanks, Ruijing -Original Message- From: Christian König Sent: Friday, July 15, 2022 4:41 AM To: Dong, Ruijing ; amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Liu, Leo ; Koenig, Christian Subject: Re: [PATCH] drm/amdgpu: align between libdrm and drm api Am 14.07.22 um 23:22 schrieb Ruijing Dong: define HW_IP_VCN_UNIFIED the same as HW_IP_VCN_ENC Usually that should be the other way around, libdrm aligns to the kernel. Why was that modification committed to libdrm first? There are usually plenty of warnings before we can do that. Regards, Christian. Signed-off-by: Ruijing Dong --- include/uapi/drm/amdgpu_drm.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 18d3246d636e..fe33db8441bc 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -560,6 +560,7 @@ struct drm_amdgpu_gem_va { #define AMDGPU_HW_IP_UVD_ENC 5 #define AMDGPU_HW_IP_VCN_DEC 6 #define AMDGPU_HW_IP_VCN_ENC 7 +#define AMDGPU_HW_IP_VCN_UNIFIED AMDGPU_HW_IP_VCN_ENC #define AMDGPU_HW_IP_VCN_JPEG 8 #define AMDGPU_HW_IP_NUM 9
Re: [PATCH] drm/amdgpu: Fix for drm buddy memory corruption
Am 14.07.22 um 12:12 schrieb Arunpravin Paneer Selvam: User reported gpu page fault when running graphics applications and in some cases garbaged graphics are observed as soon as X starts. This patch fixes all the issues. Fixed the typecast issue for fpfn and lpfn variables, thus preventing the overflow problem which resolves the memory corruption. Signed-off-by: Arunpravin Paneer Selvam Reported-by: Mike Lothian Tested-by: Mike Lothian Reviewed-by: Christian König I've re-applied the patches to drm-misc-next, solved the conflict in drm-tip and then pushed this to drm-misc-next-fixes. With a little bit of luck everything should now be in place, but fingers crossed. Regards, Christian. --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 16 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 49e4092f447f..34d789054ec8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -366,11 +366,11 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, unsigned long pages_per_block; int r; - lpfn = place->lpfn << PAGE_SHIFT; + lpfn = (u64)place->lpfn << PAGE_SHIFT; if (!lpfn) lpfn = man->size; - fpfn = place->fpfn << PAGE_SHIFT; + fpfn = (u64)place->fpfn << PAGE_SHIFT; max_bytes = adev->gmc.mc_vram_size; if (tbo->type != ttm_bo_type_kernel) @@ -410,12 +410,12 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, /* Allocate blocks in desired range */ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; - remaining_size = vres->base.num_pages << PAGE_SHIFT; + remaining_size = (u64)vres->base.num_pages << PAGE_SHIFT; mutex_lock(>lock); while (remaining_size) { if (tbo->page_alignment) - min_block_size = tbo->page_alignment << PAGE_SHIFT; + min_block_size = (u64)tbo->page_alignment << PAGE_SHIFT; else min_block_size = mgr->default_page_size; @@ -424,12 +424,12 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, /* Limit maximum size to 2GiB due to SG table limitations */ size = min(remaining_size, 2ULL << 30); - if (size >= pages_per_block << PAGE_SHIFT) - min_block_size = pages_per_block << PAGE_SHIFT; + if (size >= (u64)pages_per_block << PAGE_SHIFT) + min_block_size = (u64)pages_per_block << PAGE_SHIFT; cur_size = size; - if (fpfn + size != place->lpfn << PAGE_SHIFT) { + if (fpfn + size != (u64)place->lpfn << PAGE_SHIFT) { /* * Except for actual range allocation, modify the size and * min_block_size conforming to continuous flag enablement @@ -469,7 +469,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, LIST_HEAD(temp); trim_list = >blocks; - original_size = vres->base.num_pages << PAGE_SHIFT; + original_size = (u64)vres->base.num_pages << PAGE_SHIFT; /* * If size value is rounded up to min_block_size, trim the last diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h index 9a2db87186c7..bef0f561ba60 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h @@ -50,7 +50,7 @@ static inline u64 amdgpu_vram_mgr_block_start(struct drm_buddy_block *block) static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block) { - return PAGE_SIZE << drm_buddy_block_order(block); + return (u64)PAGE_SIZE << drm_buddy_block_order(block); } static inline struct drm_buddy_block *
[PATCH] drm/amd/pm: enable mode1 reset for smu_v13_0_7
enable mode1 reset for smu_v13_0_7 since it's missing. Signed-off-by: Kenneth Feng --- drivers/gpu/drm/amd/amdgpu/soc21.c | 1 + drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 765c3543ad18..00e9b7089feb 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -320,6 +320,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev) switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 0): + case IP_VERSION(13, 0, 7): return AMD_RESET_METHOD_MODE1; case IP_VERSION(13, 0, 4): return AMD_RESET_METHOD_MODE2; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 6259a85bc818..6f0548714566 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -118,6 +118,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(DramLogSetDramSize, PPSMC_MSG_DramLogSetDramSize, 0), MSG_MAP(AllowGfxOff,PPSMC_MSG_AllowGfxOff, 0), MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff, 0), + MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset, 0), MSG_MAP(PrepareMp1ForUnload,PPSMC_MSG_PrepareMp1ForUnload, 0), }; -- 2.25.1
RE: [PATCH v4] drm/amdgpu: add HW_IP_VCN_UNIFIED type
[AMD Official Use Only - General] Reviewed-by: Leo Liu -Original Message- From: Dong, Ruijing Sent: July 15, 2022 4:04 PM To: Koenig, Christian ; amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Liu, Leo ; Dong, Ruijing Subject: [PATCH v4] drm/amdgpu: add HW_IP_VCN_UNIFIED type >From VCN4, AMDGPU_HW_IP_VCN_UNIFIED is used to support both encoding and >decoding jobs, it re-uses the same queue number of AMDGPU_HW_IP_VCN_ENC. link: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/245/commits Signed-off-by: Ruijing Dong --- include/uapi/drm/amdgpu_drm.h | 6 ++ 1 file changed, 6 insertions(+) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 18d3246d636e..e268cd3cdb12 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -560,6 +560,12 @@ struct drm_amdgpu_gem_va { #define AMDGPU_HW_IP_UVD_ENC 5 #define AMDGPU_HW_IP_VCN_DEC 6 #define AMDGPU_HW_IP_VCN_ENC 7 +/** + * From VCN4, AMDGPU_HW_IP_VCN_UNIFIED is used to support + * both encoding and decoding jobs, it re-uses the same + * queue number of AMDGPU_HW_IP_VCN_ENC. + */ +#define AMDGPU_HW_IP_VCN_UNIFIED AMDGPU_HW_IP_VCN_ENC #define AMDGPU_HW_IP_VCN_JPEG 8 #define AMDGPU_HW_IP_NUM 9 -- 2.25.1
[PATCH 22/31] drm/amd/display: Move phanton stream to FPU code
This commit moves phanton FPU stream to dcn32_fpu file. Reviewed-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Rodrigo Siqueira --- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 89 +-- .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 84 + .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 8 ++ 3 files changed, 94 insertions(+), 87 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 1c124231b00a..a1bf24ad0787 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -1831,93 +1831,6 @@ static void dcn32_enable_phantom_plane(struct dc *dc, } } -/** - * *** - * dcn32_set_phantom_stream_timing: Set timing params for the phantom stream - * - * Set timing params of the phantom stream based on calculated output from DML. - * This function first gets the DML pipe index using the DC pipe index, then - * calls into DML (get_subviewport_lines_needed_in_mall) to get the number of - * lines required for SubVP MCLK switching and assigns to the phantom stream - * accordingly. - * - * - The number of SubVP lines calculated in DML does not take into account - * FW processing delays and required pstate allow width, so we must include - * that separately. - * - * - Set phantom backporch = vstartup of main pipe - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * @param [in] ref_pipe: Main pipe for the phantom stream - * @param [in] pipes: DML pipe params - * @param [in] pipe_cnt: number of DML pipes - * @param [in] dc_pipe_idx: DC pipe index for the main pipe (i.e. ref_pipe) - * - * @return: void - * - * *** - */ -static void dcn32_set_phantom_stream_timing(struct dc *dc, - struct dc_state *context, - struct pipe_ctx *ref_pipe, - struct dc_stream_state *phantom_stream, - display_e2e_pipe_params_st *pipes, - unsigned int pipe_cnt, - unsigned int dc_pipe_idx) -{ - unsigned int i, pipe_idx; - struct pipe_ctx *pipe; - uint32_t phantom_vactive, phantom_bp, pstate_width_fw_delay_lines; - unsigned int vlevel = context->bw_ctx.dml.vba.VoltageLevel; - unsigned int dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; - unsigned int socclk = context->bw_ctx.dml.vba.SOCCLKPerState[vlevel]; - - // Find DML pipe index (pipe_idx) using dc_pipe_idx - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - pipe = >res_ctx.pipe_ctx[i]; - - if (!pipe->stream) - continue; - - if (i == dc_pipe_idx) - break; - - pipe_idx++; - } - - // Calculate lines required for pstate allow width and FW processing delays - pstate_width_fw_delay_lines = ((double)(dc->caps.subvp_fw_processing_delay_us + - dc->caps.subvp_pstate_allow_width_us) / 100) * - (ref_pipe->stream->timing.pix_clk_100hz * 100) / - (double)ref_pipe->stream->timing.h_total; - - // Update clks_cfg for calling into recalculate - pipes[0].clks_cfg.voltage = vlevel; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk; - pipes[0].clks_cfg.socclk_mhz = socclk; - - // DML calculation for MALL region doesn't take into account FW delay - // and required pstate allow width for multi-display cases - phantom_vactive = get_subviewport_lines_needed_in_mall(>bw_ctx.dml, pipes, pipe_cnt, pipe_idx) + - pstate_width_fw_delay_lines; - - // For backporch of phantom pipe, use vstartup of the main pipe - phantom_bp = get_vstartup(>bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - - phantom_stream->dst.y = 0; - phantom_stream->dst.height = phantom_vactive; - phantom_stream->src.y = 0; - phantom_stream->src.height = phantom_vactive; - - phantom_stream->timing.v_addressable = phantom_vactive; - phantom_stream->timing.v_front_porch = 1; - phantom_stream->timing.v_total = phantom_stream->timing.v_addressable + - phantom_stream->timing.v_front_porch + - phantom_stream->timing.v_sync_width + - phantom_bp; -} - static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, @@ -1939,7 +1852,9 @@ static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc,
[PATCH 05/31] drm/amd/display: fix trigger_hotplug to support mst case
From: Wayne Lin [Why & How] Correct few problems below to have debugfs trigger_hotplug entry supports mst case * Adjust the place for acquiring the hpd_lock. We'll also access dc_link when simulate unplug * When detect the connector is a mst root, call reset_cur_dp_mst_topology() to simulate unplug * Don't support hotplug caused by CSN message since we can't change mst topology info directly. We can't simulate that * Clean up redundant code Reviewed-by: Hersen Wu Acked-by: Alan Liu Signed-off-by: Wayne Lin --- .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 17 ++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index e0646db6fdbf..b764198eca5c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -1273,14 +1273,22 @@ static ssize_t trigger_hotplug(struct file *f, const char __user *buf, return -EINVAL; } + kfree(wr_buf); + if (param_nums <= 0) { DRM_DEBUG_DRIVER("user data not be read\n"); - kfree(wr_buf); + return -EINVAL; + } + + mutex_lock(>hpd_lock); + + /* Don't support for mst end device*/ + if (aconnector->mst_port) { + mutex_unlock(>hpd_lock); return -EINVAL; } if (param[0] == 1) { - mutex_lock(>hpd_lock); if (!dc_link_detect_sink(aconnector->dc_link, _connection_type) && new_connection_type != dc_connection_none) @@ -1317,6 +1325,10 @@ static ssize_t trigger_hotplug(struct file *f, const char __user *buf, amdgpu_dm_update_connector_after_detect(aconnector); + /* If the aconnector is the root node in mst topology */ + if (aconnector->mst_mgr.mst_state == true) + reset_cur_dp_mst_topology(link); + drm_modeset_lock_all(dev); dm_restore_drm_connector_state(dev, connector); drm_modeset_unlock_all(dev); @@ -1327,7 +1339,6 @@ static ssize_t trigger_hotplug(struct file *f, const char __user *buf, unlock: mutex_unlock(>hpd_lock); - kfree(wr_buf); return size; } -- 2.37.0
Re: [PATCH 2/3] drm/amdkfd: track unified memory reservation with xnack off
On 2022-07-11 21:56, Alex Sierra wrote: [WHY] Unified memory with xnack off should be tracked, as userptr mappings and legacy allocations do. To avoid oversuscribe system memory when xnack off. [How] Exposing functions reserve_mem_limit and unreserve_mem_limit to SVM API and call them on every prange creation and free. One question and two nit-picks inline. Otherwise this looks good to me. Signed-off-by: Alex Sierra --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h| 4 ++ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 25 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 60 +-- 3 files changed, 60 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 73bf8b5f2aa9..83d955f0c52f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -305,6 +305,10 @@ bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem * void amdgpu_amdkfd_block_mmu_notifications(void *p); int amdgpu_amdkfd_criu_resume(void *p); bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev); +int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, + uint64_t size, u32 alloc_flag); +void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, + uint64_t size, u32 alloc_flag); #if IS_ENABLED(CONFIG_HSA_AMD) void amdgpu_amdkfd_gpuvm_init_mem_limits(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 2bc36ff0aa0f..7480e7333e5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -129,7 +129,7 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size) * * Return: returns -ENOMEM in case of error, ZERO otherwise */ -static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, +int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag) { uint64_t reserved_for_pt = @@ -169,7 +169,7 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) || (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > kfd_mem_limit.max_ttm_mem_limit) || - (adev->kfd.vram_used + vram_needed > + (adev && adev->kfd.vram_used + vram_needed > adev->gmc.real_vram_size - atomic64_read(>vram_pin_size) - reserved_for_pt)) { @@ -180,7 +180,10 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, /* Update memory accounting by decreasing available system * memory, TTM memory and GPU memory as computed above */ - adev->kfd.vram_used += vram_needed; + WARN_ONCE(vram_needed && !adev, + "adev reference can't be null when vram is used"); + if (adev) + adev->kfd.vram_used += vram_needed; kfd_mem_limit.system_mem_used += system_mem_needed; kfd_mem_limit.ttm_mem_used += ttm_mem_needed; @@ -189,7 +192,7 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, return ret; } -static void unreserve_mem_limit(struct amdgpu_device *adev, +void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag) { spin_lock(_mem_limit.mem_limit_lock); @@ -198,7 +201,10 @@ static void unreserve_mem_limit(struct amdgpu_device *adev, kfd_mem_limit.system_mem_used -= size; kfd_mem_limit.ttm_mem_used -= size; } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN); + WARN_ONCE(!adev, + "adev reference can't be null when alloc mem flags vram is set"); + if (adev) + adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { kfd_mem_limit.system_mem_used -= size; } else if (!(alloc_flag & @@ -207,11 +213,8 @@ static void unreserve_mem_limit(struct amdgpu_device *adev, pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); goto release; } - - WARN_ONCE(adev->kfd.vram_used < 0, + WARN_ONCE(adev && adev->kfd.vram_used < 0, "KFD VRAM memory accounting unbalanced"); - WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, - "KFD TTM memory accounting unbalanced"); This looks like an unrelated change. Why are you removing this warning? WARN_ONCE(kfd_mem_limit.system_mem_used < 0, "KFD system memory accounting unbalanced"); @@ -225,7 +228,7 @@ void
[PATCH v9 08/14] lib: test_hmm add ioctl to get zone device type
new ioctl cmd added to query zone device type. This will be used once the test_hmm adds zone device coherent type. Signed-off-by: Alex Sierra Acked-by: Felix Kuehling Reviewed-by: Alistair Poppple Signed-off-by: Christoph Hellwig --- lib/test_hmm.c | 11 +-- lib/test_hmm_uapi.h | 14 ++ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/lib/test_hmm.c b/lib/test_hmm.c index cfe632047839..915ef6b5b0d4 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -87,6 +87,7 @@ struct dmirror_chunk { struct dmirror_device { struct cdev cdevice; struct hmm_devmem *devmem; + unsigned intzone_device_type; unsigned intdevmem_capacity; unsigned intdevmem_count; @@ -1260,14 +1261,20 @@ static void dmirror_device_remove(struct dmirror_device *mdevice) static int __init hmm_dmirror_init(void) { int ret; - int id; + int id = 0; + int ndevices = 0; ret = alloc_chrdev_region(_dev, 0, DMIRROR_NDEVICES, "HMM_DMIRROR"); if (ret) goto err_unreg; - for (id = 0; id < DMIRROR_NDEVICES; id++) { + memset(dmirror_devices, 0, DMIRROR_NDEVICES * sizeof(dmirror_devices[0])); + dmirror_devices[ndevices++].zone_device_type = + HMM_DMIRROR_MEMORY_DEVICE_PRIVATE; + dmirror_devices[ndevices++].zone_device_type = + HMM_DMIRROR_MEMORY_DEVICE_PRIVATE; + for (id = 0; id < ndevices; id++) { ret = dmirror_device_init(dmirror_devices + id, id); if (ret) goto err_chrdev; diff --git a/lib/test_hmm_uapi.h b/lib/test_hmm_uapi.h index f14dea5dcd06..0511af7464ee 100644 --- a/lib/test_hmm_uapi.h +++ b/lib/test_hmm_uapi.h @@ -31,10 +31,11 @@ struct hmm_dmirror_cmd { /* Expose the address space of the calling process through hmm device file */ #define HMM_DMIRROR_READ _IOWR('H', 0x00, struct hmm_dmirror_cmd) #define HMM_DMIRROR_WRITE _IOWR('H', 0x01, struct hmm_dmirror_cmd) -#define HMM_DMIRROR_MIGRATE_IOWR('H', 0x02, struct hmm_dmirror_cmd) -#define HMM_DMIRROR_SNAPSHOT _IOWR('H', 0x03, struct hmm_dmirror_cmd) -#define HMM_DMIRROR_EXCLUSIVE _IOWR('H', 0x04, struct hmm_dmirror_cmd) -#define HMM_DMIRROR_CHECK_EXCLUSIVE_IOWR('H', 0x05, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_MIGRATE_TO_DEV _IOWR('H', 0x02, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_MIGRATE_TO_SYS _IOWR('H', 0x03, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_SNAPSHOT _IOWR('H', 0x04, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_EXCLUSIVE _IOWR('H', 0x05, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_CHECK_EXCLUSIVE_IOWR('H', 0x06, struct hmm_dmirror_cmd) /* * Values returned in hmm_dmirror_cmd.ptr for HMM_DMIRROR_SNAPSHOT. @@ -62,4 +63,9 @@ enum { HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE = 0x30, }; +enum { + /* 0 is reserved to catch uninitialized type fields */ + HMM_DMIRROR_MEMORY_DEVICE_PRIVATE = 1, +}; + #endif /* _LIB_TEST_HMM_UAPI_H */ -- 2.32.0
[PATCH] drm/amdkfd: track unified memory reservation with xnack off
[WHY] Unified memory with xnack off should be tracked, as userptr mappings and legacy allocations do. To avoid oversuscribe system memory when xnack off. [How] Exposing functions reserve_mem_limit and unreserve_mem_limit to SVM API and call them on every prange creation and free. Signed-off-by: Alex Sierra --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h| 4 ++ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 23 --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 60 +-- 3 files changed, 60 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 73bf8b5f2aa9..83d955f0c52f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -305,6 +305,10 @@ bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem * void amdgpu_amdkfd_block_mmu_notifications(void *p); int amdgpu_amdkfd_criu_resume(void *p); bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev); +int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, + uint64_t size, u32 alloc_flag); +void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, + uint64_t size, u32 alloc_flag); #if IS_ENABLED(CONFIG_HSA_AMD) void amdgpu_amdkfd_gpuvm_init_mem_limits(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 2bc36ff0aa0f..39d589394160 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -129,7 +129,7 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size) * * Return: returns -ENOMEM in case of error, ZERO otherwise */ -static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, +int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag) { uint64_t reserved_for_pt = @@ -169,7 +169,7 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) || (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > kfd_mem_limit.max_ttm_mem_limit) || - (adev->kfd.vram_used + vram_needed > + (adev && adev->kfd.vram_used + vram_needed > adev->gmc.real_vram_size - atomic64_read(>vram_pin_size) - reserved_for_pt)) { @@ -180,7 +180,10 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, /* Update memory accounting by decreasing available system * memory, TTM memory and GPU memory as computed above */ - adev->kfd.vram_used += vram_needed; + WARN_ONCE(vram_needed && !adev, + "adev reference can't be null when vram is used"); + if (adev) + adev->kfd.vram_used += vram_needed; kfd_mem_limit.system_mem_used += system_mem_needed; kfd_mem_limit.ttm_mem_used += ttm_mem_needed; @@ -189,7 +192,7 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, return ret; } -static void unreserve_mem_limit(struct amdgpu_device *adev, +void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag) { spin_lock(_mem_limit.mem_limit_lock); @@ -198,7 +201,10 @@ static void unreserve_mem_limit(struct amdgpu_device *adev, kfd_mem_limit.system_mem_used -= size; kfd_mem_limit.ttm_mem_used -= size; } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN); + WARN_ONCE(!adev, + "adev reference can't be null when alloc mem flags vram is set"); + if (adev) + adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { kfd_mem_limit.system_mem_used -= size; } else if (!(alloc_flag & @@ -207,8 +213,7 @@ static void unreserve_mem_limit(struct amdgpu_device *adev, pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); goto release; } - - WARN_ONCE(adev->kfd.vram_used < 0, + WARN_ONCE(adev && adev->kfd.vram_used < 0, "KFD VRAM memory accounting unbalanced"); WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, "KFD TTM memory accounting unbalanced"); @@ -225,7 +230,7 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo) u32 alloc_flags = bo->kfd_bo->alloc_flags; u64 size = amdgpu_bo_size(bo); - unreserve_mem_limit(adev, size, alloc_flags); + amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags); kfree(bo->kfd_bo); } @@ -1788,7 +1793,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
[PATCH 11/31] drm/amd/display: Fix hard hang if DSC is disabled
We want to calculate the DTB clock values when DSC is enabled; however, this is not the current behavior implemented in DCN32. Right now, DML is trying to calculate DSC values even if DSC is disabled; as a result, we can have a hard hang due to wrong clock calculation. This commit fixes this issue by moving the calculation after the DSC check. Signed-off-by: Rodrigo Siqueira --- .../dc/dml/dcn32/display_mode_vba_util_32.c | 19 --- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 5a701d9df0f7..febaff7d7343 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -1686,17 +1686,22 @@ double dml32_RequiredDTBCLK( unsigned int AudioRate, unsigned int AudioLayout) { - double PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2); - double HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * - dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); - double HCBlank = 64 + 32 * - dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); - double AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; - double HActiveTribyteRate = PixelWordRate * HCActive / HActive; + double PixelWordRate; + double HCActive; + double HCBlank; + double AverageTribyteRate; + double HActiveTribyteRate; if (DSCEnable != true) return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0); + PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2); + HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * + dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); + HCBlank = 64 + 32 * + dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); + AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; + HActiveTribyteRate = PixelWordRate * HCActive / HActive; return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002; } -- 2.37.0
[PATCH 16/31] drm/amd/display: Update Cursor Attribute MALL cache
From: Chris Park [Why] Cursor size can update without MALL cache update. Update the register on cursor attribute as well. [How] Update cursor MALL cache on cursor attribute update. Reviewed-by: Alvin Lee Acked-by: Alan Liu Signed-off-by: Chris Park --- .../gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c | 40 ++- .../gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h | 3 ++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c index 0a7d64306481..3176b04a7740 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c @@ -94,6 +94,44 @@ void hubp32_phantom_hubp_post_enable(struct hubp *hubp) } } +void hubp32_cursor_set_attributes( + struct hubp *hubp, + const struct dc_cursor_attributes *attr) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + enum cursor_pitch hw_pitch = hubp1_get_cursor_pitch(attr->pitch); + enum cursor_lines_per_chunk lpc = hubp2_get_lines_per_chunk( + attr->width, attr->color_format); + + hubp->curs_attr = *attr; + + REG_UPDATE(CURSOR_SURFACE_ADDRESS_HIGH, + CURSOR_SURFACE_ADDRESS_HIGH, attr->address.high_part); + REG_UPDATE(CURSOR_SURFACE_ADDRESS, + CURSOR_SURFACE_ADDRESS, attr->address.low_part); + + REG_UPDATE_2(CURSOR_SIZE, + CURSOR_WIDTH, attr->width, + CURSOR_HEIGHT, attr->height); + + REG_UPDATE_4(CURSOR_CONTROL, + CURSOR_MODE, attr->color_format, + CURSOR_2X_MAGNIFY, attr->attribute_flags.bits.ENABLE_MAGNIFICATION, + CURSOR_PITCH, hw_pitch, + CURSOR_LINES_PER_CHUNK, lpc); + + REG_SET_2(CURSOR_SETTINGS, 0, + /* no shift of the cursor HDL schedule */ + CURSOR0_DST_Y_OFFSET, 0, +/* used to shift the cursor chunk request deadline */ + CURSOR0_CHUNK_HDL_ADJUST, 3); + + if (attr->width * attr->height * 4 > 16384) + REG_UPDATE(DCHUBP_MALL_CONFIG, USE_MALL_FOR_CURSOR, true); + else + REG_UPDATE(DCHUBP_MALL_CONFIG, USE_MALL_FOR_CURSOR, false); +} + static struct hubp_funcs dcn32_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, .hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled, @@ -106,7 +144,7 @@ static struct hubp_funcs dcn32_hubp_funcs = { .set_blank = hubp2_set_blank, .dcc_control = hubp3_dcc_control, .mem_program_viewport = min_set_viewport, - .set_cursor_attributes = hubp2_cursor_set_attributes, + .set_cursor_attributes = hubp32_cursor_set_attributes, .set_cursor_position= hubp2_cursor_set_position, .hubp_clk_cntl = hubp2_clk_cntl, .hubp_vtg_sel = hubp2_vtg_sel, diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h index 00b4211389c2..c4315d50fbb0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h @@ -58,6 +58,9 @@ void hubp32_prepare_subvp_buffering(struct hubp *hubp, bool enable); void hubp32_phantom_hubp_post_enable(struct hubp *hubp); +void hubp32_cursor_set_attributes(struct hubp *hubp, + const struct dc_cursor_attributes *attr); + bool hubp32_construct( struct dcn20_hubp *hubp2, struct dc_context *ctx, -- 2.37.0
[PATCH v9 12/14] tools: update test_hmm script to support SP config
Add two more parameters to set spm_addr_dev0 & spm_addr_dev1 addresses. These two parameters configure the start SP addresses for each device in test_hmm driver. Consequently, this configures zone device type as coherent. Signed-off-by: Alex Sierra Acked-by: Felix Kuehling Reviewed-by: Alistair Popple Signed-off-by: Christoph Hellwig --- tools/testing/selftests/vm/test_hmm.sh | 24 +--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/vm/test_hmm.sh b/tools/testing/selftests/vm/test_hmm.sh index 0647b525a625..539c9371e592 100755 --- a/tools/testing/selftests/vm/test_hmm.sh +++ b/tools/testing/selftests/vm/test_hmm.sh @@ -40,11 +40,26 @@ check_test_requirements() load_driver() { - modprobe $DRIVER > /dev/null 2>&1 + if [ $# -eq 0 ]; then + modprobe $DRIVER > /dev/null 2>&1 + else + if [ $# -eq 2 ]; then + modprobe $DRIVER spm_addr_dev0=$1 spm_addr_dev1=$2 + > /dev/null 2>&1 + else + echo "Missing module parameters. Make sure pass"\ + "spm_addr_dev0 and spm_addr_dev1" + usage + fi + fi if [ $? == 0 ]; then major=$(awk "\$2==\"HMM_DMIRROR\" {print \$1}" /proc/devices) mknod /dev/hmm_dmirror0 c $major 0 mknod /dev/hmm_dmirror1 c $major 1 + if [ $# -eq 2 ]; then + mknod /dev/hmm_dmirror2 c $major 2 + mknod /dev/hmm_dmirror3 c $major 3 + fi fi } @@ -58,7 +73,7 @@ run_smoke() { echo "Running smoke test. Note, this test provides basic coverage." - load_driver + load_driver $1 $2 $(dirname "${BASH_SOURCE[0]}")/hmm-tests unload_driver } @@ -75,6 +90,9 @@ usage() echo "# Smoke testing" echo "./${TEST_NAME}.sh smoke" echo + echo "# Smoke testing with SPM enabled" + echo "./${TEST_NAME}.sh smoke " + echo exit 0 } @@ -84,7 +102,7 @@ function run_test() usage else if [ "$1" = "smoke" ]; then - run_smoke + run_smoke $2 $3 else usage fi -- 2.32.0
[PATCH 10/31] drm/amd/display: remove number of DSC slices override in DML
From: Wenjing Liu [why] Number of DSC slices is an input to DML with high dependency on display specific capability. This isn't something DML can decide on its own. DML has to use the original number of DSC slices input to DML during validation without modification. Otherwise the computed DSC delay will not reflect the current configuration and therefore causes validation failures. [how] Remove DML override for number of DSC slices parameter. Reviewed-by: Alvin Lee Acked-by: Alan Liu Signed-off-by: Wenjing Liu --- .../dc/dml/dcn32/display_mode_vba_32.c| 20 --- 1 file changed, 20 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index c6c3a9e6731a..1712843dafaa 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -1897,26 +1897,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->MaximumSwathWidthInLineBufferChroma); } - /*Number Of DSC Slices*/ - for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { - if (mode_lib->vba.BlendingAndTiming[k] == k) { - if (mode_lib->vba.PixelClockBackEnd[k] > 4800) { - mode_lib->vba.NumberOfDSCSlices[k] = dml_ceil(mode_lib->vba.PixelClockBackEnd[k] / 600, - 4); - } else if (mode_lib->vba.PixelClockBackEnd[k] > 2400) { - mode_lib->vba.NumberOfDSCSlices[k] = 8; - } else if (mode_lib->vba.PixelClockBackEnd[k] > 1200) { - mode_lib->vba.NumberOfDSCSlices[k] = 4; - } else if (mode_lib->vba.PixelClockBackEnd[k] > 340) { - mode_lib->vba.NumberOfDSCSlices[k] = 2; - } else { - mode_lib->vba.NumberOfDSCSlices[k] = 1; - } - } else { - mode_lib->vba.NumberOfDSCSlices[k] = 0; - } - } - dml32_CalculateSwathAndDETConfiguration( mode_lib->vba.DETSizeOverride, mode_lib->vba.UsesMALLForPStateChange, -- 2.37.0
[PATCH v9 07/14] drm/amdkfd: add SPM support for SVM
When CPU is connected throug XGMI, it has coherent access to VRAM resource. In this case that resource is taken from a table in the device gmc aperture base. This resource is used along with the device type, which could be DEVICE_PRIVATE or DEVICE_COHERENT to create the device page map region. Also, MIGRATE_VMA_SELECT_DEVICE_COHERENT flag is selected for coherent type case during migration to device. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Christoph Hellwig --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 34 +++- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index e44376c2ecdc..f73e3e340413 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -671,13 +671,15 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, migrate.vma = vma; migrate.start = start; migrate.end = end; - migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev); + if (adev->gmc.xgmi.connected_to_cpu) + migrate.flags = MIGRATE_VMA_SELECT_DEVICE_COHERENT; + else + migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t), GFP_KERNEL); - if (!buf) goto out; @@ -947,7 +949,7 @@ int svm_migrate_init(struct amdgpu_device *adev) { struct kfd_dev *kfddev = adev->kfd.dev; struct dev_pagemap *pgmap; - struct resource *res; + struct resource *res = NULL; unsigned long size; void *r; @@ -962,28 +964,34 @@ int svm_migrate_init(struct amdgpu_device *adev) * should remove reserved size */ size = ALIGN(adev->gmc.real_vram_size, 2ULL << 20); - res = devm_request_free_mem_region(adev->dev, _resource, size); - if (IS_ERR(res)) - return -ENOMEM; + if (adev->gmc.xgmi.connected_to_cpu) { + pgmap->range.start = adev->gmc.aper_base; + pgmap->range.end = adev->gmc.aper_base + adev->gmc.aper_size - 1; + pgmap->type = MEMORY_DEVICE_COHERENT; + } else { + res = devm_request_free_mem_region(adev->dev, _resource, size); + if (IS_ERR(res)) + return -ENOMEM; + pgmap->range.start = res->start; + pgmap->range.end = res->end; + pgmap->type = MEMORY_DEVICE_PRIVATE; + } - pgmap->type = MEMORY_DEVICE_PRIVATE; pgmap->nr_range = 1; - pgmap->range.start = res->start; - pgmap->range.end = res->end; pgmap->ops = _migrate_pgmap_ops; pgmap->owner = SVM_ADEV_PGMAP_OWNER(adev); - pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; - + pgmap->flags = 0; /* Device manager releases device-specific resources, memory region and * pgmap when driver disconnects from device. */ r = devm_memremap_pages(adev->dev, pgmap); if (IS_ERR(r)) { pr_err("failed to register HMM device memory\n"); - /* Disable SVM support capability */ pgmap->type = 0; - devm_release_mem_region(adev->dev, res->start, resource_size(res)); + if (pgmap->type == MEMORY_DEVICE_PRIVATE) + devm_release_mem_region(adev->dev, res->start, + res->end - res->start + 1); return PTR_ERR(r); } -- 2.32.0
[PATCH v9 04/14] mm: handling Non-LRU pages returned by vm_normal_pages
With DEVICE_COHERENT, we'll soon have vm_normal_pages() return device-managed anonymous pages that are not LRU pages. Although they behave like normal pages for purposes of mapping in CPU page, and for COW. They do not support LRU lists, NUMA migration or THP. Callers to follow_page() currently don't expect ZONE_DEVICE pages, however, with DEVICE_COHERENT we might now return ZONE_DEVICE. Check for ZONE_DEVICE pages in applicable users of follow_page() as well. Signed-off-by: Alex Sierra Acked-by: Felix Kuehling (v2) Reviewed-by: Alistair Popple (v6) --- fs/proc/task_mmu.c | 2 +- mm/huge_memory.c | 2 +- mm/khugepaged.c| 9 ++--- mm/ksm.c | 6 +++--- mm/madvise.c | 4 ++-- mm/memory.c| 10 +- mm/mempolicy.c | 2 +- mm/migrate.c | 4 ++-- mm/mlock.c | 2 +- mm/mprotect.c | 2 +- 10 files changed, 27 insertions(+), 16 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 2d04e3470d4c..2dd8c8a66924 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1792,7 +1792,7 @@ static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma, return NULL; page = vm_normal_page(vma, addr, pte); - if (!page) + if (!page || is_zone_device_page(page)) return NULL; if (PageReserved(page)) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 834f288b3769..c47e95b02244 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2910,7 +2910,7 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start, if (IS_ERR(page)) continue; - if (!page) + if (!page || is_zone_device_page(page)) continue; if (!is_transparent_hugepage(page)) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 16be62d493cd..671ac7800e53 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -618,7 +618,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, goto out; } page = vm_normal_page(vma, address, pteval); - if (unlikely(!page)) { + if (unlikely(!page) || unlikely(is_zone_device_page(page))) { result = SCAN_PAGE_NULL; goto out; } @@ -1267,7 +1267,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, writable = true; page = vm_normal_page(vma, _address, pteval); - if (unlikely(!page)) { + if (unlikely(!page) || unlikely(is_zone_device_page(page))) { result = SCAN_PAGE_NULL; goto out_unmap; } @@ -1479,7 +1479,8 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) goto abort; page = vm_normal_page(vma, addr, *pte); - + if (WARN_ON_ONCE(page && is_zone_device_page(page))) + page = NULL; /* * Note that uprobe, debugger, or MAP_PRIVATE may change the * page table, but the new page will not be a subpage of hpage. @@ -1497,6 +1498,8 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) if (pte_none(*pte)) continue; page = vm_normal_page(vma, addr, *pte); + if (WARN_ON_ONCE(page && is_zone_device_page(page))) + goto abort; page_remove_rmap(page, vma, false); } diff --git a/mm/ksm.c b/mm/ksm.c index 54f78c9eecae..831b18a7a50b 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -475,7 +475,7 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr) cond_resched(); page = follow_page(vma, addr, FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE); - if (IS_ERR_OR_NULL(page)) + if (IS_ERR_OR_NULL(page) || is_zone_device_page(page)) break; if (PageKsm(page)) ret = handle_mm_fault(vma, addr, @@ -560,7 +560,7 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item) goto out; page = follow_page(vma, addr, FOLL_GET); - if (IS_ERR_OR_NULL(page)) + if (IS_ERR_OR_NULL(page) || is_zone_device_page(page)) goto out; if (PageAnon(page)) { flush_anon_page(vma, page, addr); @@ -2308,7 +2308,7 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page) if (ksm_test_exit(mm)) break; *page = follow_page(vma, ksm_scan.address, FOLL_GET); - if (IS_ERR_OR_NULL(*page)) { + if (IS_ERR_OR_NULL(*page) ||
[PATCH v9 14/14] tools: add selftests to hmm for COW in device memory
The objective is to test device migration mechanism in pages marked as COW, for private and coherent device type. In case of writing to COW private page(s), a page fault will migrate pages back to system memory first. Then, these pages will be duplicated. In case of COW device coherent type, pages are duplicated directly from device memory. Signed-off-by: Alex Sierra Acked-by: Felix Kuehling --- tools/testing/selftests/vm/hmm-tests.c | 80 ++ 1 file changed, 80 insertions(+) diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c index bb38b9777610..716b62c05e3d 100644 --- a/tools/testing/selftests/vm/hmm-tests.c +++ b/tools/testing/selftests/vm/hmm-tests.c @@ -1874,4 +1874,84 @@ TEST_F(hmm, hmm_gup_test) close(gup_fd); hmm_buffer_free(buffer); } + +/* + * Test copy-on-write in device pages. + * In case of writing to COW private page(s), a page fault will migrate pages + * back to system memory first. Then, these pages will be duplicated. In case + * of COW device coherent type, pages are duplicated directly from device + * memory. + */ +TEST_F(hmm, hmm_cow_in_device) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + unsigned char *m; + pid_t pid; + int status; + + npages = 4; + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Migrate memory to device. */ + + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + pid = fork(); + if (pid == -1) + ASSERT_EQ(pid, 0); + if (!pid) { + /* Child process waitd for SIGTERM from the parent. */ + while (1) { + } + perror("Should not reach this\n"); + exit(0); + } + /* Parent process writes to COW pages(s) and gets a +* new copy in system. In case of device private pages, +* this write causes a migration to system mem first. +*/ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Terminate child and wait */ + EXPECT_EQ(0, kill(pid, SIGTERM)); + EXPECT_EQ(pid, waitpid(pid, , 0)); + EXPECT_NE(0, WIFSIGNALED(status)); + EXPECT_EQ(SIGTERM, WTERMSIG(status)); + + /* Take snapshot to CPU pagetables */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + m = buffer->mirror; + for (i = 0; i < npages; i++) + ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[i]); + + hmm_buffer_free(buffer); +} TEST_HARNESS_MAIN -- 2.32.0
[PATCH v9 06/14] mm/gup: migrate device coherent pages when pinning instead of failing
From: Alistair Popple Currently any attempts to pin a device coherent page will fail. This is because device coherent pages need to be managed by a device driver, and pinning them would prevent a driver from migrating them off the device. However this is no reason to fail pinning of these pages. These are coherent and accessible from the CPU so can be migrated just like pinning ZONE_MOVABLE pages. So instead of failing all attempts to pin them first try migrating them out of ZONE_DEVICE. [hch: rebased to the split device memory checks, moved migrate_device_page to migrate_device.c] Signed-off-by: Alistair Popple Acked-by: Felix Kuehling Signed-off-by: Christoph Hellwig --- mm/gup.c| 50 +-- mm/internal.h | 1 + mm/migrate_device.c | 52 + 3 files changed, 96 insertions(+), 7 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index b65fe8bf5af4..22b97ab61cd9 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1881,7 +1881,7 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages, unsigned long isolation_error_count = 0, i; struct folio *prev_folio = NULL; LIST_HEAD(movable_page_list); - bool drain_allow = true; + bool drain_allow = true, coherent_pages = false; int ret = 0; for (i = 0; i < nr_pages; i++) { @@ -1891,9 +1891,38 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages, continue; prev_folio = folio; - if (folio_is_longterm_pinnable(folio)) + /* +* Device coherent pages are managed by a driver and should not +* be pinned indefinitely as it prevents the driver moving the +* page. So when trying to pin with FOLL_LONGTERM instead try +* to migrate the page out of device memory. +*/ + if (folio_is_device_coherent(folio)) { + /* +* We always want a new GUP lookup with device coherent +* pages. +*/ + pages[i] = 0; + coherent_pages = true; + + /* +* Migration will fail if the page is pinned, so convert +* the pin on the source page to a normal reference. +*/ + if (gup_flags & FOLL_PIN) { + get_page(>page); + unpin_user_page(>page); + } + + ret = migrate_device_coherent_page(>page); + if (ret) + goto unpin_pages; + continue; + } + if (folio_is_longterm_pinnable(folio)) + continue; /* * Try to move out any movable page before pinning the range. */ @@ -1919,7 +1948,8 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages, folio_nr_pages(folio)); } - if (!list_empty(_page_list) || isolation_error_count) + if (!list_empty(_page_list) || isolation_error_count + || coherent_pages) goto unpin_pages; /* @@ -1929,10 +1959,16 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages, return nr_pages; unpin_pages: - if (gup_flags & FOLL_PIN) { - unpin_user_pages(pages, nr_pages); - } else { - for (i = 0; i < nr_pages; i++) + /* +* pages[i] might be NULL if any device coherent pages were found. +*/ + for (i = 0; i < nr_pages; i++) { + if (!pages[i]) + continue; + + if (gup_flags & FOLL_PIN) + unpin_user_page(pages[i]); + else put_page(pages[i]); } diff --git a/mm/internal.h b/mm/internal.h index c0f8fbe0445b..899dab512c5a 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -853,6 +853,7 @@ int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, unsigned long addr, int page_nid, int *flags); void free_zone_device_page(struct page *page); +int migrate_device_coherent_page(struct page *page); /* * mm/gup.c diff --git a/mm/migrate_device.c b/mm/migrate_device.c index 18bc6483f63a..7feeb447e3b9 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -686,6 +686,12 @@ void migrate_vma_pages(struct migrate_vma *migrate) } if (!page) { + /* +* The only time there is no vma is when called from +* migrate_device_coherent_page(). However this isn't +* called if
[PATCH 13/31] drm/amd/display: Update de-tile override to anticipate pipe splitting
From: Taimur Hassan [Why] For certain MPO configurations, DML will split a pipe after DET buffer has already been allocated by driver, resulting in allocation of more DET segments than the configurable return buffer has, causing underflow. [How] Determine during DET override calculation whether or not a pipe will be split later on by DML, and distribute DET segments based on expected number of pipes. Reviewed-by: Dmytro Laktyushkin Acked-by: Alan Liu Signed-off-by: Taimur Hassan --- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 22 ++-- .../drm/amd/display/dc/dcn32/dcn32_resource.h | 6 +- .../display/dc/dcn32/dcn32_resource_helpers.c | 112 +- 3 files changed, 69 insertions(+), 71 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 92d87745d933..631876832dfa 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -2984,7 +2984,7 @@ int dcn32_populate_dml_pipes_from_context( int i, pipe_cnt; struct resource_context *res_ctx = >res_ctx; struct pipe_ctx *pipe; - bool subvp_in_use = false; + bool subvp_in_use = false, is_pipe_split_expected[MAX_PIPES]; dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); @@ -3046,6 +3046,9 @@ int dcn32_populate_dml_pipes_from_context( if (dc->debug.enable_single_display_2to1_odm_policy) pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1; } + + is_pipe_split_expected[i] = dcn32_predict_pipe_split(context, pipes[i].pipe, i); + pipe_cnt++; } @@ -3053,8 +3056,7 @@ int dcn32_populate_dml_pipes_from_context( * the DET available for each pipe). Use the DET override input to maintain our driver * policy. */ - switch (pipe_cnt) { - case 1: + if (pipe_cnt == 1 && !is_pipe_split_expected[0]) { pipes[0].pipe.src.det_size_override = DCN3_2_MAX_DET_SIZE; if (pipe->plane_state && !dc->debug.disable_z9_mpc) { if (!is_dual_plane(pipe->plane_state->format)) { @@ -3065,18 +3067,8 @@ int dcn32_populate_dml_pipes_from_context( pipes[0].pipe.src.det_size_override = 320; // 5K or higher } } - break; - case 2: - case 3: - case 4: - // For 2 and 3 pipes, use (MAX_DET_SIZE / pipe_cnt), for 4 pipes use default size for each pipe - for (i = 0; i < pipe_cnt; i++) { - pipes[i].pipe.src.det_size_override = (pipe_cnt < 4) ? (DCN3_2_MAX_DET_SIZE / pipe_cnt) : DCN3_2_DEFAULT_DET_SIZE; - } - break; - } - - dcn32_update_det_override_for_mpo(dc, context, pipes); + } else + dcn32_determine_det_override(context, pipes, is_pipe_split_expected, pipe_cnt); // In general cases we want to keep the dram clock change requirement // (prefer configs that support MCLK switch). Only override to false diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index db4546317cb5..10254ab7e9d9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -100,7 +100,9 @@ bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc, bool dcn32_subvp_in_use(struct dc *dc, struct dc_state *context); -void dcn32_update_det_override_for_mpo(struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes); +bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st pipe, int index); + +void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_params_st *pipes, + bool *is_pipe_split_expected, int pipe_cnt); #endif /* _DCN32_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index e001f6d1f6c3..a6ef1dba01fe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -26,6 +26,8 @@ // header file of functions being implemented #include "dcn32_resource.h" #include "dcn20/dcn20_resource.h" +#include "dml/dcn32/display_mode_vba_util_32.h" + /** * * dcn32_helper_populate_phantom_dlg_params: Get DLG params for phantom pipes and populate pipe_ctx @@ -195,66 +197,68 @@ bool dcn32_subvp_in_use(struct dc *dc, return false; } -/* For MPO we adjust the DET allocation to ensure we have enough
[PATCH 14/31] drm/amd/display: Disable GPUVM in IP resource configuration
From: Vladimir Stempen [Why] VM enabled in IP configuration causes UCLK not reaching DPM0. The expectation for VM enable should be that KMD will indicate to DAL when VM is enabled, then DAL will set the bit accordingly [How] Set gpuvm_enable to zero in DCN3_20 and DCN3_21 resource. Reviewed-by: Martin Leung Acked-by: Alan Liu Signed-off-by: Vladimir Stempen --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 631876832dfa..0cb44ea9753b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -120,7 +120,7 @@ static const struct IP_BASE DCN_BASE = { { { { 0x0012, 0x00C0, 0x34C #define DCN3_2_MIN_COMPBUF_SIZE_KB 128 struct _vcs_dpi_ip_params_st dcn3_2_ip = { - .gpuvm_enable = 1, + .gpuvm_enable = 0, .gpuvm_max_page_table_levels = 4, .hostvm_enable = 0, .rob_buffer_size_kbytes = 128, diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index ebbeebf972dc..d218c6dd71aa 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -123,7 +123,7 @@ static const struct IP_BASE DCN_BASE = { { { { 0x0012, 0x00C0, 0x34C #define DCN3_2_DEFAULT_DET_SIZE 256 struct _vcs_dpi_ip_params_st dcn3_21_ip = { - .gpuvm_enable = 1, + .gpuvm_enable = 0, .gpuvm_max_page_table_levels = 4, .hostvm_enable = 0, .rob_buffer_size_kbytes = 128, -- 2.37.0
[PATCH 03/31] drm/amd/display: Update in dml
From: Alvin Lee Update DML to configure drr_display in vba struct. Reviewed-by: Dmytro Laktyushkin Acked-by: Alan Liu Signed-off-by: Alvin Lee --- drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h | 1 + drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index 87c9b9f9976e..e8b094006d95 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -499,6 +499,7 @@ struct _vcs_dpi_display_pipe_dest_params_st { unsigned int refresh_rate; bool synchronize_timings; unsigned int odm_combine_policy; + bool drr_display; }; struct _vcs_dpi_display_pipe_params_st { diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index 39f93072b5e0..083f89e276d6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -697,6 +697,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) mode_lib->vba.PixelClock[mode_lib->vba.NumberOfActivePlanes] = dst->pixel_rate_mhz; mode_lib->vba.PixelClockBackEnd[mode_lib->vba.NumberOfActivePlanes] = dst->pixel_rate_mhz; mode_lib->vba.DPPCLK[mode_lib->vba.NumberOfActivePlanes] = clks->dppclk_mhz; + mode_lib->vba.DRRDisplay[mode_lib->vba.NumberOfActiveSurfaces] = dst->drr_display; if (ip->is_line_buffer_bpp_fixed) mode_lib->vba.LBBitPerPixel[mode_lib->vba.NumberOfActivePlanes] = ip->line_buffer_fixed_bpp; -- 2.37.0
[PATCH v9 13/14] tools: add hmm gup tests for device coherent type
The intention is to test hmm device coherent type under different get user pages paths. Also, test gup with FOLL_LONGTERM flag set in device coherent pages. These pages should get migrated back to system memory. Signed-off-by: Alex Sierra Reviewed-by: Alistair Popple --- tools/testing/selftests/vm/hmm-tests.c | 110 + 1 file changed, 110 insertions(+) diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c index 4b547188ec40..bb38b9777610 100644 --- a/tools/testing/selftests/vm/hmm-tests.c +++ b/tools/testing/selftests/vm/hmm-tests.c @@ -36,6 +36,7 @@ * in the usual include/uapi/... directory. */ #include "../../../../lib/test_hmm_uapi.h" +#include "../../../../mm/gup_test.h" struct hmm_buffer { void*ptr; @@ -59,6 +60,9 @@ enum { #define NTIMES 10 #define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) +/* Just the flags we need, copied from mm.h: */ +#define FOLL_WRITE 0x01/* check pte is writable */ +#define FOLL_LONGTERM 0x1 /* mapping lifetime is indefinite */ FIXTURE(hmm) { @@ -1764,4 +1768,110 @@ TEST_F(hmm, exclusive_cow) hmm_buffer_free(buffer); } +static int gup_test_exec(int gup_fd, unsigned long addr, int cmd, +int npages, int size, int flags) +{ + struct gup_test gup = { + .nr_pages_per_call = npages, + .addr = addr, + .gup_flags = FOLL_WRITE | flags, + .size = size, + }; + + if (ioctl(gup_fd, cmd, )) { + perror("ioctl on error\n"); + return errno; + } + + return 0; +} + +/* + * Test get user device pages through gup_test. Setting PIN_LONGTERM flag. + * This should trigger a migration back to system memory for both, private + * and coherent type pages. + * This test makes use of gup_test module. Make sure GUP_TEST_CONFIG is added + * to your configuration before you run it. + */ +TEST_F(hmm, hmm_gup_test) +{ + struct hmm_buffer *buffer; + int gup_fd; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + unsigned char *m; + + gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); + if (gup_fd == -1) + SKIP(return, "Skipping test, could not find gup_test driver"); + + npages = 4; + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Migrate memory to device. */ + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + ASSERT_EQ(gup_test_exec(gup_fd, + (unsigned long)buffer->ptr, + GUP_BASIC_TEST, 1, self->page_size, 0), 0); + ASSERT_EQ(gup_test_exec(gup_fd, + (unsigned long)buffer->ptr + 1 * self->page_size, + GUP_FAST_BENCHMARK, 1, self->page_size, 0), 0); + ASSERT_EQ(gup_test_exec(gup_fd, + (unsigned long)buffer->ptr + 2 * self->page_size, + PIN_FAST_BENCHMARK, 1, self->page_size, FOLL_LONGTERM), 0); + ASSERT_EQ(gup_test_exec(gup_fd, + (unsigned long)buffer->ptr + 3 * self->page_size, + PIN_LONGTERM_BENCHMARK, 1, self->page_size, 0), 0); + + /* Take snapshot to CPU pagetables */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + m = buffer->mirror; + if (hmm_is_coherent_type(variant->device_number)) { + ASSERT_EQ(HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | HMM_DMIRROR_PROT_WRITE, m[0]); + ASSERT_EQ(HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | HMM_DMIRROR_PROT_WRITE, m[1]); + } else { + ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[0]); + ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[1]); + } + ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[2]); + ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[3]); + /* +* Check again the content
[PATCH 26/31] drm/amd/display: Move ntuple to insert entry
Move get_optimal_ntuple to the FPU code and call it inside insert_entry_into_table_sorted. Reviewed-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Rodrigo Siqueira --- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 28 --- .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 25 + 2 files changed, 25 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 32edb3e5715a..adcc83e6ea55 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -1956,29 +1956,6 @@ void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context, DC_FP_END(); } -static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) -{ - if (entry->dcfclk_mhz > 0) { - float bw_on_sdp = entry->dcfclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100); - - entry->fabricclk_mhz = bw_on_sdp / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100)); - entry->dram_speed_mts = bw_on_sdp / (dcn3_2_soc.num_chans * - dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); - } else if (entry->fabricclk_mhz > 0) { - float bw_on_fabric = entry->fabricclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100); - - entry->dcfclk_mhz = bw_on_fabric / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100)); - entry->dram_speed_mts = bw_on_fabric / (dcn3_2_soc.num_chans * - dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); - } else if (entry->dram_speed_mts > 0) { - float bw_on_dram = entry->dram_speed_mts * dcn3_2_soc.num_chans * - dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); - - entry->fabricclk_mhz = bw_on_dram / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100)); - entry->dcfclk_mhz = bw_on_dram / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100)); - } -} - static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, unsigned int index) { @@ -2062,7 +2039,6 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = 0; entry.dram_speed_mts = 0; - get_optimal_ntuple(); DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, ); DC_FP_END(); @@ -2073,7 +2049,6 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = 0; entry.dram_speed_mts = 0; - get_optimal_ntuple(); DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, ); DC_FP_END(); @@ -2084,7 +2059,6 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = 0; entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; - get_optimal_ntuple(); DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, ); DC_FP_END(); @@ -2097,7 +2071,6 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; entry.dram_speed_mts = 0; - get_optimal_ntuple(); DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, ); DC_FP_END(); @@ -2109,7 +2082,6 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = max_fclk_mhz; entry.dram_speed_mts = 0; - get_optimal_ntuple(); DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, ); DC_FP_END(); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 66102db87265..7c60a954737b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -353,6 +353,29 @@ static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st * return
[PATCH 19/31] drm/amd/display: Move populate phaton function to dml
The function dcn32_helper_populate_phantom_dlg_params uses FPU operations. For this reason, this commit moves this function to the dcn32_fpu file, and we ensure that we only invoke it under the kernel_fpu protection. Reviewed-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Rodrigo Siqueira --- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 7 +++ .../display/dc/dcn32/dcn32_resource_helpers.c | 44 --- .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 43 ++ .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 5 +++ 4 files changed, 55 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 39214a0dcdf2..411ce13847c2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -88,6 +88,7 @@ #include "dml/dcn30/display_mode_vba_30.h" #include "vm_helper.h" #include "dcn20/dcn20_vmid.h" +#include "dml/dcn32/dcn32_fpu.h" #define DCN_BASE__INST0_SEG1 0x00C0 #define DCN_BASE__INST0_SEG2 0x34C0 @@ -312,6 +313,7 @@ enum dcn32_clk_src_array_id { .reg_name = NBIO_BASE(regBIF_BX0_ ## reg_name ## _BASE_IDX) + \ regBIF_BX0_ ## reg_name +#undef CTX #define CTX ctx #define REG(reg_name) \ (DCN_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## reg_name) @@ -2667,6 +2669,11 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, memset(merge, 0, MAX_PIPES * sizeof(bool)); *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); + // Most populate phantom DLG params before programming hardware / timing for phantom pipe + DC_FP_START(); + dcn32_helper_populate_phantom_dlg_params(dc, context, pipes, *pipe_cnt); + DC_FP_END(); + // Note: We can't apply the phantom pipes to hardware at this time. We have to wait // until driver has acquired the DMCUB lock to do it safely. } diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index a6ef1dba01fe..633d3ee18cfa 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -28,50 +28,6 @@ #include "dcn20/dcn20_resource.h" #include "dml/dcn32/display_mode_vba_util_32.h" -/** - * - * dcn32_helper_populate_phantom_dlg_params: Get DLG params for phantom pipes and populate pipe_ctx - * with those params. - * - * This function must be called AFTER the phantom pipes are added to context and run through DML - * (so that the DLG params for the phantom pipes can be populated), and BEFORE we program the - * timing for the phantom pipes. - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * @param [in] pipes: DML pipe params array - * @param [in] pipe_cnt: DML pipe count - * - * @return: void - * - * - */ -void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt) -{ - uint32_t i, pipe_idx; - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = >res_ctx.pipe_ctx[i]; - if (!pipe->stream) - continue; - - if (pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { - pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(>bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(>bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(>bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(>bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - pipe->pipe_dlg_param = pipes[pipe_idx].pipe.dest; - } - pipe_idx++; - } -} - /** * * dcn32_helper_calculate_num_ways_for_subvp: Calculate number of ways needed for SubVP diff --git
[PATCH 12/31] drm/amd/display: Don't set dram clock change requirement for SubVP
From: Alvin Lee [Description] In general cases we want to keep the dram clock change requirement (we prefer configs that support MCLK switch). Only override to false for SubVP. Acked-by: Alan Liu Signed-off-by: Alvin Lee --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index f913daabcca5..92d87745d933 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -2984,6 +2984,7 @@ int dcn32_populate_dml_pipes_from_context( int i, pipe_cnt; struct resource_context *res_ctx = >res_ctx; struct pipe_ctx *pipe; + bool subvp_in_use = false; dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); @@ -3006,6 +3007,7 @@ int dcn32_populate_dml_pipes_from_context( switch (pipe->stream->mall_stream_config.type) { case SUBVP_MAIN: pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport; + subvp_in_use = true; break; case SUBVP_PHANTOM: pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_phantom_pipe; @@ -3076,6 +3078,14 @@ int dcn32_populate_dml_pipes_from_context( dcn32_update_det_override_for_mpo(dc, context, pipes); + // In general cases we want to keep the dram clock change requirement + // (prefer configs that support MCLK switch). Only override to false + // for SubVP + if (subvp_in_use) + context->bw_ctx.dml.soc.dram_clock_change_requirement_final = false; + else + context->bw_ctx.dml.soc.dram_clock_change_requirement_final = true; + return pipe_cnt; } -- 2.37.0
RE: [PATCH v3] drm/amdgpu: add comments to HW_IP_VCN_ENC
[AMD Official Use Only - General] Reviewed-by: Leo Liu -Original Message- From: Dong, Ruijing Sent: July 15, 2022 12:09 PM To: Koenig, Christian ; amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Liu, Leo ; Dong, Ruijing Subject: [PATCH v3] drm/amdgpu: add comments to HW_IP_VCN_ENC >From VCN4, HW_IP_VCN_ENC will be used as unified queue, and support both >encoding and decoding jobs, HW_IP_VCN_DEC is retired from VCN4. link: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/245/commits Signed-off-by: Ruijing Dong --- include/uapi/drm/amdgpu_drm.h | 7 +++ 1 file changed, 7 insertions(+) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 18d3246d636e..29e4a1ece2ce 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -559,7 +559,14 @@ struct drm_amdgpu_gem_va { #define AMDGPU_HW_IP_VCE 4 #define AMDGPU_HW_IP_UVD_ENC 5 #define AMDGPU_HW_IP_VCN_DEC 6 + +/** + * From VCN4, AMDGPU_HW_IP_VCN_ENC will be used as unified queue + * and support both encoding and decoding jobs, AMDGPU_HW_IP_VCN_DEC + * is retired from VCN4. + */ #define AMDGPU_HW_IP_VCN_ENC 7 + #define AMDGPU_HW_IP_VCN_JPEG 8 #define AMDGPU_HW_IP_NUM 9 -- 2.25.1
[PATCH 07/31] drm/amd/display: Add tags for indicating mst progress status
From: Wayne Lin [Why & How] In order to leverage igt tool to maintain mst feature, expose new debugfs entry "mst_progress_status". In our dm flow, record down the result of each phase of mst and user can examine the mst result by checking whether each phase get completed successfully. Reviewed-by: Hersen Wu Acked-by: Alan Liu Signed-off-by: Wayne Lin --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 20 .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 46 ++- .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 18 +++- .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 13 ++ 4 files changed, 94 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 33d66d4897dc..cdfd32c4128c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -571,6 +571,14 @@ struct dsc_preferred_settings { bool dsc_force_disable_passthrough; }; +enum mst_progress_status { + MST_STATUS_DEFAULT = 0, + MST_PROBE = BIT(0), + MST_REMOTE_EDID = BIT(1), + MST_ALLOCATE_NEW_PAYLOAD = BIT(2), + MST_CLEAR_ALLOCATED_PAYLOAD = BIT(3), +}; + struct amdgpu_dm_connector { struct drm_connector base; @@ -623,8 +631,20 @@ struct amdgpu_dm_connector { struct drm_display_mode freesync_vid_base; int psr_skip_count; + + /* Record progress status of mst*/ + uint8_t mst_status; }; +static inline void amdgpu_dm_set_mst_status(uint8_t *status, + uint8_t flags, bool set) +{ + if (set) + *status |= flags; + else + *status &= ~flags; +} + #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base) extern const struct amdgpu_ip_block_version dm_ip_block; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 991e58a3a78c..cd8db385eda0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -49,6 +49,13 @@ struct dmub_debugfs_trace_entry { uint32_t param1; }; +static const char *const mst_progress_status[] = { + "probe", + "remote_edid", + "allocate_new_payload", + "clear_allocated_payload", +}; + static inline const char *yesno(bool v) { return v ? "yes" : "no"; @@ -2607,6 +2614,41 @@ static int dp_is_mst_connector_show(struct seq_file *m, void *unused) return 0; } +/* + * function description: Read out the mst progress status + * + * This function helps to determine the mst progress status of + * a mst connector. + * + * Access it with the following command: + * + * cat /sys/kernel/debug/dri/0/DP-X/mst_progress_status + * + */ +static int dp_mst_progress_status_show(struct seq_file *m, void *unused) +{ + struct drm_connector *connector = m->private; + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct amdgpu_device *adev = drm_to_adev(connector->dev); + int i; + + mutex_lock(>hpd_lock); + mutex_lock(>dm.dc_lock); + + if (aconnector->mst_status == MST_STATUS_DEFAULT) { + seq_puts(m, "disabled\n"); + } else { + for (i = 0; i < sizeof(mst_progress_status)/sizeof(char *); i++) + seq_printf(m, "%s:%s\n", + mst_progress_status[i], + aconnector->mst_status & BIT(i) ? "done" : "not_done"); + } + + mutex_unlock(>dm.dc_lock); + mutex_unlock(>hpd_lock); + + return 0; +} DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support); DEFINE_SHOW_ATTRIBUTE(dmub_fw_state); @@ -2619,6 +2661,7 @@ DEFINE_SHOW_ATTRIBUTE(hdcp_sink_capability); DEFINE_SHOW_ATTRIBUTE(internal_display); DEFINE_SHOW_ATTRIBUTE(psr_capability); DEFINE_SHOW_ATTRIBUTE(dp_is_mst_connector); +DEFINE_SHOW_ATTRIBUTE(dp_mst_progress_status); static const struct file_operations dp_dsc_clock_en_debugfs_fops = { .owner = THIS_MODULE, @@ -2762,7 +2805,8 @@ static const struct { {"dp_dsc_fec_support", _dsc_fec_support_fops}, {"max_bpc", _max_bpc_debugfs_fops}, {"dsc_disable_passthrough", _dsc_disable_passthrough_debugfs_fops}, - {"is_mst_connector", _is_mst_connector_fops} + {"is_mst_connector", _is_mst_connector_fops}, + {"mst_progress_status", _mst_progress_status_fops} }; #ifdef CONFIG_DRM_AMD_DC_HDCP diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 137645d40b72..d66e3cd64ebd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -312,6 +312,8 @@ bool
[PATCH 18/31] drm/amd/display: Drop FPU flags from dcn32_clk_mgr
We are working to isolate FPU operations inside the DML folder, and the file dcn32_clk_mgr has some of these operations. This commit moves the FPU operations inside the clock manager and creates the dcn32_fpu file to aggregate those operations. Note that there is no functional change ere, just moving code from one part to another. Reviewed-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Rodrigo Siqueira --- .../gpu/drm/amd/display/dc/clk_mgr/Makefile | 25 .../display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c | 81 + drivers/gpu/drm/amd/display/dc/dml/Makefile | 2 + .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 113 ++ .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 34 ++ 5 files changed, 153 insertions(+), 102 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile index 053084121db2..a48453612d10 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile @@ -188,31 +188,6 @@ CLK_MGR_DCN32 = dcn32_clk_mgr.o dcn32_clk_mgr_smu_msg.o AMD_DAL_CLK_MGR_DCN32 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn32/,$(CLK_MGR_DCN32)) -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -mhard-float -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -msse2 -endif -endif - AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN32) endif diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c index 08f07f31fe73..10726571007d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c @@ -42,6 +42,7 @@ #include "dcn/dcn_3_2_0_sh_mask.h" #include "dcn32/dcn32_clk_mgr.h" +#include "dml/dcn32/dcn32_fpu.h" #define DCN_BASE__INST0_SEG1 0x00C0 @@ -146,83 +147,9 @@ static void dcn32_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e cl static void dcn32_build_wm_range_table(struct clk_mgr_internal *clk_mgr) { - /* defaults */ - double pstate_latency_us = clk_mgr->base.ctx->dc->dml.soc.dram_clock_change_latency_us; - double fclk_change_latency_us = clk_mgr->base.ctx->dc->dml.soc.fclk_change_latency_us; - double sr_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_exit_time_us; - double sr_enter_plus_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_enter_plus_exit_time_us; - /* For min clocks use as reported by PM FW and report those as min */ - uint16_t min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz; - uint16_t min_dcfclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz; - uint16_t setb_min_uclk_mhz = min_uclk_mhz; - uint16_t dcfclk_mhz_for_the_second_state = clk_mgr->base.ctx->dc->dml.soc.clock_limits[2].dcfclk_mhz; - - /* For Set B ranges use min clocks state 2 when available, and report those to PM FW */ - if (dcfclk_mhz_for_the_second_state) - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = dcfclk_mhz_for_the_second_state; - else - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz; - - if (clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz) - setb_min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz; - - /* Set A - Normal - default values */ - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us = fclk_change_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; -
[PATCH 31/31] drm/amd/display: 3.2.195
From: Aric Cyr This version brings along following fixes: - Isolate FPU operation for DCN32/321 under the DML folder - Create a specific file for CRTC and plane based on amdgpu_dm - Fix DSC issues - Update DML logic Acked-by: Alan Liu Signed-off-by: Aric Cyr --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index be41f9fcf1dd..d05bbe193bfa 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -47,7 +47,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.194" +#define DC_VER "3.2.195" #define MAX_SURFACES 3 #define MAX_PLANES 6 -- 2.37.0
[PATCH 00/31] DC Patches July 15, 2022
This DC patchset brings improvements in multiple areas. In summary, we highlight: - Isolate FPU operation for DCN32/321 under the DML folder - Create a specific file for CRTC and plane based on amdgpu_dm - Fix DSC issues - Updates tp DML logic Cc: Daniel Wheeler Thanks Siqueira Alvin Lee (2): drm/amd/display: Update in dml drm/amd/display: Don't set dram clock change requirement for SubVP Aric Cyr (1): drm/amd/display: 3.2.195 Chris Park (1): drm/amd/display: Update Cursor Attribute MALL cache Jun Lei (2): drm/amd/display: Remove unused variable drm/amd/display: Update DML logic for unbounded req handling Rodrigo Siqueira (16): drm/amd/display: Create a file dedicated to planes drm/amd/display: Create a file dedicated for CRTC drm/amd/display: Fix hard hang if DSC is disabled drm/amd/display: Drop FPU flags from dcn32_clk_mgr drm/amd/display: Move populate phaton function to dml drm/amd/display: Move predict pipe to dml fpu folder drm/amd/display: Move insert entry table to the FPU code drm/amd/display: Move phanton stream to FPU code drm/amd/display: Move SubVP functions to dcn32_fpu drm/amd/display: Move wm and dlg calculation to FPU code drm/amd/display: Move dlg params calculation drm/amd/display: Move ntuple to insert entry drm/amd/display: Move bounding box to FPU folder drm/amd/display: Drop FPU flags from dcn32 Makefile drm/amd/display: Create dcn321_fpu file drm/amd/display: Drop FPU code from dcn321 resource Taimur Hassan (2): drm/amd/display: Update de-tile override to anticipate pipe splitting drm/amd/display: Loop through all pipes for DET allocation Vladimir Stempen (1): drm/amd/display: Disable GPUVM in IP resource configuration Wayne Lin (5): drm/amd/display: Support vertical interrupt 0 for all dcn ASIC drm/amd/display: Expose function reset_cur_dp_mst_topology drm/amd/display: fix trigger_hotplug to support mst case drm/amd/display: Add is_mst_connector debugfs entry drm/amd/display: Add tags for indicating mst progress status Wenjing Liu (1): drm/amd/display: remove number of DSC slices override in DML .../gpu/drm/amd/display/amdgpu_dm/Makefile|8 +- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2557 +++-- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 20 + .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c| 463 +++ .../amd/display/amdgpu_dm/amdgpu_dm_crtc.h| 51 + .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 114 +- .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 18 +- .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 13 + .../amd/display/amdgpu_dm/amdgpu_dm_plane.c | 1637 +++ .../amd/display/amdgpu_dm/amdgpu_dm_plane.h | 73 + .../gpu/drm/amd/display/dc/clk_mgr/Makefile | 25 - .../display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c | 81 +- drivers/gpu/drm/amd/display/dc/core/dc_link.c |2 +- drivers/gpu/drm/amd/display/dc/dc.h |3 +- drivers/gpu/drm/amd/display/dc/dc_link.h |3 + drivers/gpu/drm/amd/display/dc/dcn32/Makefile | 28 - .../gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c | 40 +- .../gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h |3 + .../drm/amd/display/dc/dcn32/dcn32_resource.c | 2039 + .../drm/amd/display/dc/dcn32/dcn32_resource.h | 15 +- .../display/dc/dcn32/dcn32_resource_helpers.c | 130 +- .../gpu/drm/amd/display/dc/dcn321/Makefile| 25 - .../amd/display/dc/dcn321/dcn321_resource.c | 649 + .../amd/display/dc/dcn321/dcn321_resource.h |3 + drivers/gpu/drm/amd/display/dc/dml/Makefile |4 + .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 2244 +++ .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 74 + .../dc/dml/dcn32/display_mode_vba_32.c| 64 +- .../dc/dml/dcn32/display_mode_vba_util_32.c | 70 +- .../dc/dml/dcn32/display_mode_vba_util_32.h | 10 +- .../amd/display/dc/dml/dcn321/dcn321_fpu.c| 684 + .../amd/display/dc/dml/dcn321/dcn321_fpu.h| 38 + .../amd/display/dc/dml/display_mode_structs.h |1 + .../drm/amd/display/dc/dml/display_mode_vba.c |2 + .../display/dc/irq/dcn30/irq_service_dcn30.c | 14 +- .../dc/irq/dcn303/irq_service_dcn303.c| 19 + 36 files changed, 6035 insertions(+), 5189 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h -- 2.37.0
[PATCH 06/31] drm/amd/display: Add is_mst_connector debugfs entry
From: Wayne Lin [Why & How] Add "is_mst_connector" debugfs entry to help distinguish whether a connector is in a mst topology or not. Access it with the following command: cat /sys/kernel/debug/dri/0/DP-X/is_mst_connector Result: - "root" stands for the root connector of the topology - "branch" stands for branch device of the topology - "end" stands for leaf node connector of the topology - "no" stands for the connector is not a device of a mst topology Reviewed-by: Hersen Wu Acked-by: Alan Liu Signed-off-by: Wayne Lin --- .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 53 +++ 1 file changed, 53 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index b764198eca5c..991e58a3a78c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -2557,6 +2557,57 @@ static int target_backlight_show(struct seq_file *m, void *unused) return 0; } +/* + * function description: Determine if the connector is mst connector + * + * This function helps to determine whether a connector is a mst connector. + * - "root" stands for the root connector of the topology + * - "branch" stands for branch device of the topology + * - "end" stands for leaf node connector of the topology + * - "no" stands for the connector is not a device of a mst topology + * Access it with the following command: + * + * cat /sys/kernel/debug/dri/0/DP-X/is_mst_connector + * + */ +static int dp_is_mst_connector_show(struct seq_file *m, void *unused) +{ + struct drm_connector *connector = m->private; + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct drm_dp_mst_topology_mgr *mgr = NULL; + struct drm_dp_mst_port *port = NULL; + char *role = NULL; + + mutex_lock(>hpd_lock); + + if (aconnector->mst_mgr.mst_state) { + role = "root"; + } else if (aconnector->mst_port && + aconnector->mst_port->mst_mgr.mst_state) { + + role = "end"; + + mgr = >mst_port->mst_mgr; + port = aconnector->port; + + drm_modeset_lock(>base.lock, NULL); + if (port->pdt == DP_PEER_DEVICE_MST_BRANCHING && + port->mcs) + role = "branch"; + drm_modeset_unlock(>base.lock); + + } else { + role = "no"; + } + + seq_printf(m, "%s\n", role); + + mutex_unlock(>hpd_lock); + + return 0; +} + + DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support); DEFINE_SHOW_ATTRIBUTE(dmub_fw_state); DEFINE_SHOW_ATTRIBUTE(dmub_tracebuffer); @@ -2567,6 +2618,7 @@ DEFINE_SHOW_ATTRIBUTE(hdcp_sink_capability); #endif DEFINE_SHOW_ATTRIBUTE(internal_display); DEFINE_SHOW_ATTRIBUTE(psr_capability); +DEFINE_SHOW_ATTRIBUTE(dp_is_mst_connector); static const struct file_operations dp_dsc_clock_en_debugfs_fops = { .owner = THIS_MODULE, @@ -2710,6 +2762,7 @@ static const struct { {"dp_dsc_fec_support", _dsc_fec_support_fops}, {"max_bpc", _max_bpc_debugfs_fops}, {"dsc_disable_passthrough", _dsc_disable_passthrough_debugfs_fops}, + {"is_mst_connector", _is_mst_connector_fops} }; #ifdef CONFIG_DRM_AMD_DC_HDCP -- 2.37.0
[PATCH 24/31] drm/amd/display: Move wm and dlg calculation to FPU code
Move dcn32_calculate_wm_and_dlg from dcn32 resources to the FPU code. Additionally, this commit adds an interface to it. Reviewed-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Rodrigo Siqueira --- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 196 +- .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 185 + .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 5 + 3 files changed, 195 insertions(+), 191 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index d508909ff7a9..45768eff9315 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -2281,187 +2281,6 @@ int dcn32_populate_dml_pipes_from_context( return pipe_cnt; } -void dcn32_calculate_wm_and_dlg_fp( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel) -{ - int i, pipe_idx, vlevel_temp = 0; - double dcfclk = dcn3_2_soc.clock_limits[0].dcfclk_mhz; - double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; - unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; - bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != - dm_dram_clock_change_unsupported; - - // Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK - if (!pstate_en && dcn32_subvp_in_use(dc, context)) { - context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp; - pstate_en = true; - } - - /* Set B: -* For Set B calculations use clocks from clock_limits[2] when available i.e. when SMU is present, -* otherwise use arbitrary low value from spreadsheet for DCFCLK as lower is safer for watermark -* calculations to cover bootup clocks. -* DCFCLK: soc.clock_limits[2] when available -* UCLK: soc.clock_limits[2] when available -*/ - if (dcn3_2_soc.num_states > 2) { - vlevel_temp = 2; - dcfclk = dcn3_2_soc.clock_limits[2].dcfclk_mhz; - } else - dcfclk = 615; //DCFCLK Vmin_lv - - pipes[0].clks_cfg.voltage = vlevel_temp; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk; - pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz; - - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.fclk_change_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(>bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(>bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(>bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(>bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(>bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(>bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(>bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(>bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(>bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.usr_retraining_ns = get_usr_retraining_watermark(>bw_ctx.dml, pipes, pipe_cnt) * 1000; - - /* Set D: -* All clocks min. -* DCFCLK: Min, as reported by PM FW when available -* UCLK : Min, as reported by PM FW when available -* sr_enter_exit/sr_exit should be lower than used for DRAM (TBD after bringup or later,
[PATCH 30/31] drm/amd/display: Drop FPU code from dcn321 resource
This commit fully move the missing FPU operations from dcn321 resource to dcn321 fpu. It also remove those FPU flags from the Makefile. Reviewed-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Rodrigo Siqueira --- .../gpu/drm/amd/display/dc/dcn321/Makefile| 25 - .../amd/display/dc/dcn321/dcn321_resource.c | 452 +- .../amd/display/dc/dml/dcn321/dcn321_fpu.c| 446 + .../amd/display/dc/dml/dcn321/dcn321_fpu.h| 2 + 4 files changed, 450 insertions(+), 475 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/Makefile b/drivers/gpu/drm/amd/display/dc/dcn321/Makefile index e554fd6c16f2..0a199c83bb5b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn321/Makefile @@ -12,31 +12,6 @@ DCN321 = dcn321_resource.o dcn321_dio_link_encoder.o -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn321/dcn321_resource.o := -mhard-float -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/dcn321/dcn321_resource.o := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/dcn321/dcn321_resource.o += -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/dcn321/dcn321_resource.o += -msse2 -endif -endif - AMD_DAL_DCN321 = $(addprefix $(AMDDALPATH)/dc/dcn321/,$(DCN321)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN321) diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index 6619bcb30de7..9ac0fcf79bed 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -1570,459 +1570,11 @@ static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap }; - -static void dcn321_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, - unsigned int *optimal_dcfclk, - unsigned int *optimal_fclk) -{ - double bw_from_dram, bw_from_dram1, bw_from_dram2; - - bw_from_dram1 = uclk_mts * dcn3_21_soc.num_chans * - dcn3_21_soc.dram_channel_width_bytes * (dcn3_21_soc.max_avg_dram_bw_use_normal_percent / 100); - bw_from_dram2 = uclk_mts * dcn3_21_soc.num_chans * - dcn3_21_soc.dram_channel_width_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100); - - bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2; - - if (optimal_fclk) - *optimal_fclk = bw_from_dram / - (dcn3_21_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100)); - - if (optimal_dcfclk) - *optimal_dcfclk = bw_from_dram / - (dcn3_21_soc.return_bus_width_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100)); -} - -static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, - unsigned int index) -{ - int i; - - if (*num_entries == 0) - return; - - for (i = index; i < *num_entries - 1; i++) { - table[i] = table[i + 1]; - } - memset([--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st)); -} - -static int build_synthetic_soc_states(struct clk_bw_params *bw_params, - struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) +static void dcn321_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { - int i, j; - struct _vcs_dpi_voltage_scaling_st entry = {0}; - - unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, - max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0; - - unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; - - static const unsigned int num_dcfclk_stas = 5; - unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; - - unsigned int num_uclk_dpms = 0; - unsigned int num_fclk_dpms = 0; - unsigned int num_dcfclk_dpms = 0; - - for (i = 0; i < MAX_NUM_DPM_LVL; i++) { - if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) - max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; - if (bw_params->clk_table.entries[i].fclk_mhz > max_fclk_mhz) - max_fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; - if (bw_params->clk_table.entries[i].memclk_mhz > max_uclk_mhz) - max_uclk_mhz = bw_params->clk_table.entries[i].memclk_mhz; - if
[PATCH v9 09/14] lib: test_hmm add module param for zone device type
In order to configure device coherent in test_hmm, two module parameters should be passed, which correspond to the SP start address of each device (2) spm_addr_dev0 & spm_addr_dev1. If no parameters are passed, private device type is configured. Signed-off-by: Alex Sierra Acked-by: Felix Kuehling Reviewed-by: Alistair Poppple Signed-off-by: Christoph Hellwig --- lib/test_hmm.c | 73 - lib/test_hmm_uapi.h | 1 + 2 files changed, 53 insertions(+), 21 deletions(-) diff --git a/lib/test_hmm.c b/lib/test_hmm.c index 915ef6b5b0d4..afb30af9f3ff 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -37,6 +37,16 @@ #define DEVMEM_CHUNK_SIZE (256 * 1024 * 1024U) #define DEVMEM_CHUNKS_RESERVE 16 +static unsigned long spm_addr_dev0; +module_param(spm_addr_dev0, long, 0644); +MODULE_PARM_DESC(spm_addr_dev0, + "Specify start address for SPM (special purpose memory) used for device 0. By setting this Coherent device type will be used. Make sure spm_addr_dev1 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE."); + +static unsigned long spm_addr_dev1; +module_param(spm_addr_dev1, long, 0644); +MODULE_PARM_DESC(spm_addr_dev1, + "Specify start address for SPM (special purpose memory) used for device 1. By setting this Coherent device type will be used. Make sure spm_addr_dev0 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE."); + static const struct dev_pagemap_ops dmirror_devmem_ops; static const struct mmu_interval_notifier_ops dmirror_min_ops; static dev_t dmirror_dev; @@ -455,28 +465,44 @@ static int dmirror_write(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd) return ret; } -static bool dmirror_allocate_chunk(struct dmirror_device *mdevice, +static int dmirror_allocate_chunk(struct dmirror_device *mdevice, struct page **ppage) { struct dmirror_chunk *devmem; - struct resource *res; + struct resource *res = NULL; unsigned long pfn; unsigned long pfn_first; unsigned long pfn_last; void *ptr; + int ret = -ENOMEM; devmem = kzalloc(sizeof(*devmem), GFP_KERNEL); if (!devmem) - return false; + return ret; - res = request_free_mem_region(_resource, DEVMEM_CHUNK_SIZE, - "hmm_dmirror"); - if (IS_ERR(res)) + switch (mdevice->zone_device_type) { + case HMM_DMIRROR_MEMORY_DEVICE_PRIVATE: + res = request_free_mem_region(_resource, DEVMEM_CHUNK_SIZE, + "hmm_dmirror"); + if (IS_ERR_OR_NULL(res)) + goto err_devmem; + devmem->pagemap.range.start = res->start; + devmem->pagemap.range.end = res->end; + devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; + break; + case HMM_DMIRROR_MEMORY_DEVICE_COHERENT: + devmem->pagemap.range.start = (MINOR(mdevice->cdevice.dev) - 2) ? + spm_addr_dev0 : + spm_addr_dev1; + devmem->pagemap.range.end = devmem->pagemap.range.start + + DEVMEM_CHUNK_SIZE - 1; + devmem->pagemap.type = MEMORY_DEVICE_COHERENT; + break; + default: + ret = -EINVAL; goto err_devmem; + } - devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; - devmem->pagemap.range.start = res->start; - devmem->pagemap.range.end = res->end; devmem->pagemap.nr_range = 1; devmem->pagemap.ops = _devmem_ops; devmem->pagemap.owner = mdevice; @@ -497,10 +523,14 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice, mdevice->devmem_capacity = new_capacity; mdevice->devmem_chunks = new_chunks; } - ptr = memremap_pages(>pagemap, numa_node_id()); - if (IS_ERR(ptr)) + if (IS_ERR_OR_NULL(ptr)) { + if (ptr) + ret = PTR_ERR(ptr); + else + ret = -EFAULT; goto err_release; + } devmem->mdevice = mdevice; pfn_first = devmem->pagemap.range.start >> PAGE_SHIFT; @@ -529,15 +559,17 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice, } spin_unlock(>lock); - return true; + return 0; err_release: mutex_unlock(>devmem_lock); - release_mem_region(devmem->pagemap.range.start, range_len(>pagemap.range)); + if (res && devmem->pagemap.type == MEMORY_DEVICE_PRIVATE) + release_mem_region(devmem->pagemap.range.start, + range_len(>pagemap.range)); err_devmem: kfree(devmem); - return false; +
[PATCH 23/31] drm/amd/display: Move SubVP functions to dcn32_fpu
It looks like many of the code related to SubVP uses FPU operation, and we have many static functions that are part of this feature. This commit is a little bit large, but it only moves SubVP operation from one file to another, and I had to do it in a single change due to dependencies between functions. Reviewed-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Rodrigo Siqueira --- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 597 +- .../drm/amd/display/dc/dcn32/dcn32_resource.h | 2 + .../display/dc/dcn32/dcn32_resource_helpers.c | 11 + .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 597 ++ .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 8 + 5 files changed, 620 insertions(+), 595 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index a1bf24ad0787..d508909ff7a9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -1625,171 +1625,6 @@ bool dcn32_release_post_bldn_3dlut( return ret; } -/** - - * dcn32_get_num_free_pipes: Calculate number of free pipes - * - * This function assumes that a "used" pipe is a pipe that has - * both a stream and a plane assigned to it. - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * - * @return: Number of free pipes available in the context - * - - */ -static unsigned int dcn32_get_num_free_pipes(struct dc *dc, struct dc_state *context) -{ - unsigned int i; - unsigned int free_pipes = 0; - unsigned int num_pipes = 0; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = >res_ctx.pipe_ctx[i]; - - if (pipe->stream && !pipe->top_pipe) { - while (pipe) { - num_pipes++; - pipe = pipe->bottom_pipe; - } - } - } - - free_pipes = dc->res_pool->pipe_count - num_pipes; - return free_pipes; -} - -/** - - * dcn32_assign_subvp_pipe: Function to decide which pipe will use Sub-VP. - * - * We enter this function if we are Sub-VP capable (i.e. enough pipes available) - * and regular P-State switching (i.e. VACTIVE/VBLANK) is not supported, or if - * we are forcing SubVP P-State switching on the current config. - * - * The number of pipes used for the chosen surface must be less than or equal to the - * number of free pipes available. - * - * In general we choose surfaces with the longest frame time first (better for SubVP + VBLANK). - * For multi-display cases the ActiveDRAMClockChangeMargin doesn't provide enough info on its own - * for determining which should be the SubVP pipe (need a way to determine if a pipe / plane doesn't - * support MCLK switching naturally [i.e. ACTIVE or VBLANK]). - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * @param [out] index: dc pipe index for the pipe chosen to have phantom pipes assigned - * - * @return: True if a valid pipe assignment was found for Sub-VP. Otherwise false. - * - - */ - -static bool dcn32_assign_subvp_pipe(struct dc *dc, - struct dc_state *context, - unsigned int *index) -{ - unsigned int i, pipe_idx; - unsigned int max_frame_time = 0; - bool valid_assignment_found = false; - unsigned int free_pipes = dcn32_get_num_free_pipes(dc, context); - bool current_assignment_freesync = false; - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = >res_ctx.pipe_ctx[i]; - unsigned int num_pipes = 0; - - if (!pipe->stream) - continue; - - if (pipe->plane_state && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_NONE) { - while (pipe) { - num_pipes++; - pipe = pipe->bottom_pipe; - } - - pipe = >res_ctx.pipe_ctx[i]; - if (num_pipes <= free_pipes) { - struct dc_stream_state *stream = pipe->stream; - unsigned int frame_us = (stream->timing.v_total * stream->timing.h_total / - (double)(stream->timing.pix_clk_100hz * 100)) * 100; - if (frame_us > max_frame_time &&
[PATCH 27/31] drm/amd/display: Move bounding box to FPU folder
The final part of the DCN32 code that uses FPU is the bounding box code, and this commit move it to dcn32_fpu. Reviewed-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Rodrigo Siqueira --- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 460 + .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 470 ++ .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 2 + 3 files changed, 474 insertions(+), 458 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index adcc83e6ea55..b2e7d59e743f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -1923,29 +1923,6 @@ static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap }; - -static void dcn32_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, - unsigned int *optimal_dcfclk, - unsigned int *optimal_fclk) -{ - double bw_from_dram, bw_from_dram1, bw_from_dram2; - - bw_from_dram1 = uclk_mts * dcn3_2_soc.num_chans * - dcn3_2_soc.dram_channel_width_bytes * (dcn3_2_soc.max_avg_dram_bw_use_normal_percent / 100); - bw_from_dram2 = uclk_mts * dcn3_2_soc.num_chans * - dcn3_2_soc.dram_channel_width_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100); - - bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2; - - if (optimal_fclk) - *optimal_fclk = bw_from_dram / - (dcn3_2_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100)); - - if (optimal_dcfclk) - *optimal_dcfclk = bw_from_dram / - (dcn3_2_soc.return_bus_width_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100)); -} - void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int pipe_cnt, @@ -1956,444 +1933,11 @@ void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context, DC_FP_END(); } -static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, - unsigned int index) -{ - int i; - - if (*num_entries == 0) - return; - - for (i = index; i < *num_entries - 1; i++) { - table[i] = table[i + 1]; - } - memset([--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st)); -} - -static int build_synthetic_soc_states(struct clk_bw_params *bw_params, - struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) +static void dcn32_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { - int i, j; - struct _vcs_dpi_voltage_scaling_st entry = {0}; - - unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, - max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0; - - unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; - - static const unsigned int num_dcfclk_stas = 5; - unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; - - unsigned int num_uclk_dpms = 0; - unsigned int num_fclk_dpms = 0; - unsigned int num_dcfclk_dpms = 0; - - for (i = 0; i < MAX_NUM_DPM_LVL; i++) { - if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) - max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; - if (bw_params->clk_table.entries[i].fclk_mhz > max_fclk_mhz) - max_fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; - if (bw_params->clk_table.entries[i].memclk_mhz > max_uclk_mhz) - max_uclk_mhz = bw_params->clk_table.entries[i].memclk_mhz; - if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; - if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; - if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) - max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; - if (bw_params->clk_table.entries[i].dtbclk_mhz > max_dtbclk_mhz) - max_dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; - - if (bw_params->clk_table.entries[i].memclk_mhz > 0) - num_uclk_dpms++; - if (bw_params->clk_table.entries[i].fclk_mhz > 0) - num_fclk_dpms++; - if
[PATCH 29/31] drm/amd/display: Create dcn321_fpu file
The file dcn321_resource has a lot of FPU operations that should be inside the dml folder. This commit introduces the dcn321_fpu file and moves some of the FPU operation functions to this new file. Reviewed-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Rodrigo Siqueira --- .../amd/display/dc/dcn321/dcn321_resource.c | 225 ++--- .../amd/display/dc/dcn321/dcn321_resource.h | 3 + drivers/gpu/drm/amd/display/dc/dml/Makefile | 2 + .../amd/display/dc/dml/dcn321/dcn321_fpu.c| 238 ++ .../amd/display/dc/dml/dcn321/dcn321_fpu.h| 36 +++ 5 files changed, 296 insertions(+), 208 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index d218c6dd71aa..6619bcb30de7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -37,6 +37,8 @@ #include "dcn20/dcn20_resource.h" #include "dcn30/dcn30_resource.h" +#include "dml/dcn321/dcn321_fpu.h" + #include "dcn10/dcn10_ipp.h" #include "dcn30/dcn30_hubbub.h" #include "dcn31/dcn31_hubbub.h" @@ -120,134 +122,6 @@ static const struct IP_BASE DCN_BASE = { { { { 0x0012, 0x00C0, 0x34C #define fixed16_to_double(x) (((double)x) / ((double) (1 << 16))) #define fixed16_to_double_to_cpu(x) fixed16_to_double(le32_to_cpu(x)) -#define DCN3_2_DEFAULT_DET_SIZE 256 - -struct _vcs_dpi_ip_params_st dcn3_21_ip = { - .gpuvm_enable = 0, - .gpuvm_max_page_table_levels = 4, - .hostvm_enable = 0, - .rob_buffer_size_kbytes = 128, - .det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE, - .config_return_buffer_size_in_kbytes = 1280, - .compressed_buffer_segment_size_in_kbytes = 64, - .meta_fifo_size_in_kentries = 22, - .zero_size_buffer_entries = 512, - .compbuf_reserved_space_64b = 256, - .compbuf_reserved_space_zs = 64, - .dpp_output_buffer_pixels = 2560, - .opp_output_buffer_lines = 1, - .pixel_chunk_size_kbytes = 8, - .alpha_pixel_chunk_size_kbytes = 4, // not appearing in spreadsheet, match c code from hw team - .min_pixel_chunk_size_bytes = 1024, - .dcc_meta_buffer_size_bytes = 6272, - .meta_chunk_size_kbytes = 2, - .min_meta_chunk_size_bytes = 256, - .writeback_chunk_size_kbytes = 8, - .ptoi_supported = false, - .num_dsc = 4, - .maximum_dsc_bits_per_component = 12, - .maximum_pixels_per_line_per_dsc_unit = 6016, - .dsc422_native_support = true, - .is_line_buffer_bpp_fixed = true, - .line_buffer_fixed_bpp = 57, - .line_buffer_size_bits = 1171920, //DPP doc, DCN3_2_DisplayMode_73.xlsm still shows as 986880 bits with 48 bpp - .max_line_buffer_lines = 32, - .writeback_interface_buffer_size_kbytes = 90, - .max_num_dpp = 4, - .max_num_otg = 4, - .max_num_hdmi_frl_outputs = 1, - .max_num_wb = 1, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 6, - .max_vscl_ratio = 6, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dpte_buffer_size_in_pte_reqs_luma = 64, - .dpte_buffer_size_in_pte_reqs_chroma = 34, - .dispclk_ramp_margin_percent = 1, - .max_inter_dcn_tile_repeaters = 8, - .cursor_buffer_size = 16, - .cursor_chunk_size = 2, - .writeback_line_buffer_buffer_size = 0, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1, - .writeback_max_hscl_taps = 1, - .writeback_max_vscl_taps = 1, - .dppclk_delay_subtotal = 47, - .dppclk_delay_scl = 50, - .dppclk_delay_scl_lb_only = 16, - .dppclk_delay_cnvc_formatter = 28, - .dppclk_delay_cnvc_cursor = 6, - .dispclk_delay_subtotal = 125, - .dynamic_metadata_vm_enabled = false, - .odm_combine_4to1_supported = false, - .dcc_supported = true, - .max_num_dp2p0_outputs = 2, - .max_num_dp2p0_streams = 4, -}; - -struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { - .clock_limits = { - { - .state = 0, - .dcfclk_mhz = 1564.0, - .fabricclk_mhz = 400.0, - .dispclk_mhz = 2150.0, - .dppclk_mhz = 2150.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .phyclk_d32_mhz = 625.0, - .socclk_mhz = 1200.0, - .dscclk_mhz = 716.667, - .dram_speed_mts =
[PATCH 20/31] drm/amd/display: Move predict pipe to dml fpu folder
The function dcn32_predict_pipe_split uses FPU operations. This commit moves this function to the dcn32_fpu file, and we ensure that we only invoke it under the kernel_fpu protection. Reviewed-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Rodrigo Siqueira --- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 2 + .../drm/amd/display/dc/dcn32/dcn32_resource.h | 2 - .../display/dc/dcn32/dcn32_resource_helpers.c | 33 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 39 ++- .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 4 ++ 5 files changed, 44 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 411ce13847c2..a56d87140eba 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -3054,7 +3054,9 @@ int dcn32_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1; } + DC_FP_START(); is_pipe_split_expected[i] = dcn32_predict_pipe_split(context, pipes[i].pipe, i); + DC_FP_END(); pipe_cnt++; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index 10254ab7e9d9..901aa7e13bd2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -100,8 +100,6 @@ bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc, bool dcn32_subvp_in_use(struct dc *dc, struct dc_state *context); -bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st pipe, int index); - void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_params_st *pipes, bool *is_pipe_split_expected, int pipe_cnt); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index 633d3ee18cfa..796e3d966a76 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -153,39 +153,6 @@ bool dcn32_subvp_in_use(struct dc *dc, return false; } -bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st pipe, int index) -{ - double pscl_throughput, pscl_throughput_chroma, dpp_clk_single_dpp, clock, - clk_frequency = 0.0, vco_speed = context->bw_ctx.dml.soc.dispclk_dppclk_vco_speed_mhz; - - dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(pipe.scale_ratio_depth.hscl_ratio, - pipe.scale_ratio_depth.hscl_ratio_c, - pipe.scale_ratio_depth.vscl_ratio, - pipe.scale_ratio_depth.vscl_ratio_c, - context->bw_ctx.dml.ip.max_dchub_pscl_bw_pix_per_clk, - context->bw_ctx.dml.ip.max_pscl_lb_bw_pix_per_clk, - pipe.dest.pixel_rate_mhz, - pipe.src.source_format, - pipe.scale_taps.htaps, - pipe.scale_taps.htaps_c, - pipe.scale_taps.vtaps, - pipe.scale_taps.vtaps_c, - - /* Output */ - _throughput, _throughput_chroma, - _clk_single_dpp); - - clock = dpp_clk_single_dpp * (1 + context->bw_ctx.dml.soc.dcn_downspread_percent / 100); - - if (clock > 0) - clk_frequency = vco_speed * 4.0 / ((int) (vco_speed * 4.0)); - - if (clk_frequency > context->bw_ctx.dml.soc.clock_limits[index].dppclk_mhz) - return true; - else - return false; -} - void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_params_st *pipes, bool *is_pipe_split_expected, int pipe_cnt) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 253ff9659b0d..1b9e34f1232a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -24,7 +24,7 @@ * */ #include "dcn32_fpu.h" - +#include "display_mode_vba_util_32.h" // We need this includes for WATERMARKS_* defines #include "clk_mgr/dcn32/dcn32_smu13_driver_if.h" @@ -154,3 +154,40 @@ void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, } } +bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st pipe, int index) +{ + double pscl_throughput; + double pscl_throughput_chroma; + double dpp_clk_single_dpp, clock; + double clk_frequency = 0.0; + double vco_speed = context->bw_ctx.dml.soc.dispclk_dppclk_vco_speed_mhz; +
[PATCH v9 10/14] lib: add support for device coherent type in test_hmm
Device Coherent type uses device memory that is coherently accesible by the CPU. This could be shown as SP (special purpose) memory range at the BIOS-e820 memory enumeration. If no SP memory is supported in system, this could be faked by setting CONFIG_EFI_FAKE_MEMMAP. Currently, test_hmm only supports two different SP ranges of at least 256MB size. This could be specified in the kernel parameter variable efi_fake_mem. Ex. Two SP ranges of 1GB starting at 0x1 & 0x14000 physical address. Ex. efi_fake_mem=1G@0x1:0x4,1G@0x14000:0x4 Private and coherent device mirror instances can be created in the same probed. This is done by passing the module parameters spm_addr_dev0 & spm_addr_dev1. In this case, it will create four instances of device_mirror. The first two correspond to private device type, the last two to coherent type. Then, they can be easily accessed from user space through /dev/hmm_mirror. Usually num_device 0 and 1 are for private, and 2 and 3 for coherent types. If no module parameters are passed, two instances of private type device_mirror will be created only. Signed-off-by: Alex Sierra Acked-by: Felix Kuehling Reviewed-by: Alistair Poppple --- lib/test_hmm.c | 253 +--- lib/test_hmm_uapi.h | 4 + 2 files changed, 196 insertions(+), 61 deletions(-) diff --git a/lib/test_hmm.c b/lib/test_hmm.c index afb30af9f3ff..7930853e7fc5 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -32,11 +32,22 @@ #include "test_hmm_uapi.h" -#define DMIRROR_NDEVICES 2 +#define DMIRROR_NDEVICES 4 #define DMIRROR_RANGE_FAULT_TIMEOUT1000 #define DEVMEM_CHUNK_SIZE (256 * 1024 * 1024U) #define DEVMEM_CHUNKS_RESERVE 16 +/* + * For device_private pages, dpage is just a dummy struct page + * representing a piece of device memory. dmirror_devmem_alloc_page + * allocates a real system memory page as backing storage to fake a + * real device. zone_device_data points to that backing page. But + * for device_coherent memory, the struct page represents real + * physical CPU-accessible memory that we can use directly. + */ +#define BACKING_PAGE(page) (is_device_private_page((page)) ? \ + (page)->zone_device_data : (page)) + static unsigned long spm_addr_dev0; module_param(spm_addr_dev0, long, 0644); MODULE_PARM_DESC(spm_addr_dev0, @@ -125,6 +136,21 @@ static int dmirror_bounce_init(struct dmirror_bounce *bounce, return 0; } +static bool dmirror_is_private_zone(struct dmirror_device *mdevice) +{ + return (mdevice->zone_device_type == + HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ? true : false; +} + +static enum migrate_vma_direction +dmirror_select_device(struct dmirror *dmirror) +{ + return (dmirror->mdevice->zone_device_type == + HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ? + MIGRATE_VMA_SELECT_DEVICE_PRIVATE : + MIGRATE_VMA_SELECT_DEVICE_COHERENT; +} + static void dmirror_bounce_fini(struct dmirror_bounce *bounce) { vfree(bounce->ptr); @@ -575,16 +601,19 @@ static int dmirror_allocate_chunk(struct dmirror_device *mdevice, static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice) { struct page *dpage = NULL; - struct page *rpage; + struct page *rpage = NULL; /* -* This is a fake device so we alloc real system memory to store -* our device memory. +* For ZONE_DEVICE private type, this is a fake device so we allocate +* real system memory to store our device memory. +* For ZONE_DEVICE coherent type we use the actual dpage to store the +* data and ignore rpage. */ - rpage = alloc_page(GFP_HIGHUSER); - if (!rpage) - return NULL; - + if (dmirror_is_private_zone(mdevice)) { + rpage = alloc_page(GFP_HIGHUSER); + if (!rpage) + return NULL; + } spin_lock(>lock); if (mdevice->free_pages) { @@ -603,7 +632,8 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice) return dpage; error: - __free_page(rpage); + if (rpage) + __free_page(rpage); return NULL; } @@ -629,12 +659,16 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args, * unallocated pte_none() or read-only zero page. */ spage = migrate_pfn_to_page(*src); + if (WARN(spage && is_zone_device_page(spage), +"page already in device spage pfn: 0x%lx\n", +page_to_pfn(spage))) + continue; dpage = dmirror_devmem_alloc_page(mdevice); if (!dpage) continue; - rpage = dpage->zone_device_data; + rpage = BACKING_PAGE(dpage);
[PATCH 09/31] drm/amd/display: Create a file dedicated for CRTC
[Why] The amdgpu_dm file contains most of the code that works as an interface between DRM API and DC. As a result, this file becomes very large since it comprises multiple abstractions such as CRTC manipulation. [How] This commit extracts the CRTC code to its specific file named amdgpu_dm_crtc. This change does not change anything inside the functions; the only exception is converting some static functions to a global function. Reviewed-by: Harry Wentland Acked-by: Alan Liu Signed-off-by: Rodrigo Siqueira --- .../gpu/drm/amd/display/amdgpu_dm/Makefile| 1 + .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 434 +--- .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c| 463 ++ .../amd/display/amdgpu_dm/amdgpu_dm_crtc.h| 51 ++ 4 files changed, 516 insertions(+), 433 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile index ec559ea902a3..90fb0f3cdb6f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile @@ -28,6 +28,7 @@ AMDGPUDM = \ amdgpu_dm.o \ amdgpu_dm_plane.o \ + amdgpu_dm_crtc.o \ amdgpu_dm_irq.o \ amdgpu_dm_mst_types.o \ amdgpu_dm_color.o diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ceac70e93ece..bf01ed340ec3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -47,6 +47,7 @@ #include "atom.h" #include "amdgpu_dm.h" #include "amdgpu_dm_plane.h" +#include "amdgpu_dm_crtc.h" #ifdef CONFIG_DRM_AMD_DC_HDCP #include "amdgpu_dm_hdcp.h" #include @@ -204,9 +205,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev); /* removes and deallocates the drm structures, created by the above function */ static void amdgpu_dm_destroy_drm_device(struct amdgpu_display_manager *dm); -static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, - struct drm_plane *plane, - uint32_t link_index); static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm, struct amdgpu_dm_connector *amdgpu_dm_connector, uint32_t link_index, @@ -335,20 +333,6 @@ get_crtc_by_otg_inst(struct amdgpu_device *adev, return NULL; } -static inline bool amdgpu_dm_vrr_active_irq(struct amdgpu_crtc *acrtc) -{ - return acrtc->dm_irq_params.freesync_config.state == - VRR_STATE_ACTIVE_VARIABLE || - acrtc->dm_irq_params.freesync_config.state == - VRR_STATE_ACTIVE_FIXED; -} - -static inline bool amdgpu_dm_vrr_active(struct dm_crtc_state *dm_state) -{ - return dm_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE || - dm_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED; -} - static inline bool is_dc_timing_adjust_needed(struct dm_crtc_state *old_state, struct dm_crtc_state *new_state) { @@ -464,26 +448,6 @@ static void dm_pflip_high_irq(void *interrupt_params) vrr_active, (int) !e); } -static void dm_crtc_handle_vblank(struct amdgpu_crtc *acrtc) -{ - struct drm_crtc *crtc = >base; - struct drm_device *dev = crtc->dev; - unsigned long flags; - - drm_crtc_handle_vblank(crtc); - - spin_lock_irqsave(>event_lock, flags); - - /* Send completion event for cursor-only commits */ - if (acrtc->event && acrtc->pflip_status != AMDGPU_FLIP_SUBMITTED) { - drm_crtc_send_vblank_event(crtc, acrtc->event); - drm_crtc_vblank_put(crtc); - acrtc->event = NULL; - } - - spin_unlock_irqrestore(>event_lock, flags); -} - static void dm_vupdate_high_irq(void *interrupt_params) { struct common_irq_params *irq_params = interrupt_params; @@ -1261,52 +1225,6 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_ } -static void vblank_control_worker(struct work_struct *work) -{ - struct vblank_control_work *vblank_work = - container_of(work, struct vblank_control_work, work); - struct amdgpu_display_manager *dm = vblank_work->dm; - - mutex_lock(>dc_lock); - - if (vblank_work->enable) - dm->active_vblank_irq_count++; - else if(dm->active_vblank_irq_count) - dm->active_vblank_irq_count--; - - dc_allow_idle_optimizations(dm->dc, dm->active_vblank_irq_count == 0); - - DRM_DEBUG_KMS("Allow idle optimizations (MALL): %d\n", dm->active_vblank_irq_count == 0); - - /* -* Control PSR based on vblank
[PATCH 17/31] drm/amd/display: Update DML logic for unbounded req handling
From: Jun Lei [why] Unbounded request logic in resource/DML has some issues where unbounded request is being enabled incorrectly. SW today enables unbounded request unconditionally in hardware, on the assumption that HW can always support it in single pipe scenarios. This worked until now because the same assumption is made in DML. A new DML update is needed to fix a bug, where there are single pipe scenarios where unbounded cannot be enabled, and this change in DML needs to be ported in, and dcn32 resource logic fixed. [how] First, dcn32_resource should program unbounded req in HW according to unbounded req enablement output from DML, as opposed to DML input. Second, port in DML update which disables unbounded req in some scenarios to fix an issue with poor stutter performance Signed-off-by: Jun Lei Reviewed-by: Rodrigo Siqueira --- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 11 +++- .../dc/dml/dcn32/display_mode_vba_32.c| 44 +--- .../dc/dml/dcn32/display_mode_vba_util_32.c | 51 --- .../dc/dml/dcn32/display_mode_vba_util_32.h | 10 +++- .../drm/amd/display/dc/dml/display_mode_vba.c | 1 + 5 files changed, 103 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 32da47e24839..39214a0dcdf2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -3322,6 +3322,7 @@ void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, display { int i, pipe_idx; bool usr_retraining_support = false; + bool unbounded_req_enabled = false; /* Writeback MCIF_WB arbitration parameters */ dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt); @@ -3357,6 +3358,14 @@ void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, display if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz) context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz; + unbounded_req_enabled = get_unbounded_request_enabled(>bw_ctx.dml, pipes, pipe_cnt); + + if (unbounded_req_enabled && pipe_cnt > 1) { + // Unbounded requesting should not ever be used when more than 1 pipe is enabled. + ASSERT(false); + unbounded_req_enabled = false; + } + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { if (!context->res_ctx.pipe_ctx[i].stream) continue; @@ -3375,7 +3384,7 @@ void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, display } else { context->res_ctx.pipe_ctx[i].det_buffer_size_kb = get_det_buffer_size_kbytes(>bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - context->res_ctx.pipe_ctx[i].unbounded_req = pipes[pipe_idx].pipe.src.unbounded_req_mode; + context->res_ctx.pipe_ctx[i].unbounded_req = unbounded_req_enabled; } if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 1712843dafaa..092782b6e341 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -226,6 +226,9 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.nomDETInKByte, mode_lib->vba.UseUnboundedRequesting, + mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, + mode_lib->vba.ip.pixel_chunk_size_kbytes, + mode_lib->vba.ip.rob_buffer_size_kbytes, mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal, v->dummy_vars .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation @@ -287,6 +290,10 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman mode_lib->vba.DETBufferSizeC, >UnboundedRequestEnabled, >CompressedBufferSizeInkByte, + >CompBufReservedSpaceKBytes, + >dummy_vars +
[PATCH 02/31] drm/amd/display: Remove unused variable
From: Jun Lei Remove an unused variable "remove_disconnect_edp" which was a workaround bit. Acked-by: Alan Liu Signed-off-by: Jun Lei --- drivers/gpu/drm/amd/display/dc/dc.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 7c42377f0aae..be41f9fcf1dd 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -683,7 +683,6 @@ struct dc_debug_options { bool hdmi20_disable; bool skip_detection_link_training; uint32_t edid_read_retry_times; - bool remove_disconnect_edp; unsigned int force_odm_combine; //bit vector based on otg inst unsigned int seamless_boot_odm_combine; unsigned int force_odm_combine_4to1; //bit vector based on otg inst -- 2.37.0
[PATCH 15/31] drm/amd/display: Loop through all pipes for DET allocation
From: Taimur Hassan [Why & How] There are cases where the pipes populated are not all at the top of the pipes list under context. Loop through all pipes for DET allocation instead of just the number of populated ones, even if some unpopulated pipes are iterated through unnecessarily. Reviewed-by: Alvin Lee Acked-by: Alan Liu Signed-off-by: Taimur Hassan --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 0cb44ea9753b..32da47e24839 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -3068,7 +3068,7 @@ int dcn32_populate_dml_pipes_from_context( } } } else - dcn32_determine_det_override(context, pipes, is_pipe_split_expected, pipe_cnt); + dcn32_determine_det_override(context, pipes, is_pipe_split_expected, dc->res_pool->pipe_count); // In general cases we want to keep the dram clock change requirement // (prefer configs that support MCLK switch). Only override to false -- 2.37.0
[PATCH 28/31] drm/amd/display: Drop FPU flags from dcn32 Makefile
This is the final commit from the FPU isolation for DCN32 and for this reason we can finally remove flags related to FPU. Reviewed-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Rodrigo Siqueira --- drivers/gpu/drm/amd/display/dc/dcn32/Makefile | 28 --- 1 file changed, 28 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile index 932d85fa4262..e943b643ab6b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile @@ -15,34 +15,6 @@ DCN32 = dcn32_resource.o dcn32_hubbub.o dcn32_hwseq.o dcn32_init.o \ dcn32_dio_stream_encoder.o dcn32_dio_link_encoder.o dcn32_hpo_dp_link_encoder.o \ dcn32_resource_helpers.o dcn32_mpc.o -ifdef CONFIG_X86 -dcn32_ccflags := -mhard-float -msse -endif - -ifdef CONFIG_PPC64 -dcn32_ccflags := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -dcn32_ccflags += -mpreferred-stack-boundary=4 -else -dcn32_ccflags += -msse2 -endif -endif - -CFLAGS_$(AMDDALPATH)/dc/dcn32/dcn32_resource_helpers.o := $(dcn32_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dcn32/dcn32_resource.o := $(dcn32_ccflags) - AMD_DAL_DCN32 = $(addprefix $(AMDDALPATH)/dc/dcn32/,$(DCN32)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN32) -- 2.37.0
[PATCH v9 03/14] mm: add zone device coherent type memory support
Device memory that is cache coherent from device and CPU point of view. This is used on platforms that have an advanced system bus (like CAPI or CXL). Any page of a process can be migrated to such memory. However, no one should be allowed to pin such memory so that it can always be evicted. Signed-off-by: Alex Sierra Acked-by: Felix Kuehling Reviewed-by: Alistair Popple [hch: rebased ontop of the refcount changes, removed is_dev_private_or_coherent_page] Signed-off-by: Christoph Hellwig Acked-by: David Hildenbrand --- include/linux/memremap.h | 19 +++ include/linux/mm.h | 5 - mm/memcontrol.c | 7 --- mm/memory-failure.c | 8 ++-- mm/memremap.c| 10 ++ mm/migrate_device.c | 16 +++- mm/rmap.c| 5 +++-- 7 files changed, 53 insertions(+), 17 deletions(-) diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 77229165c914..f27b142fd3d0 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -41,6 +41,13 @@ struct vmem_altmap { * A more complete discussion of unaddressable memory may be found in * include/linux/hmm.h and Documentation/vm/hmm.rst. * + * MEMORY_DEVICE_COHERENT: + * Device memory that is cache coherent from device and CPU point of view. This + * is used on platforms that have an advanced system bus (like CAPI or CXL). A + * driver can hotplug the device memory using ZONE_DEVICE and with that memory + * type. Any page of a process can be migrated to such memory. However no one + * should be allowed to pin such memory so that it can always be evicted. + * * MEMORY_DEVICE_FS_DAX: * Host memory that has similar access semantics as System RAM i.e. DMA * coherent and supports page pinning. In support of coordinating page @@ -61,6 +68,7 @@ struct vmem_altmap { enum memory_type { /* 0 is reserved to catch uninitialized type fields */ MEMORY_DEVICE_PRIVATE = 1, + MEMORY_DEVICE_COHERENT, MEMORY_DEVICE_FS_DAX, MEMORY_DEVICE_GENERIC, MEMORY_DEVICE_PCI_P2PDMA, @@ -150,6 +158,17 @@ static inline bool is_pci_p2pdma_page(const struct page *page) page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA; } +static inline bool is_device_coherent_page(const struct page *page) +{ + return is_zone_device_page(page) && + page->pgmap->type == MEMORY_DEVICE_COHERENT; +} + +static inline bool folio_is_device_coherent(const struct folio *folio) +{ + return is_device_coherent_page(>page); +} + #ifdef CONFIG_ZONE_DEVICE void *memremap_pages(struct dev_pagemap *pgmap, int nid); void memunmap_pages(struct dev_pagemap *pgmap); diff --git a/include/linux/mm.h b/include/linux/mm.h index 2df8c2b98d36..3ed101dfbfab 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -28,6 +28,7 @@ #include #include #include +#include struct mempolicy; struct anon_vma; @@ -1522,7 +1523,9 @@ static inline bool is_longterm_pinnable_page(struct page *page) if (mt == MIGRATE_CMA || mt == MIGRATE_ISOLATE) return false; #endif - return !is_zone_movable_page(page) || is_zero_pfn(page_to_pfn(page)); + return !(is_device_coherent_page(page) || +is_zone_movable_page(page) || +is_zero_pfn(page_to_pfn(page))); } #else static inline bool is_longterm_pinnable_page(struct page *page) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 618c366a2f07..5d37a85c67da 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5665,8 +5665,8 @@ static int mem_cgroup_move_account(struct page *page, * 2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a * target for charge migration. if @target is not NULL, the entry is stored * in target->ent. - * 3(MC_TARGET_DEVICE): like MC_TARGET_PAGE but page is MEMORY_DEVICE_PRIVATE - * (so ZONE_DEVICE page and thus not on the lru). + * 3(MC_TARGET_DEVICE): like MC_TARGET_PAGE but page is device memory and + * thus not on the lru. * For now we such page is charge like a regular page would be as for all * intent and purposes it is just special memory taking the place of a * regular page. @@ -5704,7 +5704,8 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma, */ if (page_memcg(page) == mc.from) { ret = MC_TARGET_PAGE; - if (is_device_private_page(page)) + if (is_device_private_page(page) || + is_device_coherent_page(page)) ret = MC_TARGET_DEVICE; if (target) target->page = page; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index da39ec8afca8..79f175eeb190 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1685,12 +1685,16 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int
[PATCH 04/31] drm/amd/display: Expose function reset_cur_dp_mst_topology
From: Wayne Lin [Why & How] Need to leverage this function out of dc_link.c. Change it to public. Reviewed-by: Hersen Wu Acked-by: Alan Liu Signed-off-by: Wayne Lin --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 2 +- drivers/gpu/drm/amd/display/dc/dc_link.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 858ee51f930a..ef54b96affa8 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -845,7 +845,7 @@ static bool discover_dp_mst_topology(struct dc_link *link, enum dc_detect_reason return link->type == dc_connection_mst_branch; } -static bool reset_cur_dp_mst_topology(struct dc_link *link) +bool reset_cur_dp_mst_topology(struct dc_link *link) { bool result = false; DC_LOGGER_INIT(link->ctx->logger); diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index 023774b94da3..a0af0f6afeef 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -514,4 +514,7 @@ bool dc_dp_trace_is_logged(struct dc_link *link, struct dp_trace_lt_counts *dc_dp_trace_get_lt_counts(struct dc_link *link, bool in_detection); unsigned int dc_dp_trace_get_link_loss_count(struct dc_link *link); + +/* Destruct the mst topology of the link and reset the allocated payload table */ +bool reset_cur_dp_mst_topology(struct dc_link *link); #endif /* DC_LINK_H_ */ -- 2.37.0
[PATCH 25/31] drm/amd/display: Move dlg params calculation
Move dlg params calculation to the FPU folder and make it static. Reviewed-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Rodrigo Siqueira --- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 485 + .../drm/amd/display/dc/dcn32/dcn32_resource.h | 6 - .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 506 +- .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 13 +- 4 files changed, 513 insertions(+), 497 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 45768eff9315..32edb3e5715a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -1753,368 +1753,6 @@ void dcn32_add_phantom_pipes(struct dc *dc, struct dc_state *context, } } -static bool dcn32_split_stream_for_mpc_or_odm( - const struct dc *dc, - struct resource_context *res_ctx, - struct pipe_ctx *pri_pipe, - struct pipe_ctx *sec_pipe, - bool odm) -{ - int pipe_idx = sec_pipe->pipe_idx; - const struct resource_pool *pool = dc->res_pool; - - if (pri_pipe->plane_state) { - /* ODM + window MPO, where MPO window is on left half only */ - if (pri_pipe->plane_state->clip_rect.x + pri_pipe->plane_state->clip_rect.width <= - pri_pipe->stream->src.x + pri_pipe->stream->src.width/2) - return true; - - /* ODM + window MPO, where MPO window is on right half only */ - if (pri_pipe->plane_state->clip_rect.x >= pri_pipe->stream->src.width/2) - return true; - } - - *sec_pipe = *pri_pipe; - - sec_pipe->pipe_idx = pipe_idx; - sec_pipe->plane_res.mi = pool->mis[pipe_idx]; - sec_pipe->plane_res.hubp = pool->hubps[pipe_idx]; - sec_pipe->plane_res.ipp = pool->ipps[pipe_idx]; - sec_pipe->plane_res.xfm = pool->transforms[pipe_idx]; - sec_pipe->plane_res.dpp = pool->dpps[pipe_idx]; - sec_pipe->plane_res.mpcc_inst = pool->dpps[pipe_idx]->inst; - sec_pipe->stream_res.dsc = NULL; - if (odm) { - if (pri_pipe->next_odm_pipe) { - ASSERT(pri_pipe->next_odm_pipe != sec_pipe); - sec_pipe->next_odm_pipe = pri_pipe->next_odm_pipe; - sec_pipe->next_odm_pipe->prev_odm_pipe = sec_pipe; - } - if (pri_pipe->top_pipe && pri_pipe->top_pipe->next_odm_pipe) { - pri_pipe->top_pipe->next_odm_pipe->bottom_pipe = sec_pipe; - sec_pipe->top_pipe = pri_pipe->top_pipe->next_odm_pipe; - } - if (pri_pipe->bottom_pipe && pri_pipe->bottom_pipe->next_odm_pipe) { - pri_pipe->bottom_pipe->next_odm_pipe->top_pipe = sec_pipe; - sec_pipe->bottom_pipe = pri_pipe->bottom_pipe->next_odm_pipe; - } - pri_pipe->next_odm_pipe = sec_pipe; - sec_pipe->prev_odm_pipe = pri_pipe; - ASSERT(sec_pipe->top_pipe == NULL); - - if (!sec_pipe->top_pipe) - sec_pipe->stream_res.opp = pool->opps[pipe_idx]; - else - sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp; - if (sec_pipe->stream->timing.flags.DSC == 1) { - dcn20_acquire_dsc(dc, res_ctx, _pipe->stream_res.dsc, pipe_idx); - ASSERT(sec_pipe->stream_res.dsc); - if (sec_pipe->stream_res.dsc == NULL) - return false; - } - } else { - if (pri_pipe->bottom_pipe) { - ASSERT(pri_pipe->bottom_pipe != sec_pipe); - sec_pipe->bottom_pipe = pri_pipe->bottom_pipe; - sec_pipe->bottom_pipe->top_pipe = sec_pipe; - } - pri_pipe->bottom_pipe = sec_pipe; - sec_pipe->top_pipe = pri_pipe; - - ASSERT(pri_pipe->plane_state); - } - - return true; -} - -static struct pipe_ctx *dcn32_find_split_pipe( - struct dc *dc, - struct dc_state *context, - int old_index) -{ - struct pipe_ctx *pipe = NULL; - int i; - - if (old_index >= 0 && context->res_ctx.pipe_ctx[old_index].stream == NULL) { - pipe = >res_ctx.pipe_ctx[old_index]; - pipe->pipe_idx = old_index; - } - - if (!pipe) - for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { - if (dc->current_state->res_ctx.pipe_ctx[i].top_pipe == NULL - && dc->current_state->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) { - if
[PATCH 08/31] drm/amd/display: Create a file dedicated to planes
[Why] The amdgpu_dm file contains most of the code that works as an interface between DRM API and DC. As a result, this file becomes very large since it comprises multiple abstractions such as plane manipulation. [How] This commit extracts the plane code to its specific file named amdgpu_dm_plane. This change does not change anything inside the functions; the only exception is converting some static functions to a global function. Reviewed-by: Harry Wentland Acked-by: Alan Liu Signed-off-by: Rodrigo Siqueira --- .../gpu/drm/amd/display/amdgpu_dm/Makefile|7 +- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2135 +++-- .../amd/display/amdgpu_dm/amdgpu_dm_plane.c | 1637 + .../amd/display/amdgpu_dm/amdgpu_dm_plane.h | 73 + 4 files changed, 2057 insertions(+), 1795 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile index 718e123a3230..ec559ea902a3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile @@ -25,7 +25,12 @@ -AMDGPUDM = amdgpu_dm.o amdgpu_dm_irq.o amdgpu_dm_mst_types.o amdgpu_dm_color.o +AMDGPUDM = \ + amdgpu_dm.o \ + amdgpu_dm_plane.o \ + amdgpu_dm_irq.o \ + amdgpu_dm_mst_types.o \ + amdgpu_dm_color.o ifdef CONFIG_DRM_AMD_DC_DCN AMDGPUDM += dc_fpu.o diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index dae998e014b0..ceac70e93ece 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -46,6 +46,7 @@ #include "amdgpu_ucode.h" #include "atom.h" #include "amdgpu_dm.h" +#include "amdgpu_dm_plane.h" #ifdef CONFIG_DRM_AMD_DC_HDCP #include "amdgpu_dm_hdcp.h" #include @@ -203,10 +204,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev); /* removes and deallocates the drm structures, created by the above function */ static void amdgpu_dm_destroy_drm_device(struct amdgpu_display_manager *dm); -static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, - struct drm_plane *plane, - unsigned long possible_crtcs, - const struct dc_plane_cap *plane_cap); static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, struct drm_plane *plane, uint32_t link_index); @@ -225,12 +222,6 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state); static int amdgpu_dm_atomic_check(struct drm_device *dev, struct drm_atomic_state *state); -static void handle_cursor_update(struct drm_plane *plane, -struct drm_plane_state *old_plane_state); - -static const struct drm_format_info * -amd_get_format_info(const struct drm_mode_fb_cmd2 *cmd); - static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector); static void handle_hpd_rx_irq(void *param); @@ -4315,11 +4306,11 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) case IP_VERSION(3, 0, 0): case IP_VERSION(3, 1, 2): case IP_VERSION(3, 1, 3): - case IP_VERSION(3, 1, 4): case IP_VERSION(3, 1, 5): case IP_VERSION(3, 1, 6): case IP_VERSION(3, 2, 0): case IP_VERSION(3, 2, 1): + case IP_VERSION(3, 1, 4): case IP_VERSION(2, 1, 0): if (register_outbox_irq_handlers(dm->adev)) { DRM_ERROR("DM: Failed to initialize IRQ\n"); @@ -4707,1104 +4698,222 @@ static const struct drm_encoder_funcs amdgpu_dm_encoder_funcs = { .destroy = amdgpu_dm_encoder_destroy, }; - -static void get_min_max_dc_plane_scaling(struct drm_device *dev, -struct drm_framebuffer *fb, -int *min_downscale, int *max_upscale) -{ - struct amdgpu_device *adev = drm_to_adev(dev); - struct dc *dc = adev->dm.dc; - /* Caps for all supported planes are the same on DCE and DCN 1 - 3 */ - struct dc_plane_cap *plane_cap = >caps.planes[0]; - - switch (fb->format->format) { - case DRM_FORMAT_P010: - case DRM_FORMAT_NV12: - case DRM_FORMAT_NV21: - *max_upscale = plane_cap->max_upscale_factor.nv12; - *min_downscale = plane_cap->max_downscale_factor.nv12; - break; - - case DRM_FORMAT_XRGB16161616F: - case DRM_FORMAT_ARGB16161616F: - case DRM_FORMAT_XBGR16161616F: - case DRM_FORMAT_ABGR16161616F: - *max_upscale = plane_cap->max_upscale_factor.fp16; - *min_downscale =
[PATCH v3] drm/amdgpu: add comments to HW_IP_VCN_ENC
>From VCN4, HW_IP_VCN_ENC will be used as unified queue, and support both encoding and decoding jobs, HW_IP_VCN_DEC is retired from VCN4. link: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/245/commits Signed-off-by: Ruijing Dong --- include/uapi/drm/amdgpu_drm.h | 7 +++ 1 file changed, 7 insertions(+) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 18d3246d636e..29e4a1ece2ce 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -559,7 +559,14 @@ struct drm_amdgpu_gem_va { #define AMDGPU_HW_IP_VCE 4 #define AMDGPU_HW_IP_UVD_ENC 5 #define AMDGPU_HW_IP_VCN_DEC 6 + +/** + * From VCN4, AMDGPU_HW_IP_VCN_ENC will be used as unified queue + * and support both encoding and decoding jobs, AMDGPU_HW_IP_VCN_DEC + * is retired from VCN4. + */ #define AMDGPU_HW_IP_VCN_ENC 7 + #define AMDGPU_HW_IP_VCN_JPEG 8 #define AMDGPU_HW_IP_NUM 9 -- 2.25.1
Re: [PATCH 3/3] drm/amdgpu: skip put fence if signal fails
On 2022-07-15 05:28, Zhu, Jiadong wrote: [AMD Official Use Only - General] Updated some comments -Original Message- From: Zhu, Jiadong Sent: Friday, July 15, 2022 5:13 PM To: Christian König ; amd-gfx@lists.freedesktop.org; Grodzovsky, Andrey Cc: Huang, Ray ; Liu, Aaron Subject: RE: [PATCH 3/3] drm/amdgpu: skip put fence if signal fails Hi Christian, The resubmitted job in function amdgpu_ib_preempt_job_recovery returns the same hw fence because of this commit: static void amdgpu_ib_preempt_job_recovery(struct drm_gpu_scheduler *sched) { struct drm_sched_job *s_job; struct dma_fence *fence; spin_lock(>job_list_lock); list_for_each_entry(s_job, >pending_list, list) { fence = sched->ops->run_job(s_job); //fence returned has the same address with swapped fences dma_fence_put(fence); } spin_unlock(>job_list_lock); } commit c530b02f39850a639b72d01ebbf7e5d745c60831 Author: Jack Zhang Date: Wed May 12 15:06:35 2021 +0800 drm/amd/amdgpu embed hw_fence into amdgpu_job Why: Previously hw fence is alloced separately with job. It caused historical lifetime issues and corner cases. The ideal situation is to take fence to manage both job and fence's lifetime, and simplify the design of gpu-scheduler. How: We propose to embed hw_fence into amdgpu_job. 1. We cover the normal job submission by this method. 2. For ib_test, and submit without a parent job keep the legacy way to create a hw fence separately. v2: use AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT to show that the fence is embedded in a job. v3: remove redundant variable ring in amdgpu_job v4: add tdr sequence support for this feature. Add a job_run_counter to indicate whether this job is a resubmit job. v5 add missing handling in amdgpu_fence_enable_signaling Signed-off-by: Jingwen Chen Signed-off-by: Jack Zhang Reviewed-by: Andrey Grodzovsky Reviewed by: Monk Liu Signed-off-by: Alex Deucher Thus the fence we swapped out is signaled and put twice in the following 2 functions and we get " refcount_t: underflow; use-after-free. " errors latter. /* wait for jobs finished */ amdgpu_fence_wait_empty(ring); //wait on the resubmitted fence which is signaled and put somewhere else. The refcount decreased by 1 after amdgpu_fence_wait_empty. /* signal the old fences */ amdgpu_ib_preempt_signal_fences(fences, length); //signal and put the previous swapped fence, signal would return -22. Thanks, Jiadong Did you have 'drm/amdgpu: Follow up change to previous drm scheduler change.' this commit in your branch while you encountered this problem ? I don't see an underflow issue for the preempted job when inspecting the code with this commit in mind - amdgpu_fence_emit dma_fence_init 1 dma_fence_get(fence) 2 rcu_assign_pointer(*ptr, dma_fence_get(fence) 3 drm_sched_main s_fence->parent = dma_fence_get(fence); 4 dma_fence_put(fence); 3 amdgpu_ib_preempt_job_recovery amdgpu_fence_emit if (job && job->job_run_counter) -> dma_fence_get(fence); 4 rcu_assign_pointer(*ptr, dma_fence_get(fence)); 5 dma_fence_put(fence); 4 amdgpu_fence_wait_empty dma_fence_get_rcu(fence) 5 dma_fence_put(fence) 4 amdgpu_process_fence (EOP interrupt for re-submission of preempted job) dma_fence_put 3 amdgpu_ib_preempt_signal_fences dma_fence_put 2 amdgpu_job_free_cb dma_fence_put(>hw_fence) 1 drm_sched_fence_release_scheduled dma_fence_put(fence->parent); 0 Also take a look here for reference - https://drive.google.com/file/d/1yEoeW6OQC9WnwmzFW6NBLhFP_jD0xcHm/view Andrey Andrey -Original Message- From: Christian König Sent: Friday, July 15, 2022 4:48 PM To: Zhu, Jiadong ; amd-gfx@lists.freedesktop.org; Grodzovsky, Andrey Cc: Huang, Ray ; Liu, Aaron Subject: Re: [PATCH 3/3] drm/amdgpu: skip put fence if signal fails [CAUTION: External Email] Am 15.07.22 um 10:43 schrieb jiadong@amd.com: From: "Jiadong.Zhu" Dma_fence_signal returning non-zero indicates that the fence is signaled and put somewhere else. Skip dma_fence_put to make the fence refcount correct. Well quite a big NAK on this. Reference counting should be completely independent where a fence signals. Andrey can you take a look at this as well? Thanks, Christian. Signed-off-by: Jiadong.Zhu --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index f4ed0785d523..93c1a5e83835 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1500,8 +1500,8 @@ static void
[PATCH 01/31] drm/amd/display: Support vertical interrupt 0 for all dcn ASIC
From: Wayne Lin [Why] When CONFIG_DRM_AMD_SECURE_DISPLAY is enabled, it will try to register vertical interrupt 0 for specific task. Currently, only dcn10 have defined relevant info for vertical interrupt 0. If we enable CONFIG_DRM_AMD_SECURE_DISPLAY for other dcn ASIC, will get DC_IRQ_SOURCE_INVALID while calling dc_interrupt_to_irq_source() and cause pointer errors. [How] Add support of vertical interrupt 0 for all dcn ASIC. Acked-by: Alan Liu Signed-off-by: Wayne Lin --- .../display/dc/irq/dcn30/irq_service_dcn30.c | 14 +++--- .../dc/irq/dcn303/irq_service_dcn303.c| 19 +++ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c index 146cd1819912..2aa74ee1502a 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c @@ -289,6 +289,13 @@ static const struct irq_source_info_funcs vline0_irq_info_funcs = { .funcs = _irq_info_funcs\ } +#define dmub_trace_int_entry()\ + [DC_IRQ_SOURCE_DMCUB_OUTBOX0] = {\ + IRQ_REG_ENTRY_DMUB(DMCUB_INTERRUPT_ENABLE, DMCUB_OUTBOX0_READY_INT_EN,\ + DMCUB_INTERRUPT_ACK, DMCUB_OUTBOX0_READY_INT_ACK),\ + .funcs = _trace_irq_info_funcs\ + } + #define vline0_int_entry(reg_num)\ [DC_IRQ_SOURCE_DC1_VLINE0 + reg_num] = {\ IRQ_REG_ENTRY(OTG, reg_num,\ @@ -297,13 +304,6 @@ static const struct irq_source_info_funcs vline0_irq_info_funcs = { .funcs = _irq_info_funcs\ } -#define dmub_trace_int_entry()\ - [DC_IRQ_SOURCE_DMCUB_OUTBOX0] = {\ - IRQ_REG_ENTRY_DMUB(DMCUB_INTERRUPT_ENABLE, DMCUB_OUTBOX0_READY_INT_EN,\ - DMCUB_INTERRUPT_ACK, DMCUB_OUTBOX0_READY_INT_ACK),\ - .funcs = _trace_irq_info_funcs\ - } - #define dummy_irq_entry() \ {\ .funcs = _irq_info_funcs\ diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c b/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c index 66e60762388e..1d149d290147 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c @@ -24,6 +24,10 @@ static enum dc_irq_source to_dal_irq_source_dcn303(struct irq_service *irq_servi return DC_IRQ_SOURCE_VBLANK1; case DCN_1_0__SRCID__DC_D2_OTG_VSTARTUP: return DC_IRQ_SOURCE_VBLANK2; + case DCN_1_0__SRCID__OTG1_VERTICAL_INTERRUPT0_CONTROL: + return DC_IRQ_SOURCE_DC1_VLINE0; + case DCN_1_0__SRCID__OTG2_VERTICAL_INTERRUPT0_CONTROL: + return DC_IRQ_SOURCE_DC2_VLINE0; case DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT: return DC_IRQ_SOURCE_PFLIP1; case DCN_1_0__SRCID__HUBP1_FLIP_INTERRUPT: @@ -96,6 +100,11 @@ static const struct irq_source_info_funcs vblank_irq_info_funcs = { .ack = NULL }; +static const struct irq_source_info_funcs vline0_irq_info_funcs = { + .set = NULL, + .ack = NULL +}; + #undef BASE_INNER #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg @@ -164,6 +173,14 @@ static const struct irq_source_info_funcs vblank_irq_info_funcs = { .funcs = _irq_info_funcs\ } +#define vline0_int_entry(reg_num)\ + [DC_IRQ_SOURCE_DC1_VLINE0 + reg_num] = {\ + IRQ_REG_ENTRY(OTG, reg_num,\ + OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE,\ + OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_CLEAR),\ + .funcs = _irq_info_funcs\ + } + #define dummy_irq_entry() { .funcs = _irq_info_funcs } #define i2c_int_entry(reg_num) \ @@ -236,6 +253,8 @@ static const struct irq_source_info irq_source_info_dcn303[DAL_IRQ_SOURCES_NUMBE vupdate_no_lock_int_entry(1), vblank_int_entry(0), vblank_int_entry(1), + vline0_int_entry(0), + vline0_int_entry(1), }; static const struct irq_service_funcs irq_service_funcs_dcn303 = { -- 2.37.0
[PATCH 21/31] drm/amd/display: Move insert entry table to the FPU code
The insert_entry_into_table_sorted function uses FPU operation and calls other static functions support. This commit moves the insert entry function with all the required struct and static functions to the FPU file. Reviewed-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Rodrigo Siqueira --- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 188 +- .../drm/amd/display/dc/dcn32/dcn32_resource.h | 3 + .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 186 + .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 9 + 4 files changed, 208 insertions(+), 178 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index a56d87140eba..1c124231b00a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -115,137 +115,6 @@ static const struct IP_BASE DCN_BASE = { { { { 0x0012, 0x00C0, 0x34C #define DC_LOGGER_INIT(logger) -#define DCN3_2_DEFAULT_DET_SIZE 256 -#define DCN3_2_MAX_DET_SIZE 1152 -#define DCN3_2_MIN_DET_SIZE 128 -#define DCN3_2_MIN_COMPBUF_SIZE_KB 128 - -struct _vcs_dpi_ip_params_st dcn3_2_ip = { - .gpuvm_enable = 0, - .gpuvm_max_page_table_levels = 4, - .hostvm_enable = 0, - .rob_buffer_size_kbytes = 128, - .det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE, - .config_return_buffer_size_in_kbytes = 1280, - .compressed_buffer_segment_size_in_kbytes = 64, - .meta_fifo_size_in_kentries = 22, - .zero_size_buffer_entries = 512, - .compbuf_reserved_space_64b = 256, - .compbuf_reserved_space_zs = 64, - .dpp_output_buffer_pixels = 2560, - .opp_output_buffer_lines = 1, - .pixel_chunk_size_kbytes = 8, - .alpha_pixel_chunk_size_kbytes = 4, // not appearing in spreadsheet, match c code from hw team - .min_pixel_chunk_size_bytes = 1024, - .dcc_meta_buffer_size_bytes = 6272, - .meta_chunk_size_kbytes = 2, - .min_meta_chunk_size_bytes = 256, - .writeback_chunk_size_kbytes = 8, - .ptoi_supported = false, - .num_dsc = 4, - .maximum_dsc_bits_per_component = 12, - .maximum_pixels_per_line_per_dsc_unit = 6016, - .dsc422_native_support = true, - .is_line_buffer_bpp_fixed = true, - .line_buffer_fixed_bpp = 57, - .line_buffer_size_bits = 1171920, //DPP doc, DCN3_2_DisplayMode_73.xlsm still shows as 986880 bits with 48 bpp - .max_line_buffer_lines = 32, - .writeback_interface_buffer_size_kbytes = 90, - .max_num_dpp = 4, - .max_num_otg = 4, - .max_num_hdmi_frl_outputs = 1, - .max_num_wb = 1, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 6, - .max_vscl_ratio = 6, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dpte_buffer_size_in_pte_reqs_luma = 64, - .dpte_buffer_size_in_pte_reqs_chroma = 34, - .dispclk_ramp_margin_percent = 1, - .max_inter_dcn_tile_repeaters = 8, - .cursor_buffer_size = 16, - .cursor_chunk_size = 2, - .writeback_line_buffer_buffer_size = 0, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1, - .writeback_max_hscl_taps = 1, - .writeback_max_vscl_taps = 1, - .dppclk_delay_subtotal = 47, - .dppclk_delay_scl = 50, - .dppclk_delay_scl_lb_only = 16, - .dppclk_delay_cnvc_formatter = 28, - .dppclk_delay_cnvc_cursor = 6, - .dispclk_delay_subtotal = 125, - .dynamic_metadata_vm_enabled = false, - .odm_combine_4to1_supported = false, - .dcc_supported = true, - .max_num_dp2p0_outputs = 2, - .max_num_dp2p0_streams = 4, -}; - -struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = { - .clock_limits = { - { - .state = 0, - .dcfclk_mhz = 1564.0, - .fabricclk_mhz = 400.0, - .dispclk_mhz = 2150.0, - .dppclk_mhz = 2150.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .phyclk_d32_mhz = 625.0, - .socclk_mhz = 1200.0, - .dscclk_mhz = 716.667, - .dram_speed_mts = 1600.0, - .dtbclk_mhz = 1564.0, - }, - }, - .num_states = 1, - .sr_exit_time_us = 5.20, - .sr_enter_plus_exit_time_us = 9.60, - .sr_exit_z8_time_us = 285.0, - .sr_enter_plus_exit_z8_time_us = 320, - .writeback_latency_us = 12.0, - .round_trip_ping_latency_dcfclk_cycles = 263, - .urgent_latency_pixel_data_only_us = 4.0, -
[PATCH v9 11/14] tools: update hmm-test to support device coherent type
Test cases such as migrate_fault and migrate_multiple, were modified to explicit migrate from device to sys memory without the need of page faults, when using device coherent type. Snapshot test case updated to read memory device type first and based on that, get the proper returned results migrate_ping_pong test case added to test explicit migration from device to sys memory for both private and coherent zone types. Helpers to migrate from device to sys memory and vicerversa were also added. Signed-off-by: Alex Sierra Acked-by: Felix Kuehling Reviewed-by: Alistair Popple Signed-off-by: Christoph Hellwig --- tools/testing/selftests/vm/hmm-tests.c | 121 - 1 file changed, 100 insertions(+), 21 deletions(-) diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c index 203323967b50..4b547188ec40 100644 --- a/tools/testing/selftests/vm/hmm-tests.c +++ b/tools/testing/selftests/vm/hmm-tests.c @@ -46,6 +46,13 @@ struct hmm_buffer { uint64_tfaults; }; +enum { + HMM_PRIVATE_DEVICE_ONE, + HMM_PRIVATE_DEVICE_TWO, + HMM_COHERENCE_DEVICE_ONE, + HMM_COHERENCE_DEVICE_TWO, +}; + #define TWOMEG (1 << 21) #define HMM_BUFFER_SIZE (1024 << 12) #define HMM_PATH_MAX64 @@ -60,6 +67,21 @@ FIXTURE(hmm) unsigned intpage_shift; }; +FIXTURE_VARIANT(hmm) +{ + int device_number; +}; + +FIXTURE_VARIANT_ADD(hmm, hmm_device_private) +{ + .device_number = HMM_PRIVATE_DEVICE_ONE, +}; + +FIXTURE_VARIANT_ADD(hmm, hmm_device_coherent) +{ + .device_number = HMM_COHERENCE_DEVICE_ONE, +}; + FIXTURE(hmm2) { int fd0; @@ -68,6 +90,24 @@ FIXTURE(hmm2) unsigned intpage_shift; }; +FIXTURE_VARIANT(hmm2) +{ + int device_number0; + int device_number1; +}; + +FIXTURE_VARIANT_ADD(hmm2, hmm2_device_private) +{ + .device_number0 = HMM_PRIVATE_DEVICE_ONE, + .device_number1 = HMM_PRIVATE_DEVICE_TWO, +}; + +FIXTURE_VARIANT_ADD(hmm2, hmm2_device_coherent) +{ + .device_number0 = HMM_COHERENCE_DEVICE_ONE, + .device_number1 = HMM_COHERENCE_DEVICE_TWO, +}; + static int hmm_open(int unit) { char pathname[HMM_PATH_MAX]; @@ -81,12 +121,19 @@ static int hmm_open(int unit) return fd; } +static bool hmm_is_coherent_type(int dev_num) +{ + return (dev_num >= HMM_COHERENCE_DEVICE_ONE); +} + FIXTURE_SETUP(hmm) { self->page_size = sysconf(_SC_PAGE_SIZE); self->page_shift = ffs(self->page_size) - 1; - self->fd = hmm_open(0); + self->fd = hmm_open(variant->device_number); + if (self->fd < 0 && hmm_is_coherent_type(variant->device_number)) + SKIP(exit(0), "DEVICE_COHERENT not available"); ASSERT_GE(self->fd, 0); } @@ -95,9 +142,11 @@ FIXTURE_SETUP(hmm2) self->page_size = sysconf(_SC_PAGE_SIZE); self->page_shift = ffs(self->page_size) - 1; - self->fd0 = hmm_open(0); + self->fd0 = hmm_open(variant->device_number0); + if (self->fd0 < 0 && hmm_is_coherent_type(variant->device_number0)) + SKIP(exit(0), "DEVICE_COHERENT not available"); ASSERT_GE(self->fd0, 0); - self->fd1 = hmm_open(1); + self->fd1 = hmm_open(variant->device_number1); ASSERT_GE(self->fd1, 0); } @@ -211,6 +260,20 @@ static void hmm_nanosleep(unsigned int n) nanosleep(, NULL); } +static int hmm_migrate_sys_to_dev(int fd, + struct hmm_buffer *buffer, + unsigned long npages) +{ + return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_DEV, buffer, npages); +} + +static int hmm_migrate_dev_to_sys(int fd, + struct hmm_buffer *buffer, + unsigned long npages) +{ + return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_SYS, buffer, npages); +} + /* * Simple NULL test of device open/close. */ @@ -875,7 +938,7 @@ TEST_F(hmm, migrate) ptr[i] = i; /* Migrate memory to device. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages); + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); ASSERT_EQ(ret, 0); ASSERT_EQ(buffer->cpages, npages); @@ -923,7 +986,7 @@ TEST_F(hmm, migrate_fault) ptr[i] = i; /* Migrate memory to device. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages); + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); ASSERT_EQ(ret, 0); ASSERT_EQ(buffer->cpages, npages); @@ -936,7 +999,7 @@ TEST_F(hmm, migrate_fault) ASSERT_EQ(ptr[i], i); /* Migrate memory to the device again. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages); + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); ASSERT_EQ(ret, 0); ASSERT_EQ(buffer->cpages,
Re: [PATCH v2] drm/amdgpu: add HW_IP_VCN_UNIFIED type
Am 15.07.22 um 16:44 schrieb Ruijing Dong: Define HW_IP_VCN_UNIFIED type the same as HW_IP_VCN_ENC. VCN4 support for libdrm needs a new definition for the unified queue, so that it can align to the kernel. link: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/245/commits Signed-off-by: Ruijing Dong --- include/uapi/drm/amdgpu_drm.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 18d3246d636e..fe33db8441bc 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -560,6 +560,7 @@ struct drm_amdgpu_gem_va { #define AMDGPU_HW_IP_UVD_ENC 5 #define AMDGPU_HW_IP_VCN_DEC 6 #define AMDGPU_HW_IP_VCN_ENC 7 +#define AMDGPU_HW_IP_VCN_UNIFIED AMDGPU_HW_IP_VCN_ENC Why exactly do we need a new define for this? Essentially the encode queue is extended with new functionality, isn't it? So I think we should just stick to AMDGPU_HW_IP_VCN_ENC and not add an alias for it. Regards, Christian. #define AMDGPU_HW_IP_VCN_JPEG 8 #define AMDGPU_HW_IP_NUM 9
RE: [PATCH v2] drm/amdgpu: add HW_IP_VCN_UNIFIED type
[AMD Official Use Only - General] >> Why exactly do we need a new define for this? Essentially the encode queue >> is extended with new functionality, isn't it? >> So I think we should just stick to AMDGPU_HW_IP_VCN_ENC and not add an alias >> for it. Yes, it extended the encode queue to include new functionality, and that looks little confused when send decoding jobs to the encoding queue. Then I assume this bias can reduce the confusion. Does this change make sense in this regard? certainly we can stick to AMDGPU_HW_IP_VCN_ENC. Thanks, Ruijing -Original Message- From: Koenig, Christian Sent: Friday, July 15, 2022 11:18 AM To: Dong, Ruijing ; amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Liu, Leo Subject: Re: [PATCH v2] drm/amdgpu: add HW_IP_VCN_UNIFIED type Am 15.07.22 um 16:44 schrieb Ruijing Dong: > Define HW_IP_VCN_UNIFIED type the same as HW_IP_VCN_ENC. > > VCN4 support for libdrm needs a new definition for the unified queue, > so that it can align to the kernel. > > link: > https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/245/commits > > Signed-off-by: Ruijing Dong > --- > include/uapi/drm/amdgpu_drm.h | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/include/uapi/drm/amdgpu_drm.h > b/include/uapi/drm/amdgpu_drm.h index 18d3246d636e..fe33db8441bc > 100644 > --- a/include/uapi/drm/amdgpu_drm.h > +++ b/include/uapi/drm/amdgpu_drm.h > @@ -560,6 +560,7 @@ struct drm_amdgpu_gem_va { > #define AMDGPU_HW_IP_UVD_ENC 5 > #define AMDGPU_HW_IP_VCN_DEC 6 > #define AMDGPU_HW_IP_VCN_ENC 7 > +#define AMDGPU_HW_IP_VCN_UNIFIED AMDGPU_HW_IP_VCN_ENC Why exactly do we need a new define for this? Essentially the encode queue is extended with new functionality, isn't it? So I think we should just stick to AMDGPU_HW_IP_VCN_ENC and not add an alias for it. Regards, Christian. > #define AMDGPU_HW_IP_VCN_JPEG 8 > #define AMDGPU_HW_IP_NUM 9 >
RE: [PATCH] drm/amdgpu: align between libdrm and drm api
[AMD Official Use Only - General] Hi Christian, You are right, when process the libdrm code review (not committed yet), we realized the corresponding file needs to align to the kernel. So we will need to have this header file changed first, then to process libdrm code again. Thanks, Ruijing -Original Message- From: Christian König Sent: Friday, July 15, 2022 4:41 AM To: Dong, Ruijing ; amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Liu, Leo ; Koenig, Christian Subject: Re: [PATCH] drm/amdgpu: align between libdrm and drm api Am 14.07.22 um 23:22 schrieb Ruijing Dong: > define HW_IP_VCN_UNIFIED the same as HW_IP_VCN_ENC Usually that should be the other way around, libdrm aligns to the kernel. Why was that modification committed to libdrm first? There are usually plenty of warnings before we can do that. Regards, Christian. > > Signed-off-by: Ruijing Dong > --- > include/uapi/drm/amdgpu_drm.h | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/include/uapi/drm/amdgpu_drm.h > b/include/uapi/drm/amdgpu_drm.h index 18d3246d636e..fe33db8441bc > 100644 > --- a/include/uapi/drm/amdgpu_drm.h > +++ b/include/uapi/drm/amdgpu_drm.h > @@ -560,6 +560,7 @@ struct drm_amdgpu_gem_va { > #define AMDGPU_HW_IP_UVD_ENC 5 > #define AMDGPU_HW_IP_VCN_DEC 6 > #define AMDGPU_HW_IP_VCN_ENC 7 > +#define AMDGPU_HW_IP_VCN_UNIFIED AMDGPU_HW_IP_VCN_ENC > #define AMDGPU_HW_IP_VCN_JPEG 8 > #define AMDGPU_HW_IP_NUM 9 >
[PATCH v9 01/14] mm: rename is_pinnable_pages to is_longterm_pinnable_pages
is_pinnable_page() and folio_is_pinnable() were renamed to is_longterm_pinnable_page() and folio_is_longterm_pinnable() respectively. These functions are used in the FOLL_LONGTERM flag context. Signed-off-by: Alex Sierra Reviewed-by: David Hildenbrand --- include/linux/mm.h | 8 mm/gup.c | 4 ++-- mm/gup_test.c | 2 +- mm/hugetlb.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index cf3d0d673f6b..3b31b33bd5be 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1592,7 +1592,7 @@ static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma, /* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */ #ifdef CONFIG_MIGRATION -static inline bool is_pinnable_page(struct page *page) +static inline bool is_longterm_pinnable_page(struct page *page) { #ifdef CONFIG_CMA int mt = get_pageblock_migratetype(page); @@ -1603,15 +1603,15 @@ static inline bool is_pinnable_page(struct page *page) return !is_zone_movable_page(page) || is_zero_pfn(page_to_pfn(page)); } #else -static inline bool is_pinnable_page(struct page *page) +static inline bool is_longterm_pinnable_page(struct page *page) { return true; } #endif -static inline bool folio_is_pinnable(struct folio *folio) +static inline bool folio_is_longterm_pinnable(struct folio *folio) { - return is_pinnable_page(>page); + return is_longterm_pinnable_page(>page); } static inline void set_page_zone(struct page *page, enum zone_type zone) diff --git a/mm/gup.c b/mm/gup.c index 551264407624..b65fe8bf5af4 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -133,7 +133,7 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags) * path. */ if (unlikely((flags & FOLL_LONGTERM) && -!is_pinnable_page(page))) +!is_longterm_pinnable_page(page))) return NULL; /* @@ -1891,7 +1891,7 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages, continue; prev_folio = folio; - if (folio_is_pinnable(folio)) + if (folio_is_longterm_pinnable(folio)) continue; /* diff --git a/mm/gup_test.c b/mm/gup_test.c index d974dec19e1c..12b0a91767d3 100644 --- a/mm/gup_test.c +++ b/mm/gup_test.c @@ -53,7 +53,7 @@ static void verify_dma_pinned(unsigned int cmd, struct page **pages, dump_page(page, "gup_test failure"); break; } else if (cmd == PIN_LONGTERM_BENCHMARK && - WARN(!is_pinnable_page(page), + WARN(!is_longterm_pinnable_page(page), "pages[%lu] is NOT pinnable but pinned\n", i)) { dump_page(page, "gup_test failure"); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a57e1be41401..368fd33787b0 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1135,7 +1135,7 @@ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid) lockdep_assert_held(_lock); list_for_each_entry(page, >hugepage_freelists[nid], lru) { - if (pin && !is_pinnable_page(page)) + if (pin && !is_longterm_pinnable_page(page)) continue; if (PageHWPoison(page)) -- 2.32.0
[PATCH v9 02/14] mm: move page zone helpers from mm.h to mmzone.h
[WHY] It makes more sense to have these helpers in zone specific header file, rather than the generic mm.h Signed-off-by: Alex Sierra --- include/linux/memremap.h | 2 +- include/linux/mm.h | 78 --- include/linux/mmzone.h | 80 3 files changed, 81 insertions(+), 79 deletions(-) diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 8af304f6b504..77229165c914 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -2,7 +2,7 @@ #ifndef _LINUX_MEMREMAP_H_ #define _LINUX_MEMREMAP_H_ -#include +#include #include #include #include diff --git a/include/linux/mm.h b/include/linux/mm.h index 3b31b33bd5be..2df8c2b98d36 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1049,84 +1049,6 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); * back into memory. */ -/* - * The zone field is never updated after free_area_init_core() - * sets it, so none of the operations on it need to be atomic. - */ - -/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */ -#define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) -#define NODES_PGOFF(SECTIONS_PGOFF - NODES_WIDTH) -#define ZONES_PGOFF(NODES_PGOFF - ZONES_WIDTH) -#define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH) -#define KASAN_TAG_PGOFF(LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH) - -/* - * Define the bit shifts to access each section. For non-existent - * sections we define the shift as 0; that plus a 0 mask ensures - * the compiler will optimise away reference to them. - */ -#define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0)) -#define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) -#define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) -#define LAST_CPUPID_PGSHIFT(LAST_CPUPID_PGOFF * (LAST_CPUPID_WIDTH != 0)) -#define KASAN_TAG_PGSHIFT (KASAN_TAG_PGOFF * (KASAN_TAG_WIDTH != 0)) - -/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */ -#ifdef NODE_NOT_IN_PAGE_FLAGS -#define ZONEID_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT) -#define ZONEID_PGOFF ((SECTIONS_PGOFF < ZONES_PGOFF)? \ - SECTIONS_PGOFF : ZONES_PGOFF) -#else -#define ZONEID_SHIFT (NODES_SHIFT + ZONES_SHIFT) -#define ZONEID_PGOFF ((NODES_PGOFF < ZONES_PGOFF)? \ - NODES_PGOFF : ZONES_PGOFF) -#endif - -#define ZONEID_PGSHIFT (ZONEID_PGOFF * (ZONEID_SHIFT != 0)) - -#define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) -#define NODES_MASK ((1UL << NODES_WIDTH) - 1) -#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) -#define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_SHIFT) - 1) -#define KASAN_TAG_MASK ((1UL << KASAN_TAG_WIDTH) - 1) -#define ZONEID_MASK((1UL << ZONEID_SHIFT) - 1) - -static inline enum zone_type page_zonenum(const struct page *page) -{ - ASSERT_EXCLUSIVE_BITS(page->flags, ZONES_MASK << ZONES_PGSHIFT); - return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; -} - -static inline enum zone_type folio_zonenum(const struct folio *folio) -{ - return page_zonenum(>page); -} - -#ifdef CONFIG_ZONE_DEVICE -static inline bool is_zone_device_page(const struct page *page) -{ - return page_zonenum(page) == ZONE_DEVICE; -} -extern void memmap_init_zone_device(struct zone *, unsigned long, - unsigned long, struct dev_pagemap *); -#else -static inline bool is_zone_device_page(const struct page *page) -{ - return false; -} -#endif - -static inline bool folio_is_zone_device(const struct folio *folio) -{ - return is_zone_device_page(>page); -} - -static inline bool is_zone_movable_page(const struct page *page) -{ - return page_zonenum(page) == ZONE_MOVABLE; -} - #if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX) DECLARE_STATIC_KEY_FALSE(devmap_managed_key); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index aab70355d64f..47fc41f43c48 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -730,6 +730,86 @@ static inline bool zone_is_empty(struct zone *zone) return zone->spanned_pages == 0; } +#ifndef BUILD_VDSO32_64 +/* + * The zone field is never updated after free_area_init_core() + * sets it, so none of the operations on it need to be atomic. + */ + +/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */ +#define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) +#define NODES_PGOFF(SECTIONS_PGOFF - NODES_WIDTH) +#define ZONES_PGOFF(NODES_PGOFF - ZONES_WIDTH) +#define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH) +#define KASAN_TAG_PGOFF(LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH) + +/* + * Define the bit shifts to access
[PATCH v9 05/14] mm: add device coherent vma selection for memory migration
This case is used to migrate pages from device memory, back to system memory. Device coherent type memory is cache coherent from device and CPU point of view. Signed-off-by: Alex Sierra Acked-by: Felix Kuehling Reviewed-by: Alistair Poppple Signed-off-by: Christoph Hellwig Reviewed-by: David Hildenbrand --- include/linux/migrate.h | 1 + mm/migrate_device.c | 12 +--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 069a89e847f3..b84908debe5c 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -148,6 +148,7 @@ static inline unsigned long migrate_pfn(unsigned long pfn) enum migrate_vma_direction { MIGRATE_VMA_SELECT_SYSTEM = 1 << 0, MIGRATE_VMA_SELECT_DEVICE_PRIVATE = 1 << 1, + MIGRATE_VMA_SELECT_DEVICE_COHERENT = 1 << 2, }; struct migrate_vma { diff --git a/mm/migrate_device.c b/mm/migrate_device.c index a4847ad65da3..18bc6483f63a 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -148,15 +148,21 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, if (is_writable_device_private_entry(entry)) mpfn |= MIGRATE_PFN_WRITE; } else { - if (!(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM)) - goto next; pfn = pte_pfn(pte); - if (is_zero_pfn(pfn)) { + if (is_zero_pfn(pfn) && + (migrate->flags & MIGRATE_VMA_SELECT_SYSTEM)) { mpfn = MIGRATE_PFN_MIGRATE; migrate->cpages++; goto next; } page = vm_normal_page(migrate->vma, addr, pte); + if (page && !is_zone_device_page(page) && + !(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM)) + goto next; + else if (page && is_device_coherent_page(page) && + (!(migrate->flags & MIGRATE_VMA_SELECT_DEVICE_COHERENT) || +page->pgmap->owner != migrate->pgmap_owner)) + goto next; mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE; mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0; } -- 2.32.0
[pull] amdgpu drm-fixes-5.19
Hi Dave, Daniel, One more stable fix for 5.19. The following changes since commit 3283c83eb6fcfbda8ea03d7149d8e42e71c5d45e: drm/amd/display: Ensure valid event timestamp for cursor-only commits (2022-07-13 12:20:37 -0400) are available in the Git repository at: https://gitlab.freedesktop.org/agd5f/linux.git tags/amd-drm-fixes-5.19-2022-07-15 for you to fetch changes up to 2d4bd81fea1ad6ebba543bd6da3ef5179d130e6a: drm/amd/display: Fix new dmub notification enabling in DM (2022-07-15 10:04:59 -0400) amd-drm-fixes-5.19-2022-07-15: amdgpu: - DMUB display fix Stylon Wang (1): drm/amd/display: Fix new dmub notification enabling in DM drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 27 --- 1 file changed, 19 insertions(+), 8 deletions(-)
Re: [PATCH v1 1/6] dma-buf: Add _unlocked postfix to function names
On 7/15/22 10:19, Christian König wrote: >> -struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment >> *attach, >> - enum dma_data_direction direction) >> +struct sg_table * >> +dma_buf_map_attachment_unlocked(struct dma_buf_attachment *attach, >> + enum dma_data_direction direction) > > The locking state of mapping and unmapping operations depend on if the > attachment is dynamic or not. > > So this here is not a good idea at all since it suggests that the > function is always called without holding the lock. I had the same thought while was working on this patch and initially was thinking about adding an "unlocked" alias to dma_buf_map_attachment(). In the end I decided that it will create even more confusion and it's simpler just to rename this func here since there are only two drivers using the dynamic mapping. Do you have suggestions how to improve it? -- Best regards, Dmitry
Re: [PATCH v1 4/6] dma-buf: Acquire wait-wound context on attachment
On 7/15/22 09:50, Christian König wrote: > Am 15.07.22 um 02:52 schrieb Dmitry Osipenko: >> Intel i915 GPU driver uses wait-wound mutex to lock multiple GEMs on the >> attachment to the i915 dma-buf. In order to let all drivers utilize >> shared >> wait-wound context during attachment in a general way, make dma-buf >> core to >> acquire the ww context internally for the attachment operation and update >> i915 driver to use the importer's ww context instead of the internal one. >> >> From now on all dma-buf exporters shall use the importer's ww context >> for >> the attachment operation. >> >> Signed-off-by: Dmitry Osipenko >> --- >> drivers/dma-buf/dma-buf.c | 8 +- >> drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 2 +- >> .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +- >> drivers/gpu/drm/i915/gem/i915_gem_object.h | 6 ++--- >> drivers/gpu/drm/i915/i915_gem_evict.c | 2 +- >> drivers/gpu/drm/i915/i915_gem_ww.c | 26 +++ >> drivers/gpu/drm/i915/i915_gem_ww.h | 15 +-- >> 7 files changed, 47 insertions(+), 14 deletions(-) >> >> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c >> index 0ee588276534..37545ecb845a 100644 >> --- a/drivers/dma-buf/dma-buf.c >> +++ b/drivers/dma-buf/dma-buf.c >> @@ -807,6 +807,8 @@ static struct sg_table * __map_dma_buf(struct >> dma_buf_attachment *attach, >> * Optionally this calls _buf_ops.attach to allow >> device-specific attach >> * functionality. >> * >> + * Exporters shall use ww_ctx acquired by this function. >> + * >> * Returns: >> * >> * A pointer to newly created _buf_attachment on success, or a >> negative >> @@ -822,6 +824,7 @@ dma_buf_dynamic_attach_unlocked(struct dma_buf >> *dmabuf, struct device *dev, >> void *importer_priv) >> { >> struct dma_buf_attachment *attach; >> + struct ww_acquire_ctx ww_ctx; >> int ret; >> if (WARN_ON(!dmabuf || !dev)) >> @@ -841,7 +844,8 @@ dma_buf_dynamic_attach_unlocked(struct dma_buf >> *dmabuf, struct device *dev, >> attach->importer_ops = importer_ops; >> attach->importer_priv = importer_priv; >> - dma_resv_lock(dmabuf->resv, NULL); >> + ww_acquire_init(_ctx, _ww_class); >> + dma_resv_lock(dmabuf->resv, _ctx); > > That won't work like this. The core property of a WW context is that you > need to unwind all the locks and re-quire them with the contended one > first. > > When you statically lock the imported one here you can't do that any more. You're right. I felt that something is missing here, but couldn't notice. I'll think more about this and enable CONFIG_DEBUG_WW_MUTEX_SLOWPATH. Thank you! -- Best regards, Dmitry
[PATCH] mm/gup: migrate device coherent pages when pinning instead of failing
Currently any attempts to pin a device coherent page will fail. This is because device coherent pages need to be managed by a device driver, and pinning them would prevent a driver from migrating them off the device. However this is no reason to fail pinning of these pages. These are coherent and accessible from the CPU so can be migrated just like pinning ZONE_MOVABLE pages. So instead of failing all attempts to pin them first try migrating them out of ZONE_DEVICE. [hch: rebased to the split device memory checks, moved migrate_device_page to migrate_device.c] Signed-off-by: Alistair Popple Acked-by: Felix Kuehling Signed-off-by: Christoph Hellwig --- This patch hopefully addresses all of David's comments. It replaces both my "mm: remove the vma check in migrate_vma_setup()" and "mm/gup: migrate device coherent pages when pinning instead of failing" patches. I'm not sure what the best way of including this is, perhaps Alex can respin the series with this patch instead? - Alistair mm/gup.c| 50 +-- mm/internal.h | 1 + mm/migrate_device.c | 52 + 3 files changed, 96 insertions(+), 7 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index b65fe8bf5af4..22b97ab61cd9 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1881,7 +1881,7 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages, unsigned long isolation_error_count = 0, i; struct folio *prev_folio = NULL; LIST_HEAD(movable_page_list); - bool drain_allow = true; + bool drain_allow = true, coherent_pages = false; int ret = 0; for (i = 0; i < nr_pages; i++) { @@ -1891,9 +1891,38 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages, continue; prev_folio = folio; - if (folio_is_longterm_pinnable(folio)) + /* +* Device coherent pages are managed by a driver and should not +* be pinned indefinitely as it prevents the driver moving the +* page. So when trying to pin with FOLL_LONGTERM instead try +* to migrate the page out of device memory. +*/ + if (folio_is_device_coherent(folio)) { + /* +* We always want a new GUP lookup with device coherent +* pages. +*/ + pages[i] = 0; + coherent_pages = true; + + /* +* Migration will fail if the page is pinned, so convert +* the pin on the source page to a normal reference. +*/ + if (gup_flags & FOLL_PIN) { + get_page(>page); + unpin_user_page(>page); + } + + ret = migrate_device_coherent_page(>page); + if (ret) + goto unpin_pages; + continue; + } + if (folio_is_longterm_pinnable(folio)) + continue; /* * Try to move out any movable page before pinning the range. */ @@ -1919,7 +1948,8 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages, folio_nr_pages(folio)); } - if (!list_empty(_page_list) || isolation_error_count) + if (!list_empty(_page_list) || isolation_error_count + || coherent_pages) goto unpin_pages; /* @@ -1929,10 +1959,16 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages, return nr_pages; unpin_pages: - if (gup_flags & FOLL_PIN) { - unpin_user_pages(pages, nr_pages); - } else { - for (i = 0; i < nr_pages; i++) + /* +* pages[i] might be NULL if any device coherent pages were found. +*/ + for (i = 0; i < nr_pages; i++) { + if (!pages[i]) + continue; + + if (gup_flags & FOLL_PIN) + unpin_user_page(pages[i]); + else put_page(pages[i]); } diff --git a/mm/internal.h b/mm/internal.h index c0f8fbe0445b..899dab512c5a 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -853,6 +853,7 @@ int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, unsigned long addr, int page_nid, int *flags); void free_zone_device_page(struct page *page); +int migrate_device_coherent_page(struct page *page); /* * mm/gup.c diff --git a/mm/migrate_device.c b/mm/migrate_device.c index 18bc6483f63a..7feeb447e3b9 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -686,6 +686,12 @@ void
[PATCH v1 2/6] drm/gem: Take reservation lock for vmap/vunmap operations
The new common dma-buf locking convention will require buffer importers to hold the reservation lock around mapping operations. Make DRM GEM core to take the lock around the vmapping operations and update QXL and i915 drivers to use the locked functions for the case where DRM core now holds the lock. This patch prepares DRM core and drivers to transition to the common dma-buf locking convention where vmapping of exported GEMs will be done under the held reservation lock. Signed-off-by: Dmitry Osipenko --- drivers/gpu/drm/drm_client.c | 4 +-- drivers/gpu/drm/drm_gem.c| 28 drivers/gpu/drm/drm_gem_framebuffer_helper.c | 6 ++--- drivers/gpu/drm/drm_prime.c | 4 +-- drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 2 +- drivers/gpu/drm/qxl/qxl_object.c | 17 ++-- drivers/gpu/drm/qxl/qxl_prime.c | 4 +-- include/drm/drm_gem.h| 3 +++ 8 files changed, 50 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c index 2b230b4d6942..fbcb1e995384 100644 --- a/drivers/gpu/drm/drm_client.c +++ b/drivers/gpu/drm/drm_client.c @@ -323,7 +323,7 @@ drm_client_buffer_vmap(struct drm_client_buffer *buffer, * fd_install step out of the driver backend hooks, to make that * final step optional for internal users. */ - ret = drm_gem_vmap(buffer->gem, map); + ret = drm_gem_vmap_unlocked(buffer->gem, map); if (ret) return ret; @@ -345,7 +345,7 @@ void drm_client_buffer_vunmap(struct drm_client_buffer *buffer) { struct iosys_map *map = >map; - drm_gem_vunmap(buffer->gem, map); + drm_gem_vunmap_unlocked(buffer->gem, map); } EXPORT_SYMBOL(drm_client_buffer_vunmap); diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index eb0c2d041f13..9769c33cad99 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1155,6 +1155,8 @@ void drm_gem_print_info(struct drm_printer *p, unsigned int indent, int drm_gem_pin(struct drm_gem_object *obj) { + dma_resv_assert_held(obj->resv); + if (obj->funcs->pin) return obj->funcs->pin(obj); else @@ -1163,6 +1165,8 @@ int drm_gem_pin(struct drm_gem_object *obj) void drm_gem_unpin(struct drm_gem_object *obj) { + dma_resv_assert_held(obj->resv); + if (obj->funcs->unpin) obj->funcs->unpin(obj); } @@ -1171,6 +1175,8 @@ int drm_gem_vmap(struct drm_gem_object *obj, struct iosys_map *map) { int ret; + dma_resv_assert_held(obj->resv); + if (!obj->funcs->vmap) return -EOPNOTSUPP; @@ -1186,6 +1192,8 @@ EXPORT_SYMBOL(drm_gem_vmap); void drm_gem_vunmap(struct drm_gem_object *obj, struct iosys_map *map) { + dma_resv_assert_held(obj->resv); + if (iosys_map_is_null(map)) return; @@ -1197,6 +1205,26 @@ void drm_gem_vunmap(struct drm_gem_object *obj, struct iosys_map *map) } EXPORT_SYMBOL(drm_gem_vunmap); +int drm_gem_vmap_unlocked(struct drm_gem_object *obj, struct iosys_map *map) +{ + int ret; + + dma_resv_lock(obj->resv, NULL); + ret = drm_gem_vmap(obj, map); + dma_resv_unlock(obj->resv); + + return ret; +} +EXPORT_SYMBOL(drm_gem_vmap_unlocked); + +void drm_gem_vunmap_unlocked(struct drm_gem_object *obj, struct iosys_map *map) +{ + dma_resv_lock(obj->resv, NULL); + drm_gem_vunmap(obj, map); + dma_resv_unlock(obj->resv); +} +EXPORT_SYMBOL(drm_gem_vunmap_unlocked); + /** * drm_gem_lock_reservations - Sets up the ww context and acquires * the lock on an array of GEM objects. diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c index 880a4975507f..e35e224e6303 100644 --- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c +++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c @@ -354,7 +354,7 @@ int drm_gem_fb_vmap(struct drm_framebuffer *fb, struct iosys_map *map, ret = -EINVAL; goto err_drm_gem_vunmap; } - ret = drm_gem_vmap(obj, [i]); + ret = drm_gem_vmap_unlocked(obj, [i]); if (ret) goto err_drm_gem_vunmap; } @@ -376,7 +376,7 @@ int drm_gem_fb_vmap(struct drm_framebuffer *fb, struct iosys_map *map, obj = drm_gem_fb_get_obj(fb, i); if (!obj) continue; - drm_gem_vunmap(obj, [i]); + drm_gem_vunmap_unlocked(obj, [i]); } return ret; } @@ -403,7 +403,7 @@ void drm_gem_fb_vunmap(struct drm_framebuffer *fb, struct iosys_map *map) continue; if (iosys_map_is_null([i])) continue; - drm_gem_vunmap(obj, [i]); + drm_gem_vunmap_unlocked(obj,
Re: Linux 5.19-rc6
On Thu, Jul 14, 2022 at 7:24 PM Guenter Roeck wrote: > On 7/14/22 09:48, Linus Torvalds wrote: > > And some look positively strange. Like that > > > >drivers/mfd/asic3.c: error: unused variable 'asic' > > [-Werror=unused-variable]: => 941:23 > > > > which is clearly used three lines later by > > > > iounmap(asic->tmio_cnf); > > > > and I can't find any case of 'iounmap()' having been defined to an > > empty macro or anything like that to explain it. The error in > > drivers/tty/serial/sh-sci.c looks to be exactly the same issue, just > > with ioremap() instead of iounmap(). > > > > It would be good to have some way to find which build/architecture it > > is, because right now it just looks bogus. > > > > Do you perhaps use some broken compiler that complains when the empty > > inline functions don't use their arguments? Because that's what those > > ioremap/iounmap() ones look like to me, but there might be some > > magical architecture / config that has issues that aren't obvious. > > > > IOW, I'd love to get those fixed, but I would also want a little bit more > > info. > > > Geert gave the necessary hint - it looks like sh-nommu used defines > for iomap() and iounmap(), which made the variable unused. According > to Geert that was fixed a couple of days ago. Yes, post-rc6 should be fine, as the fix went in... for the third time. Combine people that keep on switching back to macros without reading a file's history with unresponsive maintainers... Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds
Re: [PATCH] mm/gup: migrate device coherent pages when pinning instead of failing
On 7/14/2022 9:11 PM, Alistair Popple wrote: Currently any attempts to pin a device coherent page will fail. This is because device coherent pages need to be managed by a device driver, and pinning them would prevent a driver from migrating them off the device. However this is no reason to fail pinning of these pages. These are coherent and accessible from the CPU so can be migrated just like pinning ZONE_MOVABLE pages. So instead of failing all attempts to pin them first try migrating them out of ZONE_DEVICE. [hch: rebased to the split device memory checks, moved migrate_device_page to migrate_device.c] Signed-off-by: Alistair Popple Acked-by: Felix Kuehling Signed-off-by: Christoph Hellwig --- This patch hopefully addresses all of David's comments. It replaces both my "mm: remove the vma check in migrate_vma_setup()" and "mm/gup: migrate device coherent pages when pinning instead of failing" patches. I'm not sure what the best way of including this is, perhaps Alex can respin the series with this patch instead? For sure Alistair. I'll include this in my next patch series version. Thanks, Alex Sierra - Alistair mm/gup.c| 50 +-- mm/internal.h | 1 + mm/migrate_device.c | 52 + 3 files changed, 96 insertions(+), 7 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index b65fe8bf5af4..22b97ab61cd9 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1881,7 +1881,7 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages, unsigned long isolation_error_count = 0, i; struct folio *prev_folio = NULL; LIST_HEAD(movable_page_list); - bool drain_allow = true; + bool drain_allow = true, coherent_pages = false; int ret = 0; for (i = 0; i < nr_pages; i++) { @@ -1891,9 +1891,38 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages, continue; prev_folio = folio; - if (folio_is_longterm_pinnable(folio)) + /* +* Device coherent pages are managed by a driver and should not +* be pinned indefinitely as it prevents the driver moving the +* page. So when trying to pin with FOLL_LONGTERM instead try +* to migrate the page out of device memory. +*/ + if (folio_is_device_coherent(folio)) { + /* +* We always want a new GUP lookup with device coherent +* pages. +*/ + pages[i] = 0; + coherent_pages = true; + + /* +* Migration will fail if the page is pinned, so convert +* the pin on the source page to a normal reference. +*/ + if (gup_flags & FOLL_PIN) { + get_page(>page); + unpin_user_page(>page); + } + + ret = migrate_device_coherent_page(>page); + if (ret) + goto unpin_pages; + continue; + } + if (folio_is_longterm_pinnable(folio)) + continue; /* * Try to move out any movable page before pinning the range. */ @@ -1919,7 +1948,8 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages, folio_nr_pages(folio)); } - if (!list_empty(_page_list) || isolation_error_count) + if (!list_empty(_page_list) || isolation_error_count + || coherent_pages) goto unpin_pages; /* @@ -1929,10 +1959,16 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages, return nr_pages; unpin_pages: - if (gup_flags & FOLL_PIN) { - unpin_user_pages(pages, nr_pages); - } else { - for (i = 0; i < nr_pages; i++) + /* +* pages[i] might be NULL if any device coherent pages were found. +*/ + for (i = 0; i < nr_pages; i++) { + if (!pages[i]) + continue; + + if (gup_flags & FOLL_PIN) + unpin_user_page(pages[i]); + else put_page(pages[i]); } diff --git a/mm/internal.h b/mm/internal.h index c0f8fbe0445b..899dab512c5a 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -853,6 +853,7 @@ int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, unsigned long addr, int page_nid, int *flags); void free_zone_device_page(struct page *page); +int migrate_device_coherent_page(struct page *page); /* * mm/gup.c diff --git a/mm/migrate_device.c
Re: [PATCH v2 16/29] ACPI: video: Add Nvidia WMI EC brightness control detection
Hi Daniel, On 7/12/22 22:13, Daniel Dadap wrote: > Thanks, Hans: > > On 7/12/22 14:38, Hans de Goede wrote: >> On some new laptop designs a new Nvidia specific WMI interface is present >> which gives info about panel brightness control and may allow controlling >> the brightness through this interface when the embedded controller is used >> for brightness control. >> >> When this WMI interface is present and indicates that the EC is used, >> then this interface should be used for brightness control. >> >> Signed-off-by: Hans de Goede >> --- >> drivers/acpi/Kconfig | 1 + >> drivers/acpi/video_detect.c | 35 ++ >> drivers/gpu/drm/gma500/Kconfig | 2 ++ >> drivers/gpu/drm/i915/Kconfig | 2 ++ >> include/acpi/video.h | 1 + >> 5 files changed, 41 insertions(+) >> >> diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig >> index 1e34f846508f..c372385cfc3f 100644 >> --- a/drivers/acpi/Kconfig >> +++ b/drivers/acpi/Kconfig >> @@ -212,6 +212,7 @@ config ACPI_VIDEO >> tristate "Video" >> depends on X86 && BACKLIGHT_CLASS_DEVICE >> depends on INPUT >> + depends on ACPI_WMI >> select THERMAL >> help >> This driver implements the ACPI Extensions For Display Adapters >> diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c >> index 8c2863403040..7b89dc9a04a2 100644 >> --- a/drivers/acpi/video_detect.c >> +++ b/drivers/acpi/video_detect.c >> @@ -75,6 +75,35 @@ find_video(acpi_handle handle, u32 lvl, void *context, >> void **rv) >> return AE_OK; >> } >> +#define WMI_BRIGHTNESS_METHOD_SOURCE 2 >> +#define WMI_BRIGHTNESS_MODE_GET 0 >> +#define WMI_BRIGHTNESS_SOURCE_EC 2 >> + >> +struct wmi_brightness_args { >> + u32 mode; >> + u32 val; >> + u32 ret; >> + u32 ignored[3]; >> +}; >> + >> +static bool nvidia_wmi_ec_supported(void) >> +{ >> + struct wmi_brightness_args args = { >> + .mode = WMI_BRIGHTNESS_MODE_GET, >> + .val = 0, >> + .ret = 0, >> + }; >> + struct acpi_buffer buf = { (acpi_size)sizeof(args), }; >> + acpi_status status; >> + >> + status = wmi_evaluate_method("603E9613-EF25-4338-A3D0-C46177516DB7", 0, >> + WMI_BRIGHTNESS_METHOD_SOURCE, , ); >> + if (ACPI_FAILURE(status)) >> + return false; >> + >> + return args.ret == WMI_BRIGHTNESS_SOURCE_EC; >> +} >> + > > > The code duplication here with nvidia-wmi-ec-backlight.c is a little > unfortunate. Can we move the constants, struct definition, and WMI GUID from > that file to a header file that's used both by the EC backlight driver and > the ACPI video driver? Yes that is a good idea. I suggest using include/linux/platform_data/x86/nvidia-wmi-ec-backlight.h to move the shared definitions there. If you can submit 2 patches on top of this series: 1. Moving the definitions from drivers/platform/x86/nvidia-wmi-ec-backlight.c to include/linux/platform_data/x86/nvidia-wmi-ec-backlight.h 2. Switching the code from this patch over to using the new nvidia-wmi-ec-backlight.h Then for the next version I'll add patch 1. to the series and squash patch 2. into this one. > I was thinking it might be nice to add a wrapper around > wmi_brightness_notify() in nvidia-wmi-ec-backlight.c that does this source == > WMI_BRIGHTNESS_SOURCE_EC test, and then export it so that it can be called > both here and in the EC backlight driver's probe routine, but then I guess > that would make video.ko depend on nvidia-wmi-ec-backlight.ko, which seems > wrong. It also seems wrong to implement the WMI plumbing in the ACPI video > driver, and export it so that the EC backlight driver can use it, so I guess > I can live with the duplication of the relatively simple WMI stuff here, it > would just be nice to not have to define all of the API constants, structure, > and GUID twice. Agreed. > > >> /* Force to use vendor driver when the ACPI device is known to be >> * buggy */ >> static int video_detect_force_vendor(const struct dmi_system_id *d) >> @@ -518,6 +547,7 @@ static const struct dmi_system_id >> video_detect_dmi_table[] = { >> static enum acpi_backlight_type __acpi_video_get_backlight_type(bool >> native) >> { >> static DEFINE_MUTEX(init_mutex); >> + static bool nvidia_wmi_ec_present; >> static bool native_available; >> static bool init_done; >> static long video_caps; >> @@ -530,6 +560,7 @@ static enum acpi_backlight_type >> __acpi_video_get_backlight_type(bool native) >> acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, >> ACPI_UINT32_MAX, find_video, NULL, >> _caps, NULL); >> + nvidia_wmi_ec_present = nvidia_wmi_ec_supported(); >> init_done = true; >> } >> if (native) >> @@ -547,6 +578,10 @@ static enum acpi_backlight_type >> __acpi_video_get_backlight_type(bool native) >>
[PATCH v1 3/6] dma-buf: Move all dma-bufs to dynamic locking specification
This patch moves the non-dynamic dma-buf users over to the dynamic locking specification. From now on all dma-buf importers are responsible for holding dma-buf's reservation lock around operations performed over dma-bufs. This strict locking convention prevents dead lock situation for dma-buf importers and exporters. Previously the "unlocked" versions of the dma-buf API functions weren't taking the reservation lock and this patch makes them to take the lock. Intel and AMD GPU drivers already were mapping imported dma-bufs under the held lock, hence the "locked" variant of the functions are added for them and the drivers are updated to use the "locked" versions. Intel driver is also updated to not lock the exported buffer on attachment since lock is now held by importer. We also need to move the ww context acquirement from exporters (i915 driver) to importers, otherwise lockdep won't be happy. This will be done in the next patch since i915 is the only driver that uses ww context on attachment today and it's not critical to make this change separately for i915 driver. Signed-off-by: Dmitry Osipenko --- drivers/dma-buf/dma-buf.c | 125 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 4 +- drivers/gpu/drm/drm_prime.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 12 +- include/linux/dma-buf.h| 6 + 5 files changed, 104 insertions(+), 47 deletions(-) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index d16237a6ffaa..0ee588276534 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -841,14 +841,14 @@ dma_buf_dynamic_attach_unlocked(struct dma_buf *dmabuf, struct device *dev, attach->importer_ops = importer_ops; attach->importer_priv = importer_priv; + dma_resv_lock(dmabuf->resv, NULL); + if (dmabuf->ops->attach) { ret = dmabuf->ops->attach(dmabuf, attach); if (ret) goto err_attach; } - dma_resv_lock(dmabuf->resv, NULL); list_add(>node, >attachments); - dma_resv_unlock(dmabuf->resv); /* When either the importer or the exporter can't handle dynamic * mappings we cache the mapping here to avoid issues with the @@ -859,7 +859,6 @@ dma_buf_dynamic_attach_unlocked(struct dma_buf *dmabuf, struct device *dev, struct sg_table *sgt; if (dma_buf_is_dynamic(attach->dmabuf)) { - dma_resv_lock(attach->dmabuf->resv, NULL); ret = dmabuf->ops->pin(attach); if (ret) goto err_unlock; @@ -872,15 +871,16 @@ dma_buf_dynamic_attach_unlocked(struct dma_buf *dmabuf, struct device *dev, ret = PTR_ERR(sgt); goto err_unpin; } - if (dma_buf_is_dynamic(attach->dmabuf)) - dma_resv_unlock(attach->dmabuf->resv); attach->sgt = sgt; attach->dir = DMA_BIDIRECTIONAL; } + dma_resv_unlock(dmabuf->resv); + return attach; err_attach: + dma_resv_unlock(attach->dmabuf->resv); kfree(attach); return ERR_PTR(ret); @@ -889,8 +889,7 @@ dma_buf_dynamic_attach_unlocked(struct dma_buf *dmabuf, struct device *dev, dmabuf->ops->unpin(attach); err_unlock: - if (dma_buf_is_dynamic(attach->dmabuf)) - dma_resv_unlock(attach->dmabuf->resv); + dma_resv_unlock(dmabuf->resv); dma_buf_detach_unlocked(dmabuf, attach); return ERR_PTR(ret); @@ -937,24 +936,23 @@ void dma_buf_detach_unlocked(struct dma_buf *dmabuf, if (WARN_ON(!dmabuf || !attach)) return; - if (attach->sgt) { - if (dma_buf_is_dynamic(attach->dmabuf)) - dma_resv_lock(attach->dmabuf->resv, NULL); + if (WARN_ON(dmabuf != attach->dmabuf)) + return; + dma_resv_lock(dmabuf->resv, NULL); + + if (attach->sgt) { __unmap_dma_buf(attach, attach->sgt, attach->dir); - if (dma_buf_is_dynamic(attach->dmabuf)) { + if (dma_buf_is_dynamic(attach->dmabuf)) dmabuf->ops->unpin(attach); - dma_resv_unlock(attach->dmabuf->resv); - } } - - dma_resv_lock(dmabuf->resv, NULL); list_del(>node); - dma_resv_unlock(dmabuf->resv); + if (dmabuf->ops->detach) dmabuf->ops->detach(dmabuf, attach); + dma_resv_unlock(dmabuf->resv); kfree(attach); } EXPORT_SYMBOL_NS_GPL(dma_buf_detach_unlocked, DMA_BUF); @@ -1030,10 +1028,11 @@ EXPORT_SYMBOL_NS_GPL(dma_buf_unpin, DMA_BUF); * * Important: Dynamic importers must wait for the exclusive fence of the struct * dma_resv attached to the DMA-BUF first. + * + * Importer is responsible for
[PATCH v2] drm/amdgpu: add HW_IP_VCN_UNIFIED type
Define HW_IP_VCN_UNIFIED type the same as HW_IP_VCN_ENC. VCN4 support for libdrm needs a new definition for the unified queue, so that it can align to the kernel. link: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/245/commits Signed-off-by: Ruijing Dong --- include/uapi/drm/amdgpu_drm.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 18d3246d636e..fe33db8441bc 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -560,6 +560,7 @@ struct drm_amdgpu_gem_va { #define AMDGPU_HW_IP_UVD_ENC 5 #define AMDGPU_HW_IP_VCN_DEC 6 #define AMDGPU_HW_IP_VCN_ENC 7 +#define AMDGPU_HW_IP_VCN_UNIFIED AMDGPU_HW_IP_VCN_ENC #define AMDGPU_HW_IP_VCN_JPEG 8 #define AMDGPU_HW_IP_NUM 9 -- 2.25.1
[PATCH v1 6/6] dma-buf: Remove internal lock
The internal dma-buf lock isn't needed anymore because the updated locking specification claims that dma-buf reservation must be locked by importers, and thus, the internal data is already protected by the reservation lock. Remove the obsoleted internal lock. Signed-off-by: Dmitry Osipenko --- drivers/dma-buf/dma-buf.c | 5 - include/linux/dma-buf.h | 9 - 2 files changed, 14 deletions(-) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 37545ecb845a..4cc739537ebd 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -656,7 +656,6 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) dmabuf->file = file; - mutex_init(>lock); INIT_LIST_HEAD(>attachments); mutex_lock(_list.lock); @@ -1459,7 +1458,6 @@ int dma_buf_vmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map) return -EINVAL; dma_resv_lock(dmabuf->resv, NULL); - mutex_lock(>lock); if (dmabuf->vmapping_counter) { dmabuf->vmapping_counter++; BUG_ON(iosys_map_is_null(>vmap_ptr)); @@ -1479,7 +1477,6 @@ int dma_buf_vmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map) *map = dmabuf->vmap_ptr; out_unlock: - mutex_unlock(>lock); dma_resv_unlock(dmabuf->resv); return ret; } @@ -1500,13 +1497,11 @@ void dma_buf_vunmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map) BUG_ON(!iosys_map_is_equal(>vmap_ptr, map)); dma_resv_lock(dmabuf->resv, NULL); - mutex_lock(>lock); if (--dmabuf->vmapping_counter == 0) { if (dmabuf->ops->vunmap) dmabuf->ops->vunmap(dmabuf, map); iosys_map_clear(>vmap_ptr); } - mutex_unlock(>lock); dma_resv_unlock(dmabuf->resv); } EXPORT_SYMBOL_NS_GPL(dma_buf_vunmap_unlocked, DMA_BUF); diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index da924a56d58f..abdd99042c77 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -326,15 +326,6 @@ struct dma_buf { /** @ops: dma_buf_ops associated with this buffer object. */ const struct dma_buf_ops *ops; - /** -* @lock: -* -* Used internally to serialize list manipulation, attach/detach and -* vmap/unmap. Note that in many cases this is superseeded by -* dma_resv_lock() on @resv. -*/ - struct mutex lock; - /** * @vmapping_counter: * -- 2.36.1
[PATCH v1 0/6] Move all drivers to a common dma-buf locking convention
Hello, This series moves all drivers to a dynamic dma-buf locking specification. >From now on all dma-buf importers are made responsible for holding dma-buf's reservation lock around all operations performed over dma-bufs. This common locking convention allows us to utilize reservation lock more broadly around kernel without fearing of potential dead locks. This patchset passes all i915 selftests. It was also tested using VirtIO, Panfrost, Lima and Tegra drivers. I tested cases of display+GPU, display+V4L and GPU+V4L dma-buf sharing, which covers majority of kernel drivers since rest of the drivers share same or similar code paths. This is a continuation of [1] where Christian König asked to factor out the dma-buf locking changes into separate series. [1] https://lore.kernel.org/dri-devel/20220526235040.678984-1-dmitry.osipe...@collabora.com/ Dmitry Osipenko (6): dma-buf: Add _unlocked postfix to function names drm/gem: Take reservation lock for vmap/vunmap operations dma-buf: Move all dma-bufs to dynamic locking specification dma-buf: Acquire wait-wound context on attachment media: videobuf2: Stop using internal dma-buf lock dma-buf: Remove internal lock drivers/dma-buf/dma-buf.c | 198 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 +- drivers/gpu/drm/armada/armada_gem.c | 14 +- drivers/gpu/drm/drm_client.c | 4 +- drivers/gpu/drm/drm_gem.c | 28 +++ drivers/gpu/drm/drm_gem_cma_helper.c | 6 +- drivers/gpu/drm/drm_gem_framebuffer_helper.c | 6 +- drivers/gpu/drm/drm_gem_shmem_helper.c| 6 +- drivers/gpu/drm/drm_prime.c | 12 +- drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c | 6 +- drivers/gpu/drm/exynos/exynos_drm_gem.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c| 20 +- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 2 +- drivers/gpu/drm/i915/gem/i915_gem_object.h| 6 +- .../drm/i915/gem/selftests/i915_gem_dmabuf.c | 20 +- drivers/gpu/drm/i915/i915_gem_evict.c | 2 +- drivers/gpu/drm/i915/i915_gem_ww.c| 26 ++- drivers/gpu/drm/i915/i915_gem_ww.h| 15 +- drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c | 8 +- drivers/gpu/drm/qxl/qxl_object.c | 17 +- drivers/gpu/drm/qxl/qxl_prime.c | 4 +- drivers/gpu/drm/tegra/gem.c | 27 +-- drivers/infiniband/core/umem_dmabuf.c | 11 +- .../common/videobuf2/videobuf2-dma-contig.c | 26 +-- .../media/common/videobuf2/videobuf2-dma-sg.c | 23 +- .../common/videobuf2/videobuf2-vmalloc.c | 17 +- .../platform/nvidia/tegra-vde/dmabuf-cache.c | 12 +- drivers/misc/fastrpc.c| 12 +- drivers/xen/gntdev-dmabuf.c | 14 +- include/drm/drm_gem.h | 3 + include/linux/dma-buf.h | 49 ++--- 32 files changed, 347 insertions(+), 257 deletions(-) -- 2.36.1
[PATCH v1 5/6] media: videobuf2: Stop using internal dma-buf lock
All drivers that use dma-bufs have been moved to the updated locking specification and now dma-buf reservation is guaranteed to be locked by importers during the mapping operations. There is no need to take the internal dma-buf lock anymore. Remove locking from the videobuf2 memory allocators. Signed-off-by: Dmitry Osipenko --- drivers/media/common/videobuf2/videobuf2-dma-contig.c | 11 +-- drivers/media/common/videobuf2/videobuf2-dma-sg.c | 11 +-- drivers/media/common/videobuf2/videobuf2-vmalloc.c| 11 +-- 3 files changed, 3 insertions(+), 30 deletions(-) diff --git a/drivers/media/common/videobuf2/videobuf2-dma-contig.c b/drivers/media/common/videobuf2/videobuf2-dma-contig.c index de762dbdaf78..2c69bf0470e7 100644 --- a/drivers/media/common/videobuf2/videobuf2-dma-contig.c +++ b/drivers/media/common/videobuf2/videobuf2-dma-contig.c @@ -382,18 +382,12 @@ static struct sg_table *vb2_dc_dmabuf_ops_map( struct dma_buf_attachment *db_attach, enum dma_data_direction dma_dir) { struct vb2_dc_attachment *attach = db_attach->priv; - /* stealing dmabuf mutex to serialize map/unmap operations */ - struct mutex *lock = _attach->dmabuf->lock; struct sg_table *sgt; - mutex_lock(lock); - sgt = >sgt; /* return previously mapped sg table */ - if (attach->dma_dir == dma_dir) { - mutex_unlock(lock); + if (attach->dma_dir == dma_dir) return sgt; - } /* release any previous cache */ if (attach->dma_dir != DMA_NONE) { @@ -409,14 +403,11 @@ static struct sg_table *vb2_dc_dmabuf_ops_map( if (dma_map_sgtable(db_attach->dev, sgt, dma_dir, DMA_ATTR_SKIP_CPU_SYNC)) { pr_err("failed to map scatterlist\n"); - mutex_unlock(lock); return ERR_PTR(-EIO); } attach->dma_dir = dma_dir; - mutex_unlock(lock); - return sgt; } diff --git a/drivers/media/common/videobuf2/videobuf2-dma-sg.c b/drivers/media/common/videobuf2/videobuf2-dma-sg.c index 39e11600304a..e63e718c0bf7 100644 --- a/drivers/media/common/videobuf2/videobuf2-dma-sg.c +++ b/drivers/media/common/videobuf2/videobuf2-dma-sg.c @@ -424,18 +424,12 @@ static struct sg_table *vb2_dma_sg_dmabuf_ops_map( struct dma_buf_attachment *db_attach, enum dma_data_direction dma_dir) { struct vb2_dma_sg_attachment *attach = db_attach->priv; - /* stealing dmabuf mutex to serialize map/unmap operations */ - struct mutex *lock = _attach->dmabuf->lock; struct sg_table *sgt; - mutex_lock(lock); - sgt = >sgt; /* return previously mapped sg table */ - if (attach->dma_dir == dma_dir) { - mutex_unlock(lock); + if (attach->dma_dir == dma_dir) return sgt; - } /* release any previous cache */ if (attach->dma_dir != DMA_NONE) { @@ -446,14 +440,11 @@ static struct sg_table *vb2_dma_sg_dmabuf_ops_map( /* mapping to the client with new direction */ if (dma_map_sgtable(db_attach->dev, sgt, dma_dir, 0)) { pr_err("failed to map scatterlist\n"); - mutex_unlock(lock); return ERR_PTR(-EIO); } attach->dma_dir = dma_dir; - mutex_unlock(lock); - return sgt; } diff --git a/drivers/media/common/videobuf2/videobuf2-vmalloc.c b/drivers/media/common/videobuf2/videobuf2-vmalloc.c index 7831bf545874..41db707e43a4 100644 --- a/drivers/media/common/videobuf2/videobuf2-vmalloc.c +++ b/drivers/media/common/videobuf2/videobuf2-vmalloc.c @@ -267,18 +267,12 @@ static struct sg_table *vb2_vmalloc_dmabuf_ops_map( struct dma_buf_attachment *db_attach, enum dma_data_direction dma_dir) { struct vb2_vmalloc_attachment *attach = db_attach->priv; - /* stealing dmabuf mutex to serialize map/unmap operations */ - struct mutex *lock = _attach->dmabuf->lock; struct sg_table *sgt; - mutex_lock(lock); - sgt = >sgt; /* return previously mapped sg table */ - if (attach->dma_dir == dma_dir) { - mutex_unlock(lock); + if (attach->dma_dir == dma_dir) return sgt; - } /* release any previous cache */ if (attach->dma_dir != DMA_NONE) { @@ -289,14 +283,11 @@ static struct sg_table *vb2_vmalloc_dmabuf_ops_map( /* mapping to the client with new direction */ if (dma_map_sgtable(db_attach->dev, sgt, dma_dir, 0)) { pr_err("failed to map scatterlist\n"); - mutex_unlock(lock); return ERR_PTR(-EIO); } attach->dma_dir = dma_dir; - mutex_unlock(lock); - return sgt; } -- 2.36.1
Re: [PATCH v2 20/29] platform/x86: acer-wmi: Move backlight DMI quirks to acpi/video_detect.c
Hi, On 7/12/22 22:24, Daniel Dadap wrote: > I'll ask around to see if there's some DMI property we can match in order to > detect whether a system is expected to use the EC backlight driver: if so, > maybe we can avoid the WMI interactions in patch 16/29 of this series. > Although I suppose even if there were a DMI property, we'd still need to call > the WMI-wrapped ACPI method to check whether the system is currently > configured to drive the backlight through the EC, unless the system somehow > exports a different DMI table depending on the current backlight control > configuration, which I imagine to be unlikely. IMHO the duplication is fine, it is also important that the video_detect.c code and the actual backlight driver use the same detection mechanism where possible. Otherwise acpi_video_get_backlight_type() may return acpi_backlight_nvidia_wmi_ec while the EC backlight driver refuses to load... Regards, Hans > > This change looks fine to me, although I suppose somebody who maintains the > acer-wmi driver should comment. The bugzilla links are a nice touch. > > On 7/12/22 14:39, Hans de Goede wrote: >> Move the backlight DMI quirks to acpi/video_detect.c, so that >> the driver no longer needs to call acpi_video_set_dmi_backlight_type(). >> >> acpi_video_set_dmi_backlight_type() is troublesome because it may end up >> getting called after other backlight drivers have already called >> acpi_video_get_backlight_type() resulting in the other drivers >> already being registered even though they should not. >> >> Note that even though the DMI quirk table name was video_vendor_dmi_table, >> 5/6 quirks were actually quirks to use the GPU native backlight. >> >> These 5 quirks also had a callback in their dmi_system_id entry which >> disabled the acer-wmi vendor driver; and any DMI match resulted in: >> >> acpi_video_set_dmi_backlight_type(acpi_backlight_vendor); >> >> which disabled the acpi_video driver, so only the native driver was left. >> The new entries for these 5/6 devices correctly marks these as needing >> the native backlight driver. >> >> Also note that other changes in this series change the native backlight >> drivers to no longer unconditionally register their backlight. Instead >> these drivers now do this check: >> >> if (acpi_video_get_backlight_type(false) != acpi_backlight_native) >> return 0; /* bail */ >> >> which without this patch would have broken these 5/6 "special" quirks. >> >> Since I had to look at all the commits adding the quirks anyways, to make >> sure that I understood the code correctly, I've also added links to >> the various original bugzillas for these quirks to the new entries. >> >> Signed-off-by: Hans de Goede >> --- >> drivers/acpi/video_detect.c | 53 ++ >> drivers/platform/x86/acer-wmi.c | 66 - >> 2 files changed, 53 insertions(+), 66 deletions(-) >> >> diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c >> index a514adaec14d..cd51cb0d7821 100644 >> --- a/drivers/acpi/video_detect.c >> +++ b/drivers/acpi/video_detect.c >> @@ -147,6 +147,15 @@ static const struct dmi_system_id >> video_detect_dmi_table[] = { >> DMI_MATCH(DMI_BOARD_NAME, "X360"), >> }, >> }, >> + { >> + /* https://bugzilla.redhat.com/show_bug.cgi?id=1128309 */ >> + .callback = video_detect_force_vendor, >> + /* Acer KAV80 */ >> + .matches = { >> + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), >> + DMI_MATCH(DMI_PRODUCT_NAME, "KAV80"), >> + }, >> + }, >> { >> .callback = video_detect_force_vendor, >> /* Asus UL30VT */ >> @@ -427,6 +436,41 @@ static const struct dmi_system_id >> video_detect_dmi_table[] = { >> DMI_MATCH(DMI_BOARD_NAME, "JV50"), >> }, >> }, >> + { >> + /* https://bugzilla.redhat.com/show_bug.cgi?id=1012674 */ >> + .callback = video_detect_force_native, >> + /* Acer Aspire 5741 */ >> + .matches = { >> + DMI_MATCH(DMI_BOARD_VENDOR, "Acer"), >> + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5741"), >> + }, >> + }, >> + { >> + /* https://bugzilla.kernel.org/show_bug.cgi?id=42993 */ >> + .callback = video_detect_force_native, >> + /* Acer Aspire 5750 */ >> + .matches = { >> + DMI_MATCH(DMI_BOARD_VENDOR, "Acer"), >> + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5750"), >> + }, >> + }, >> + { >> + /* https://bugzilla.kernel.org/show_bug.cgi?id=42833 */ >> + .callback = video_detect_force_native, >> + /* Acer Extensa 5235 */ >> + .matches = { >> + DMI_MATCH(DMI_BOARD_VENDOR, "Acer"), >> + DMI_MATCH(DMI_PRODUCT_NAME, "Extensa 5235"), >> + }, >> + }, >> + { >> + .callback = video_detect_force_native, >> + /* Acer TravelMate 4750 */ >> + .matches = { >> + DMI_MATCH(DMI_BOARD_VENDOR, "Acer"), >> + DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate
[PATCH v1 4/6] dma-buf: Acquire wait-wound context on attachment
Intel i915 GPU driver uses wait-wound mutex to lock multiple GEMs on the attachment to the i915 dma-buf. In order to let all drivers utilize shared wait-wound context during attachment in a general way, make dma-buf core to acquire the ww context internally for the attachment operation and update i915 driver to use the importer's ww context instead of the internal one. >From now on all dma-buf exporters shall use the importer's ww context for the attachment operation. Signed-off-by: Dmitry Osipenko --- drivers/dma-buf/dma-buf.c | 8 +- drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c| 2 +- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 2 +- drivers/gpu/drm/i915/gem/i915_gem_object.h| 6 ++--- drivers/gpu/drm/i915/i915_gem_evict.c | 2 +- drivers/gpu/drm/i915/i915_gem_ww.c| 26 +++ drivers/gpu/drm/i915/i915_gem_ww.h| 15 +-- 7 files changed, 47 insertions(+), 14 deletions(-) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 0ee588276534..37545ecb845a 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -807,6 +807,8 @@ static struct sg_table * __map_dma_buf(struct dma_buf_attachment *attach, * Optionally this calls _buf_ops.attach to allow device-specific attach * functionality. * + * Exporters shall use ww_ctx acquired by this function. + * * Returns: * * A pointer to newly created _buf_attachment on success, or a negative @@ -822,6 +824,7 @@ dma_buf_dynamic_attach_unlocked(struct dma_buf *dmabuf, struct device *dev, void *importer_priv) { struct dma_buf_attachment *attach; + struct ww_acquire_ctx ww_ctx; int ret; if (WARN_ON(!dmabuf || !dev)) @@ -841,7 +844,8 @@ dma_buf_dynamic_attach_unlocked(struct dma_buf *dmabuf, struct device *dev, attach->importer_ops = importer_ops; attach->importer_priv = importer_priv; - dma_resv_lock(dmabuf->resv, NULL); + ww_acquire_init(_ctx, _ww_class); + dma_resv_lock(dmabuf->resv, _ctx); if (dmabuf->ops->attach) { ret = dmabuf->ops->attach(dmabuf, attach); @@ -876,11 +880,13 @@ dma_buf_dynamic_attach_unlocked(struct dma_buf *dmabuf, struct device *dev, } dma_resv_unlock(dmabuf->resv); + ww_acquire_fini(_ctx); return attach; err_attach: dma_resv_unlock(attach->dmabuf->resv); + ww_acquire_fini(_ctx); kfree(attach); return ERR_PTR(ret); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index c199bf71c373..9173f0232b16 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -173,7 +173,7 @@ static int i915_gem_dmabuf_attach(struct dma_buf *dmabuf, if (!i915_gem_object_can_migrate(obj, INTEL_REGION_SMEM)) return -EOPNOTSUPP; - for_i915_gem_ww(, err, true) { + for_i915_dmabuf_ww(, dmabuf, err, true) { err = i915_gem_object_migrate(obj, , INTEL_REGION_SMEM); if (err) continue; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 30fe847c6664..ad7d602fc43a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -3409,7 +3409,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_vma; } - ww_acquire_done(); + ww_acquire_done(eb.ww.ctx); eb_capture_stage(); out_fence = eb_requests_create(, in_fence, out_fence_fd); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index e11d82a9f7c3..5ae38f94a5c7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -178,9 +178,9 @@ static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj, int ret; if (intr) - ret = dma_resv_lock_interruptible(obj->base.resv, ww ? >ctx : NULL); + ret = dma_resv_lock_interruptible(obj->base.resv, ww ? ww->ctx : NULL); else - ret = dma_resv_lock(obj->base.resv, ww ? >ctx : NULL); + ret = dma_resv_lock(obj->base.resv, ww ? ww->ctx : NULL); if (!ret && ww) { i915_gem_object_get(obj); @@ -216,7 +216,7 @@ static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj, if (!ww) return dma_resv_trylock(obj->base.resv); else - return ww_mutex_trylock(>base.resv->lock, >ctx); + return ww_mutex_trylock(>base.resv->lock, ww->ctx); } static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index
[PATCH v1 1/6] dma-buf: Add _unlocked postfix to function names
Add _unlocked postfix to the dma-buf API function names in a preparation to move all non-dynamic dma-buf users over to the dynamic locking specification. This patch only renames API functions, preparing drivers to the common locking convention. Later on we will make the "unlocked" functions to take the reservation lock. Suggested-by: Christian König Signed-off-by: Dmitry Osipenko --- drivers/dma-buf/dma-buf.c | 76 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 +- drivers/gpu/drm/armada/armada_gem.c | 14 ++-- drivers/gpu/drm/drm_gem_cma_helper.c | 6 +- drivers/gpu/drm/drm_gem_shmem_helper.c| 6 +- drivers/gpu/drm/drm_prime.c | 12 +-- drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c | 6 +- drivers/gpu/drm/exynos/exynos_drm_gem.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c| 12 +-- .../drm/i915/gem/selftests/i915_gem_dmabuf.c | 20 ++--- drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c | 8 +- drivers/gpu/drm/tegra/gem.c | 27 +++ drivers/infiniband/core/umem_dmabuf.c | 11 +-- .../common/videobuf2/videobuf2-dma-contig.c | 15 ++-- .../media/common/videobuf2/videobuf2-dma-sg.c | 12 +-- .../common/videobuf2/videobuf2-vmalloc.c | 6 +- .../platform/nvidia/tegra-vde/dmabuf-cache.c | 12 +-- drivers/misc/fastrpc.c| 12 +-- drivers/xen/gntdev-dmabuf.c | 14 ++-- include/linux/dma-buf.h | 34 + 21 files changed, 161 insertions(+), 152 deletions(-) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 44574fbe7482..d16237a6ffaa 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -795,7 +795,7 @@ static struct sg_table * __map_dma_buf(struct dma_buf_attachment *attach, } /** - * dma_buf_dynamic_attach - Add the device to dma_buf's attachments list + * dma_buf_dynamic_attach_unlocked - Add the device to dma_buf's attachments list * @dmabuf:[in]buffer to attach device to. * @dev: [in]device to be attached. * @importer_ops: [in]importer operations for the attachment @@ -817,9 +817,9 @@ static struct sg_table * __map_dma_buf(struct dma_buf_attachment *attach, * indicated with the error code -EBUSY. */ struct dma_buf_attachment * -dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev, - const struct dma_buf_attach_ops *importer_ops, - void *importer_priv) +dma_buf_dynamic_attach_unlocked(struct dma_buf *dmabuf, struct device *dev, + const struct dma_buf_attach_ops *importer_ops, + void *importer_priv) { struct dma_buf_attachment *attach; int ret; @@ -892,25 +892,25 @@ dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev, if (dma_buf_is_dynamic(attach->dmabuf)) dma_resv_unlock(attach->dmabuf->resv); - dma_buf_detach(dmabuf, attach); + dma_buf_detach_unlocked(dmabuf, attach); return ERR_PTR(ret); } -EXPORT_SYMBOL_NS_GPL(dma_buf_dynamic_attach, DMA_BUF); +EXPORT_SYMBOL_NS_GPL(dma_buf_dynamic_attach_unlocked, DMA_BUF); /** - * dma_buf_attach - Wrapper for dma_buf_dynamic_attach + * dma_buf_attach_unlocked - Wrapper for dma_buf_dynamic_attach * @dmabuf:[in]buffer to attach device to. * @dev: [in]device to be attached. * - * Wrapper to call dma_buf_dynamic_attach() for drivers which still use a static - * mapping. + * Wrapper to call dma_buf_dynamic_attach_unlocked() for drivers which still + * use a static mapping. */ -struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf, - struct device *dev) +struct dma_buf_attachment *dma_buf_attach_unlocked(struct dma_buf *dmabuf, + struct device *dev) { - return dma_buf_dynamic_attach(dmabuf, dev, NULL, NULL); + return dma_buf_dynamic_attach_unlocked(dmabuf, dev, NULL, NULL); } -EXPORT_SYMBOL_NS_GPL(dma_buf_attach, DMA_BUF); +EXPORT_SYMBOL_NS_GPL(dma_buf_attach_unlocked, DMA_BUF); static void __unmap_dma_buf(struct dma_buf_attachment *attach, struct sg_table *sg_table, @@ -923,7 +923,7 @@ static void __unmap_dma_buf(struct dma_buf_attachment *attach, } /** - * dma_buf_detach - Remove the given attachment from dmabuf's attachments list + * dma_buf_detach_unlocked - Remove the given attachment from dmabuf's attachments list * @dmabuf:[in]buffer to detach from. * @attach:[in]attachment to be detached; is free'd after this call. * @@ -931,7 +931,8 @@ static void __unmap_dma_buf(struct dma_buf_attachment *attach, * * Optionally this calls _buf_ops.detach for device-specific detach. */ -void dma_buf_detach(struct
Re: [PATCH 12/12] drm/amd/display: Rewrite CalculateWriteBackDISPCLK function
Às 13:45 de 14/07/22, Maíra Canal escreveu: > Based on the dml30_CalculateWriteBackDISPCLK, it separates the > DISPCLK calculations on three variables, making no functional changes, in > order > to make it more readable and better express that three values are being > compared > on dml_max. > > Signed-off-by: Maíra Canal > --- > .../drm/amd/display/dc/dml/display_mode_vba.c | 31 --- > 1 file changed, 20 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c > b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c > index c5a0a3649e9a..5fc1d16a2e15 100644 > --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c > +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c > @@ -1113,20 +1113,29 @@ double CalculateWriteBackDISPCLK( > unsigned int HTotal, > unsigned int WritebackChromaLineBufferWidth) > { > - double CalculateWriteBackDISPCLK = 1.01 * PixelClock * dml_max( > - dml_ceil(WritebackLumaHTaps / 4.0, 1) / WritebackHRatio, > - dml_max((WritebackLumaVTaps * dml_ceil(1.0 / WritebackVRatio, > 1) * dml_ceil(WritebackDestinationWidth / 4.0, 1) > + > + double DISPCLK_H = 0, DISPCLK_V = 0, DISPCLK_HB = 0; > + double CalculateWriteBackDISPCLK = 0; > + Small nit: no need to initialize to 0 those variables here. They are getting initialized bellow anyway. > + DISPCLK_H = dml_ceil(WritebackLumaHTaps / 4.0, 1) / WritebackHRatio; > + DISPCLK_V = (WritebackLumaVTaps * dml_ceil(1.0 / WritebackVRatio, 1) * > dml_ceil(WritebackDestinationWidth / 4.0, 1) > + dml_ceil(WritebackDestinationWidth / 4.0, 1)) / > (double) HTotal + dml_ceil(1.0 / WritebackVRatio, 1) > - * (dml_ceil(WritebackLumaVTaps / 4.0, 1) + 4.0) / > (double) HTotal, > - dml_ceil(1.0 / WritebackVRatio, 1) * > WritebackDestinationWidth / (double) HTotal)); > + * (dml_ceil(WritebackLumaVTaps / 4.0, 1) + 4.0) / > (double) HTotal; > + DISPCLK_HB = dml_ceil(1.0 / WritebackVRatio, 1) * > WritebackDestinationWidth / (double) HTotal; > + > + CalculateWriteBackDISPCLK = 1.01 * PixelClock * dml_max3(DISPCLK_H, > DISPCLK_V, DISPCLK_HB); > + > if (WritebackPixelFormat != dm_444_32) { > - CalculateWriteBackDISPCLK = dml_max(CalculateWriteBackDISPCLK, > 1.01 * PixelClock * dml_max( > - dml_ceil(WritebackChromaHTaps / 2.0, 1) / (2 * > WritebackHRatio), > - dml_max((WritebackChromaVTaps * dml_ceil(1 / (2 * > WritebackVRatio), 1) * dml_ceil(WritebackDestinationWidth / 2.0 / 2.0, 1) > - + dml_ceil(WritebackDestinationWidth / 2.0 / > WritebackChromaLineBufferWidth, 1)) / HTotal > - + dml_ceil(1 / (2 * WritebackVRatio), 1) * > (dml_ceil(WritebackChromaVTaps / 4.0, 1) + 4) / HTotal, > - dml_ceil(1.0 / (2 * WritebackVRatio), 1) * > WritebackDestinationWidth / 2.0 / HTotal))); > + DISPCLK_H = dml_ceil(WritebackChromaHTaps / 2.0, 1) / (2 * > WritebackHRatio); > + DISPCLK_V = (WritebackChromaVTaps * dml_ceil(1 / (2 * > WritebackVRatio), 1) * > + dml_ceil(WritebackDestinationWidth / 4.0, 1) + > + dml_ceil(WritebackDestinationWidth / 2.0 / > WritebackChromaLineBufferWidth, 1)) / HTotal + > + dml_ceil(1 / (2 * WritebackVRatio), 1) > *(dml_ceil(WritebackChromaVTaps / 4.0, 1) + 4) / HTotal; > + DISPCLK_HB = dml_ceil(1.0 / (2 * WritebackVRatio), 1) * > WritebackDestinationWidth / 2.0 / HTotal; > + CalculateWriteBackDISPCLK = dml_max(CalculateWriteBackDISPCLK, > + 1.01 * PixelClock * dml_max3(DISPCLK_H, > DISPCLK_V, DISPCLK_HB)); > } > + > return CalculateWriteBackDISPCLK; > } >