[PATCH 3/4] drm/amd/display: add doc entries for MPC blending configuration

2022-07-16 Thread Melissa Wen
Describe structs and enums used to set blend mode properties to MPC
blocks. Some pieces of information are already available as code
comments, and were just formatted. Others were collected and summarised
from discusssions on AMD issue tracker[1][2].

[1] https://gitlab.freedesktop.org/drm/amd/-/issues/1734
[2] https://gitlab.freedesktop.org/drm/amd/-/issues/1769

Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h | 91 +
 1 file changed, 77 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h 
b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
index 5097037e3962..cf28b841c42d 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
@@ -22,6 +22,16 @@
  *
  */
 
+/**
+ * DOC: mpc-overview
+ *
+ * Multiple Pipe/Plane Combined (MPC) is a component in the hardware pipeline
+ * that performs blending of multiple planes, using global and per-pixel alpha.
+ * It also performs post-blending color correction operations according to the
+ * hardware capabilities, such as color transformation matrix and gamma 1D and
+ * 3D LUT.
+ */
+
 #ifndef __DC_MPCC_H__
 #define __DC_MPCC_H__
 
@@ -48,14 +58,39 @@ enum mpcc_blend_mode {
MPCC_BLEND_MODE_TOP_BOT_BLENDING
 };
 
+/**
+ * enum mpcc_alpha_blend_mode - define the alpha blend mode regarding pixel
+ * alpha and plane alpha values
+ */
 enum mpcc_alpha_blend_mode {
+   /**
+* @MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA: per pixel alpha using DPP
+* alpha value
+*/
MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA,
+   /**
+* @MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN: per
+* pixel alpha using DPP alpha value multiplied by a global gain (plane
+* alpha)
+*/
MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN,
+   /**
+* @MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA: global alpha value, ignores
+* pixel alpha and consider only plane alpha
+*/
MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA
 };
 
-/*
- * MPCC blending configuration
+/**
+ * struct mpcc_blnd_cfg - MPCC blending configuration
+ *
+ * @black_color: background color
+ * @alpha_mode: alpha blend mode (MPCC_ALPHA_BLND_MODE)
+ * @pre_multiplied_alpha: whether pixel color values were pre-multiplied by the
+ * alpha channel (MPCC_ALPHA_MULTIPLIED_MODE)
+ * @global_gain: used when blend mode considers both pixel alpha and plane
+ * alpha value and assumes the global alpha value.
+ * @global_alpha: plane alpha value
  */
 struct mpcc_blnd_cfg {
struct tg_color black_color;/* background color */
@@ -107,8 +142,15 @@ struct mpc_dwb_flow_control {
int flow_ctrl_cnt1;
 };
 
-/*
- * MPCC connection and blending configuration for a single MPCC instance.
+/**
+ * struct mpcc - MPCC connection and blending configuration for a single MPCC 
instance.
+ * @mpcc_id: MPCC physical instance
+ * @dpp_id: DPP input to this MPCC
+ * @mpcc_bot: pointer to bottom layer MPCC. NULL when not connected.
+ * @blnd_cfg: the blending configuration for this MPCC
+ * @sm_cfg: stereo mix setting for this MPCC
+ * @shared_bottom: if MPCC output to both OPP and DWB endpoints, true. 
Othewise, false.
+ *
  * This struct is used as a node in an MPC tree.
  */
 struct mpcc {
@@ -120,8 +162,12 @@ struct mpcc {
bool shared_bottom; /* TRUE if MPCC output to both OPP and 
DWB endpoints, else FALSE */
 };
 
-/*
- * MPC tree represents all MPCC connections for a pipe.
+/**
+ * struct mpc_tree - MPC tree represents all MPCC connections for a pipe.
+ *
+ * @opp_id: the OPP instance that owns this MPC tree
+ * @opp_list: the top MPCC layer of the MPC tree that outputs to OPP endpoint
+ *
  */
 struct mpc_tree {
int opp_id; /* The OPP instance that owns this MPC 
tree */
@@ -149,13 +195,18 @@ struct mpcc_state {
uint32_t busy;
 };
 
+/**
+ * struct mpc_funcs - funcs
+ */
 struct mpc_funcs {
void (*read_mpcc_state)(
struct mpc *mpc,
int mpcc_inst,
struct mpcc_state *s);
 
-   /*
+   /**
+* @insert_plane:
+*
 * Insert DPP into MPC tree based on specified blending position.
 * Only used for planes that are part of blending chain for OPP output
 *
@@ -180,7 +231,9 @@ struct mpc_funcs {
int dpp_id,
int mpcc_id);
 
-   /*
+   /**
+* @remove_mpcc:
+*
 * Remove a specified MPCC from the MPC tree.
 *
 * Parameters:
@@ -195,7 +248,9 @@ struct mpc_funcs {
struct mpc_tree *tree,
struct mpcc *mpcc);
 
-   /*
+   /**
+* @mpc_init:
+*
 * Reset the MPCC HW status by disconnecting all muxes.
 *
 * Parameters:
@@ -208,7 +263,9 @@ struct mpc_funcs {
 

[PATCH 4/4] Documentation/gpu/amdgpu/amdgpu_dm: add DM docs for pixel blend mode

2022-07-16 Thread Melissa Wen
AMD GPU display manager (DM) maps DRM pixel blend modes (None,
Pre-multiplied, Coverage) to MPC hw blocks through blend configuration
options. Describe relevant elements and how to set and test them to get
the expected DRM blend mode on DCN hw.

Signed-off-by: Melissa Wen 
---
 .../gpu/amdgpu/display/display-manager.rst| 98 +++
 Documentation/gpu/drm-kms.rst |  2 +
 2 files changed, 100 insertions(+)

diff --git a/Documentation/gpu/amdgpu/display/display-manager.rst 
b/Documentation/gpu/amdgpu/display/display-manager.rst
index 8960a5f1fa66..7a495ed1f69e 100644
--- a/Documentation/gpu/amdgpu/display/display-manager.rst
+++ b/Documentation/gpu/amdgpu/display/display-manager.rst
@@ -84,3 +84,101 @@ families below.
 **DCN 3.0 family color caps and mapping**
 
 .. kernel-figure:: dcn3_cm_drm_current.svg
+
+Blend Mode Properties
+=
+
+Pixel blend mode is a DRM plane composition property of :c:type:`drm_plane` 
used to
+describes how pixels from a foreground plane (fg) are composited with the
+background plane (bg). Here, we present main concepts of DRM blend mode to help
+to understand how this property is mapped to AMD DC interface. See more about
+this DRM property and the alpha blending equations in :ref:`DRM Plane
+Composition Properties `.
+
+Basically, a blend mode sets the alpha blending equation for plane
+composition that fits the mode in which the alpha channel affects the state of
+pixel color values and, therefore, the resulted pixel color. For
+example, consider the following elements of the alpha blending equation:
+
+- *fg.rgb*: Each of the RGB component values from the foreground's pixel.
+- *fg.alpha*: Alpha component value from the foreground's pixel.
+- *bg.rgb*: Each of the RGB component values from the background.
+- *plane_alpha*: Plane alpha value set by the **plane "alpha" property**, see
+  more in `DRM Plane Composition Properties `.
+
+in the basic alpha blending equation::
+
+   out.rgb = alpha * fg.rgb + (1 - alpha) * bg.rgb
+
+the alpha channel value of each pixel in a plane is ignored and only the plane
+alpha affects the resulted pixel color values.
+
+DRM has three blend mode to define the blend formula in the plane composition:
+
+* **None**: Blend formula that ignores the pixel alpha.
+
+* **Pre-multiplied**: Blend formula that assumes the pixel color values in a
+  plane was already pre-multiplied by its own alpha channel before storage.
+
+* **Coverage**: Blend formula that assumes the pixel color values were not
+  pre-multiplied with the alpha channel values.
+
+and pre-multiplied is the default pixel blend mode, that means, when no blend
+mode property is created or defined, DRM considers the plane's pixels has
+pre-multiplied color values. On IGT GPU tools, the kms_plane_alpha_blend test
+provides a set of subtests to verify plane alpha and blend mode properties.
+
+The DRM blend mode and its elements are then mapped by AMDGPU display manager
+(DM) to program the blending configuration of the Multiple Pipe/Plane Combined
+(MPC), as follows:
+
+.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
+   :doc: mpc-overview
+
+.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
+   :functions: mpcc_blnd_cfg
+
+Therefore, the blending configuration for a single MPCC instance on the MPC
+tree is defined by :c:type:`mpcc_blnd_cfg`, where
+:c:type:`pre_multiplied_alpha` is the alpha pre-multiplied mode flag used to
+set :c:type:`MPCC_ALPHA_MULTIPLIED_MODE`. It controls whether alpha is
+multiplied (true/false), being only true for DRM pre-multiplied blend mode.
+:c:type:`mpcc_alpha_blend_mode` defines the alpha blend mode regarding pixel
+alpha and plane alpha values. It sets one of the three modes for
+:c:type:`MPCC_ALPHA_BLND_MODE`, as described below.
+
+.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
+   :functions: mpcc_alpha_blend_mode
+
+DM then maps the elements of `enum mpcc_alpha_blend_mode` to those in the DRM
+blend formula, as follows:
+
+* *MPC pixel alpha* matches *DRM fg.alpha* as the alpha component value
+  from the plane's pixel
+* *MPC global alpha* matches *DRM plane_alpha* when the pixel alpha should
+  be ignored and, therefore, pixel values are not pre-multiplied
+* *MPC global gain* assumes *MPC global alpha* value when both *DRM
+  fg.alpha* and *DRM plane_alpha* participate in the blend equation
+
+In short, *fg.alpha* is ignored by selecting
+:c:type:`MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA`. On the other hand, (plane_alpha *
+fg.alpha) component becomes available by selecting
+:c:type:`MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN`. And the
+:c:type:`MPCC_ALPHA_MULTIPLIED_MODE` defines if the pixel color values are
+pre-multiplied by alpha or not.
+
+Blend configuration flow
+
+
+The alpha blending equation is configured from DRM to DC interface by the
+following path:
+
+1. When updating a :c:type:`drm_plane_state `, DM calls
+   

[PATCH 2/4] Documentation/amdgpu/display: add DC color caps info

2022-07-16 Thread Melissa Wen
Add details about color correction capabilities and explain a bit about
differences between DC hw generations and also how they are mapped
between DRM and DC interface. Two schemas for DCN 2.0 and 3.0 (converted
to svg from the original png) is included to illustrate it. They were
obtained from a discussion[1] in the amd-gfx mailing list.

[1] 
https://lore.kernel.org/amd-gfx/20220422142811.dm6vtk6v64jcw...@mail.igalia.com/

v2:

- remove redundant comments (Harry)
- fix typo (Harry)

Signed-off-by: Melissa Wen 
---
 .../amdgpu/display/dcn2_cm_drm_current.svg| 1370 +++
 .../amdgpu/display/dcn3_cm_drm_current.svg| 1529 +
 .../gpu/amdgpu/display/display-manager.rst|   35 +
 drivers/gpu/drm/amd/display/dc/dc.h   |   74 +-
 4 files changed, 2995 insertions(+), 13 deletions(-)
 create mode 100644 Documentation/gpu/amdgpu/display/dcn2_cm_drm_current.svg
 create mode 100644 Documentation/gpu/amdgpu/display/dcn3_cm_drm_current.svg

diff --git a/Documentation/gpu/amdgpu/display/dcn2_cm_drm_current.svg 
b/Documentation/gpu/amdgpu/display/dcn2_cm_drm_current.svg
new file mode 100644
index ..315ffc5a1a4b
--- /dev/null
+++ b/Documentation/gpu/amdgpu/display/dcn2_cm_drm_current.svg
@@ -0,0 +1,1370 @@
+
+
+
+http://www.inkscape.org/namespaces/inkscape;
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd;
+   xmlns="http://www.w3.org/2000/svg;
+   xmlns:svg="http://www.w3.org/2000/svg;>
+  
+  
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Matrix
+1D LUT
+3D LUT
+Unpacking
+Other
+drm_framebuffer
+format
+drm_plane
+drm_crtc
+Stream
+MPC
+DPP
+
+Blender
+Degamma
+CTM
+Gamma
+format
+bias_and_scale
+color space matrix
+input_csc_color_matrix
+in_transfer_func
+hdr_mult
+gamut_remap_matrix
+in_shaper_func
+lut3d_func
+blend_tf
+Blender
+gamut_remap_matrix
+func_shaper
+lut3d_func
+out_transfer_func
+csc_color_matrix
+bit_depth_param
+clamping
+output_color_space
+Plane
+Legend
+DCN 2.0
+DC Interface
+DRM Interface
+
+CNVC
+Input CSC
+DeGammaRAM and ROM(sRGB, BT2020
+HDR Multiply
+Gamut Remap
+Shaper 
LUTRAM
+3D 
LUTRAM
+Blend Gamma
+Blender
+GammaRAM
+OCSC
+
+
+color_encoding
+
+pixel_blend_mode
+
+color_range
+
+
+
+
+
+
+
+
+
+
+
+
+  
+
diff --git a/Documentation/gpu/amdgpu/display/dcn3_cm_drm_current.svg 
b/Documentation/gpu/amdgpu/display/dcn3_cm_drm_current.svg
new file mode 100644
index ..7299ee9b6d64
--- /dev/null
+++ b/Documentation/gpu/amdgpu/display/dcn3_cm_drm_current.svg
@@ -0,0 +1,1529 @@
+
+
+
+http://www.inkscape.org/namespaces/inkscape;
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd;
+   xmlns="http://www.w3.org/2000/svg;
+   xmlns:svg="http://www.w3.org/2000/svg;>
+  
+  
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Matrix
+1D LUT
+3D LUT
+Unpacking
+Other
+drm_framebuffer
+format
+drm_plane
+drm_crtc
+Stream
+MPC
+DPP
+
+Blender
+Degamma
+CTM
+Gamma
+format
+bias_and_scale
+color space matrix
+input_csc_color_matrix
+in_transfer_func
+hdr_mult
+gamut_remap_matrix
+in_shaper_func
+lut3d_func
+blend_tf
+Blender
+gamut_remap_matrix
+func_shaper
+lut3d_func
+out_transfer_func
+csc_color_matrix
+bit_depth_param
+clamping
+output_color_space
+Plane
+Legend
+DCN 3.0
+DC Interface
+DRM Interface
+
+CNVC
+Input CSC
+DeGammaROM(sRGB, BT2020, Gamma 2.2,PQ, HLG)
+Post CSC
+Gamma Correction
+HDR Multiply
+Gamut Remap
+Shaper 
LUTRAM
+3D 
LUTRAM
+Blend Gamma
+Blender
+Gamut Remap
+Shaper 
LUTRAM
+3D 
LUTRAM
+GammaRAM
+OCSC
+
+
+color_encoding
+
+  

[PATCH 1/4] Documentation/amdgpu_dm: Add DM color correction documentation

2022-07-16 Thread Melissa Wen
AMDGPU DM maps DRM color management properties (degamma, ctm and gamma)
to DC color correction entities. Part of this mapping is already
documented as code comments and can be converted as kernel docs.

v2:
- rebase to amd-staging-drm-next

Reviewed-by: Harry Wentland 
Signed-off-by: Melissa Wen 
---
 .../gpu/amdgpu/display/display-manager.rst|   9 ++
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 121 +-
 2 files changed, 98 insertions(+), 32 deletions(-)

diff --git a/Documentation/gpu/amdgpu/display/display-manager.rst 
b/Documentation/gpu/amdgpu/display/display-manager.rst
index 7ce31f89d9a0..b1b0f11aed83 100644
--- a/Documentation/gpu/amdgpu/display/display-manager.rst
+++ b/Documentation/gpu/amdgpu/display/display-manager.rst
@@ -40,3 +40,12 @@ Atomic Implementation
 
 .. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
:functions: amdgpu_dm_atomic_check amdgpu_dm_atomic_commit_tail
+
+Color Management Properties
+===
+
+.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+   :doc: overview
+
+.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+   :internal:
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index a71177305bcd..93c813089bff 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -29,7 +29,9 @@
 #include "modules/color/color_gamma.h"
 #include "basics/conversion.h"
 
-/*
+/**
+ * DOC: overview
+ *
  * The DC interface to HW gives us the following color management blocks
  * per pipe (surface):
  *
@@ -71,8 +73,8 @@
 
 #define MAX_DRM_LUT_VALUE 0x
 
-/*
- * Initialize the color module.
+/**
+ * amdgpu_dm_init_color_mod - Initialize the color module.
  *
  * We're not using the full color module, only certain components.
  * Only call setup functions for components that we need.
@@ -82,7 +84,14 @@ void amdgpu_dm_init_color_mod(void)
setup_x_points_distribution();
 }
 
-/* Extracts the DRM lut and lut size from a blob. */
+/**
+ * __extract_blob_lut - Extracts the DRM lut and lut size from a blob.
+ * @blob: DRM color mgmt property blob
+ * @size: lut size
+ *
+ * Returns:
+ * DRM LUT or NULL
+ */
 static const struct drm_color_lut *
 __extract_blob_lut(const struct drm_property_blob *blob, uint32_t *size)
 {
@@ -90,13 +99,18 @@ __extract_blob_lut(const struct drm_property_blob *blob, 
uint32_t *size)
return blob ? (struct drm_color_lut *)blob->data : NULL;
 }
 
-/*
- * Return true if the given lut is a linear mapping of values, i.e. it acts
- * like a bypass LUT.
+/**
+ * __is_lut_linear - check if the given lut is a linear mapping of values
+ * @lut: given lut to check values
+ * @size: lut size
  *
  * It is considered linear if the lut represents:
- * f(a) = (0xFF00/MAX_COLOR_LUT_ENTRIES-1)a; for integer a in
- *   [0, MAX_COLOR_LUT_ENTRIES)
+ * f(a) = (0xFF00/MAX_COLOR_LUT_ENTRIES-1)a; for integer a in [0,
+ * MAX_COLOR_LUT_ENTRIES)
+ *
+ * Returns:
+ * True if the given lut is a linear mapping of values, i.e. it acts like a
+ * bypass LUT. Otherwise, false.
  */
 static bool __is_lut_linear(const struct drm_color_lut *lut, uint32_t size)
 {
@@ -119,9 +133,13 @@ static bool __is_lut_linear(const struct drm_color_lut 
*lut, uint32_t size)
return true;
 }
 
-/*
- * Convert the drm_color_lut to dc_gamma. The conversion depends on the size
- * of the lut - whether or not it's legacy.
+/**
+ * __drm_lut_to_dc_gamma - convert the drm_color_lut to dc_gamma.
+ * @lut: DRM lookup table for color conversion
+ * @gamma: DC gamma to set entries
+ * @is_legacy: legacy or atomic gamma
+ *
+ * The conversion depends on the size of the lut - whether or not it's legacy.
  */
 static void __drm_lut_to_dc_gamma(const struct drm_color_lut *lut,
  struct dc_gamma *gamma, bool is_legacy)
@@ -154,8 +172,11 @@ static void __drm_lut_to_dc_gamma(const struct 
drm_color_lut *lut,
}
 }
 
-/*
- * Converts a DRM CTM to a DC CSC float matrix.
+/**
+ * __drm_ctm_to_dc_matrix - converts a DRM CTM to a DC CSC float matrix
+ * @ctm: DRM color transformation matrix
+ * @matrix: DC CSC float matrix
+ *
  * The matrix needs to be a 3x4 (12 entry) matrix.
  */
 static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm,
@@ -189,7 +210,18 @@ static void __drm_ctm_to_dc_matrix(const struct 
drm_color_ctm *ctm,
}
 }
 
-/* Calculates the legacy transfer function - only for sRGB input space. */
+/**
+ * __set_legacy_tf - Calculates the legacy transfer function
+ * @func: transfer function
+ * @lut: lookup table that defines the color space
+ * @lut_size: size of respective lut
+ * @has_rom: if ROM can be used for hardcoded curve
+ *
+ * Only for sRGB input space
+ *
+ * Returns:
+ * 0 in case of sucess, -ENOMEM if fails
+ */
 static int 

[PATCH 0/4] Documentation/amdgpu/display: describe color and blend mode properties mapping

2022-07-16 Thread Melissa Wen
Patches 1 and 2 describe DM mapping of DRM color correction properties
to DC interface and where detached from 3D LUT RFC series [1]. Patches 3
and 4 describe MPC block programming that matches the three DRM blend
modes and came from previous work [2][3] and discussions on AMD issue
tracker. Let me know any misleading information.

[1] https://lore.kernel.org/amd-gfx/20220619223104.667413-1-m...@igalia.com/
[2] https://lore.kernel.org/amd-gfx/20220329201835.2393141-1-m...@igalia.com/
[3] 
https://lore.kernel.org/amd-gfx/7a95d6a4-bc2f-b0e8-83f8-8cc5b7559...@amd.com/

Melissa Wen (4):
  Documentation/amdgpu_dm: Add DM color correction documentation
  Documentation/amdgpu/display: add DC color caps info
  drm/amd/display: add doc entries for MPC blending configuration
  Documentation/gpu/amdgpu/amdgpu_dm: add DM docs for pixel blend mode

 .../amdgpu/display/dcn2_cm_drm_current.svg| 1370 +++
 .../amdgpu/display/dcn3_cm_drm_current.svg| 1529 +
 .../gpu/amdgpu/display/display-manager.rst|  142 ++
 Documentation/gpu/drm-kms.rst |2 +
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   |  121 +-
 drivers/gpu/drm/amd/display/dc/dc.h   |   74 +-
 drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h   |   91 +-
 7 files changed, 3270 insertions(+), 59 deletions(-)
 create mode 100644 Documentation/gpu/amdgpu/display/dcn2_cm_drm_current.svg
 create mode 100644 Documentation/gpu/amdgpu/display/dcn3_cm_drm_current.svg

-- 
2.35.1



[PATCH] drm/amd/display: move dcn31_update_soc_for_wm_a func to dml fpu folder

2022-07-16 Thread Melissa Wen
Although dcn31_update_soc_for_wm_a() is only called in dml/dcn31/dcn31_fpu by
dc->res_pool->funcs->update_soc_for_wm_a(dc, context), it's declared in
dcn31_resource that is not FPU protected. Move this function to dcn31_fpu
file as part of the work to isolate FPU code.

Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 9 -
 drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h | 1 -
 drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c  | 9 +
 drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h  | 2 ++
 4 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
index 16bbccc69fdc..17c776e88514 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
@@ -1716,15 +1716,6 @@ int dcn31_populate_dml_pipes_from_context(
return pipe_cnt;
 }
 
-void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)
-{
-   if (dc->clk_mgr->bw_params->wm_table.entries[WM_A].valid) {
-   context->bw_ctx.dml.soc.dram_clock_change_latency_us = 
dc->clk_mgr->bw_params->wm_table.entries[WM_A].pstate_latency_us;
-   context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = 
dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_enter_plus_exit_time_us;
-   context->bw_ctx.dml.soc.sr_exit_time_us = 
dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_exit_time_us;
-   }
-}
-
 void dcn31_calculate_wm_and_dlg(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h 
b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h
index 393458015d6a..41f8ec99da6b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h
@@ -59,7 +59,6 @@ dcn31_set_mcif_arb_params(struct dc *dc,
  struct dc_state *context,
  display_e2e_pipe_params_st *pipes,
  int pipe_cnt);
-void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context);
 
 struct resource_pool *dcn31_create_resource_pool(
const struct dc_init_data *init_data,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
index 7be3476989ce..facac3daeaca 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
@@ -435,6 +435,15 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = {
.urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
 };
 
+void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)
+{
+   if (dc->clk_mgr->bw_params->wm_table.entries[WM_A].valid) {
+   context->bw_ctx.dml.soc.dram_clock_change_latency_us = 
dc->clk_mgr->bw_params->wm_table.entries[WM_A].pstate_latency_us;
+   context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = 
dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_enter_plus_exit_time_us;
+   context->bw_ctx.dml.soc.sr_exit_time_us = 
dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_exit_time_us;
+   }
+}
+
 void dcn31_calculate_wm_and_dlg_fp(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h 
b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h
index 24ac19c83687..0a10de80c1a4 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h
@@ -31,6 +31,8 @@
 #define DCN3_15_MIN_COMPBUF_SIZE_KB 128
 #define DCN3_16_DEFAULT_DET_SIZE 192
 
+void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context);
+
 void dcn31_calculate_wm_and_dlg_fp(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
-- 
2.35.1



Re: [PATCH] drm/amd/pm: enable mode1 reset for smu_v13_0_7

2022-07-16 Thread Wang, Yang(Kevin)
[AMD Official Use Only - General]

Reviewed-by: Yang Wang 

Best Regards,
Kevin

From: amd-gfx  on behalf of Kenneth Feng 

Sent: Saturday, July 16, 2022 12:43 PM
To: amd-gfx@lists.freedesktop.org 
Cc: Feng, Kenneth 
Subject: [PATCH] drm/amd/pm: enable mode1 reset for smu_v13_0_7

enable mode1 reset for smu_v13_0_7 since it's missing.

Signed-off-by: Kenneth Feng 
---
 drivers/gpu/drm/amd/amdgpu/soc21.c   | 1 +
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c 
b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 765c3543ad18..00e9b7089feb 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -320,6 +320,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev)

 switch (adev->ip_versions[MP1_HWIP][0]) {
 case IP_VERSION(13, 0, 0):
+   case IP_VERSION(13, 0, 7):
 return AMD_RESET_METHOD_MODE1;
 case IP_VERSION(13, 0, 4):
 return AMD_RESET_METHOD_MODE2;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 6259a85bc818..6f0548714566 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -118,6 +118,7 @@ static struct cmn2asic_msg_mapping 
smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] =
 MSG_MAP(DramLogSetDramSize, PPSMC_MSG_DramLogSetDramSize,  
0),
 MSG_MAP(AllowGfxOff,PPSMC_MSG_AllowGfxOff, 
0),
 MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff,  
0),
+   MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset,  
0),
 MSG_MAP(PrepareMp1ForUnload,PPSMC_MSG_PrepareMp1ForUnload, 
0),
 };

--
2.25.1



Re: [PATCH v2] drm/amdgpu: add HW_IP_VCN_UNIFIED type

2022-07-16 Thread Christian König

Am 15.07.22 um 17:25 schrieb Dong, Ruijing:

[AMD Official Use Only - General]


Why exactly do we need a new define for this? Essentially the encode queue is 
extended with new functionality, isn't it?
So I think we should just stick to AMDGPU_HW_IP_VCN_ENC and not add an alias 
for it.

Yes, it extended the encode queue to include new functionality, and that looks 
little confused when send
decoding jobs to the encoding queue. Then I assume this bias can reduce the 
confusion.

Does this change make sense in this regard? certainly we can stick to 
AMDGPU_HW_IP_VCN_ENC.


I'm a bit on the edge with that.

On the one hand I agree with you that using AMDGPU_HW_IP_VCN_ENC for 
decoding is then a bit confusing, but on the other hand adding another 
enum with the same value as AMDGPU_HW_IP_VCN_ENC might be even more 
confusing.


I think the best middle way would be to at least add a comment 
explaining what's going on.


Regards,
Christian.



Thanks,
Ruijing

-Original Message-
From: Koenig, Christian 
Sent: Friday, July 15, 2022 11:18 AM
To: Dong, Ruijing ; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Liu, Leo 
Subject: Re: [PATCH v2] drm/amdgpu: add HW_IP_VCN_UNIFIED type

Am 15.07.22 um 16:44 schrieb Ruijing Dong:

Define HW_IP_VCN_UNIFIED type the same as HW_IP_VCN_ENC.

VCN4 support for libdrm needs a new definition for the unified queue,
so that it can align to the kernel.

link:
https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/245/commits

Signed-off-by: Ruijing Dong 
---
   include/uapi/drm/amdgpu_drm.h | 1 +
   1 file changed, 1 insertion(+)

diff --git a/include/uapi/drm/amdgpu_drm.h
b/include/uapi/drm/amdgpu_drm.h index 18d3246d636e..fe33db8441bc
100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -560,6 +560,7 @@ struct drm_amdgpu_gem_va {
   #define AMDGPU_HW_IP_UVD_ENC  5
   #define AMDGPU_HW_IP_VCN_DEC  6
   #define AMDGPU_HW_IP_VCN_ENC  7
+#define AMDGPU_HW_IP_VCN_UNIFIED  AMDGPU_HW_IP_VCN_ENC

Why exactly do we need a new define for this? Essentially the encode queue is 
extended with new functionality, isn't it?

So I think we should just stick to AMDGPU_HW_IP_VCN_ENC and not add an alias 
for it.

Regards,
Christian.


   #define AMDGPU_HW_IP_VCN_JPEG 8
   #define AMDGPU_HW_IP_NUM  9





[PATCH v4] drm/amdgpu: add HW_IP_VCN_UNIFIED type

2022-07-16 Thread Ruijing Dong
>From VCN4, AMDGPU_HW_IP_VCN_UNIFIED is used to support
both encoding and decoding jobs, it re-uses the same
queue number of AMDGPU_HW_IP_VCN_ENC.

link: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/245/commits

Signed-off-by: Ruijing Dong 
---
 include/uapi/drm/amdgpu_drm.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 18d3246d636e..e268cd3cdb12 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -560,6 +560,12 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_HW_IP_UVD_ENC  5
 #define AMDGPU_HW_IP_VCN_DEC  6
 #define AMDGPU_HW_IP_VCN_ENC  7
+/**
+ * From VCN4, AMDGPU_HW_IP_VCN_UNIFIED is used to support
+ * both encoding and decoding jobs, it re-uses the same
+ * queue number of AMDGPU_HW_IP_VCN_ENC.
+ */
+#define AMDGPU_HW_IP_VCN_UNIFIED  AMDGPU_HW_IP_VCN_ENC
 #define AMDGPU_HW_IP_VCN_JPEG 8
 #define AMDGPU_HW_IP_NUM  9
 
-- 
2.25.1



Re: Linux 5.19-rc6

2022-07-16 Thread Sudip Mukherjee
Hi Russell,

On Fri, Jul 15, 2022 at 12:34 AM Russell Currey  wrote:
>
> Hi Linus,
>
> On Wed, 2022-07-13 at 14:32 -0700, Linus Torvalds wrote:
> > On Wed, Jul 13, 2022 at 2:01 PM Alex Deucher 
> > wrote:
> > >
> > > If you want to apply Guenter's patch original patch:
> > > https://patchwork.freedesktop.org/patch/490184/
> > > That's fine with me.
> >
> > Honestly, by this time I feel that it's too little, too late.
> >
> > The ppc people apparently didn't care at all about the fact that this
> > driver didn't compile.
> >
> > At least Michael Ellerman and Daniel Axtens were cc'd on that thread
> > with the proposed fix originally.
> >
> > I don't see any replies from ppc people as to why it happened, even
> > though apparently a bog-standard "make allmodconfig" just doesn't
> > build.
>
> I believe Michael Ellerman has been on holiday for some time, and
> Daniel Axtens no longer works on powerpc (and wasn't the one that
> submitted the patch, it was submitted by Paul Mackerras, who wasn't on
> CC).
>
> The proposed fix didn't get sent to linuxppc-dev either, so it's
> unlikely many ppc people knew about it.
>
> We certainly should have noticed allmodconfig was broken, and should
> have more than just Michael keeping an eye on all his automated builds.

Not sure if I have added the correct people in my another mail, but
thats also ppc allmodconfig with gcc-12.
https://lore.kernel.org/lkml/Ys%2FaDKZNhhsENH9S@debian/



-- 
Regards
Sudip


[PATCH v9 00/14] Add MEMORY_DEVICE_COHERENT for coherent device memory mapping

2022-07-16 Thread Alex Sierra
This is our MEMORY_DEVICE_COHERENT patch series rebased and updated
for current 5.19.0-rc6

Changes since the last version:
- Fixed problems with migration during long-term pinning in
get_user_pages
- Open coded vm_normal_lru_pages as suggested in previous code review
- Update hmm_gup_test with more get_user_pages calls, include
hmm_cow_in_device in hmm-test.

This patch series introduces MEMORY_DEVICE_COHERENT, a type of memory
owned by a device that can be mapped into CPU page tables like
MEMORY_DEVICE_GENERIC and can also be migrated like
MEMORY_DEVICE_PRIVATE.

This patch series is mostly self-contained except for a few places where
it needs to update other subsystems to handle the new memory type.

System stability and performance are not affected according to our
ongoing testing, including xfstests.

How it works: The system BIOS advertises the GPU device memory
(aka VRAM) as SPM (special purpose memory) in the UEFI system address
map.

The amdgpu driver registers the memory with devmap as
MEMORY_DEVICE_COHERENT using devm_memremap_pages. The initial user for
this hardware page migration capability is the Frontier supercomputer
project. This functionality is not AMD-specific. We expect other GPU
vendors to find this functionality useful, and possibly other hardware
types in the future.

Our test nodes in the lab are similar to the Frontier configuration,
with .5 TB of system memory plus 256 GB of device memory split across
4 GPUs, all in a single coherent address space. Page migration is
expected to improve application efficiency significantly. We will
report empirical results as they become available.

Coherent device type pages at gup are now migrated back to system
memory if they are being pinned long-term (FOLL_LONGTERM). The reason
is, that long-term pinning would interfere with the device memory
manager owning the device-coherent pages (e.g. evictions in TTM).
These series incorporate Alistair Popple patches to do this
migration from pin_user_pages() calls. hmm_gup_test has been added to
hmm-test to test different get user pages calls.

This series includes handling of device-managed anonymous pages
returned by vm_normal_pages. Although they behave like normal pages
for purposes of mapping in CPU page tables and for COW, they do not
support LRU lists, NUMA migration or THP.

We also introduced a FOLL_LRU flag that adds the same behaviour to
follow_page and related APIs, to allow callers to specify that they
expect to put pages on an LRU list.

v2:
- Rebase to latest 5.18-rc7.
- Drop patch "mm: add device coherent checker to remove migration pte"
and modify try_to_migrate_one, to let DEVICE_COHERENT pages fall
through to normal page path. Based on Alistair Popple's comment.
- Fix comment formatting.
- Reword comment in vm_normal_page about pte_devmap().
- Merge "drm/amdkfd: coherent type as sys mem on migration to ram" to
"drm/amdkfd: add SPM support for SVM".

v3:
- Rebase to latest 5.18.0.
- Patch "mm: handling Non-LRU pages returned by vm_normal_pages"
reordered.
- Add WARN_ON_ONCE for thp device coherent case.

v4:
- Rebase to latest 5.18.0
- Fix consitency between pages with FOLL_LRU flag set and pte_devmap
at follow_page_pte.

v5:
- Remove unused zone_device_type from lib/test_hmm and
selftest/vm/hmm-test.c.

v6:
- Rebase to 5.19.0-rc4
- Rename is_pinnable_page to is_longterm_pinnable_page and add a
coherent device checker.
- Add a new gup test to hmm-test to cover fast pinnable case with
FOLL_LONGTERM.

v7:
- Reorder patch series.
- Remove FOLL_LRU and check on each caller for LRU pages handling
instead.

v8:
- Add "mm: move page zone helpers into new header-specific file"
patch. The intention is to centralize all page zone helpers and keep
them independent from mm.h and memremap.h.

v9:
- Rebase to 5.19.0-rc6
- Include latest Alistair's patch
"mm/gup: migrate device coherent pages when pinning instead of failing"
with changes based on David Hildenbrand comments.
- Replace moving page zone helpers into new header-specific file.
Instead, those were moved to mmzone.h.
Patch "mm: move page zone helpers from mm.h to mmzone.h"

Alex Sierra (13):
  mm: rename is_pinnable_pages to is_longterm_pinnable_pages
  mm: move page zone helpers from mm.h to mmzone.h
  mm: add zone device coherent type memory support
  mm: handling Non-LRU pages returned by vm_normal_pages
  mm: add device coherent vma selection for memory migration
  drm/amdkfd: add SPM support for SVM
  lib: test_hmm add ioctl to get zone device type
  lib: test_hmm add module param for zone device type
  lib: add support for device coherent type in test_hmm
  tools: update hmm-test to support device coherent type
  tools: update test_hmm script to support SP config
  tools: add hmm gup tests for device coherent type
  tools: add selftests to hmm for COW in device memory

Alistair Popple (1):
  mm/gup: migrate device coherent pages when pinning instead of failing

 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  34 ++-
 fs/proc/task_mmu.c  

Re: [PATCH] drm/amdgpu: align between libdrm and drm api

2022-07-16 Thread Christian König

Hi Ruijing,

ok in this case please prepare a kernel patch and send it to the mailing 
list with full description why we do this change.


Thanks,
Christian.

Am 15.07.22 um 15:33 schrieb Dong, Ruijing:

[AMD Official Use Only - General]

Hi Christian,

You are right, when process the libdrm code review (not committed yet), we 
realized the corresponding file needs to align to the kernel.
So we will need to have this header file changed first, then to process libdrm 
code again.

Thanks,
Ruijing

-Original Message-
From: Christian König 
Sent: Friday, July 15, 2022 4:41 AM
To: Dong, Ruijing ; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Liu, Leo ; 
Koenig, Christian 
Subject: Re: [PATCH] drm/amdgpu: align between libdrm and drm api

Am 14.07.22 um 23:22 schrieb Ruijing Dong:

define HW_IP_VCN_UNIFIED the same as HW_IP_VCN_ENC

Usually that should be the other way around, libdrm aligns to the kernel.

Why was that modification committed to libdrm first? There are usually plenty 
of warnings before we can do that.

Regards,
Christian.


Signed-off-by: Ruijing Dong 
---
   include/uapi/drm/amdgpu_drm.h | 1 +
   1 file changed, 1 insertion(+)

diff --git a/include/uapi/drm/amdgpu_drm.h
b/include/uapi/drm/amdgpu_drm.h index 18d3246d636e..fe33db8441bc
100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -560,6 +560,7 @@ struct drm_amdgpu_gem_va {
   #define AMDGPU_HW_IP_UVD_ENC  5
   #define AMDGPU_HW_IP_VCN_DEC  6
   #define AMDGPU_HW_IP_VCN_ENC  7
+#define AMDGPU_HW_IP_VCN_UNIFIED  AMDGPU_HW_IP_VCN_ENC
   #define AMDGPU_HW_IP_VCN_JPEG 8
   #define AMDGPU_HW_IP_NUM  9





Re: [PATCH] drm/amdgpu: Fix for drm buddy memory corruption

2022-07-16 Thread Christian König

Am 14.07.22 um 12:12 schrieb Arunpravin Paneer Selvam:

User reported gpu page fault when running graphics applications
and in some cases garbaged graphics are observed as soon as X
starts. This patch fixes all the issues.

Fixed the typecast issue for fpfn and lpfn variables, thus
preventing the overflow problem which resolves the memory
corruption.

Signed-off-by: Arunpravin Paneer Selvam 
Reported-by: Mike Lothian 
Tested-by: Mike Lothian 


Reviewed-by: Christian König 

I've re-applied the patches to drm-misc-next, solved the conflict in 
drm-tip and then pushed this to drm-misc-next-fixes.


With a little bit of luck everything should now be in place, but fingers 
crossed.


Regards,
Christian.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 16 
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h |  2 +-
  2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 49e4092f447f..34d789054ec8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -366,11 +366,11 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,
unsigned long pages_per_block;
int r;
  
-	lpfn = place->lpfn << PAGE_SHIFT;

+   lpfn = (u64)place->lpfn << PAGE_SHIFT;
if (!lpfn)
lpfn = man->size;
  
-	fpfn = place->fpfn << PAGE_SHIFT;

+   fpfn = (u64)place->fpfn << PAGE_SHIFT;
  
  	max_bytes = adev->gmc.mc_vram_size;

if (tbo->type != ttm_bo_type_kernel)
@@ -410,12 +410,12 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,
/* Allocate blocks in desired range */
vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
  
-	remaining_size = vres->base.num_pages << PAGE_SHIFT;

+   remaining_size = (u64)vres->base.num_pages << PAGE_SHIFT;
  
  	mutex_lock(>lock);

while (remaining_size) {
if (tbo->page_alignment)
-   min_block_size = tbo->page_alignment << PAGE_SHIFT;
+   min_block_size = (u64)tbo->page_alignment << PAGE_SHIFT;
else
min_block_size = mgr->default_page_size;
  
@@ -424,12 +424,12 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,

/* Limit maximum size to 2GiB due to SG table limitations */
size = min(remaining_size, 2ULL << 30);
  
-		if (size >= pages_per_block << PAGE_SHIFT)

-   min_block_size = pages_per_block << PAGE_SHIFT;
+   if (size >= (u64)pages_per_block << PAGE_SHIFT)
+   min_block_size = (u64)pages_per_block << PAGE_SHIFT;
  
  		cur_size = size;
  
-		if (fpfn + size != place->lpfn << PAGE_SHIFT) {

+   if (fpfn + size != (u64)place->lpfn << PAGE_SHIFT) {
/*
 * Except for actual range allocation, modify the size 
and
 * min_block_size conforming to continuous flag 
enablement
@@ -469,7 +469,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
LIST_HEAD(temp);
  
  		trim_list = >blocks;

-   original_size = vres->base.num_pages << PAGE_SHIFT;
+   original_size = (u64)vres->base.num_pages << PAGE_SHIFT;
  
  		/*

 * If size value is rounded up to min_block_size, trim the last
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
index 9a2db87186c7..bef0f561ba60 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
@@ -50,7 +50,7 @@ static inline u64 amdgpu_vram_mgr_block_start(struct 
drm_buddy_block *block)
  
  static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)

  {
-   return PAGE_SIZE << drm_buddy_block_order(block);
+   return (u64)PAGE_SIZE << drm_buddy_block_order(block);
  }
  
  static inline struct drm_buddy_block *




[PATCH] drm/amd/pm: enable mode1 reset for smu_v13_0_7

2022-07-16 Thread Kenneth Feng
enable mode1 reset for smu_v13_0_7 since it's missing.

Signed-off-by: Kenneth Feng 
---
 drivers/gpu/drm/amd/amdgpu/soc21.c   | 1 +
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c 
b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 765c3543ad18..00e9b7089feb 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -320,6 +320,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev)
 
switch (adev->ip_versions[MP1_HWIP][0]) {
case IP_VERSION(13, 0, 0):
+   case IP_VERSION(13, 0, 7):
return AMD_RESET_METHOD_MODE1;
case IP_VERSION(13, 0, 4):
return AMD_RESET_METHOD_MODE2;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 6259a85bc818..6f0548714566 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -118,6 +118,7 @@ static struct cmn2asic_msg_mapping 
smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(DramLogSetDramSize, PPSMC_MSG_DramLogSetDramSize,   
   0),
MSG_MAP(AllowGfxOff,PPSMC_MSG_AllowGfxOff,  
   0),
MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff,   
   0),
+   MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset,  
0),
MSG_MAP(PrepareMp1ForUnload,PPSMC_MSG_PrepareMp1ForUnload,  
   0),
 };
 
-- 
2.25.1



RE: [PATCH v4] drm/amdgpu: add HW_IP_VCN_UNIFIED type

2022-07-16 Thread Liu, Leo
[AMD Official Use Only - General]

Reviewed-by: Leo Liu 

-Original Message-
From: Dong, Ruijing 
Sent: July 15, 2022 4:04 PM
To: Koenig, Christian ; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Liu, Leo ; 
Dong, Ruijing 
Subject: [PATCH v4] drm/amdgpu: add HW_IP_VCN_UNIFIED type

>From VCN4, AMDGPU_HW_IP_VCN_UNIFIED is used to support both encoding and 
>decoding jobs, it re-uses the same queue number of AMDGPU_HW_IP_VCN_ENC.

link: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/245/commits

Signed-off-by: Ruijing Dong 
---
 include/uapi/drm/amdgpu_drm.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h 
index 18d3246d636e..e268cd3cdb12 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -560,6 +560,12 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_HW_IP_UVD_ENC  5
 #define AMDGPU_HW_IP_VCN_DEC  6
 #define AMDGPU_HW_IP_VCN_ENC  7
+/**
+ * From VCN4, AMDGPU_HW_IP_VCN_UNIFIED is used to support
+ * both encoding and decoding jobs, it re-uses the same
+ * queue number of AMDGPU_HW_IP_VCN_ENC.
+ */
+#define AMDGPU_HW_IP_VCN_UNIFIED  AMDGPU_HW_IP_VCN_ENC
 #define AMDGPU_HW_IP_VCN_JPEG 8
 #define AMDGPU_HW_IP_NUM  9

--
2.25.1



[PATCH 22/31] drm/amd/display: Move phanton stream to FPU code

2022-07-16 Thread Rodrigo Siqueira
This commit moves phanton FPU stream to dcn32_fpu file.

Reviewed-by: Harry Wentland 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Rodrigo Siqueira 
---
 .../drm/amd/display/dc/dcn32/dcn32_resource.c | 89 +--
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 84 +
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  |  8 ++
 3 files changed, 94 insertions(+), 87 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index 1c124231b00a..a1bf24ad0787 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -1831,93 +1831,6 @@ static void dcn32_enable_phantom_plane(struct dc *dc,
}
 }
 
-/**
- * 
***
- * dcn32_set_phantom_stream_timing: Set timing params for the phantom stream
- *
- * Set timing params of the phantom stream based on calculated output from DML.
- * This function first gets the DML pipe index using the DC pipe index, then
- * calls into DML (get_subviewport_lines_needed_in_mall) to get the number of
- * lines required for SubVP MCLK switching and assigns to the phantom stream
- * accordingly.
- *
- * - The number of SubVP lines calculated in DML does not take into account
- * FW processing delays and required pstate allow width, so we must include
- * that separately.
- *
- * - Set phantom backporch = vstartup of main pipe
- *
- * @param [in] dc: current dc state
- * @param [in] context: new dc state
- * @param [in] ref_pipe: Main pipe for the phantom stream
- * @param [in] pipes: DML pipe params
- * @param [in] pipe_cnt: number of DML pipes
- * @param [in] dc_pipe_idx: DC pipe index for the main pipe (i.e. ref_pipe)
- *
- * @return: void
- *
- * 
***
- */
-static void dcn32_set_phantom_stream_timing(struct dc *dc,
-   struct dc_state *context,
-   struct pipe_ctx *ref_pipe,
-   struct dc_stream_state *phantom_stream,
-   display_e2e_pipe_params_st *pipes,
-   unsigned int pipe_cnt,
-   unsigned int dc_pipe_idx)
-{
-   unsigned int i, pipe_idx;
-   struct pipe_ctx *pipe;
-   uint32_t phantom_vactive, phantom_bp, pstate_width_fw_delay_lines;
-   unsigned int vlevel = context->bw_ctx.dml.vba.VoltageLevel;
-   unsigned int dcfclk = 
context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
-   unsigned int socclk = context->bw_ctx.dml.vba.SOCCLKPerState[vlevel];
-
-   // Find DML pipe index (pipe_idx) using dc_pipe_idx
-   for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
-   pipe = >res_ctx.pipe_ctx[i];
-
-   if (!pipe->stream)
-   continue;
-
-   if (i == dc_pipe_idx)
-   break;
-
-   pipe_idx++;
-   }
-
-   // Calculate lines required for pstate allow width and FW processing 
delays
-   pstate_width_fw_delay_lines = 
((double)(dc->caps.subvp_fw_processing_delay_us +
-   dc->caps.subvp_pstate_allow_width_us) / 100) *
-   (ref_pipe->stream->timing.pix_clk_100hz * 100) /
-   (double)ref_pipe->stream->timing.h_total;
-
-   // Update clks_cfg for calling into recalculate
-   pipes[0].clks_cfg.voltage = vlevel;
-   pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
-   pipes[0].clks_cfg.socclk_mhz = socclk;
-
-   // DML calculation for MALL region doesn't take into account FW delay
-   // and required pstate allow width for multi-display cases
-   phantom_vactive = 
get_subviewport_lines_needed_in_mall(>bw_ctx.dml, pipes, pipe_cnt, 
pipe_idx) +
-   pstate_width_fw_delay_lines;
-
-   // For backporch of phantom pipe, use vstartup of the main pipe
-   phantom_bp = get_vstartup(>bw_ctx.dml, pipes, pipe_cnt, 
pipe_idx);
-
-   phantom_stream->dst.y = 0;
-   phantom_stream->dst.height = phantom_vactive;
-   phantom_stream->src.y = 0;
-   phantom_stream->src.height = phantom_vactive;
-
-   phantom_stream->timing.v_addressable = phantom_vactive;
-   phantom_stream->timing.v_front_porch = 1;
-   phantom_stream->timing.v_total = phantom_stream->timing.v_addressable +
-   
phantom_stream->timing.v_front_porch +
-   
phantom_stream->timing.v_sync_width +
-   phantom_bp;
-}
-
 static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
@@ -1939,7 +1852,9 @@ static struct dc_stream_state 
*dcn32_enable_phantom_stream(struct dc *dc,

[PATCH 05/31] drm/amd/display: fix trigger_hotplug to support mst case

2022-07-16 Thread Rodrigo Siqueira
From: Wayne Lin 

[Why & How]
Correct few problems below to have debugfs trigger_hotplug entry
supports mst case

* Adjust the place for acquiring the hpd_lock. We'll also access
  dc_link when simulate unplug
* When detect the connector is a mst root, call
  reset_cur_dp_mst_topology() to simulate unplug
* Don't support hotplug caused by CSN message since we can't change
  mst topology info directly. We can't simulate that
* Clean up redundant code

Reviewed-by: Hersen Wu 
Acked-by: Alan Liu 
Signed-off-by: Wayne Lin 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c   | 17 ++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index e0646db6fdbf..b764198eca5c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -1273,14 +1273,22 @@ static ssize_t trigger_hotplug(struct file *f, const 
char __user *buf,
return -EINVAL;
}
 
+   kfree(wr_buf);
+
if (param_nums <= 0) {
DRM_DEBUG_DRIVER("user data not be read\n");
-   kfree(wr_buf);
+   return -EINVAL;
+   }
+
+   mutex_lock(>hpd_lock);
+
+   /* Don't support for mst end device*/
+   if (aconnector->mst_port) {
+   mutex_unlock(>hpd_lock);
return -EINVAL;
}
 
if (param[0] == 1) {
-   mutex_lock(>hpd_lock);
 
if (!dc_link_detect_sink(aconnector->dc_link, 
_connection_type) &&
new_connection_type != dc_connection_none)
@@ -1317,6 +1325,10 @@ static ssize_t trigger_hotplug(struct file *f, const 
char __user *buf,
 
amdgpu_dm_update_connector_after_detect(aconnector);
 
+   /* If the aconnector is the root node in mst topology */
+   if (aconnector->mst_mgr.mst_state == true)
+   reset_cur_dp_mst_topology(link);
+
drm_modeset_lock_all(dev);
dm_restore_drm_connector_state(dev, connector);
drm_modeset_unlock_all(dev);
@@ -1327,7 +1339,6 @@ static ssize_t trigger_hotplug(struct file *f, const char 
__user *buf,
 unlock:
mutex_unlock(>hpd_lock);
 
-   kfree(wr_buf);
return size;
 }
 
-- 
2.37.0



Re: [PATCH 2/3] drm/amdkfd: track unified memory reservation with xnack off

2022-07-16 Thread Felix Kuehling

On 2022-07-11 21:56, Alex Sierra wrote:

[WHY]
Unified memory with xnack off should be tracked, as userptr mappings
and legacy allocations do. To avoid oversuscribe system memory when
xnack off.
[How]
Exposing functions reserve_mem_limit and unreserve_mem_limit to SVM
API and call them on every prange creation and free.


One question and two nit-picks inline. Otherwise this looks good to me.




Signed-off-by: Alex Sierra 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  4 ++
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 25 
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c  | 60 +--
  3 files changed, 60 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 73bf8b5f2aa9..83d955f0c52f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -305,6 +305,10 @@ bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device 
*adev, struct kgd_mem *
  void amdgpu_amdkfd_block_mmu_notifications(void *p);
  int amdgpu_amdkfd_criu_resume(void *p);
  bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev);
+int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
+   uint64_t size, u32 alloc_flag);
+void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
+   uint64_t size, u32 alloc_flag);
  
  #if IS_ENABLED(CONFIG_HSA_AMD)

  void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 2bc36ff0aa0f..7480e7333e5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -129,7 +129,7 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
   *
   * Return: returns -ENOMEM in case of error, ZERO otherwise
   */
-static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
+int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 alloc_flag)
  {
uint64_t reserved_for_pt =
@@ -169,7 +169,7 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct 
amdgpu_device *adev,
 kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
 kfd_mem_limit.max_ttm_mem_limit) ||
-   (adev->kfd.vram_used + vram_needed >
+   (adev && adev->kfd.vram_used + vram_needed >
 adev->gmc.real_vram_size -
 atomic64_read(>vram_pin_size) -
 reserved_for_pt)) {
@@ -180,7 +180,10 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct 
amdgpu_device *adev,
/* Update memory accounting by decreasing available system
 * memory, TTM memory and GPU memory as computed above
 */
-   adev->kfd.vram_used += vram_needed;
+   WARN_ONCE(vram_needed && !adev,
+ "adev reference can't be null when vram is used");
+   if (adev)
+   adev->kfd.vram_used += vram_needed;
kfd_mem_limit.system_mem_used += system_mem_needed;
kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
  
@@ -189,7 +192,7 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,

return ret;
  }
  
-static void unreserve_mem_limit(struct amdgpu_device *adev,

+void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 alloc_flag)
  {
spin_lock(_mem_limit.mem_limit_lock);
@@ -198,7 +201,10 @@ static void unreserve_mem_limit(struct amdgpu_device *adev,
kfd_mem_limit.system_mem_used -= size;
kfd_mem_limit.ttm_mem_used -= size;
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
-   adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN);
+   WARN_ONCE(!adev,
+ "adev reference can't be null when alloc mem flags vram is 
set");
+   if (adev)
+   adev->kfd.vram_used -= ALIGN(size, 
VRAM_ALLOCATION_ALIGN);
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
kfd_mem_limit.system_mem_used -= size;
} else if (!(alloc_flag &
@@ -207,11 +213,8 @@ static void unreserve_mem_limit(struct amdgpu_device *adev,
pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
goto release;
}
-
-   WARN_ONCE(adev->kfd.vram_used < 0,
+   WARN_ONCE(adev && adev->kfd.vram_used < 0,
  "KFD VRAM memory accounting unbalanced");
-   WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
- "KFD TTM memory accounting unbalanced");


This looks like an unrelated change. Why are you removing this warning?



WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
  "KFD system memory accounting unbalanced");
  
@@ -225,7 +228,7 @@ void 

[PATCH v9 08/14] lib: test_hmm add ioctl to get zone device type

2022-07-16 Thread Alex Sierra
new ioctl cmd added to query zone device type. This will be
used once the test_hmm adds zone device coherent type.

Signed-off-by: Alex Sierra 
Acked-by: Felix Kuehling 
Reviewed-by: Alistair Poppple 
Signed-off-by: Christoph Hellwig 
---
 lib/test_hmm.c  | 11 +--
 lib/test_hmm_uapi.h | 14 ++
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index cfe632047839..915ef6b5b0d4 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -87,6 +87,7 @@ struct dmirror_chunk {
 struct dmirror_device {
struct cdev cdevice;
struct hmm_devmem   *devmem;
+   unsigned intzone_device_type;
 
unsigned intdevmem_capacity;
unsigned intdevmem_count;
@@ -1260,14 +1261,20 @@ static void dmirror_device_remove(struct dmirror_device 
*mdevice)
 static int __init hmm_dmirror_init(void)
 {
int ret;
-   int id;
+   int id = 0;
+   int ndevices = 0;
 
ret = alloc_chrdev_region(_dev, 0, DMIRROR_NDEVICES,
  "HMM_DMIRROR");
if (ret)
goto err_unreg;
 
-   for (id = 0; id < DMIRROR_NDEVICES; id++) {
+   memset(dmirror_devices, 0, DMIRROR_NDEVICES * 
sizeof(dmirror_devices[0]));
+   dmirror_devices[ndevices++].zone_device_type =
+   HMM_DMIRROR_MEMORY_DEVICE_PRIVATE;
+   dmirror_devices[ndevices++].zone_device_type =
+   HMM_DMIRROR_MEMORY_DEVICE_PRIVATE;
+   for (id = 0; id < ndevices; id++) {
ret = dmirror_device_init(dmirror_devices + id, id);
if (ret)
goto err_chrdev;
diff --git a/lib/test_hmm_uapi.h b/lib/test_hmm_uapi.h
index f14dea5dcd06..0511af7464ee 100644
--- a/lib/test_hmm_uapi.h
+++ b/lib/test_hmm_uapi.h
@@ -31,10 +31,11 @@ struct hmm_dmirror_cmd {
 /* Expose the address space of the calling process through hmm device file */
 #define HMM_DMIRROR_READ   _IOWR('H', 0x00, struct hmm_dmirror_cmd)
 #define HMM_DMIRROR_WRITE  _IOWR('H', 0x01, struct hmm_dmirror_cmd)
-#define HMM_DMIRROR_MIGRATE_IOWR('H', 0x02, struct hmm_dmirror_cmd)
-#define HMM_DMIRROR_SNAPSHOT   _IOWR('H', 0x03, struct hmm_dmirror_cmd)
-#define HMM_DMIRROR_EXCLUSIVE  _IOWR('H', 0x04, struct hmm_dmirror_cmd)
-#define HMM_DMIRROR_CHECK_EXCLUSIVE_IOWR('H', 0x05, struct hmm_dmirror_cmd)
+#define HMM_DMIRROR_MIGRATE_TO_DEV _IOWR('H', 0x02, struct hmm_dmirror_cmd)
+#define HMM_DMIRROR_MIGRATE_TO_SYS _IOWR('H', 0x03, struct hmm_dmirror_cmd)
+#define HMM_DMIRROR_SNAPSHOT   _IOWR('H', 0x04, struct hmm_dmirror_cmd)
+#define HMM_DMIRROR_EXCLUSIVE  _IOWR('H', 0x05, struct hmm_dmirror_cmd)
+#define HMM_DMIRROR_CHECK_EXCLUSIVE_IOWR('H', 0x06, struct hmm_dmirror_cmd)
 
 /*
  * Values returned in hmm_dmirror_cmd.ptr for HMM_DMIRROR_SNAPSHOT.
@@ -62,4 +63,9 @@ enum {
HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE = 0x30,
 };
 
+enum {
+   /* 0 is reserved to catch uninitialized type fields */
+   HMM_DMIRROR_MEMORY_DEVICE_PRIVATE = 1,
+};
+
 #endif /* _LIB_TEST_HMM_UAPI_H */
-- 
2.32.0



[PATCH] drm/amdkfd: track unified memory reservation with xnack off

2022-07-16 Thread Alex Sierra
[WHY]
Unified memory with xnack off should be tracked, as userptr mappings
and legacy allocations do. To avoid oversuscribe system memory when
xnack off.
[How]
Exposing functions reserve_mem_limit and unreserve_mem_limit to SVM
API and call them on every prange creation and free.

Signed-off-by: Alex Sierra 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  4 ++
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 23 ---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c  | 60 +--
 3 files changed, 60 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 73bf8b5f2aa9..83d955f0c52f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -305,6 +305,10 @@ bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device 
*adev, struct kgd_mem *
 void amdgpu_amdkfd_block_mmu_notifications(void *p);
 int amdgpu_amdkfd_criu_resume(void *p);
 bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev);
+int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
+   uint64_t size, u32 alloc_flag);
+void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
+   uint64_t size, u32 alloc_flag);
 
 #if IS_ENABLED(CONFIG_HSA_AMD)
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 2bc36ff0aa0f..39d589394160 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -129,7 +129,7 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
  *
  * Return: returns -ENOMEM in case of error, ZERO otherwise
  */
-static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
+int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 alloc_flag)
 {
uint64_t reserved_for_pt =
@@ -169,7 +169,7 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct 
amdgpu_device *adev,
 kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
 kfd_mem_limit.max_ttm_mem_limit) ||
-   (adev->kfd.vram_used + vram_needed >
+   (adev && adev->kfd.vram_used + vram_needed >
 adev->gmc.real_vram_size -
 atomic64_read(>vram_pin_size) -
 reserved_for_pt)) {
@@ -180,7 +180,10 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct 
amdgpu_device *adev,
/* Update memory accounting by decreasing available system
 * memory, TTM memory and GPU memory as computed above
 */
-   adev->kfd.vram_used += vram_needed;
+   WARN_ONCE(vram_needed && !adev,
+ "adev reference can't be null when vram is used");
+   if (adev)
+   adev->kfd.vram_used += vram_needed;
kfd_mem_limit.system_mem_used += system_mem_needed;
kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
 
@@ -189,7 +192,7 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct 
amdgpu_device *adev,
return ret;
 }
 
-static void unreserve_mem_limit(struct amdgpu_device *adev,
+void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 alloc_flag)
 {
spin_lock(_mem_limit.mem_limit_lock);
@@ -198,7 +201,10 @@ static void unreserve_mem_limit(struct amdgpu_device *adev,
kfd_mem_limit.system_mem_used -= size;
kfd_mem_limit.ttm_mem_used -= size;
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
-   adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN);
+   WARN_ONCE(!adev,
+ "adev reference can't be null when alloc mem flags 
vram is set");
+   if (adev)
+   adev->kfd.vram_used -= ALIGN(size, 
VRAM_ALLOCATION_ALIGN);
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
kfd_mem_limit.system_mem_used -= size;
} else if (!(alloc_flag &
@@ -207,8 +213,7 @@ static void unreserve_mem_limit(struct amdgpu_device *adev,
pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
goto release;
}
-
-   WARN_ONCE(adev->kfd.vram_used < 0,
+   WARN_ONCE(adev && adev->kfd.vram_used < 0,
  "KFD VRAM memory accounting unbalanced");
WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
  "KFD TTM memory accounting unbalanced");
@@ -225,7 +230,7 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
u32 alloc_flags = bo->kfd_bo->alloc_flags;
u64 size = amdgpu_bo_size(bo);
 
-   unreserve_mem_limit(adev, size, alloc_flags);
+   amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags);
 
kfree(bo->kfd_bo);
 }
@@ -1788,7 +1793,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
   

[PATCH 11/31] drm/amd/display: Fix hard hang if DSC is disabled

2022-07-16 Thread Rodrigo Siqueira
We want to calculate the DTB clock values when DSC is enabled; however,
this is not the current behavior implemented in DCN32. Right now, DML is
trying to calculate DSC values even if DSC is disabled; as a result, we
can have a hard hang due to wrong clock calculation. This commit fixes
this issue by moving the calculation after the DSC check.

Signed-off-by: Rodrigo Siqueira 
---
 .../dc/dml/dcn32/display_mode_vba_util_32.c   | 19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git 
a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
index 5a701d9df0f7..febaff7d7343 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -1686,17 +1686,22 @@ double dml32_RequiredDTBCLK(
unsigned int  AudioRate,
unsigned int  AudioLayout)
 {
-   double PixelWordRate = PixelClock /  (OutputFormat == dm_444 ? 1 : 2);
-   double HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
-   dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
-   double HCBlank = 64 + 32 *
-   dml_ceil(AudioRate *  (AudioLayout == 1 ? 1 : 0.25) * 
HTotal / (PixelClock * 1000), 1);
-   double AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / 
HTotal;
-   double HActiveTribyteRate = PixelWordRate * HCActive / HActive;
+   double PixelWordRate;
+   double HCActive;
+   double HCBlank;
+   double AverageTribyteRate;
+   double HActiveTribyteRate;
 
if (DSCEnable != true)
return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
 
+   PixelWordRate = PixelClock /  (OutputFormat == dm_444 ? 1 : 2);
+   HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
+   dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
+   HCBlank = 64 + 32 *
+   dml_ceil(AudioRate *  (AudioLayout == 1 ? 1 : 0.25) * 
HTotal / (PixelClock * 1000), 1);
+   AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
+   HActiveTribyteRate = PixelWordRate * HCActive / HActive;
return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, 
HActiveTribyteRate / 4.0, 25.0) * 1.002;
 }
 
-- 
2.37.0



[PATCH 16/31] drm/amd/display: Update Cursor Attribute MALL cache

2022-07-16 Thread Rodrigo Siqueira
From: Chris Park 

[Why]
Cursor size can update without MALL cache update.
Update the register on cursor attribute as well.

[How]
Update cursor MALL cache on cursor attribute update.

Reviewed-by: Alvin Lee 
Acked-by: Alan Liu 
Signed-off-by: Chris Park 
---
 .../gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c | 40 ++-
 .../gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h |  3 ++
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c
index 0a7d64306481..3176b04a7740 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c
@@ -94,6 +94,44 @@ void hubp32_phantom_hubp_post_enable(struct hubp *hubp)
}
 }
 
+void hubp32_cursor_set_attributes(
+   struct hubp *hubp,
+   const struct dc_cursor_attributes *attr)
+{
+   struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+   enum cursor_pitch hw_pitch = hubp1_get_cursor_pitch(attr->pitch);
+   enum cursor_lines_per_chunk lpc = hubp2_get_lines_per_chunk(
+   attr->width, attr->color_format);
+
+   hubp->curs_attr = *attr;
+
+   REG_UPDATE(CURSOR_SURFACE_ADDRESS_HIGH,
+   CURSOR_SURFACE_ADDRESS_HIGH, attr->address.high_part);
+   REG_UPDATE(CURSOR_SURFACE_ADDRESS,
+   CURSOR_SURFACE_ADDRESS, attr->address.low_part);
+
+   REG_UPDATE_2(CURSOR_SIZE,
+   CURSOR_WIDTH, attr->width,
+   CURSOR_HEIGHT, attr->height);
+
+   REG_UPDATE_4(CURSOR_CONTROL,
+   CURSOR_MODE, attr->color_format,
+   CURSOR_2X_MAGNIFY, 
attr->attribute_flags.bits.ENABLE_MAGNIFICATION,
+   CURSOR_PITCH, hw_pitch,
+   CURSOR_LINES_PER_CHUNK, lpc);
+
+   REG_SET_2(CURSOR_SETTINGS, 0,
+   /* no shift of the cursor HDL schedule */
+   CURSOR0_DST_Y_OFFSET, 0,
+/* used to shift the cursor chunk request deadline */
+   CURSOR0_CHUNK_HDL_ADJUST, 3);
+
+   if (attr->width * attr->height * 4 > 16384)
+   REG_UPDATE(DCHUBP_MALL_CONFIG, USE_MALL_FOR_CURSOR, true);
+   else
+   REG_UPDATE(DCHUBP_MALL_CONFIG, USE_MALL_FOR_CURSOR, false);
+}
+
 static struct hubp_funcs dcn32_hubp_funcs = {
.hubp_enable_tripleBuffer = hubp2_enable_triplebuffer,
.hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled,
@@ -106,7 +144,7 @@ static struct hubp_funcs dcn32_hubp_funcs = {
.set_blank = hubp2_set_blank,
.dcc_control = hubp3_dcc_control,
.mem_program_viewport = min_set_viewport,
-   .set_cursor_attributes  = hubp2_cursor_set_attributes,
+   .set_cursor_attributes  = hubp32_cursor_set_attributes,
.set_cursor_position= hubp2_cursor_set_position,
.hubp_clk_cntl = hubp2_clk_cntl,
.hubp_vtg_sel = hubp2_vtg_sel,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h
index 00b4211389c2..c4315d50fbb0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h
@@ -58,6 +58,9 @@ void hubp32_prepare_subvp_buffering(struct hubp *hubp, bool 
enable);
 
 void hubp32_phantom_hubp_post_enable(struct hubp *hubp);
 
+void hubp32_cursor_set_attributes(struct hubp *hubp,
+   const struct dc_cursor_attributes *attr);
+
 bool hubp32_construct(
struct dcn20_hubp *hubp2,
struct dc_context *ctx,
-- 
2.37.0



[PATCH v9 12/14] tools: update test_hmm script to support SP config

2022-07-16 Thread Alex Sierra
Add two more parameters to set spm_addr_dev0 & spm_addr_dev1
addresses. These two parameters configure the start SP
addresses for each device in test_hmm driver.
Consequently, this configures zone device type as coherent.

Signed-off-by: Alex Sierra 
Acked-by: Felix Kuehling 
Reviewed-by: Alistair Popple 
Signed-off-by: Christoph Hellwig 
---
 tools/testing/selftests/vm/test_hmm.sh | 24 +---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/vm/test_hmm.sh 
b/tools/testing/selftests/vm/test_hmm.sh
index 0647b525a625..539c9371e592 100755
--- a/tools/testing/selftests/vm/test_hmm.sh
+++ b/tools/testing/selftests/vm/test_hmm.sh
@@ -40,11 +40,26 @@ check_test_requirements()
 
 load_driver()
 {
-   modprobe $DRIVER > /dev/null 2>&1
+   if [ $# -eq 0 ]; then
+   modprobe $DRIVER > /dev/null 2>&1
+   else
+   if [ $# -eq 2 ]; then
+   modprobe $DRIVER spm_addr_dev0=$1 spm_addr_dev1=$2
+   > /dev/null 2>&1
+   else
+   echo "Missing module parameters. Make sure pass"\
+   "spm_addr_dev0 and spm_addr_dev1"
+   usage
+   fi
+   fi
if [ $? == 0 ]; then
major=$(awk "\$2==\"HMM_DMIRROR\" {print \$1}" /proc/devices)
mknod /dev/hmm_dmirror0 c $major 0
mknod /dev/hmm_dmirror1 c $major 1
+   if [ $# -eq 2 ]; then
+   mknod /dev/hmm_dmirror2 c $major 2
+   mknod /dev/hmm_dmirror3 c $major 3
+   fi
fi
 }
 
@@ -58,7 +73,7 @@ run_smoke()
 {
echo "Running smoke test. Note, this test provides basic coverage."
 
-   load_driver
+   load_driver $1 $2
$(dirname "${BASH_SOURCE[0]}")/hmm-tests
unload_driver
 }
@@ -75,6 +90,9 @@ usage()
echo "# Smoke testing"
echo "./${TEST_NAME}.sh smoke"
echo
+   echo "# Smoke testing with SPM enabled"
+   echo "./${TEST_NAME}.sh smoke  "
+   echo
exit 0
 }
 
@@ -84,7 +102,7 @@ function run_test()
usage
else
if [ "$1" = "smoke" ]; then
-   run_smoke
+   run_smoke $2 $3
else
usage
fi
-- 
2.32.0



[PATCH 10/31] drm/amd/display: remove number of DSC slices override in DML

2022-07-16 Thread Rodrigo Siqueira
From: Wenjing Liu 

[why]
Number of DSC slices is an input to DML with high dependency
on display specific capability. This isn't something DML can decide
on its own. DML has to use the original number of DSC slices input
to DML during validation without modification. Otherwise the
computed DSC delay will not reflect the current configuration
and therefore causes validation failures.

[how]
Remove DML override for number of DSC slices parameter.

Reviewed-by: Alvin Lee 
Acked-by: Alan Liu 
Signed-off-by: Wenjing Liu 
---
 .../dc/dml/dcn32/display_mode_vba_32.c| 20 ---
 1 file changed, 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index c6c3a9e6731a..1712843dafaa 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -1897,26 +1897,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
v->MaximumSwathWidthInLineBufferChroma);
}
 
-   /*Number Of DSC Slices*/
-   for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
-   if (mode_lib->vba.BlendingAndTiming[k] == k) {
-   if (mode_lib->vba.PixelClockBackEnd[k] > 4800) {
-   mode_lib->vba.NumberOfDSCSlices[k] = 
dml_ceil(mode_lib->vba.PixelClockBackEnd[k] / 600,
-   4);
-   } else if (mode_lib->vba.PixelClockBackEnd[k] > 2400) {
-   mode_lib->vba.NumberOfDSCSlices[k] = 8;
-   } else if (mode_lib->vba.PixelClockBackEnd[k] > 1200) {
-   mode_lib->vba.NumberOfDSCSlices[k] = 4;
-   } else if (mode_lib->vba.PixelClockBackEnd[k] > 340) {
-   mode_lib->vba.NumberOfDSCSlices[k] = 2;
-   } else {
-   mode_lib->vba.NumberOfDSCSlices[k] = 1;
-   }
-   } else {
-   mode_lib->vba.NumberOfDSCSlices[k] = 0;
-   }
-   }
-
dml32_CalculateSwathAndDETConfiguration(
mode_lib->vba.DETSizeOverride,
mode_lib->vba.UsesMALLForPStateChange,
-- 
2.37.0



[PATCH v9 07/14] drm/amdkfd: add SPM support for SVM

2022-07-16 Thread Alex Sierra
When CPU is connected throug XGMI, it has coherent
access to VRAM resource. In this case that resource
is taken from a table in the device gmc aperture base.
This resource is used along with the device type, which could
be DEVICE_PRIVATE or DEVICE_COHERENT to create the device
page map region.
Also, MIGRATE_VMA_SELECT_DEVICE_COHERENT flag is selected for
coherent type case during migration to device.

Signed-off-by: Alex Sierra 
Reviewed-by: Felix Kuehling 
Signed-off-by: Christoph Hellwig 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 34 +++-
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index e44376c2ecdc..f73e3e340413 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -671,13 +671,15 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct 
svm_range *prange,
migrate.vma = vma;
migrate.start = start;
migrate.end = end;
-   migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
+   if (adev->gmc.xgmi.connected_to_cpu)
+   migrate.flags = MIGRATE_VMA_SELECT_DEVICE_COHERENT;
+   else
+   migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
 
buf = kvcalloc(npages,
   2 * sizeof(*migrate.src) + sizeof(uint64_t) + 
sizeof(dma_addr_t),
   GFP_KERNEL);
-
if (!buf)
goto out;
 
@@ -947,7 +949,7 @@ int svm_migrate_init(struct amdgpu_device *adev)
 {
struct kfd_dev *kfddev = adev->kfd.dev;
struct dev_pagemap *pgmap;
-   struct resource *res;
+   struct resource *res = NULL;
unsigned long size;
void *r;
 
@@ -962,28 +964,34 @@ int svm_migrate_init(struct amdgpu_device *adev)
 * should remove reserved size
 */
size = ALIGN(adev->gmc.real_vram_size, 2ULL << 20);
-   res = devm_request_free_mem_region(adev->dev, _resource, size);
-   if (IS_ERR(res))
-   return -ENOMEM;
+   if (adev->gmc.xgmi.connected_to_cpu) {
+   pgmap->range.start = adev->gmc.aper_base;
+   pgmap->range.end = adev->gmc.aper_base + adev->gmc.aper_size - 
1;
+   pgmap->type = MEMORY_DEVICE_COHERENT;
+   } else {
+   res = devm_request_free_mem_region(adev->dev, _resource, 
size);
+   if (IS_ERR(res))
+   return -ENOMEM;
+   pgmap->range.start = res->start;
+   pgmap->range.end = res->end;
+   pgmap->type = MEMORY_DEVICE_PRIVATE;
+   }
 
-   pgmap->type = MEMORY_DEVICE_PRIVATE;
pgmap->nr_range = 1;
-   pgmap->range.start = res->start;
-   pgmap->range.end = res->end;
pgmap->ops = _migrate_pgmap_ops;
pgmap->owner = SVM_ADEV_PGMAP_OWNER(adev);
-   pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
-
+   pgmap->flags = 0;
/* Device manager releases device-specific resources, memory region and
 * pgmap when driver disconnects from device.
 */
r = devm_memremap_pages(adev->dev, pgmap);
if (IS_ERR(r)) {
pr_err("failed to register HMM device memory\n");
-
/* Disable SVM support capability */
pgmap->type = 0;
-   devm_release_mem_region(adev->dev, res->start, 
resource_size(res));
+   if (pgmap->type == MEMORY_DEVICE_PRIVATE)
+   devm_release_mem_region(adev->dev, res->start,
+   res->end - res->start + 1);
return PTR_ERR(r);
}
 
-- 
2.32.0



[PATCH v9 04/14] mm: handling Non-LRU pages returned by vm_normal_pages

2022-07-16 Thread Alex Sierra
With DEVICE_COHERENT, we'll soon have vm_normal_pages() return
device-managed anonymous pages that are not LRU pages. Although they
behave like normal pages for purposes of mapping in CPU page, and for
COW. They do not support LRU lists, NUMA migration or THP.

Callers to follow_page() currently don't expect ZONE_DEVICE pages,
however, with DEVICE_COHERENT we might now return ZONE_DEVICE. Check
for ZONE_DEVICE pages in applicable users of follow_page() as well.

Signed-off-by: Alex Sierra 
Acked-by: Felix Kuehling  (v2)
Reviewed-by: Alistair Popple  (v6)
---
 fs/proc/task_mmu.c |  2 +-
 mm/huge_memory.c   |  2 +-
 mm/khugepaged.c|  9 ++---
 mm/ksm.c   |  6 +++---
 mm/madvise.c   |  4 ++--
 mm/memory.c| 10 +-
 mm/mempolicy.c |  2 +-
 mm/migrate.c   |  4 ++--
 mm/mlock.c |  2 +-
 mm/mprotect.c  |  2 +-
 10 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 2d04e3470d4c..2dd8c8a66924 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1792,7 +1792,7 @@ static struct page *can_gather_numa_stats(pte_t pte, 
struct vm_area_struct *vma,
return NULL;
 
page = vm_normal_page(vma, addr, pte);
-   if (!page)
+   if (!page || is_zone_device_page(page))
return NULL;
 
if (PageReserved(page))
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 834f288b3769..c47e95b02244 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2910,7 +2910,7 @@ static int split_huge_pages_pid(int pid, unsigned long 
vaddr_start,
 
if (IS_ERR(page))
continue;
-   if (!page)
+   if (!page || is_zone_device_page(page))
continue;
 
if (!is_transparent_hugepage(page))
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 16be62d493cd..671ac7800e53 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -618,7 +618,7 @@ static int __collapse_huge_page_isolate(struct 
vm_area_struct *vma,
goto out;
}
page = vm_normal_page(vma, address, pteval);
-   if (unlikely(!page)) {
+   if (unlikely(!page) || unlikely(is_zone_device_page(page))) {
result = SCAN_PAGE_NULL;
goto out;
}
@@ -1267,7 +1267,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
writable = true;
 
page = vm_normal_page(vma, _address, pteval);
-   if (unlikely(!page)) {
+   if (unlikely(!page) || unlikely(is_zone_device_page(page))) {
result = SCAN_PAGE_NULL;
goto out_unmap;
}
@@ -1479,7 +1479,8 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, 
unsigned long addr)
goto abort;
 
page = vm_normal_page(vma, addr, *pte);
-
+   if (WARN_ON_ONCE(page && is_zone_device_page(page)))
+   page = NULL;
/*
 * Note that uprobe, debugger, or MAP_PRIVATE may change the
 * page table, but the new page will not be a subpage of hpage.
@@ -1497,6 +1498,8 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, 
unsigned long addr)
if (pte_none(*pte))
continue;
page = vm_normal_page(vma, addr, *pte);
+   if (WARN_ON_ONCE(page && is_zone_device_page(page)))
+   goto abort;
page_remove_rmap(page, vma, false);
}
 
diff --git a/mm/ksm.c b/mm/ksm.c
index 54f78c9eecae..831b18a7a50b 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -475,7 +475,7 @@ static int break_ksm(struct vm_area_struct *vma, unsigned 
long addr)
cond_resched();
page = follow_page(vma, addr,
FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE);
-   if (IS_ERR_OR_NULL(page))
+   if (IS_ERR_OR_NULL(page) || is_zone_device_page(page))
break;
if (PageKsm(page))
ret = handle_mm_fault(vma, addr,
@@ -560,7 +560,7 @@ static struct page *get_mergeable_page(struct rmap_item 
*rmap_item)
goto out;
 
page = follow_page(vma, addr, FOLL_GET);
-   if (IS_ERR_OR_NULL(page))
+   if (IS_ERR_OR_NULL(page) || is_zone_device_page(page))
goto out;
if (PageAnon(page)) {
flush_anon_page(vma, page, addr);
@@ -2308,7 +2308,7 @@ static struct rmap_item *scan_get_next_rmap_item(struct 
page **page)
if (ksm_test_exit(mm))
break;
*page = follow_page(vma, ksm_scan.address, FOLL_GET);
-   if (IS_ERR_OR_NULL(*page)) {
+   if (IS_ERR_OR_NULL(*page) || 

[PATCH v9 14/14] tools: add selftests to hmm for COW in device memory

2022-07-16 Thread Alex Sierra
The objective is to test device migration mechanism in pages marked
as COW, for private and coherent device type. In case of writing to
COW private page(s), a page fault will migrate pages back to system
memory first. Then, these pages will be duplicated. In case of COW
device coherent type, pages are duplicated directly from device
memory.

Signed-off-by: Alex Sierra 
Acked-by: Felix Kuehling 
---
 tools/testing/selftests/vm/hmm-tests.c | 80 ++
 1 file changed, 80 insertions(+)

diff --git a/tools/testing/selftests/vm/hmm-tests.c 
b/tools/testing/selftests/vm/hmm-tests.c
index bb38b9777610..716b62c05e3d 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -1874,4 +1874,84 @@ TEST_F(hmm, hmm_gup_test)
close(gup_fd);
hmm_buffer_free(buffer);
 }
+
+/*
+ * Test copy-on-write in device pages.
+ * In case of writing to COW private page(s), a page fault will migrate pages
+ * back to system memory first. Then, these pages will be duplicated. In case
+ * of COW device coherent type, pages are duplicated directly from device
+ * memory.
+ */
+TEST_F(hmm, hmm_cow_in_device)
+{
+   struct hmm_buffer *buffer;
+   unsigned long npages;
+   unsigned long size;
+   unsigned long i;
+   int *ptr;
+   int ret;
+   unsigned char *m;
+   pid_t pid;
+   int status;
+
+   npages = 4;
+   size = npages << self->page_shift;
+
+   buffer = malloc(sizeof(*buffer));
+   ASSERT_NE(buffer, NULL);
+
+   buffer->fd = -1;
+   buffer->size = size;
+   buffer->mirror = malloc(size);
+   ASSERT_NE(buffer->mirror, NULL);
+
+   buffer->ptr = mmap(NULL, size,
+  PROT_READ | PROT_WRITE,
+  MAP_PRIVATE | MAP_ANONYMOUS,
+  buffer->fd, 0);
+   ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+   /* Initialize buffer in system memory. */
+   for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+   ptr[i] = i;
+
+   /* Migrate memory to device. */
+
+   ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+   ASSERT_EQ(ret, 0);
+   ASSERT_EQ(buffer->cpages, npages);
+
+   pid = fork();
+   if (pid == -1)
+   ASSERT_EQ(pid, 0);
+   if (!pid) {
+   /* Child process waitd for SIGTERM from the parent. */
+   while (1) {
+   }
+   perror("Should not reach this\n");
+   exit(0);
+   }
+   /* Parent process writes to COW pages(s) and gets a
+* new copy in system. In case of device private pages,
+* this write causes a migration to system mem first.
+*/
+   for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+   ptr[i] = i;
+
+   /* Terminate child and wait */
+   EXPECT_EQ(0, kill(pid, SIGTERM));
+   EXPECT_EQ(pid, waitpid(pid, , 0));
+   EXPECT_NE(0, WIFSIGNALED(status));
+   EXPECT_EQ(SIGTERM, WTERMSIG(status));
+
+   /* Take snapshot to CPU pagetables */
+   ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+   ASSERT_EQ(ret, 0);
+   ASSERT_EQ(buffer->cpages, npages);
+   m = buffer->mirror;
+   for (i = 0; i < npages; i++)
+   ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[i]);
+
+   hmm_buffer_free(buffer);
+}
 TEST_HARNESS_MAIN
-- 
2.32.0



[PATCH v9 06/14] mm/gup: migrate device coherent pages when pinning instead of failing

2022-07-16 Thread Alex Sierra
From: Alistair Popple 

Currently any attempts to pin a device coherent page will fail. This is
because device coherent pages need to be managed by a device driver, and
pinning them would prevent a driver from migrating them off the device.

However this is no reason to fail pinning of these pages. These are
coherent and accessible from the CPU so can be migrated just like
pinning ZONE_MOVABLE pages. So instead of failing all attempts to pin
them first try migrating them out of ZONE_DEVICE.

[hch: rebased to the split device memory checks,
  moved migrate_device_page to migrate_device.c]

Signed-off-by: Alistair Popple 
Acked-by: Felix Kuehling 
Signed-off-by: Christoph Hellwig 
---
 mm/gup.c| 50 +--
 mm/internal.h   |  1 +
 mm/migrate_device.c | 52 +
 3 files changed, 96 insertions(+), 7 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index b65fe8bf5af4..22b97ab61cd9 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1881,7 +1881,7 @@ static long check_and_migrate_movable_pages(unsigned long 
nr_pages,
unsigned long isolation_error_count = 0, i;
struct folio *prev_folio = NULL;
LIST_HEAD(movable_page_list);
-   bool drain_allow = true;
+   bool drain_allow = true, coherent_pages = false;
int ret = 0;
 
for (i = 0; i < nr_pages; i++) {
@@ -1891,9 +1891,38 @@ static long check_and_migrate_movable_pages(unsigned 
long nr_pages,
continue;
prev_folio = folio;
 
-   if (folio_is_longterm_pinnable(folio))
+   /*
+* Device coherent pages are managed by a driver and should not
+* be pinned indefinitely as it prevents the driver moving the
+* page. So when trying to pin with FOLL_LONGTERM instead try
+* to migrate the page out of device memory.
+*/
+   if (folio_is_device_coherent(folio)) {
+   /*
+* We always want a new GUP lookup with device coherent
+* pages.
+*/
+   pages[i] = 0;
+   coherent_pages = true;
+
+   /*
+* Migration will fail if the page is pinned, so convert
+* the pin on the source page to a normal reference.
+*/
+   if (gup_flags & FOLL_PIN) {
+   get_page(>page);
+   unpin_user_page(>page);
+   }
+
+   ret = migrate_device_coherent_page(>page);
+   if (ret)
+   goto unpin_pages;
+
continue;
+   }
 
+   if (folio_is_longterm_pinnable(folio))
+   continue;
/*
 * Try to move out any movable page before pinning the range.
 */
@@ -1919,7 +1948,8 @@ static long check_and_migrate_movable_pages(unsigned long 
nr_pages,
folio_nr_pages(folio));
}
 
-   if (!list_empty(_page_list) || isolation_error_count)
+   if (!list_empty(_page_list) || isolation_error_count
+   || coherent_pages)
goto unpin_pages;
 
/*
@@ -1929,10 +1959,16 @@ static long check_and_migrate_movable_pages(unsigned 
long nr_pages,
return nr_pages;
 
 unpin_pages:
-   if (gup_flags & FOLL_PIN) {
-   unpin_user_pages(pages, nr_pages);
-   } else {
-   for (i = 0; i < nr_pages; i++)
+   /*
+* pages[i] might be NULL if any device coherent pages were found.
+*/
+   for (i = 0; i < nr_pages; i++) {
+   if (!pages[i])
+   continue;
+
+   if (gup_flags & FOLL_PIN)
+   unpin_user_page(pages[i]);
+   else
put_page(pages[i]);
}
 
diff --git a/mm/internal.h b/mm/internal.h
index c0f8fbe0445b..899dab512c5a 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -853,6 +853,7 @@ int numa_migrate_prep(struct page *page, struct 
vm_area_struct *vma,
  unsigned long addr, int page_nid, int *flags);
 
 void free_zone_device_page(struct page *page);
+int migrate_device_coherent_page(struct page *page);
 
 /*
  * mm/gup.c
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index 18bc6483f63a..7feeb447e3b9 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -686,6 +686,12 @@ void migrate_vma_pages(struct migrate_vma *migrate)
}
 
if (!page) {
+   /*
+* The only time there is no vma is when called from
+* migrate_device_coherent_page(). However this isn't
+* called if 

[PATCH 13/31] drm/amd/display: Update de-tile override to anticipate pipe splitting

2022-07-16 Thread Rodrigo Siqueira
From: Taimur Hassan 

[Why]
For certain MPO configurations, DML will split a pipe after DET buffer has
already been allocated by driver, resulting in allocation of more DET
segments than the configurable return buffer has, causing underflow.

[How]
Determine during DET override calculation whether or not a pipe will be
split later on by DML, and distribute DET segments based on expected
number of pipes.

Reviewed-by: Dmytro Laktyushkin 
Acked-by: Alan Liu 
Signed-off-by: Taimur Hassan 
---
 .../drm/amd/display/dc/dcn32/dcn32_resource.c |  22 ++--
 .../drm/amd/display/dc/dcn32/dcn32_resource.h |   6 +-
 .../display/dc/dcn32/dcn32_resource_helpers.c | 112 +-
 3 files changed, 69 insertions(+), 71 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index 92d87745d933..631876832dfa 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -2984,7 +2984,7 @@ int dcn32_populate_dml_pipes_from_context(
int i, pipe_cnt;
struct resource_context *res_ctx = >res_ctx;
struct pipe_ctx *pipe;
-   bool subvp_in_use = false;
+   bool subvp_in_use = false, is_pipe_split_expected[MAX_PIPES];
 
dcn20_populate_dml_pipes_from_context(dc, context, pipes, 
fast_validate);
 
@@ -3046,6 +3046,9 @@ int dcn32_populate_dml_pipes_from_context(
if (dc->debug.enable_single_display_2to1_odm_policy)
pipes[pipe_cnt].pipe.dest.odm_combine_policy = 
dm_odm_combine_policy_2to1;
}
+
+   is_pipe_split_expected[i] = dcn32_predict_pipe_split(context, 
pipes[i].pipe, i);
+
pipe_cnt++;
}
 
@@ -3053,8 +3056,7 @@ int dcn32_populate_dml_pipes_from_context(
 * the DET available for each pipe). Use the DET override input to 
maintain our driver
 * policy.
 */
-   switch (pipe_cnt) {
-   case 1:
+   if (pipe_cnt == 1 && !is_pipe_split_expected[0]) {
pipes[0].pipe.src.det_size_override = DCN3_2_MAX_DET_SIZE;
if (pipe->plane_state && !dc->debug.disable_z9_mpc) {
if (!is_dual_plane(pipe->plane_state->format)) {
@@ -3065,18 +3067,8 @@ int dcn32_populate_dml_pipes_from_context(
pipes[0].pipe.src.det_size_override = 
320; // 5K or higher
}
}
-   break;
-   case 2:
-   case 3:
-   case 4:
-   // For 2 and 3 pipes, use (MAX_DET_SIZE / pipe_cnt), for 4 
pipes use default size for each pipe
-   for (i = 0; i < pipe_cnt; i++) {
-   pipes[i].pipe.src.det_size_override = (pipe_cnt < 4) ? 
(DCN3_2_MAX_DET_SIZE / pipe_cnt) : DCN3_2_DEFAULT_DET_SIZE;
-   }
-   break;
-   }
-
-   dcn32_update_det_override_for_mpo(dc, context, pipes);
+   } else
+   dcn32_determine_det_override(context, pipes, 
is_pipe_split_expected, pipe_cnt);
 
// In general cases we want to keep the dram clock change requirement
// (prefer configs that support MCLK switch). Only override to false
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
index db4546317cb5..10254ab7e9d9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
@@ -100,7 +100,9 @@ bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc,
 bool dcn32_subvp_in_use(struct dc *dc,
struct dc_state *context);
 
-void dcn32_update_det_override_for_mpo(struct dc *dc, struct dc_state *context,
-   display_e2e_pipe_params_st *pipes);
+bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st 
pipe, int index);
+
+void dcn32_determine_det_override(struct dc_state *context, 
display_e2e_pipe_params_st *pipes,
+   bool *is_pipe_split_expected, int pipe_cnt);
 
 #endif /* _DCN32_RESOURCE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index e001f6d1f6c3..a6ef1dba01fe 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -26,6 +26,8 @@
 // header file of functions being implemented
 #include "dcn32_resource.h"
 #include "dcn20/dcn20_resource.h"
+#include "dml/dcn32/display_mode_vba_util_32.h"
+
 /**
  * 

  * dcn32_helper_populate_phantom_dlg_params: Get DLG params for phantom pipes 
and populate pipe_ctx
@@ -195,66 +197,68 @@ bool dcn32_subvp_in_use(struct dc *dc,
return false;
 }
 
-/* For MPO we adjust the DET allocation to ensure we have enough 

[PATCH 14/31] drm/amd/display: Disable GPUVM in IP resource configuration

2022-07-16 Thread Rodrigo Siqueira
From: Vladimir Stempen 

[Why]
VM enabled in IP configuration causes UCLK not
reaching DPM0. The expectation for VM enable should
be that KMD will indicate to DAL when VM is enabled,
then DAL will set the bit accordingly

[How]
Set gpuvm_enable to zero in DCN3_20 and DCN3_21 resource.

Reviewed-by: Martin Leung 
Acked-by: Alan Liu 
Signed-off-by: Vladimir Stempen 
---
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c   | 2 +-
 drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index 631876832dfa..0cb44ea9753b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -120,7 +120,7 @@ static const struct IP_BASE DCN_BASE = { { { { 0x0012, 
0x00C0, 0x34C
 #define DCN3_2_MIN_COMPBUF_SIZE_KB 128
 
 struct _vcs_dpi_ip_params_st dcn3_2_ip = {
-   .gpuvm_enable = 1,
+   .gpuvm_enable = 0,
.gpuvm_max_page_table_levels = 4,
.hostvm_enable = 0,
.rob_buffer_size_kbytes = 128,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
index ebbeebf972dc..d218c6dd71aa 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
@@ -123,7 +123,7 @@ static const struct IP_BASE DCN_BASE = { { { { 0x0012, 
0x00C0, 0x34C
 #define DCN3_2_DEFAULT_DET_SIZE 256
 
 struct _vcs_dpi_ip_params_st dcn3_21_ip = {
-   .gpuvm_enable = 1,
+   .gpuvm_enable = 0,
.gpuvm_max_page_table_levels = 4,
.hostvm_enable = 0,
.rob_buffer_size_kbytes = 128,
-- 
2.37.0



[PATCH 03/31] drm/amd/display: Update in dml

2022-07-16 Thread Rodrigo Siqueira
From: Alvin Lee 

Update DML to configure drr_display in vba struct.

Reviewed-by: Dmytro Laktyushkin 
Acked-by: Alan Liu 
Signed-off-by: Alvin Lee 
---
 drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h | 1 +
 drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h 
b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
index 87c9b9f9976e..e8b094006d95 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
@@ -499,6 +499,7 @@ struct _vcs_dpi_display_pipe_dest_params_st {
unsigned int refresh_rate;
bool synchronize_timings;
unsigned int odm_combine_policy;
+   bool drr_display;
 };
 
 struct _vcs_dpi_display_pipe_params_st {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c 
b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
index 39f93072b5e0..083f89e276d6 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
@@ -697,6 +697,7 @@ static void fetch_pipe_params(struct display_mode_lib 
*mode_lib)
mode_lib->vba.PixelClock[mode_lib->vba.NumberOfActivePlanes] = 
dst->pixel_rate_mhz;

mode_lib->vba.PixelClockBackEnd[mode_lib->vba.NumberOfActivePlanes] = 
dst->pixel_rate_mhz;
mode_lib->vba.DPPCLK[mode_lib->vba.NumberOfActivePlanes] = 
clks->dppclk_mhz;
+   mode_lib->vba.DRRDisplay[mode_lib->vba.NumberOfActiveSurfaces] 
= dst->drr_display;
if (ip->is_line_buffer_bpp_fixed)

mode_lib->vba.LBBitPerPixel[mode_lib->vba.NumberOfActivePlanes] =
ip->line_buffer_fixed_bpp;
-- 
2.37.0



[PATCH v9 13/14] tools: add hmm gup tests for device coherent type

2022-07-16 Thread Alex Sierra
The intention is to test hmm device coherent type under different get
user pages paths. Also, test gup with FOLL_LONGTERM flag set in
device coherent pages. These pages should get migrated back to system
memory.

Signed-off-by: Alex Sierra 
Reviewed-by: Alistair Popple 
---
 tools/testing/selftests/vm/hmm-tests.c | 110 +
 1 file changed, 110 insertions(+)

diff --git a/tools/testing/selftests/vm/hmm-tests.c 
b/tools/testing/selftests/vm/hmm-tests.c
index 4b547188ec40..bb38b9777610 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -36,6 +36,7 @@
  * in the usual include/uapi/... directory.
  */
 #include "../../../../lib/test_hmm_uapi.h"
+#include "../../../../mm/gup_test.h"
 
 struct hmm_buffer {
void*ptr;
@@ -59,6 +60,9 @@ enum {
 #define NTIMES 10
 
 #define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
+/* Just the flags we need, copied from mm.h: */
+#define FOLL_WRITE 0x01/* check pte is writable */
+#define FOLL_LONGTERM   0x1 /* mapping lifetime is indefinite */
 
 FIXTURE(hmm)
 {
@@ -1764,4 +1768,110 @@ TEST_F(hmm, exclusive_cow)
hmm_buffer_free(buffer);
 }
 
+static int gup_test_exec(int gup_fd, unsigned long addr, int cmd,
+int npages, int size, int flags)
+{
+   struct gup_test gup = {
+   .nr_pages_per_call  = npages,
+   .addr   = addr,
+   .gup_flags  = FOLL_WRITE | flags,
+   .size   = size,
+   };
+
+   if (ioctl(gup_fd, cmd, )) {
+   perror("ioctl on error\n");
+   return errno;
+   }
+
+   return 0;
+}
+
+/*
+ * Test get user device pages through gup_test. Setting PIN_LONGTERM flag.
+ * This should trigger a migration back to system memory for both, private
+ * and coherent type pages.
+ * This test makes use of gup_test module. Make sure GUP_TEST_CONFIG is added
+ * to your configuration before you run it.
+ */
+TEST_F(hmm, hmm_gup_test)
+{
+   struct hmm_buffer *buffer;
+   int gup_fd;
+   unsigned long npages;
+   unsigned long size;
+   unsigned long i;
+   int *ptr;
+   int ret;
+   unsigned char *m;
+
+   gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+   if (gup_fd == -1)
+   SKIP(return, "Skipping test, could not find gup_test driver");
+
+   npages = 4;
+   size = npages << self->page_shift;
+
+   buffer = malloc(sizeof(*buffer));
+   ASSERT_NE(buffer, NULL);
+
+   buffer->fd = -1;
+   buffer->size = size;
+   buffer->mirror = malloc(size);
+   ASSERT_NE(buffer->mirror, NULL);
+
+   buffer->ptr = mmap(NULL, size,
+  PROT_READ | PROT_WRITE,
+  MAP_PRIVATE | MAP_ANONYMOUS,
+  buffer->fd, 0);
+   ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+   /* Initialize buffer in system memory. */
+   for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+   ptr[i] = i;
+
+   /* Migrate memory to device. */
+   ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+   ASSERT_EQ(ret, 0);
+   ASSERT_EQ(buffer->cpages, npages);
+   /* Check what the device read. */
+   for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+   ASSERT_EQ(ptr[i], i);
+
+   ASSERT_EQ(gup_test_exec(gup_fd,
+   (unsigned long)buffer->ptr,
+   GUP_BASIC_TEST, 1, self->page_size, 0), 0);
+   ASSERT_EQ(gup_test_exec(gup_fd,
+   (unsigned long)buffer->ptr + 1 * 
self->page_size,
+   GUP_FAST_BENCHMARK, 1, self->page_size, 0), 0);
+   ASSERT_EQ(gup_test_exec(gup_fd,
+   (unsigned long)buffer->ptr + 2 * 
self->page_size,
+   PIN_FAST_BENCHMARK, 1, self->page_size, 
FOLL_LONGTERM), 0);
+   ASSERT_EQ(gup_test_exec(gup_fd,
+   (unsigned long)buffer->ptr + 3 * 
self->page_size,
+   PIN_LONGTERM_BENCHMARK, 1, self->page_size, 0), 
0);
+
+   /* Take snapshot to CPU pagetables */
+   ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+   ASSERT_EQ(ret, 0);
+   ASSERT_EQ(buffer->cpages, npages);
+   m = buffer->mirror;
+   if (hmm_is_coherent_type(variant->device_number)) {
+   ASSERT_EQ(HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | 
HMM_DMIRROR_PROT_WRITE, m[0]);
+   ASSERT_EQ(HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | 
HMM_DMIRROR_PROT_WRITE, m[1]);
+   } else {
+   ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[0]);
+   ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[1]);
+   }
+   ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[2]);
+   ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[3]);
+   /*
+* Check again the content 

[PATCH 26/31] drm/amd/display: Move ntuple to insert entry

2022-07-16 Thread Rodrigo Siqueira
Move get_optimal_ntuple to the FPU code and call it inside
insert_entry_into_table_sorted.

Reviewed-by: Harry Wentland 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Rodrigo Siqueira 
---
 .../drm/amd/display/dc/dcn32/dcn32_resource.c | 28 ---
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 25 +
 2 files changed, 25 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index 32edb3e5715a..adcc83e6ea55 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -1956,29 +1956,6 @@ void dcn32_calculate_wm_and_dlg(struct dc *dc, struct 
dc_state *context,
 DC_FP_END();
 }
 
-static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry)
-{
-   if (entry->dcfclk_mhz > 0) {
-   float bw_on_sdp = entry->dcfclk_mhz * 
dcn3_2_soc.return_bus_width_bytes * 
((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100);
-
-   entry->fabricclk_mhz = bw_on_sdp / 
(dcn3_2_soc.return_bus_width_bytes * 
((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100));
-   entry->dram_speed_mts = bw_on_sdp / (dcn3_2_soc.num_chans *
-   dcn3_2_soc.dram_channel_width_bytes * 
((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100));
-   } else if (entry->fabricclk_mhz > 0) {
-   float bw_on_fabric = entry->fabricclk_mhz * 
dcn3_2_soc.return_bus_width_bytes * 
((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100);
-
-   entry->dcfclk_mhz = bw_on_fabric / 
(dcn3_2_soc.return_bus_width_bytes * 
((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100));
-   entry->dram_speed_mts = bw_on_fabric / (dcn3_2_soc.num_chans *
-   dcn3_2_soc.dram_channel_width_bytes * 
((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100));
-   } else if (entry->dram_speed_mts > 0) {
-   float bw_on_dram = entry->dram_speed_mts * dcn3_2_soc.num_chans 
*
-   dcn3_2_soc.dram_channel_width_bytes * 
((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100);
-
-   entry->fabricclk_mhz = bw_on_dram / 
(dcn3_2_soc.return_bus_width_bytes * 
((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100));
-   entry->dcfclk_mhz = bw_on_dram / 
(dcn3_2_soc.return_bus_width_bytes * 
((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100));
-   }
-}
-
 static void remove_entry_from_table_at_index(struct 
_vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries,
unsigned int index)
 {
@@ -2062,7 +2039,6 @@ static int build_synthetic_soc_states(struct 
clk_bw_params *bw_params,
entry.fabricclk_mhz = 0;
entry.dram_speed_mts = 0;
 
-   get_optimal_ntuple();
DC_FP_START();
insert_entry_into_table_sorted(table, num_entries, );
DC_FP_END();
@@ -2073,7 +2049,6 @@ static int build_synthetic_soc_states(struct 
clk_bw_params *bw_params,
entry.fabricclk_mhz = 0;
entry.dram_speed_mts = 0;
 
-   get_optimal_ntuple();
DC_FP_START();
insert_entry_into_table_sorted(table, num_entries, );
DC_FP_END();
@@ -2084,7 +2059,6 @@ static int build_synthetic_soc_states(struct 
clk_bw_params *bw_params,
entry.fabricclk_mhz = 0;
entry.dram_speed_mts = 
bw_params->clk_table.entries[i].memclk_mhz * 16;
 
-   get_optimal_ntuple();
DC_FP_START();
insert_entry_into_table_sorted(table, num_entries, );
DC_FP_END();
@@ -2097,7 +2071,6 @@ static int build_synthetic_soc_states(struct 
clk_bw_params *bw_params,
entry.fabricclk_mhz = 
bw_params->clk_table.entries[i].fclk_mhz;
entry.dram_speed_mts = 0;
 
-   get_optimal_ntuple();
DC_FP_START();
insert_entry_into_table_sorted(table, num_entries, 
);
DC_FP_END();
@@ -2109,7 +2082,6 @@ static int build_synthetic_soc_states(struct 
clk_bw_params *bw_params,
entry.fabricclk_mhz = max_fclk_mhz;
entry.dram_speed_mts = 0;
 
-   get_optimal_ntuple();
DC_FP_START();
insert_entry_into_table_sorted(table, num_entries, );
DC_FP_END();
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 66102db87265..7c60a954737b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -353,6 +353,29 @@ static float calculate_net_bw_in_kbytes_sec(struct 
_vcs_dpi_voltage_scaling_st *
return 

[PATCH 19/31] drm/amd/display: Move populate phaton function to dml

2022-07-16 Thread Rodrigo Siqueira
The function dcn32_helper_populate_phantom_dlg_params uses FPU
operations. For this reason, this commit moves this function to the
dcn32_fpu file, and we ensure that we only invoke it under the
kernel_fpu protection.

Reviewed-by: Harry Wentland 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Rodrigo Siqueira 
---
 .../drm/amd/display/dc/dcn32/dcn32_resource.c |  7 +++
 .../display/dc/dcn32/dcn32_resource_helpers.c | 44 ---
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 43 ++
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  |  5 +++
 4 files changed, 55 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index 39214a0dcdf2..411ce13847c2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -88,6 +88,7 @@
 #include "dml/dcn30/display_mode_vba_30.h"
 #include "vm_helper.h"
 #include "dcn20/dcn20_vmid.h"
+#include "dml/dcn32/dcn32_fpu.h"
 
 #define DCN_BASE__INST0_SEG1   0x00C0
 #define DCN_BASE__INST0_SEG2   0x34C0
@@ -312,6 +313,7 @@ enum dcn32_clk_src_array_id {
.reg_name = NBIO_BASE(regBIF_BX0_ ## reg_name ## _BASE_IDX) + \
regBIF_BX0_ ## reg_name
 
+#undef CTX
 #define CTX ctx
 #define REG(reg_name) \
(DCN_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## 
reg_name)
@@ -2667,6 +2669,11 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
memset(merge, 0, MAX_PIPES * sizeof(bool));
*vlevel = dcn20_validate_apply_pipe_split_flags(dc, 
context, *vlevel, split, merge);
 
+   // Most populate phantom DLG params before programming 
hardware / timing for phantom pipe
+   DC_FP_START();
+   dcn32_helper_populate_phantom_dlg_params(dc, context, 
pipes, *pipe_cnt);
+   DC_FP_END();
+
// Note: We can't apply the phantom pipes to hardware 
at this time. We have to wait
// until driver has acquired the DMCUB lock to do it 
safely.
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index a6ef1dba01fe..633d3ee18cfa 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -28,50 +28,6 @@
 #include "dcn20/dcn20_resource.h"
 #include "dml/dcn32/display_mode_vba_util_32.h"
 
-/**
- * 

- * dcn32_helper_populate_phantom_dlg_params: Get DLG params for phantom pipes 
and populate pipe_ctx
- * with those params.
- *
- * This function must be called AFTER the phantom pipes are added to context 
and run through DML
- * (so that the DLG params for the phantom pipes can be populated), and BEFORE 
we program the
- * timing for the phantom pipes.
- *
- * @param [in] dc: current dc state
- * @param [in] context: new dc state
- * @param [in] pipes: DML pipe params array
- * @param [in] pipe_cnt: DML pipe count
- *
- * @return: void
- *
- * 

- */
-void dcn32_helper_populate_phantom_dlg_params(struct dc *dc,
-   struct dc_state *context,
-   display_e2e_pipe_params_st *pipes,
-   int pipe_cnt)
-{
-   uint32_t i, pipe_idx;
-   for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
-   struct pipe_ctx *pipe = >res_ctx.pipe_ctx[i];
-   if (!pipe->stream)
-   continue;
-
-   if (pipe->plane_state && pipe->stream->mall_stream_config.type 
== SUBVP_PHANTOM) {
-   pipes[pipe_idx].pipe.dest.vstartup_start = 
get_vstartup(>bw_ctx.dml, pipes, pipe_cnt,
-   pipe_idx);
-   pipes[pipe_idx].pipe.dest.vupdate_offset = 
get_vupdate_offset(>bw_ctx.dml, pipes, pipe_cnt,
-   pipe_idx);
-   pipes[pipe_idx].pipe.dest.vupdate_width = 
get_vupdate_width(>bw_ctx.dml, pipes, pipe_cnt,
-   pipe_idx);
-   pipes[pipe_idx].pipe.dest.vready_offset = 
get_vready_offset(>bw_ctx.dml, pipes, pipe_cnt,
-   pipe_idx);
-   pipe->pipe_dlg_param = pipes[pipe_idx].pipe.dest;
-   }
-   pipe_idx++;
-   }
-}
-
 /**
  * 

  * dcn32_helper_calculate_num_ways_for_subvp: Calculate number of ways needed 
for SubVP
diff --git 

[PATCH 12/31] drm/amd/display: Don't set dram clock change requirement for SubVP

2022-07-16 Thread Rodrigo Siqueira
From: Alvin Lee 

[Description]
In general cases we want to keep the dram clock change requirement (we
prefer configs that support MCLK switch). Only override to false for
SubVP.

Acked-by: Alan Liu 
Signed-off-by: Alvin Lee 
---
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index f913daabcca5..92d87745d933 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -2984,6 +2984,7 @@ int dcn32_populate_dml_pipes_from_context(
int i, pipe_cnt;
struct resource_context *res_ctx = >res_ctx;
struct pipe_ctx *pipe;
+   bool subvp_in_use = false;
 
dcn20_populate_dml_pipes_from_context(dc, context, pipes, 
fast_validate);
 
@@ -3006,6 +3007,7 @@ int dcn32_populate_dml_pipes_from_context(
switch (pipe->stream->mall_stream_config.type) {
case SUBVP_MAIN:
pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = 
dm_use_mall_pstate_change_sub_viewport;
+   subvp_in_use = true;
break;
case SUBVP_PHANTOM:
pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = 
dm_use_mall_pstate_change_phantom_pipe;
@@ -3076,6 +3078,14 @@ int dcn32_populate_dml_pipes_from_context(
 
dcn32_update_det_override_for_mpo(dc, context, pipes);
 
+   // In general cases we want to keep the dram clock change requirement
+   // (prefer configs that support MCLK switch). Only override to false
+   // for SubVP
+   if (subvp_in_use)
+   context->bw_ctx.dml.soc.dram_clock_change_requirement_final = 
false;
+   else
+   context->bw_ctx.dml.soc.dram_clock_change_requirement_final = 
true;
+
return pipe_cnt;
 }
 
-- 
2.37.0



RE: [PATCH v3] drm/amdgpu: add comments to HW_IP_VCN_ENC

2022-07-16 Thread Liu, Leo
[AMD Official Use Only - General]

Reviewed-by: Leo Liu 

-Original Message-
From: Dong, Ruijing 
Sent: July 15, 2022 12:09 PM
To: Koenig, Christian ; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Liu, Leo ; 
Dong, Ruijing 
Subject: [PATCH v3] drm/amdgpu: add comments to HW_IP_VCN_ENC

>From VCN4, HW_IP_VCN_ENC will be used as unified queue, and support both 
>encoding and decoding jobs, HW_IP_VCN_DEC is retired from VCN4.

link: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/245/commits

Signed-off-by: Ruijing Dong 
---
 include/uapi/drm/amdgpu_drm.h | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h 
index 18d3246d636e..29e4a1ece2ce 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -559,7 +559,14 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_HW_IP_VCE  4
 #define AMDGPU_HW_IP_UVD_ENC  5
 #define AMDGPU_HW_IP_VCN_DEC  6
+
+/**
+ * From VCN4, AMDGPU_HW_IP_VCN_ENC will be used as unified queue
+ * and support both encoding and decoding jobs, AMDGPU_HW_IP_VCN_DEC
+ * is retired from VCN4.
+ */
 #define AMDGPU_HW_IP_VCN_ENC  7
+
 #define AMDGPU_HW_IP_VCN_JPEG 8
 #define AMDGPU_HW_IP_NUM  9

--
2.25.1



[PATCH 07/31] drm/amd/display: Add tags for indicating mst progress status

2022-07-16 Thread Rodrigo Siqueira
From: Wayne Lin 

[Why & How]
In order to leverage igt tool to maintain mst feature, expose new
debugfs entry "mst_progress_status".

In our dm flow, record down the result of each phase of mst and user
can examine the mst result by checking whether each phase get completed
successfully.

Reviewed-by: Hersen Wu 
Acked-by: Alan Liu 
Signed-off-by: Wayne Lin 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 20 
 .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 46 ++-
 .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 18 +++-
 .../display/amdgpu_dm/amdgpu_dm_mst_types.c   | 13 ++
 4 files changed, 94 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 33d66d4897dc..cdfd32c4128c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -571,6 +571,14 @@ struct dsc_preferred_settings {
bool dsc_force_disable_passthrough;
 };
 
+enum mst_progress_status {
+   MST_STATUS_DEFAULT = 0,
+   MST_PROBE = BIT(0),
+   MST_REMOTE_EDID = BIT(1),
+   MST_ALLOCATE_NEW_PAYLOAD = BIT(2),
+   MST_CLEAR_ALLOCATED_PAYLOAD = BIT(3),
+};
+
 struct amdgpu_dm_connector {
 
struct drm_connector base;
@@ -623,8 +631,20 @@ struct amdgpu_dm_connector {
struct drm_display_mode freesync_vid_base;
 
int psr_skip_count;
+
+   /* Record progress status of mst*/
+   uint8_t mst_status;
 };
 
+static inline void amdgpu_dm_set_mst_status(uint8_t *status,
+   uint8_t flags, bool set)
+{
+   if (set)
+   *status |= flags;
+   else
+   *status &= ~flags;
+}
+
 #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, 
base)
 
 extern const struct amdgpu_ip_block_version dm_ip_block;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index 991e58a3a78c..cd8db385eda0 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -49,6 +49,13 @@ struct dmub_debugfs_trace_entry {
uint32_t param1;
 };
 
+static const char *const mst_progress_status[] = {
+   "probe",
+   "remote_edid",
+   "allocate_new_payload",
+   "clear_allocated_payload",
+};
+
 static inline const char *yesno(bool v)
 {
return v ? "yes" : "no";
@@ -2607,6 +2614,41 @@ static int dp_is_mst_connector_show(struct seq_file *m, 
void *unused)
return 0;
 }
 
+/*
+ * function description: Read out the mst progress status
+ *
+ * This function helps to determine the mst progress status of
+ * a mst connector.
+ *
+ * Access it with the following command:
+ *
+ * cat /sys/kernel/debug/dri/0/DP-X/mst_progress_status
+ *
+ */
+static int dp_mst_progress_status_show(struct seq_file *m, void *unused)
+{
+   struct drm_connector *connector = m->private;
+   struct amdgpu_dm_connector *aconnector = 
to_amdgpu_dm_connector(connector);
+   struct amdgpu_device *adev = drm_to_adev(connector->dev);
+   int i;
+
+   mutex_lock(>hpd_lock);
+   mutex_lock(>dm.dc_lock);
+
+   if (aconnector->mst_status == MST_STATUS_DEFAULT) {
+   seq_puts(m, "disabled\n");
+   } else {
+   for (i = 0; i < sizeof(mst_progress_status)/sizeof(char *); i++)
+   seq_printf(m, "%s:%s\n",
+   mst_progress_status[i],
+   aconnector->mst_status & BIT(i) ? "done" : 
"not_done");
+   }
+
+   mutex_unlock(>dm.dc_lock);
+   mutex_unlock(>hpd_lock);
+
+   return 0;
+}
 
 DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support);
 DEFINE_SHOW_ATTRIBUTE(dmub_fw_state);
@@ -2619,6 +2661,7 @@ DEFINE_SHOW_ATTRIBUTE(hdcp_sink_capability);
 DEFINE_SHOW_ATTRIBUTE(internal_display);
 DEFINE_SHOW_ATTRIBUTE(psr_capability);
 DEFINE_SHOW_ATTRIBUTE(dp_is_mst_connector);
+DEFINE_SHOW_ATTRIBUTE(dp_mst_progress_status);
 
 static const struct file_operations dp_dsc_clock_en_debugfs_fops = {
.owner = THIS_MODULE,
@@ -2762,7 +2805,8 @@ static const struct {
{"dp_dsc_fec_support", _dsc_fec_support_fops},
{"max_bpc", _max_bpc_debugfs_fops},
{"dsc_disable_passthrough", 
_dsc_disable_passthrough_debugfs_fops},
-   {"is_mst_connector", _is_mst_connector_fops}
+   {"is_mst_connector", _is_mst_connector_fops},
+   {"mst_progress_status", _mst_progress_status_fops}
 };
 
 #ifdef CONFIG_DRM_AMD_DC_HDCP
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index 137645d40b72..d66e3cd64ebd 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -312,6 +312,8 @@ bool 

[PATCH 18/31] drm/amd/display: Drop FPU flags from dcn32_clk_mgr

2022-07-16 Thread Rodrigo Siqueira
We are working to isolate FPU operations inside the DML folder, and the
file dcn32_clk_mgr has some of these operations. This commit moves the
FPU operations inside the clock manager and creates the dcn32_fpu file
to aggregate those operations. Note that there is no functional change
ere, just moving code from one part to another.

Reviewed-by: Harry Wentland 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Rodrigo Siqueira 
---
 .../gpu/drm/amd/display/dc/clk_mgr/Makefile   |  25 
 .../display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c  |  81 +
 drivers/gpu/drm/amd/display/dc/dml/Makefile   |   2 +
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 113 ++
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  |  34 ++
 5 files changed, 153 insertions(+), 102 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
 create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
index 053084121db2..a48453612d10 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
@@ -188,31 +188,6 @@ CLK_MGR_DCN32 = dcn32_clk_mgr.o dcn32_clk_mgr_smu_msg.o
 
 AMD_DAL_CLK_MGR_DCN32 = $(addprefix 
$(AMDDALPATH)/dc/clk_mgr/dcn32/,$(CLK_MGR_DCN32))
 
-ifdef CONFIG_X86
-CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -mhard-float -msse
-endif
-
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := 
-mpreferred-stack-boundary=4
-else
-CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -msse2
-endif
-endif
-
 AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN32)
 
 endif
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
index 08f07f31fe73..10726571007d 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
@@ -42,6 +42,7 @@
 #include "dcn/dcn_3_2_0_sh_mask.h"
 
 #include "dcn32/dcn32_clk_mgr.h"
+#include "dml/dcn32/dcn32_fpu.h"
 
 #define DCN_BASE__INST0_SEG1   0x00C0
 
@@ -146,83 +147,9 @@ static void dcn32_init_single_clock(struct 
clk_mgr_internal *clk_mgr, PPCLK_e cl
 
 static void dcn32_build_wm_range_table(struct clk_mgr_internal *clk_mgr)
 {
-   /* defaults */
-   double pstate_latency_us = 
clk_mgr->base.ctx->dc->dml.soc.dram_clock_change_latency_us;
-   double fclk_change_latency_us = 
clk_mgr->base.ctx->dc->dml.soc.fclk_change_latency_us;
-   double sr_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_exit_time_us;
-   double sr_enter_plus_exit_time_us = 
clk_mgr->base.ctx->dc->dml.soc.sr_enter_plus_exit_time_us;
-   /* For min clocks use as reported by PM FW and report those as min */
-   uint16_t min_uclk_mhz   = 
clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz;
-   uint16_t min_dcfclk_mhz = 
clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz;
-   uint16_t setb_min_uclk_mhz  = min_uclk_mhz;
-   uint16_t dcfclk_mhz_for_the_second_state = 
clk_mgr->base.ctx->dc->dml.soc.clock_limits[2].dcfclk_mhz;
-
-   /* For Set B ranges use min clocks state 2 when available, and report 
those to PM FW */
-   if (dcfclk_mhz_for_the_second_state)
-   
clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = 
dcfclk_mhz_for_the_second_state;
-   else
-   
clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = 
clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz;
-
-   if (clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz)
-   setb_min_uclk_mhz = 
clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz;
-
-   /* Set A - Normal - default values */
-   clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].valid = true;
-   
clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us 
= pstate_latency_us;
-   
clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us
 = fclk_change_latency_us;
-   
clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = 
sr_exit_time_us;
-   
clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us
 = sr_enter_plus_exit_time_us;
-   
clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = 
WATERMARKS_CLOCK_RANGE;
-   

[PATCH 31/31] drm/amd/display: 3.2.195

2022-07-16 Thread Rodrigo Siqueira
From: Aric Cyr 

This version brings along following fixes:

- Isolate FPU operation for DCN32/321 under the DML folder
- Create a specific file for CRTC and plane based on amdgpu_dm
- Fix DSC issues
- Update DML logic

Acked-by: Alan Liu 
Signed-off-by: Aric Cyr 
---
 drivers/gpu/drm/amd/display/dc/dc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index be41f9fcf1dd..d05bbe193bfa 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -47,7 +47,7 @@ struct aux_payload;
 struct set_config_cmd_payload;
 struct dmub_notification;
 
-#define DC_VER "3.2.194"
+#define DC_VER "3.2.195"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
-- 
2.37.0



[PATCH 00/31] DC Patches July 15, 2022

2022-07-16 Thread Rodrigo Siqueira
This DC patchset brings improvements in multiple areas. In summary, we
highlight:

- Isolate FPU operation for DCN32/321 under the DML folder
- Create a specific file for CRTC and plane based on amdgpu_dm
- Fix DSC issues
- Updates tp DML logic

Cc: Daniel Wheeler 

Thanks
Siqueira

Alvin Lee (2):
  drm/amd/display: Update in dml
  drm/amd/display: Don't set dram clock change requirement for SubVP

Aric Cyr (1):
  drm/amd/display: 3.2.195

Chris Park (1):
  drm/amd/display: Update Cursor Attribute MALL cache

Jun Lei (2):
  drm/amd/display: Remove unused variable
  drm/amd/display: Update DML logic for unbounded req handling

Rodrigo Siqueira (16):
  drm/amd/display: Create a file dedicated to planes
  drm/amd/display: Create a file dedicated for CRTC
  drm/amd/display: Fix hard hang if DSC is disabled
  drm/amd/display: Drop FPU flags from dcn32_clk_mgr
  drm/amd/display: Move populate phaton function to dml
  drm/amd/display: Move predict pipe to dml fpu folder
  drm/amd/display: Move insert entry table to the FPU code
  drm/amd/display: Move phanton stream to FPU code
  drm/amd/display: Move SubVP functions to dcn32_fpu
  drm/amd/display: Move wm and dlg calculation to FPU code
  drm/amd/display: Move dlg params calculation
  drm/amd/display: Move ntuple to insert entry
  drm/amd/display: Move bounding box to FPU folder
  drm/amd/display: Drop FPU flags from dcn32 Makefile
  drm/amd/display: Create dcn321_fpu file
  drm/amd/display: Drop FPU code from dcn321 resource

Taimur Hassan (2):
  drm/amd/display: Update de-tile override to anticipate pipe splitting
  drm/amd/display: Loop through all pipes for DET allocation

Vladimir Stempen (1):
  drm/amd/display: Disable GPUVM in IP resource configuration

Wayne Lin (5):
  drm/amd/display: Support vertical interrupt 0 for all dcn ASIC
  drm/amd/display: Expose function reset_cur_dp_mst_topology
  drm/amd/display: fix trigger_hotplug to support mst case
  drm/amd/display: Add is_mst_connector debugfs entry
  drm/amd/display: Add tags for indicating mst progress status

Wenjing Liu (1):
  drm/amd/display: remove number of DSC slices override in DML

 .../gpu/drm/amd/display/amdgpu_dm/Makefile|8 +-
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2557 +++--
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |   20 +
 .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c|  463 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_crtc.h|   51 +
 .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c |  114 +-
 .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c |   18 +-
 .../display/amdgpu_dm/amdgpu_dm_mst_types.c   |   13 +
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 1637 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.h   |   73 +
 .../gpu/drm/amd/display/dc/clk_mgr/Makefile   |   25 -
 .../display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c  |   81 +-
 drivers/gpu/drm/amd/display/dc/core/dc_link.c |2 +-
 drivers/gpu/drm/amd/display/dc/dc.h   |3 +-
 drivers/gpu/drm/amd/display/dc/dc_link.h  |3 +
 drivers/gpu/drm/amd/display/dc/dcn32/Makefile |   28 -
 .../gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c |   40 +-
 .../gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h |3 +
 .../drm/amd/display/dc/dcn32/dcn32_resource.c | 2039 +
 .../drm/amd/display/dc/dcn32/dcn32_resource.h |   15 +-
 .../display/dc/dcn32/dcn32_resource_helpers.c |  130 +-
 .../gpu/drm/amd/display/dc/dcn321/Makefile|   25 -
 .../amd/display/dc/dcn321/dcn321_resource.c   |  649 +
 .../amd/display/dc/dcn321/dcn321_resource.h   |3 +
 drivers/gpu/drm/amd/display/dc/dml/Makefile   |4 +
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 2244 +++
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  |   74 +
 .../dc/dml/dcn32/display_mode_vba_32.c|   64 +-
 .../dc/dml/dcn32/display_mode_vba_util_32.c   |   70 +-
 .../dc/dml/dcn32/display_mode_vba_util_32.h   |   10 +-
 .../amd/display/dc/dml/dcn321/dcn321_fpu.c|  684 +
 .../amd/display/dc/dml/dcn321/dcn321_fpu.h|   38 +
 .../amd/display/dc/dml/display_mode_structs.h |1 +
 .../drm/amd/display/dc/dml/display_mode_vba.c |2 +
 .../display/dc/irq/dcn30/irq_service_dcn30.c  |   14 +-
 .../dc/irq/dcn303/irq_service_dcn303.c|   19 +
 36 files changed, 6035 insertions(+), 5189 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
 create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h
 create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
 create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h
 create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
 create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
 create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
 create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h

-- 
2.37.0



[PATCH 06/31] drm/amd/display: Add is_mst_connector debugfs entry

2022-07-16 Thread Rodrigo Siqueira
From: Wayne Lin 

[Why & How]
Add "is_mst_connector" debugfs entry to help distinguish whether
a connector is in a mst topology or not.

Access it with the following command:
cat /sys/kernel/debug/dri/0/DP-X/is_mst_connector

Result:
- "root" stands for the root connector of the topology
- "branch" stands for branch device of the topology
- "end" stands for leaf node connector of the topology
- "no" stands for the connector is not a device of a mst topology

Reviewed-by: Hersen Wu 
Acked-by: Alan Liu 
Signed-off-by: Wayne Lin 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 53 +++
 1 file changed, 53 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index b764198eca5c..991e58a3a78c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -2557,6 +2557,57 @@ static int target_backlight_show(struct seq_file *m, 
void *unused)
return 0;
 }
 
+/*
+ * function description: Determine if the connector is mst connector
+ *
+ * This function helps to determine whether a connector is a mst connector.
+ * - "root" stands for the root connector of the topology
+ * - "branch" stands for branch device of the topology
+ * - "end" stands for leaf node connector of the topology
+ * - "no" stands for the connector is not a device of a mst topology
+ * Access it with the following command:
+ *
+ * cat /sys/kernel/debug/dri/0/DP-X/is_mst_connector
+ *
+ */
+static int dp_is_mst_connector_show(struct seq_file *m, void *unused)
+{
+   struct drm_connector *connector = m->private;
+   struct amdgpu_dm_connector *aconnector = 
to_amdgpu_dm_connector(connector);
+   struct drm_dp_mst_topology_mgr *mgr = NULL;
+   struct drm_dp_mst_port *port = NULL;
+   char *role = NULL;
+
+   mutex_lock(>hpd_lock);
+
+   if (aconnector->mst_mgr.mst_state) {
+   role = "root";
+   } else if (aconnector->mst_port &&
+   aconnector->mst_port->mst_mgr.mst_state) {
+
+   role = "end";
+
+   mgr = >mst_port->mst_mgr;
+   port = aconnector->port;
+
+   drm_modeset_lock(>base.lock, NULL);
+   if (port->pdt == DP_PEER_DEVICE_MST_BRANCHING &&
+   port->mcs)
+   role = "branch";
+   drm_modeset_unlock(>base.lock);
+
+   } else {
+   role = "no";
+   }
+
+   seq_printf(m, "%s\n", role);
+
+   mutex_unlock(>hpd_lock);
+
+   return 0;
+}
+
+
 DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support);
 DEFINE_SHOW_ATTRIBUTE(dmub_fw_state);
 DEFINE_SHOW_ATTRIBUTE(dmub_tracebuffer);
@@ -2567,6 +2618,7 @@ DEFINE_SHOW_ATTRIBUTE(hdcp_sink_capability);
 #endif
 DEFINE_SHOW_ATTRIBUTE(internal_display);
 DEFINE_SHOW_ATTRIBUTE(psr_capability);
+DEFINE_SHOW_ATTRIBUTE(dp_is_mst_connector);
 
 static const struct file_operations dp_dsc_clock_en_debugfs_fops = {
.owner = THIS_MODULE,
@@ -2710,6 +2762,7 @@ static const struct {
{"dp_dsc_fec_support", _dsc_fec_support_fops},
{"max_bpc", _max_bpc_debugfs_fops},
{"dsc_disable_passthrough", 
_dsc_disable_passthrough_debugfs_fops},
+   {"is_mst_connector", _is_mst_connector_fops}
 };
 
 #ifdef CONFIG_DRM_AMD_DC_HDCP
-- 
2.37.0



[PATCH 24/31] drm/amd/display: Move wm and dlg calculation to FPU code

2022-07-16 Thread Rodrigo Siqueira
Move dcn32_calculate_wm_and_dlg from dcn32 resources to the FPU code.
Additionally, this commit adds an interface to it.

Reviewed-by: Harry Wentland 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Rodrigo Siqueira 
---
 .../drm/amd/display/dc/dcn32/dcn32_resource.c | 196 +-
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 185 +
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  |   5 +
 3 files changed, 195 insertions(+), 191 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index d508909ff7a9..45768eff9315 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -2281,187 +2281,6 @@ int dcn32_populate_dml_pipes_from_context(
return pipe_cnt;
 }
 
-void dcn32_calculate_wm_and_dlg_fp(
-   struct dc *dc, struct dc_state *context,
-   display_e2e_pipe_params_st *pipes,
-   int pipe_cnt,
-   int vlevel)
-{
-   int i, pipe_idx, vlevel_temp = 0;
-   double dcfclk = dcn3_2_soc.clock_limits[0].dcfclk_mhz;
-   double dcfclk_from_validation = 
context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
-   unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
-   bool pstate_en = 
context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]
 !=
-   dm_dram_clock_change_unsupported;
-
-   // Override DRAMClockChangeSupport for SubVP + DRR case where the DRR 
cannot switch without stretching it's VBLANK
-   if (!pstate_en && dcn32_subvp_in_use(dc, context)) {
-   
context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]
 = dm_dram_clock_change_vblank_w_mall_sub_vp;
-   pstate_en = true;
-   }
-
-   /* Set B:
-* For Set B calculations use clocks from clock_limits[2] when 
available i.e. when SMU is present,
-* otherwise use arbitrary low value from spreadsheet for DCFCLK as 
lower is safer for watermark
-* calculations to cover bootup clocks.
-* DCFCLK: soc.clock_limits[2] when available
-* UCLK: soc.clock_limits[2] when available
-*/
-   if (dcn3_2_soc.num_states > 2) {
-   vlevel_temp = 2;
-   dcfclk = dcn3_2_soc.clock_limits[2].dcfclk_mhz;
-   } else
-   dcfclk = 615; //DCFCLK Vmin_lv
-
-   pipes[0].clks_cfg.voltage = vlevel_temp;
-   pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
-   pipes[0].clks_cfg.socclk_mhz = 
context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz;
-
-   if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) {
-   context->bw_ctx.dml.soc.dram_clock_change_latency_us = 
dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us;
-   context->bw_ctx.dml.soc.fclk_change_latency_us = 
dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.fclk_change_latency_us;
-   context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = 
dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us;
-   context->bw_ctx.dml.soc.sr_exit_time_us = 
dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us;
-   }
-   context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = 
get_wm_urgent(>bw_ctx.dml, pipes, pipe_cnt) * 1000;
-   
context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = 
get_wm_stutter_enter_exit(>bw_ctx.dml, pipes, pipe_cnt) * 1000;
-   context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = 
get_wm_stutter_exit(>bw_ctx.dml, pipes, pipe_cnt) * 1000;
-   context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = 
get_wm_dram_clock_change(>bw_ctx.dml, pipes, pipe_cnt) * 1000;
-   context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = 
get_wm_memory_trip(>bw_ctx.dml, pipes, pipe_cnt) * 1000;
-   context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = 
get_fraction_of_urgent_bandwidth(>bw_ctx.dml, pipes, pipe_cnt) * 1000;
-   context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = 
get_fraction_of_urgent_bandwidth_imm_flip(>bw_ctx.dml, pipes, 
pipe_cnt) * 1000;
-   context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = 
get_urgent_latency(>bw_ctx.dml, pipes, pipe_cnt) * 1000;
-   context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.fclk_pstate_change_ns 
= get_fclk_watermark(>bw_ctx.dml, pipes, pipe_cnt) * 1000;
-   context->bw_ctx.bw.dcn.watermarks.b.usr_retraining_ns = 
get_usr_retraining_watermark(>bw_ctx.dml, pipes, pipe_cnt) * 1000;
-
-   /* Set D:
-* All clocks min.
-* DCFCLK: Min, as reported by PM FW when available
-* UCLK  : Min, as reported by PM FW when available
-* sr_enter_exit/sr_exit should be lower than used for DRAM (TBD after 
bringup or later, 

[PATCH 30/31] drm/amd/display: Drop FPU code from dcn321 resource

2022-07-16 Thread Rodrigo Siqueira
This commit fully move the missing FPU operations from dcn321 resource
to dcn321 fpu. It also remove those FPU flags from the Makefile.

Reviewed-by: Harry Wentland 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Rodrigo Siqueira 
---
 .../gpu/drm/amd/display/dc/dcn321/Makefile|  25 -
 .../amd/display/dc/dcn321/dcn321_resource.c   | 452 +-
 .../amd/display/dc/dml/dcn321/dcn321_fpu.c| 446 +
 .../amd/display/dc/dml/dcn321/dcn321_fpu.h|   2 +
 4 files changed, 450 insertions(+), 475 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/Makefile 
b/drivers/gpu/drm/amd/display/dc/dcn321/Makefile
index e554fd6c16f2..0a199c83bb5b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn321/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn321/Makefile
@@ -12,31 +12,6 @@
 
 DCN321 = dcn321_resource.o dcn321_dio_link_encoder.o
 
-ifdef CONFIG_X86
-CFLAGS_$(AMDDALPATH)/dc/dcn321/dcn321_resource.o := -mhard-float -msse
-endif
-
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/dcn321/dcn321_resource.o := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-CFLAGS_$(AMDDALPATH)/dc/dcn321/dcn321_resource.o += 
-mpreferred-stack-boundary=4
-else
-CFLAGS_$(AMDDALPATH)/dc/dcn321/dcn321_resource.o += -msse2
-endif
-endif
-
 AMD_DAL_DCN321 = $(addprefix $(AMDDALPATH)/dc/dcn321/,$(DCN321))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_DCN321)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
index 6619bcb30de7..9ac0fcf79bed 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
@@ -1570,459 +1570,11 @@ static struct dc_cap_funcs cap_funcs = {
.get_dcc_compression_cap = dcn20_get_dcc_compression_cap
 };
 
-
-static void dcn321_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
-   unsigned int *optimal_dcfclk,
-   unsigned int *optimal_fclk)
-{
-   double bw_from_dram, bw_from_dram1, bw_from_dram2;
-
-   bw_from_dram1 = uclk_mts * dcn3_21_soc.num_chans *
-   dcn3_21_soc.dram_channel_width_bytes * 
(dcn3_21_soc.max_avg_dram_bw_use_normal_percent / 100);
-   bw_from_dram2 = uclk_mts * dcn3_21_soc.num_chans *
-   dcn3_21_soc.dram_channel_width_bytes * 
(dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100);
-
-   bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : 
bw_from_dram2;
-
-   if (optimal_fclk)
-   *optimal_fclk = bw_from_dram /
-   (dcn3_21_soc.fabric_datapath_to_dcn_data_return_bytes * 
(dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100));
-
-   if (optimal_dcfclk)
-   *optimal_dcfclk =  bw_from_dram /
-   (dcn3_21_soc.return_bus_width_bytes * 
(dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100));
-}
-
-static void remove_entry_from_table_at_index(struct 
_vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries,
-   unsigned int index)
-{
-   int i;
-
-   if (*num_entries == 0)
-   return;
-
-   for (i = index; i < *num_entries - 1; i++) {
-   table[i] = table[i + 1];
-   }
-   memset([--(*num_entries)], 0, sizeof(struct 
_vcs_dpi_voltage_scaling_st));
-}
-
-static int build_synthetic_soc_states(struct clk_bw_params *bw_params,
-   struct _vcs_dpi_voltage_scaling_st *table, unsigned int 
*num_entries)
+static void dcn321_update_bw_bounding_box(struct dc *dc, struct clk_bw_params 
*bw_params)
 {
-   int i, j;
-   struct _vcs_dpi_voltage_scaling_st entry = {0};
-
-   unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 
0,
-   max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 
0, max_uclk_mhz = 0;
-
-   unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299;
-
-   static const unsigned int num_dcfclk_stas = 5;
-   unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 
1324, 1564};
-
-   unsigned int num_uclk_dpms = 0;
-   unsigned int num_fclk_dpms = 0;
-   unsigned int num_dcfclk_dpms = 0;
-
-   for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
-   if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
-   max_dcfclk_mhz = 
bw_params->clk_table.entries[i].dcfclk_mhz;
-   if (bw_params->clk_table.entries[i].fclk_mhz > max_fclk_mhz)
-   max_fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz;
-   if (bw_params->clk_table.entries[i].memclk_mhz > max_uclk_mhz)
-   max_uclk_mhz = 
bw_params->clk_table.entries[i].memclk_mhz;
-   if 

[PATCH v9 09/14] lib: test_hmm add module param for zone device type

2022-07-16 Thread Alex Sierra
In order to configure device coherent in test_hmm, two module parameters
should be passed, which correspond to the SP start address of each
device (2) spm_addr_dev0 & spm_addr_dev1. If no parameters are passed,
private device type is configured.

Signed-off-by: Alex Sierra 
Acked-by: Felix Kuehling 
Reviewed-by: Alistair Poppple 
Signed-off-by: Christoph Hellwig 
---
 lib/test_hmm.c  | 73 -
 lib/test_hmm_uapi.h |  1 +
 2 files changed, 53 insertions(+), 21 deletions(-)

diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index 915ef6b5b0d4..afb30af9f3ff 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -37,6 +37,16 @@
 #define DEVMEM_CHUNK_SIZE  (256 * 1024 * 1024U)
 #define DEVMEM_CHUNKS_RESERVE  16
 
+static unsigned long spm_addr_dev0;
+module_param(spm_addr_dev0, long, 0644);
+MODULE_PARM_DESC(spm_addr_dev0,
+   "Specify start address for SPM (special purpose memory) used 
for device 0. By setting this Coherent device type will be used. Make sure 
spm_addr_dev1 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE.");
+
+static unsigned long spm_addr_dev1;
+module_param(spm_addr_dev1, long, 0644);
+MODULE_PARM_DESC(spm_addr_dev1,
+   "Specify start address for SPM (special purpose memory) used 
for device 1. By setting this Coherent device type will be used. Make sure 
spm_addr_dev0 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE.");
+
 static const struct dev_pagemap_ops dmirror_devmem_ops;
 static const struct mmu_interval_notifier_ops dmirror_min_ops;
 static dev_t dmirror_dev;
@@ -455,28 +465,44 @@ static int dmirror_write(struct dmirror *dmirror, struct 
hmm_dmirror_cmd *cmd)
return ret;
 }
 
-static bool dmirror_allocate_chunk(struct dmirror_device *mdevice,
+static int dmirror_allocate_chunk(struct dmirror_device *mdevice,
   struct page **ppage)
 {
struct dmirror_chunk *devmem;
-   struct resource *res;
+   struct resource *res = NULL;
unsigned long pfn;
unsigned long pfn_first;
unsigned long pfn_last;
void *ptr;
+   int ret = -ENOMEM;
 
devmem = kzalloc(sizeof(*devmem), GFP_KERNEL);
if (!devmem)
-   return false;
+   return ret;
 
-   res = request_free_mem_region(_resource, DEVMEM_CHUNK_SIZE,
- "hmm_dmirror");
-   if (IS_ERR(res))
+   switch (mdevice->zone_device_type) {
+   case HMM_DMIRROR_MEMORY_DEVICE_PRIVATE:
+   res = request_free_mem_region(_resource, 
DEVMEM_CHUNK_SIZE,
+ "hmm_dmirror");
+   if (IS_ERR_OR_NULL(res))
+   goto err_devmem;
+   devmem->pagemap.range.start = res->start;
+   devmem->pagemap.range.end = res->end;
+   devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
+   break;
+   case HMM_DMIRROR_MEMORY_DEVICE_COHERENT:
+   devmem->pagemap.range.start = (MINOR(mdevice->cdevice.dev) - 2) 
?
+   spm_addr_dev0 :
+   spm_addr_dev1;
+   devmem->pagemap.range.end = devmem->pagemap.range.start +
+   DEVMEM_CHUNK_SIZE - 1;
+   devmem->pagemap.type = MEMORY_DEVICE_COHERENT;
+   break;
+   default:
+   ret = -EINVAL;
goto err_devmem;
+   }
 
-   devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
-   devmem->pagemap.range.start = res->start;
-   devmem->pagemap.range.end = res->end;
devmem->pagemap.nr_range = 1;
devmem->pagemap.ops = _devmem_ops;
devmem->pagemap.owner = mdevice;
@@ -497,10 +523,14 @@ static bool dmirror_allocate_chunk(struct dmirror_device 
*mdevice,
mdevice->devmem_capacity = new_capacity;
mdevice->devmem_chunks = new_chunks;
}
-
ptr = memremap_pages(>pagemap, numa_node_id());
-   if (IS_ERR(ptr))
+   if (IS_ERR_OR_NULL(ptr)) {
+   if (ptr)
+   ret = PTR_ERR(ptr);
+   else
+   ret = -EFAULT;
goto err_release;
+   }
 
devmem->mdevice = mdevice;
pfn_first = devmem->pagemap.range.start >> PAGE_SHIFT;
@@ -529,15 +559,17 @@ static bool dmirror_allocate_chunk(struct dmirror_device 
*mdevice,
}
spin_unlock(>lock);
 
-   return true;
+   return 0;
 
 err_release:
mutex_unlock(>devmem_lock);
-   release_mem_region(devmem->pagemap.range.start, 
range_len(>pagemap.range));
+   if (res && devmem->pagemap.type == MEMORY_DEVICE_PRIVATE)
+   release_mem_region(devmem->pagemap.range.start,
+  range_len(>pagemap.range));
 err_devmem:
kfree(devmem);
 
-   return false;
+  

[PATCH 23/31] drm/amd/display: Move SubVP functions to dcn32_fpu

2022-07-16 Thread Rodrigo Siqueira
It looks like many of the code related to SubVP uses FPU operation, and
we have many static functions that are part of this feature. This commit
is a little bit large, but it only moves SubVP operation from one file
to another, and I had to do it in a single change due to dependencies
between functions.

Reviewed-by: Harry Wentland 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Rodrigo Siqueira 
---
 .../drm/amd/display/dc/dcn32/dcn32_resource.c | 597 +-
 .../drm/amd/display/dc/dcn32/dcn32_resource.h |   2 +
 .../display/dc/dcn32/dcn32_resource_helpers.c |  11 +
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 597 ++
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  |   8 +
 5 files changed, 620 insertions(+), 595 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index a1bf24ad0787..d508909ff7a9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -1625,171 +1625,6 @@ bool dcn32_release_post_bldn_3dlut(
return ret;
 }
 
-/**
- 

- * dcn32_get_num_free_pipes: Calculate number of free pipes
- *
- * This function assumes that a "used" pipe is a pipe that has
- * both a stream and a plane assigned to it.
- *
- * @param [in] dc: current dc state
- * @param [in] context: new dc state
- *
- * @return: Number of free pipes available in the context
- *
- 

- */
-static unsigned int dcn32_get_num_free_pipes(struct dc *dc, struct dc_state 
*context)
-{
-   unsigned int i;
-   unsigned int free_pipes = 0;
-   unsigned int num_pipes = 0;
-
-   for (i = 0; i < dc->res_pool->pipe_count; i++) {
-   struct pipe_ctx *pipe = >res_ctx.pipe_ctx[i];
-
-   if (pipe->stream && !pipe->top_pipe) {
-   while (pipe) {
-   num_pipes++;
-   pipe = pipe->bottom_pipe;
-   }
-   }
-   }
-
-   free_pipes = dc->res_pool->pipe_count - num_pipes;
-   return free_pipes;
-}
-
-/**
- 

- * dcn32_assign_subvp_pipe: Function to decide which pipe will use Sub-VP.
- *
- * We enter this function if we are Sub-VP capable (i.e. enough pipes 
available)
- * and regular P-State switching (i.e. VACTIVE/VBLANK) is not supported, or if
- * we are forcing SubVP P-State switching on the current config.
- *
- * The number of pipes used for the chosen surface must be less than or equal 
to the
- * number of free pipes available.
- *
- * In general we choose surfaces with the longest frame time first (better for 
SubVP + VBLANK).
- * For multi-display cases the ActiveDRAMClockChangeMargin doesn't provide 
enough info on its own
- * for determining which should be the SubVP pipe (need a way to determine if 
a pipe / plane doesn't
- * support MCLK switching naturally [i.e. ACTIVE or VBLANK]).
- *
- * @param [in] dc: current dc state
- * @param [in] context: new dc state
- * @param [out] index: dc pipe index for the pipe chosen to have phantom pipes 
assigned
- *
- * @return: True if a valid pipe assignment was found for Sub-VP. Otherwise 
false.
- *
- 

- */
-
-static bool dcn32_assign_subvp_pipe(struct dc *dc,
-   struct dc_state *context,
-   unsigned int *index)
-{
-   unsigned int i, pipe_idx;
-   unsigned int max_frame_time = 0;
-   bool valid_assignment_found = false;
-   unsigned int free_pipes = dcn32_get_num_free_pipes(dc, context);
-   bool current_assignment_freesync = false;
-
-   for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
-   struct pipe_ctx *pipe = >res_ctx.pipe_ctx[i];
-   unsigned int num_pipes = 0;
-
-   if (!pipe->stream)
-   continue;
-
-   if (pipe->plane_state && !pipe->top_pipe &&
-   pipe->stream->mall_stream_config.type == 
SUBVP_NONE) {
-   while (pipe) {
-   num_pipes++;
-   pipe = pipe->bottom_pipe;
-   }
-
-   pipe = >res_ctx.pipe_ctx[i];
-   if (num_pipes <= free_pipes) {
-   struct dc_stream_state *stream = pipe->stream;
-   unsigned int frame_us = (stream->timing.v_total 
* stream->timing.h_total /
-   
(double)(stream->timing.pix_clk_100hz * 100)) * 100;
-   if (frame_us > max_frame_time && 

[PATCH 27/31] drm/amd/display: Move bounding box to FPU folder

2022-07-16 Thread Rodrigo Siqueira
The final part of the DCN32 code that uses FPU is the bounding box code,
and this commit move it to dcn32_fpu.

Reviewed-by: Harry Wentland 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Rodrigo Siqueira 
---
 .../drm/amd/display/dc/dcn32/dcn32_resource.c | 460 +
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 470 ++
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  |   2 +
 3 files changed, 474 insertions(+), 458 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index adcc83e6ea55..b2e7d59e743f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -1923,29 +1923,6 @@ static struct dc_cap_funcs cap_funcs = {
.get_dcc_compression_cap = dcn20_get_dcc_compression_cap
 };
 
-
-static void dcn32_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
-   unsigned int *optimal_dcfclk,
-   unsigned int *optimal_fclk)
-{
-   double bw_from_dram, bw_from_dram1, bw_from_dram2;
-
-   bw_from_dram1 = uclk_mts * dcn3_2_soc.num_chans *
-   dcn3_2_soc.dram_channel_width_bytes * 
(dcn3_2_soc.max_avg_dram_bw_use_normal_percent / 100);
-   bw_from_dram2 = uclk_mts * dcn3_2_soc.num_chans *
-   dcn3_2_soc.dram_channel_width_bytes * 
(dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100);
-
-   bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : 
bw_from_dram2;
-
-   if (optimal_fclk)
-   *optimal_fclk = bw_from_dram /
-   (dcn3_2_soc.fabric_datapath_to_dcn_data_return_bytes * 
(dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100));
-
-   if (optimal_dcfclk)
-   *optimal_dcfclk =  bw_from_dram /
-   (dcn3_2_soc.return_bus_width_bytes * 
(dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100));
-}
-
 void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
int pipe_cnt,
@@ -1956,444 +1933,11 @@ void dcn32_calculate_wm_and_dlg(struct dc *dc, struct 
dc_state *context,
 DC_FP_END();
 }
 
-static void remove_entry_from_table_at_index(struct 
_vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries,
-   unsigned int index)
-{
-   int i;
-
-   if (*num_entries == 0)
-   return;
-
-   for (i = index; i < *num_entries - 1; i++) {
-   table[i] = table[i + 1];
-   }
-   memset([--(*num_entries)], 0, sizeof(struct 
_vcs_dpi_voltage_scaling_st));
-}
-
-static int build_synthetic_soc_states(struct clk_bw_params *bw_params,
-   struct _vcs_dpi_voltage_scaling_st *table, unsigned int 
*num_entries)
+static void dcn32_update_bw_bounding_box(struct dc *dc, struct clk_bw_params 
*bw_params)
 {
-   int i, j;
-   struct _vcs_dpi_voltage_scaling_st entry = {0};
-
-   unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 
0,
-   max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 
0, max_uclk_mhz = 0;
-
-   unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299;
-
-   static const unsigned int num_dcfclk_stas = 5;
-   unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 
1324, 1564};
-
-   unsigned int num_uclk_dpms = 0;
-   unsigned int num_fclk_dpms = 0;
-   unsigned int num_dcfclk_dpms = 0;
-
-   for (i = 0; i < MAX_NUM_DPM_LVL; i++) {
-   if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
-   max_dcfclk_mhz = 
bw_params->clk_table.entries[i].dcfclk_mhz;
-   if (bw_params->clk_table.entries[i].fclk_mhz > max_fclk_mhz)
-   max_fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz;
-   if (bw_params->clk_table.entries[i].memclk_mhz > max_uclk_mhz)
-   max_uclk_mhz = 
bw_params->clk_table.entries[i].memclk_mhz;
-   if (bw_params->clk_table.entries[i].dispclk_mhz > 
max_dispclk_mhz)
-   max_dispclk_mhz = 
bw_params->clk_table.entries[i].dispclk_mhz;
-   if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz)
-   max_dppclk_mhz = 
bw_params->clk_table.entries[i].dppclk_mhz;
-   if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz)
-   max_phyclk_mhz = 
bw_params->clk_table.entries[i].phyclk_mhz;
-   if (bw_params->clk_table.entries[i].dtbclk_mhz > max_dtbclk_mhz)
-   max_dtbclk_mhz = 
bw_params->clk_table.entries[i].dtbclk_mhz;
-
-   if (bw_params->clk_table.entries[i].memclk_mhz > 0)
-   num_uclk_dpms++;
-   if (bw_params->clk_table.entries[i].fclk_mhz > 0)
-   num_fclk_dpms++;
-   if 

[PATCH 29/31] drm/amd/display: Create dcn321_fpu file

2022-07-16 Thread Rodrigo Siqueira
The file dcn321_resource has a lot of FPU operations that should be
inside the dml folder. This commit introduces the dcn321_fpu file and
moves some of the FPU operation functions to this new file.

Reviewed-by: Harry Wentland 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Rodrigo Siqueira 
---
 .../amd/display/dc/dcn321/dcn321_resource.c   | 225 ++---
 .../amd/display/dc/dcn321/dcn321_resource.h   |   3 +
 drivers/gpu/drm/amd/display/dc/dml/Makefile   |   2 +
 .../amd/display/dc/dml/dcn321/dcn321_fpu.c| 238 ++
 .../amd/display/dc/dml/dcn321/dcn321_fpu.h|  36 +++
 5 files changed, 296 insertions(+), 208 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
 create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h

diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
index d218c6dd71aa..6619bcb30de7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
@@ -37,6 +37,8 @@
 #include "dcn20/dcn20_resource.h"
 #include "dcn30/dcn30_resource.h"
 
+#include "dml/dcn321/dcn321_fpu.h"
+
 #include "dcn10/dcn10_ipp.h"
 #include "dcn30/dcn30_hubbub.h"
 #include "dcn31/dcn31_hubbub.h"
@@ -120,134 +122,6 @@ static const struct IP_BASE DCN_BASE = { { { { 
0x0012, 0x00C0, 0x34C
 #define fixed16_to_double(x) (((double)x) / ((double) (1 << 16)))
 #define fixed16_to_double_to_cpu(x) fixed16_to_double(le32_to_cpu(x))
 
-#define DCN3_2_DEFAULT_DET_SIZE 256
-
-struct _vcs_dpi_ip_params_st dcn3_21_ip = {
-   .gpuvm_enable = 0,
-   .gpuvm_max_page_table_levels = 4,
-   .hostvm_enable = 0,
-   .rob_buffer_size_kbytes = 128,
-   .det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE,
-   .config_return_buffer_size_in_kbytes = 1280,
-   .compressed_buffer_segment_size_in_kbytes = 64,
-   .meta_fifo_size_in_kentries = 22,
-   .zero_size_buffer_entries = 512,
-   .compbuf_reserved_space_64b = 256,
-   .compbuf_reserved_space_zs = 64,
-   .dpp_output_buffer_pixels = 2560,
-   .opp_output_buffer_lines = 1,
-   .pixel_chunk_size_kbytes = 8,
-   .alpha_pixel_chunk_size_kbytes = 4, // not appearing in spreadsheet, 
match c code from hw team
-   .min_pixel_chunk_size_bytes = 1024,
-   .dcc_meta_buffer_size_bytes = 6272,
-   .meta_chunk_size_kbytes = 2,
-   .min_meta_chunk_size_bytes = 256,
-   .writeback_chunk_size_kbytes = 8,
-   .ptoi_supported = false,
-   .num_dsc = 4,
-   .maximum_dsc_bits_per_component = 12,
-   .maximum_pixels_per_line_per_dsc_unit = 6016,
-   .dsc422_native_support = true,
-   .is_line_buffer_bpp_fixed = true,
-   .line_buffer_fixed_bpp = 57,
-   .line_buffer_size_bits = 1171920, //DPP doc, DCN3_2_DisplayMode_73.xlsm 
still shows as 986880 bits with 48 bpp
-   .max_line_buffer_lines = 32,
-   .writeback_interface_buffer_size_kbytes = 90,
-   .max_num_dpp = 4,
-   .max_num_otg = 4,
-   .max_num_hdmi_frl_outputs = 1,
-   .max_num_wb = 1,
-   .max_dchub_pscl_bw_pix_per_clk = 4,
-   .max_pscl_lb_bw_pix_per_clk = 2,
-   .max_lb_vscl_bw_pix_per_clk = 4,
-   .max_vscl_hscl_bw_pix_per_clk = 4,
-   .max_hscl_ratio = 6,
-   .max_vscl_ratio = 6,
-   .max_hscl_taps = 8,
-   .max_vscl_taps = 8,
-   .dpte_buffer_size_in_pte_reqs_luma = 64,
-   .dpte_buffer_size_in_pte_reqs_chroma = 34,
-   .dispclk_ramp_margin_percent = 1,
-   .max_inter_dcn_tile_repeaters = 8,
-   .cursor_buffer_size = 16,
-   .cursor_chunk_size = 2,
-   .writeback_line_buffer_buffer_size = 0,
-   .writeback_min_hscl_ratio = 1,
-   .writeback_min_vscl_ratio = 1,
-   .writeback_max_hscl_ratio = 1,
-   .writeback_max_vscl_ratio = 1,
-   .writeback_max_hscl_taps = 1,
-   .writeback_max_vscl_taps = 1,
-   .dppclk_delay_subtotal = 47,
-   .dppclk_delay_scl = 50,
-   .dppclk_delay_scl_lb_only = 16,
-   .dppclk_delay_cnvc_formatter = 28,
-   .dppclk_delay_cnvc_cursor = 6,
-   .dispclk_delay_subtotal = 125,
-   .dynamic_metadata_vm_enabled = false,
-   .odm_combine_4to1_supported = false,
-   .dcc_supported = true,
-   .max_num_dp2p0_outputs = 2,
-   .max_num_dp2p0_streams = 4,
-};
-
-struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = {
-   .clock_limits = {
-   {
-   .state = 0,
-   .dcfclk_mhz = 1564.0,
-   .fabricclk_mhz = 400.0,
-   .dispclk_mhz = 2150.0,
-   .dppclk_mhz = 2150.0,
-   .phyclk_mhz = 810.0,
-   .phyclk_d18_mhz = 667.0,
-   .phyclk_d32_mhz = 625.0,
-   .socclk_mhz = 1200.0,
-   .dscclk_mhz = 716.667,
-   .dram_speed_mts = 

[PATCH 20/31] drm/amd/display: Move predict pipe to dml fpu folder

2022-07-16 Thread Rodrigo Siqueira
The function dcn32_predict_pipe_split uses FPU operations. This commit
moves this function to the dcn32_fpu file, and we ensure that we only
invoke it under the kernel_fpu protection.

Reviewed-by: Harry Wentland 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Rodrigo Siqueira 
---
 .../drm/amd/display/dc/dcn32/dcn32_resource.c |  2 +
 .../drm/amd/display/dc/dcn32/dcn32_resource.h |  2 -
 .../display/dc/dcn32/dcn32_resource_helpers.c | 33 
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 39 ++-
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  |  4 ++
 5 files changed, 44 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index 411ce13847c2..a56d87140eba 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -3054,7 +3054,9 @@ int dcn32_populate_dml_pipes_from_context(
pipes[pipe_cnt].pipe.dest.odm_combine_policy = 
dm_odm_combine_policy_2to1;
}
 
+   DC_FP_START();
is_pipe_split_expected[i] = dcn32_predict_pipe_split(context, 
pipes[i].pipe, i);
+   DC_FP_END();
 
pipe_cnt++;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
index 10254ab7e9d9..901aa7e13bd2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
@@ -100,8 +100,6 @@ bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc,
 bool dcn32_subvp_in_use(struct dc *dc,
struct dc_state *context);
 
-bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st 
pipe, int index);
-
 void dcn32_determine_det_override(struct dc_state *context, 
display_e2e_pipe_params_st *pipes,
bool *is_pipe_split_expected, int pipe_cnt);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index 633d3ee18cfa..796e3d966a76 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -153,39 +153,6 @@ bool dcn32_subvp_in_use(struct dc *dc,
return false;
 }
 
-bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st 
pipe, int index)
-{
-   double pscl_throughput, pscl_throughput_chroma, dpp_clk_single_dpp, 
clock,
-   clk_frequency = 0.0, vco_speed = 
context->bw_ctx.dml.soc.dispclk_dppclk_vco_speed_mhz;
-
-   
dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(pipe.scale_ratio_depth.hscl_ratio,
-   pipe.scale_ratio_depth.hscl_ratio_c,
-   pipe.scale_ratio_depth.vscl_ratio,
-   pipe.scale_ratio_depth.vscl_ratio_c,
-   context->bw_ctx.dml.ip.max_dchub_pscl_bw_pix_per_clk,
-   context->bw_ctx.dml.ip.max_pscl_lb_bw_pix_per_clk,
-   pipe.dest.pixel_rate_mhz,
-   pipe.src.source_format,
-   pipe.scale_taps.htaps,
-   pipe.scale_taps.htaps_c,
-   pipe.scale_taps.vtaps,
-   pipe.scale_taps.vtaps_c,
-
-   /* Output */
-   _throughput, _throughput_chroma,
-   _clk_single_dpp);
-
-   clock = dpp_clk_single_dpp * (1 + 
context->bw_ctx.dml.soc.dcn_downspread_percent / 100);
-
-   if (clock > 0)
-   clk_frequency = vco_speed * 4.0 / ((int) (vco_speed * 4.0));
-
-   if (clk_frequency > 
context->bw_ctx.dml.soc.clock_limits[index].dppclk_mhz)
-   return true;
-   else
-   return false;
-}
-
 void dcn32_determine_det_override(struct dc_state *context, 
display_e2e_pipe_params_st *pipes,
bool *is_pipe_split_expected, int pipe_cnt)
 {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 253ff9659b0d..1b9e34f1232a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -24,7 +24,7 @@
  *
  */
 #include "dcn32_fpu.h"
-
+#include "display_mode_vba_util_32.h"
 // We need this includes for WATERMARKS_* defines
 #include "clk_mgr/dcn32/dcn32_smu13_driver_if.h"
 
@@ -154,3 +154,40 @@ void dcn32_helper_populate_phantom_dlg_params(struct dc 
*dc,
}
 }
 
+bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st 
pipe, int index)
+{
+   double pscl_throughput;
+   double pscl_throughput_chroma;
+   double dpp_clk_single_dpp, clock;
+   double clk_frequency = 0.0;
+   double vco_speed = context->bw_ctx.dml.soc.dispclk_dppclk_vco_speed_mhz;
+

[PATCH v9 10/14] lib: add support for device coherent type in test_hmm

2022-07-16 Thread Alex Sierra
Device Coherent type uses device memory that is coherently accesible by
the CPU. This could be shown as SP (special purpose) memory range
at the BIOS-e820 memory enumeration. If no SP memory is supported in
system, this could be faked by setting CONFIG_EFI_FAKE_MEMMAP.

Currently, test_hmm only supports two different SP ranges of at least
256MB size. This could be specified in the kernel parameter variable
efi_fake_mem. Ex. Two SP ranges of 1GB starting at 0x1 &
0x14000 physical address. Ex.
efi_fake_mem=1G@0x1:0x4,1G@0x14000:0x4

Private and coherent device mirror instances can be created in the same
probed. This is done by passing the module parameters spm_addr_dev0 &
spm_addr_dev1. In this case, it will create four instances of
device_mirror. The first two correspond to private device type, the
last two to coherent type. Then, they can be easily accessed from user
space through /dev/hmm_mirror. Usually num_device 0 and 1
are for private, and 2 and 3 for coherent types. If no module
parameters are passed, two instances of private type device_mirror will
be created only.

Signed-off-by: Alex Sierra 
Acked-by: Felix Kuehling 
Reviewed-by: Alistair Poppple 
---
 lib/test_hmm.c  | 253 +---
 lib/test_hmm_uapi.h |   4 +
 2 files changed, 196 insertions(+), 61 deletions(-)

diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index afb30af9f3ff..7930853e7fc5 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -32,11 +32,22 @@
 
 #include "test_hmm_uapi.h"
 
-#define DMIRROR_NDEVICES   2
+#define DMIRROR_NDEVICES   4
 #define DMIRROR_RANGE_FAULT_TIMEOUT1000
 #define DEVMEM_CHUNK_SIZE  (256 * 1024 * 1024U)
 #define DEVMEM_CHUNKS_RESERVE  16
 
+/*
+ * For device_private pages, dpage is just a dummy struct page
+ * representing a piece of device memory. dmirror_devmem_alloc_page
+ * allocates a real system memory page as backing storage to fake a
+ * real device. zone_device_data points to that backing page. But
+ * for device_coherent memory, the struct page represents real
+ * physical CPU-accessible memory that we can use directly.
+ */
+#define BACKING_PAGE(page) (is_device_private_page((page)) ? \
+  (page)->zone_device_data : (page))
+
 static unsigned long spm_addr_dev0;
 module_param(spm_addr_dev0, long, 0644);
 MODULE_PARM_DESC(spm_addr_dev0,
@@ -125,6 +136,21 @@ static int dmirror_bounce_init(struct dmirror_bounce 
*bounce,
return 0;
 }
 
+static bool dmirror_is_private_zone(struct dmirror_device *mdevice)
+{
+   return (mdevice->zone_device_type ==
+   HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ? true : false;
+}
+
+static enum migrate_vma_direction
+dmirror_select_device(struct dmirror *dmirror)
+{
+   return (dmirror->mdevice->zone_device_type ==
+   HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ?
+   MIGRATE_VMA_SELECT_DEVICE_PRIVATE :
+   MIGRATE_VMA_SELECT_DEVICE_COHERENT;
+}
+
 static void dmirror_bounce_fini(struct dmirror_bounce *bounce)
 {
vfree(bounce->ptr);
@@ -575,16 +601,19 @@ static int dmirror_allocate_chunk(struct dmirror_device 
*mdevice,
 static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
 {
struct page *dpage = NULL;
-   struct page *rpage;
+   struct page *rpage = NULL;
 
/*
-* This is a fake device so we alloc real system memory to store
-* our device memory.
+* For ZONE_DEVICE private type, this is a fake device so we allocate
+* real system memory to store our device memory.
+* For ZONE_DEVICE coherent type we use the actual dpage to store the
+* data and ignore rpage.
 */
-   rpage = alloc_page(GFP_HIGHUSER);
-   if (!rpage)
-   return NULL;
-
+   if (dmirror_is_private_zone(mdevice)) {
+   rpage = alloc_page(GFP_HIGHUSER);
+   if (!rpage)
+   return NULL;
+   }
spin_lock(>lock);
 
if (mdevice->free_pages) {
@@ -603,7 +632,8 @@ static struct page *dmirror_devmem_alloc_page(struct 
dmirror_device *mdevice)
return dpage;
 
 error:
-   __free_page(rpage);
+   if (rpage)
+   __free_page(rpage);
return NULL;
 }
 
@@ -629,12 +659,16 @@ static void dmirror_migrate_alloc_and_copy(struct 
migrate_vma *args,
 * unallocated pte_none() or read-only zero page.
 */
spage = migrate_pfn_to_page(*src);
+   if (WARN(spage && is_zone_device_page(spage),
+"page already in device spage pfn: 0x%lx\n",
+page_to_pfn(spage)))
+   continue;
 
dpage = dmirror_devmem_alloc_page(mdevice);
if (!dpage)
continue;
 
-   rpage = dpage->zone_device_data;
+   rpage = BACKING_PAGE(dpage);

[PATCH 09/31] drm/amd/display: Create a file dedicated for CRTC

2022-07-16 Thread Rodrigo Siqueira
[Why]
The amdgpu_dm file contains most of the code that works as an interface
between DRM API and DC. As a result, this file becomes very large since
it comprises multiple abstractions such as CRTC manipulation.

[How]
This commit extracts the CRTC code to its specific file named
amdgpu_dm_crtc. This change does not change anything inside the
functions; the only exception is converting some static functions to a
global function.

Reviewed-by: Harry Wentland 
Acked-by: Alan Liu 
Signed-off-by: Rodrigo Siqueira 
---
 .../gpu/drm/amd/display/amdgpu_dm/Makefile|   1 +
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 434 +---
 .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c| 463 ++
 .../amd/display/amdgpu_dm/amdgpu_dm_crtc.h|  51 ++
 4 files changed, 516 insertions(+), 433 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
 create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile 
b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
index ec559ea902a3..90fb0f3cdb6f 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
@@ -28,6 +28,7 @@
 AMDGPUDM = \
amdgpu_dm.o \
amdgpu_dm_plane.o \
+   amdgpu_dm_crtc.o \
amdgpu_dm_irq.o \
amdgpu_dm_mst_types.o \
amdgpu_dm_color.o
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index ceac70e93ece..bf01ed340ec3 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -47,6 +47,7 @@
 #include "atom.h"
 #include "amdgpu_dm.h"
 #include "amdgpu_dm_plane.h"
+#include "amdgpu_dm_crtc.h"
 #ifdef CONFIG_DRM_AMD_DC_HDCP
 #include "amdgpu_dm_hdcp.h"
 #include 
@@ -204,9 +205,6 @@ static int amdgpu_dm_initialize_drm_device(struct 
amdgpu_device *adev);
 /* removes and deallocates the drm structures, created by the above function */
 static void amdgpu_dm_destroy_drm_device(struct amdgpu_display_manager *dm);
 
-static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
-  struct drm_plane *plane,
-  uint32_t link_index);
 static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm,
struct amdgpu_dm_connector 
*amdgpu_dm_connector,
uint32_t link_index,
@@ -335,20 +333,6 @@ get_crtc_by_otg_inst(struct amdgpu_device *adev,
return NULL;
 }
 
-static inline bool amdgpu_dm_vrr_active_irq(struct amdgpu_crtc *acrtc)
-{
-   return acrtc->dm_irq_params.freesync_config.state ==
-  VRR_STATE_ACTIVE_VARIABLE ||
-  acrtc->dm_irq_params.freesync_config.state ==
-  VRR_STATE_ACTIVE_FIXED;
-}
-
-static inline bool amdgpu_dm_vrr_active(struct dm_crtc_state *dm_state)
-{
-   return dm_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE ||
-  dm_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED;
-}
-
 static inline bool is_dc_timing_adjust_needed(struct dm_crtc_state *old_state,
  struct dm_crtc_state *new_state)
 {
@@ -464,26 +448,6 @@ static void dm_pflip_high_irq(void *interrupt_params)
 vrr_active, (int) !e);
 }
 
-static void dm_crtc_handle_vblank(struct amdgpu_crtc *acrtc)
-{
-   struct drm_crtc *crtc = >base;
-   struct drm_device *dev = crtc->dev;
-   unsigned long flags;
-
-   drm_crtc_handle_vblank(crtc);
-
-   spin_lock_irqsave(>event_lock, flags);
-
-   /* Send completion event for cursor-only commits */
-   if (acrtc->event && acrtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
-   drm_crtc_send_vblank_event(crtc, acrtc->event);
-   drm_crtc_vblank_put(crtc);
-   acrtc->event = NULL;
-   }
-
-   spin_unlock_irqrestore(>event_lock, flags);
-}
-
 static void dm_vupdate_high_irq(void *interrupt_params)
 {
struct common_irq_params *irq_params = interrupt_params;
@@ -1261,52 +1225,6 @@ static void mmhub_read_system_context(struct 
amdgpu_device *adev, struct dc_phy_
 
 }
 
-static void vblank_control_worker(struct work_struct *work)
-{
-   struct vblank_control_work *vblank_work =
-   container_of(work, struct vblank_control_work, work);
-   struct amdgpu_display_manager *dm = vblank_work->dm;
-
-   mutex_lock(>dc_lock);
-
-   if (vblank_work->enable)
-   dm->active_vblank_irq_count++;
-   else if(dm->active_vblank_irq_count)
-   dm->active_vblank_irq_count--;
-
-   dc_allow_idle_optimizations(dm->dc, dm->active_vblank_irq_count == 0);
-
-   DRM_DEBUG_KMS("Allow idle optimizations (MALL): %d\n", 
dm->active_vblank_irq_count == 0);
-
-   /*
-* Control PSR based on vblank 

[PATCH 17/31] drm/amd/display: Update DML logic for unbounded req handling

2022-07-16 Thread Rodrigo Siqueira
From: Jun Lei 

[why]
Unbounded request logic in resource/DML has some issues where unbounded
request is being enabled incorrectly. SW today enables unbounded request
unconditionally in hardware, on the assumption that HW can always
support it in single pipe scenarios.

This worked until now because the same assumption is made in DML. A new
DML update is needed to fix a bug, where there are single pipe scenarios
where unbounded cannot be enabled, and this change in DML needs to be
ported in, and dcn32 resource logic fixed.

[how]
First, dcn32_resource should program unbounded req in HW according to
unbounded req enablement output from DML, as opposed to DML input.

Second, port in DML update which disables unbounded req in some
scenarios to fix an issue with poor stutter performance

Signed-off-by: Jun Lei 
Reviewed-by: Rodrigo Siqueira 
---
 .../drm/amd/display/dc/dcn32/dcn32_resource.c | 11 +++-
 .../dc/dml/dcn32/display_mode_vba_32.c| 44 +---
 .../dc/dml/dcn32/display_mode_vba_util_32.c   | 51 ---
 .../dc/dml/dcn32/display_mode_vba_util_32.h   | 10 +++-
 .../drm/amd/display/dc/dml/display_mode_vba.c |  1 +
 5 files changed, 103 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index 32da47e24839..39214a0dcdf2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -3322,6 +3322,7 @@ void dcn32_calculate_dlg_params(struct dc *dc, struct 
dc_state *context, display
 {
int i, pipe_idx;
bool usr_retraining_support = false;
+   bool unbounded_req_enabled = false;
 
/* Writeback MCIF_WB arbitration parameters */
dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt);
@@ -3357,6 +3358,14 @@ void dcn32_calculate_dlg_params(struct dc *dc, struct 
dc_state *context, display
if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz)
context->bw_ctx.bw.dcn.clk.dispclk_khz = 
dc->debug.min_disp_clk_khz;
 
+   unbounded_req_enabled = 
get_unbounded_request_enabled(>bw_ctx.dml, pipes, pipe_cnt);
+
+   if (unbounded_req_enabled && pipe_cnt > 1) {
+   // Unbounded requesting should not ever be used when more than 
1 pipe is enabled.
+   ASSERT(false);
+   unbounded_req_enabled = false;
+   }
+
for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
if (!context->res_ctx.pipe_ctx[i].stream)
continue;
@@ -3375,7 +3384,7 @@ void dcn32_calculate_dlg_params(struct dc *dc, struct 
dc_state *context, display
} else {
context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 
get_det_buffer_size_kbytes(>bw_ctx.dml, pipes, pipe_cnt,
pipe_idx);
-   context->res_ctx.pipe_ctx[i].unbounded_req = 
pipes[pipe_idx].pipe.src.unbounded_req_mode;
+   context->res_ctx.pipe_ctx[i].unbounded_req = 
unbounded_req_enabled;
}
if (context->bw_ctx.bw.dcn.clk.dppclk_khz < 
pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
context->bw_ctx.bw.dcn.clk.dppclk_khz = 
pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index 1712843dafaa..092782b6e341 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -226,6 +226,9 @@ static void 
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
mode_lib->vba.NumberOfActiveSurfaces,
mode_lib->vba.nomDETInKByte,
mode_lib->vba.UseUnboundedRequesting,
+   
mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
+   mode_lib->vba.ip.pixel_chunk_size_kbytes,
+   mode_lib->vba.ip.rob_buffer_size_kbytes,

mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal,
v->dummy_vars

.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
@@ -287,6 +290,10 @@ static void 
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
mode_lib->vba.DETBufferSizeC,
>UnboundedRequestEnabled,
>CompressedBufferSizeInkByte,
+   >CompBufReservedSpaceKBytes,
+   >dummy_vars
+   

[PATCH 02/31] drm/amd/display: Remove unused variable

2022-07-16 Thread Rodrigo Siqueira
From: Jun Lei 

Remove an unused variable "remove_disconnect_edp" which was a workaround
bit.

Acked-by: Alan Liu 
Signed-off-by: Jun Lei 
---
 drivers/gpu/drm/amd/display/dc/dc.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index 7c42377f0aae..be41f9fcf1dd 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -683,7 +683,6 @@ struct dc_debug_options {
bool hdmi20_disable;
bool skip_detection_link_training;
uint32_t edid_read_retry_times;
-   bool remove_disconnect_edp;
unsigned int force_odm_combine; //bit vector based on otg inst
unsigned int seamless_boot_odm_combine;
unsigned int force_odm_combine_4to1; //bit vector based on otg inst
-- 
2.37.0



[PATCH 15/31] drm/amd/display: Loop through all pipes for DET allocation

2022-07-16 Thread Rodrigo Siqueira
From: Taimur Hassan 

[Why & How]
There are cases where the pipes populated are not all at the top
of the pipes list under context. Loop through all pipes for DET
allocation instead of just the number of populated ones, even if
some unpopulated pipes are iterated through unnecessarily.

Reviewed-by: Alvin Lee 
Acked-by: Alan Liu 
Signed-off-by: Taimur Hassan 
---
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index 0cb44ea9753b..32da47e24839 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -3068,7 +3068,7 @@ int dcn32_populate_dml_pipes_from_context(
}
}
} else
-   dcn32_determine_det_override(context, pipes, 
is_pipe_split_expected, pipe_cnt);
+   dcn32_determine_det_override(context, pipes, 
is_pipe_split_expected, dc->res_pool->pipe_count);
 
// In general cases we want to keep the dram clock change requirement
// (prefer configs that support MCLK switch). Only override to false
-- 
2.37.0



[PATCH 28/31] drm/amd/display: Drop FPU flags from dcn32 Makefile

2022-07-16 Thread Rodrigo Siqueira
This is the final commit from the FPU isolation for DCN32 and for this
reason we can finally remove flags related to FPU.

Reviewed-by: Harry Wentland 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Rodrigo Siqueira 
---
 drivers/gpu/drm/amd/display/dc/dcn32/Makefile | 28 ---
 1 file changed, 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile 
b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile
index 932d85fa4262..e943b643ab6b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile
@@ -15,34 +15,6 @@ DCN32 = dcn32_resource.o dcn32_hubbub.o dcn32_hwseq.o 
dcn32_init.o \
dcn32_dio_stream_encoder.o dcn32_dio_link_encoder.o 
dcn32_hpo_dp_link_encoder.o \
dcn32_resource_helpers.o dcn32_mpc.o
 
-ifdef CONFIG_X86
-dcn32_ccflags := -mhard-float -msse
-endif
-
-ifdef CONFIG_PPC64
-dcn32_ccflags := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-dcn32_ccflags += -mpreferred-stack-boundary=4
-else
-dcn32_ccflags += -msse2
-endif
-endif
-
-CFLAGS_$(AMDDALPATH)/dc/dcn32/dcn32_resource_helpers.o := $(dcn32_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dcn32/dcn32_resource.o := $(dcn32_ccflags)
-
 AMD_DAL_DCN32 = $(addprefix $(AMDDALPATH)/dc/dcn32/,$(DCN32))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_DCN32)
-- 
2.37.0



[PATCH v9 03/14] mm: add zone device coherent type memory support

2022-07-16 Thread Alex Sierra
Device memory that is cache coherent from device and CPU point of view.
This is used on platforms that have an advanced system bus (like CAPI
or CXL). Any page of a process can be migrated to such memory. However,
no one should be allowed to pin such memory so that it can always be
evicted.

Signed-off-by: Alex Sierra 
Acked-by: Felix Kuehling 
Reviewed-by: Alistair Popple 
[hch: rebased ontop of the refcount changes,
  removed is_dev_private_or_coherent_page]
Signed-off-by: Christoph Hellwig 
Acked-by: David Hildenbrand 
---
 include/linux/memremap.h | 19 +++
 include/linux/mm.h   |  5 -
 mm/memcontrol.c  |  7 ---
 mm/memory-failure.c  |  8 ++--
 mm/memremap.c| 10 ++
 mm/migrate_device.c  | 16 +++-
 mm/rmap.c|  5 +++--
 7 files changed, 53 insertions(+), 17 deletions(-)

diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 77229165c914..f27b142fd3d0 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -41,6 +41,13 @@ struct vmem_altmap {
  * A more complete discussion of unaddressable memory may be found in
  * include/linux/hmm.h and Documentation/vm/hmm.rst.
  *
+ * MEMORY_DEVICE_COHERENT:
+ * Device memory that is cache coherent from device and CPU point of view. This
+ * is used on platforms that have an advanced system bus (like CAPI or CXL). A
+ * driver can hotplug the device memory using ZONE_DEVICE and with that memory
+ * type. Any page of a process can be migrated to such memory. However no one
+ * should be allowed to pin such memory so that it can always be evicted.
+ *
  * MEMORY_DEVICE_FS_DAX:
  * Host memory that has similar access semantics as System RAM i.e. DMA
  * coherent and supports page pinning. In support of coordinating page
@@ -61,6 +68,7 @@ struct vmem_altmap {
 enum memory_type {
/* 0 is reserved to catch uninitialized type fields */
MEMORY_DEVICE_PRIVATE = 1,
+   MEMORY_DEVICE_COHERENT,
MEMORY_DEVICE_FS_DAX,
MEMORY_DEVICE_GENERIC,
MEMORY_DEVICE_PCI_P2PDMA,
@@ -150,6 +158,17 @@ static inline bool is_pci_p2pdma_page(const struct page 
*page)
page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA;
 }
 
+static inline bool is_device_coherent_page(const struct page *page)
+{
+   return is_zone_device_page(page) &&
+   page->pgmap->type == MEMORY_DEVICE_COHERENT;
+}
+
+static inline bool folio_is_device_coherent(const struct folio *folio)
+{
+   return is_device_coherent_page(>page);
+}
+
 #ifdef CONFIG_ZONE_DEVICE
 void *memremap_pages(struct dev_pagemap *pgmap, int nid);
 void memunmap_pages(struct dev_pagemap *pgmap);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2df8c2b98d36..3ed101dfbfab 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 
 struct mempolicy;
 struct anon_vma;
@@ -1522,7 +1523,9 @@ static inline bool is_longterm_pinnable_page(struct page 
*page)
if (mt == MIGRATE_CMA || mt == MIGRATE_ISOLATE)
return false;
 #endif
-   return !is_zone_movable_page(page) || is_zero_pfn(page_to_pfn(page));
+   return !(is_device_coherent_page(page) ||
+is_zone_movable_page(page) ||
+is_zero_pfn(page_to_pfn(page)));
 }
 #else
 static inline bool is_longterm_pinnable_page(struct page *page)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 618c366a2f07..5d37a85c67da 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5665,8 +5665,8 @@ static int mem_cgroup_move_account(struct page *page,
  *   2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a
  * target for charge migration. if @target is not NULL, the entry is stored
  * in target->ent.
- *   3(MC_TARGET_DEVICE): like MC_TARGET_PAGE  but page is 
MEMORY_DEVICE_PRIVATE
- * (so ZONE_DEVICE page and thus not on the lru).
+ *   3(MC_TARGET_DEVICE): like MC_TARGET_PAGE  but page is device memory and
+ *   thus not on the lru.
  * For now we such page is charge like a regular page would be as for all
  * intent and purposes it is just special memory taking the place of a
  * regular page.
@@ -5704,7 +5704,8 @@ static enum mc_target_type get_mctgt_type(struct 
vm_area_struct *vma,
 */
if (page_memcg(page) == mc.from) {
ret = MC_TARGET_PAGE;
-   if (is_device_private_page(page))
+   if (is_device_private_page(page) ||
+   is_device_coherent_page(page))
ret = MC_TARGET_DEVICE;
if (target)
target->page = page;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index da39ec8afca8..79f175eeb190 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1685,12 +1685,16 @@ static int memory_failure_dev_pagemap(unsigned long 
pfn, int 

[PATCH 04/31] drm/amd/display: Expose function reset_cur_dp_mst_topology

2022-07-16 Thread Rodrigo Siqueira
From: Wayne Lin 

[Why & How]
Need to leverage this function out of dc_link.c. Change it to public.

Reviewed-by: Hersen Wu 
Acked-by: Alan Liu 
Signed-off-by: Wayne Lin 
---
 drivers/gpu/drm/amd/display/dc/core/dc_link.c | 2 +-
 drivers/gpu/drm/amd/display/dc/dc_link.h  | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 858ee51f930a..ef54b96affa8 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -845,7 +845,7 @@ static bool discover_dp_mst_topology(struct dc_link *link, 
enum dc_detect_reason
return link->type == dc_connection_mst_branch;
 }
 
-static bool reset_cur_dp_mst_topology(struct dc_link *link)
+bool reset_cur_dp_mst_topology(struct dc_link *link)
 {
bool result = false;
DC_LOGGER_INIT(link->ctx->logger);
diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h 
b/drivers/gpu/drm/amd/display/dc/dc_link.h
index 023774b94da3..a0af0f6afeef 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_link.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_link.h
@@ -514,4 +514,7 @@ bool dc_dp_trace_is_logged(struct dc_link *link,
 struct dp_trace_lt_counts *dc_dp_trace_get_lt_counts(struct dc_link *link,
bool in_detection);
 unsigned int dc_dp_trace_get_link_loss_count(struct dc_link *link);
+
+/* Destruct the mst topology of the link and reset the allocated payload table 
*/
+bool reset_cur_dp_mst_topology(struct dc_link *link);
 #endif /* DC_LINK_H_ */
-- 
2.37.0



[PATCH 25/31] drm/amd/display: Move dlg params calculation

2022-07-16 Thread Rodrigo Siqueira
Move dlg params calculation to the FPU folder and make it static.

Reviewed-by: Harry Wentland 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Rodrigo Siqueira 
---
 .../drm/amd/display/dc/dcn32/dcn32_resource.c | 485 +
 .../drm/amd/display/dc/dcn32/dcn32_resource.h |   6 -
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 506 +-
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  |  13 +-
 4 files changed, 513 insertions(+), 497 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index 45768eff9315..32edb3e5715a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -1753,368 +1753,6 @@ void dcn32_add_phantom_pipes(struct dc *dc, struct 
dc_state *context,
}
 }
 
-static bool dcn32_split_stream_for_mpc_or_odm(
-   const struct dc *dc,
-   struct resource_context *res_ctx,
-   struct pipe_ctx *pri_pipe,
-   struct pipe_ctx *sec_pipe,
-   bool odm)
-{
-   int pipe_idx = sec_pipe->pipe_idx;
-   const struct resource_pool *pool = dc->res_pool;
-
-   if (pri_pipe->plane_state) {
-   /* ODM + window MPO, where MPO window is on left half only */
-   if (pri_pipe->plane_state->clip_rect.x + 
pri_pipe->plane_state->clip_rect.width <=
-   pri_pipe->stream->src.x + 
pri_pipe->stream->src.width/2)
-   return true;
-
-   /* ODM + window MPO, where MPO window is on right half only */
-   if (pri_pipe->plane_state->clip_rect.x >= 
pri_pipe->stream->src.width/2)
-   return true;
-   }
-
-   *sec_pipe = *pri_pipe;
-
-   sec_pipe->pipe_idx = pipe_idx;
-   sec_pipe->plane_res.mi = pool->mis[pipe_idx];
-   sec_pipe->plane_res.hubp = pool->hubps[pipe_idx];
-   sec_pipe->plane_res.ipp = pool->ipps[pipe_idx];
-   sec_pipe->plane_res.xfm = pool->transforms[pipe_idx];
-   sec_pipe->plane_res.dpp = pool->dpps[pipe_idx];
-   sec_pipe->plane_res.mpcc_inst = pool->dpps[pipe_idx]->inst;
-   sec_pipe->stream_res.dsc = NULL;
-   if (odm) {
-   if (pri_pipe->next_odm_pipe) {
-   ASSERT(pri_pipe->next_odm_pipe != sec_pipe);
-   sec_pipe->next_odm_pipe = pri_pipe->next_odm_pipe;
-   sec_pipe->next_odm_pipe->prev_odm_pipe = sec_pipe;
-   }
-   if (pri_pipe->top_pipe && pri_pipe->top_pipe->next_odm_pipe) {
-   pri_pipe->top_pipe->next_odm_pipe->bottom_pipe = 
sec_pipe;
-   sec_pipe->top_pipe = pri_pipe->top_pipe->next_odm_pipe;
-   }
-   if (pri_pipe->bottom_pipe && 
pri_pipe->bottom_pipe->next_odm_pipe) {
-   pri_pipe->bottom_pipe->next_odm_pipe->top_pipe = 
sec_pipe;
-   sec_pipe->bottom_pipe = 
pri_pipe->bottom_pipe->next_odm_pipe;
-   }
-   pri_pipe->next_odm_pipe = sec_pipe;
-   sec_pipe->prev_odm_pipe = pri_pipe;
-   ASSERT(sec_pipe->top_pipe == NULL);
-
-   if (!sec_pipe->top_pipe)
-   sec_pipe->stream_res.opp = pool->opps[pipe_idx];
-   else
-   sec_pipe->stream_res.opp = 
sec_pipe->top_pipe->stream_res.opp;
-   if (sec_pipe->stream->timing.flags.DSC == 1) {
-   dcn20_acquire_dsc(dc, res_ctx, 
_pipe->stream_res.dsc, pipe_idx);
-   ASSERT(sec_pipe->stream_res.dsc);
-   if (sec_pipe->stream_res.dsc == NULL)
-   return false;
-   }
-   } else {
-   if (pri_pipe->bottom_pipe) {
-   ASSERT(pri_pipe->bottom_pipe != sec_pipe);
-   sec_pipe->bottom_pipe = pri_pipe->bottom_pipe;
-   sec_pipe->bottom_pipe->top_pipe = sec_pipe;
-   }
-   pri_pipe->bottom_pipe = sec_pipe;
-   sec_pipe->top_pipe = pri_pipe;
-
-   ASSERT(pri_pipe->plane_state);
-   }
-
-   return true;
-}
-
-static struct pipe_ctx *dcn32_find_split_pipe(
-   struct dc *dc,
-   struct dc_state *context,
-   int old_index)
-{
-   struct pipe_ctx *pipe = NULL;
-   int i;
-
-   if (old_index >= 0 && context->res_ctx.pipe_ctx[old_index].stream == 
NULL) {
-   pipe = >res_ctx.pipe_ctx[old_index];
-   pipe->pipe_idx = old_index;
-   }
-
-   if (!pipe)
-   for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) {
-   if (dc->current_state->res_ctx.pipe_ctx[i].top_pipe == 
NULL
-   && 
dc->current_state->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) {
-   if 

[PATCH 08/31] drm/amd/display: Create a file dedicated to planes

2022-07-16 Thread Rodrigo Siqueira
[Why]
The amdgpu_dm file contains most of the code that works as an interface
between DRM API and DC. As a result, this file becomes very large since
it comprises multiple abstractions such as plane manipulation.

[How]
This commit extracts the plane code to its specific file named
amdgpu_dm_plane. This change does not change anything inside the
functions; the only exception is converting some static functions to a
global function.

Reviewed-by: Harry Wentland 
Acked-by: Alan Liu 
Signed-off-by: Rodrigo Siqueira 
---
 .../gpu/drm/amd/display/amdgpu_dm/Makefile|7 +-
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2135 +++--
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 1637 +
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.h   |   73 +
 4 files changed, 2057 insertions(+), 1795 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
 create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile 
b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
index 718e123a3230..ec559ea902a3 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
@@ -25,7 +25,12 @@
 
 
 
-AMDGPUDM = amdgpu_dm.o amdgpu_dm_irq.o amdgpu_dm_mst_types.o amdgpu_dm_color.o
+AMDGPUDM = \
+   amdgpu_dm.o \
+   amdgpu_dm_plane.o \
+   amdgpu_dm_irq.o \
+   amdgpu_dm_mst_types.o \
+   amdgpu_dm_color.o
 
 ifdef CONFIG_DRM_AMD_DC_DCN
 AMDGPUDM += dc_fpu.o
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index dae998e014b0..ceac70e93ece 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -46,6 +46,7 @@
 #include "amdgpu_ucode.h"
 #include "atom.h"
 #include "amdgpu_dm.h"
+#include "amdgpu_dm_plane.h"
 #ifdef CONFIG_DRM_AMD_DC_HDCP
 #include "amdgpu_dm_hdcp.h"
 #include 
@@ -203,10 +204,6 @@ static int amdgpu_dm_initialize_drm_device(struct 
amdgpu_device *adev);
 /* removes and deallocates the drm structures, created by the above function */
 static void amdgpu_dm_destroy_drm_device(struct amdgpu_display_manager *dm);
 
-static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
-   struct drm_plane *plane,
-   unsigned long possible_crtcs,
-   const struct dc_plane_cap *plane_cap);
 static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
   struct drm_plane *plane,
   uint32_t link_index);
@@ -225,12 +222,6 @@ static void amdgpu_dm_atomic_commit_tail(struct 
drm_atomic_state *state);
 static int amdgpu_dm_atomic_check(struct drm_device *dev,
  struct drm_atomic_state *state);
 
-static void handle_cursor_update(struct drm_plane *plane,
-struct drm_plane_state *old_plane_state);
-
-static const struct drm_format_info *
-amd_get_format_info(const struct drm_mode_fb_cmd2 *cmd);
-
 static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector);
 static void handle_hpd_rx_irq(void *param);
 
@@ -4315,11 +4306,11 @@ static int amdgpu_dm_initialize_drm_device(struct 
amdgpu_device *adev)
case IP_VERSION(3, 0, 0):
case IP_VERSION(3, 1, 2):
case IP_VERSION(3, 1, 3):
-   case IP_VERSION(3, 1, 4):
case IP_VERSION(3, 1, 5):
case IP_VERSION(3, 1, 6):
case IP_VERSION(3, 2, 0):
case IP_VERSION(3, 2, 1):
+   case IP_VERSION(3, 1, 4):
case IP_VERSION(2, 1, 0):
if (register_outbox_irq_handlers(dm->adev)) {
DRM_ERROR("DM: Failed to initialize IRQ\n");
@@ -4707,1104 +4698,222 @@ static const struct drm_encoder_funcs 
amdgpu_dm_encoder_funcs = {
.destroy = amdgpu_dm_encoder_destroy,
 };
 
-
-static void get_min_max_dc_plane_scaling(struct drm_device *dev,
-struct drm_framebuffer *fb,
-int *min_downscale, int *max_upscale)
-{
-   struct amdgpu_device *adev = drm_to_adev(dev);
-   struct dc *dc = adev->dm.dc;
-   /* Caps for all supported planes are the same on DCE and DCN 1 - 3 */
-   struct dc_plane_cap *plane_cap = >caps.planes[0];
-
-   switch (fb->format->format) {
-   case DRM_FORMAT_P010:
-   case DRM_FORMAT_NV12:
-   case DRM_FORMAT_NV21:
-   *max_upscale = plane_cap->max_upscale_factor.nv12;
-   *min_downscale = plane_cap->max_downscale_factor.nv12;
-   break;
-
-   case DRM_FORMAT_XRGB16161616F:
-   case DRM_FORMAT_ARGB16161616F:
-   case DRM_FORMAT_XBGR16161616F:
-   case DRM_FORMAT_ABGR16161616F:
-   *max_upscale = plane_cap->max_upscale_factor.fp16;
-   *min_downscale = 

[PATCH v3] drm/amdgpu: add comments to HW_IP_VCN_ENC

2022-07-16 Thread Ruijing Dong
>From VCN4, HW_IP_VCN_ENC will be used as unified queue,
and support both encoding and decoding jobs, HW_IP_VCN_DEC
is retired from VCN4.

link: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/245/commits

Signed-off-by: Ruijing Dong 
---
 include/uapi/drm/amdgpu_drm.h | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 18d3246d636e..29e4a1ece2ce 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -559,7 +559,14 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_HW_IP_VCE  4
 #define AMDGPU_HW_IP_UVD_ENC  5
 #define AMDGPU_HW_IP_VCN_DEC  6
+
+/**
+ * From VCN4, AMDGPU_HW_IP_VCN_ENC will be used as unified queue
+ * and support both encoding and decoding jobs, AMDGPU_HW_IP_VCN_DEC
+ * is retired from VCN4.
+ */
 #define AMDGPU_HW_IP_VCN_ENC  7
+
 #define AMDGPU_HW_IP_VCN_JPEG 8
 #define AMDGPU_HW_IP_NUM  9
 
-- 
2.25.1



Re: [PATCH 3/3] drm/amdgpu: skip put fence if signal fails

2022-07-16 Thread Andrey Grodzovsky



On 2022-07-15 05:28, Zhu, Jiadong wrote:

[AMD Official Use Only - General]

Updated some comments

-Original Message-
From: Zhu, Jiadong
Sent: Friday, July 15, 2022 5:13 PM
To: Christian König ; 
amd-gfx@lists.freedesktop.org; Grodzovsky, Andrey 
Cc: Huang, Ray ; Liu, Aaron 
Subject: RE: [PATCH 3/3] drm/amdgpu: skip put fence if signal fails

Hi Christian,

The resubmitted job in function amdgpu_ib_preempt_job_recovery returns the same 
hw fence because of this commit:

static void amdgpu_ib_preempt_job_recovery(struct drm_gpu_scheduler *sched) {
 struct drm_sched_job *s_job;
 struct dma_fence *fence;

 spin_lock(>job_list_lock);
 list_for_each_entry(s_job, >pending_list, list) {
 fence = sched->ops->run_job(s_job);   //fence returned has 
the same address with swapped fences
 dma_fence_put(fence);
 }
 spin_unlock(>job_list_lock);
}



commit c530b02f39850a639b72d01ebbf7e5d745c60831
Author: Jack Zhang 
Date:   Wed May 12 15:06:35 2021 +0800

 drm/amd/amdgpu embed hw_fence into amdgpu_job

 Why: Previously hw fence is alloced separately with job.
 It caused historical lifetime issues and corner cases.
 The ideal situation is to take fence to manage both job
 and fence's lifetime, and simplify the design of gpu-scheduler.

 How:
 We propose to embed hw_fence into amdgpu_job.
 1. We cover the normal job submission by this method.
 2. For ib_test, and submit without a parent job keep the
 legacy way to create a hw fence separately.
 v2:
 use AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT to show that the fence is
 embedded in a job.
 v3:
 remove redundant variable ring in amdgpu_job
 v4:
 add tdr sequence support for this feature. Add a job_run_counter to
 indicate whether this job is a resubmit job.
 v5
 add missing handling in amdgpu_fence_enable_signaling

 Signed-off-by: Jingwen Chen 
 Signed-off-by: Jack Zhang 
 Reviewed-by: Andrey Grodzovsky 
 Reviewed by: Monk Liu 
 Signed-off-by: Alex Deucher 


Thus the fence we swapped out is signaled and put twice in the following 2 functions and 
we get " refcount_t: underflow; use-after-free. " errors latter.

 /* wait for jobs finished */
 amdgpu_fence_wait_empty(ring); //wait on the resubmitted fence 
which is signaled and put somewhere else. The refcount decreased by 1 after 
amdgpu_fence_wait_empty.

 /* signal the old fences */
 amdgpu_ib_preempt_signal_fences(fences, length);   //signal 
and put the previous swapped fence, signal would return -22.

Thanks,
Jiadong



Did you have 'drm/amdgpu: Follow up change to previous drm scheduler 
change.' this commit in your branch while you encountered this problem ? 
I don't see an underflow issue

for the preempted job when inspecting the code with this commit in mind -

amdgpu_fence_emit
    dma_fence_init 1
    dma_fence_get(fence) 2
    rcu_assign_pointer(*ptr, dma_fence_get(fence) 3

drm_sched_main
    s_fence->parent = dma_fence_get(fence); 4
    dma_fence_put(fence); 3

amdgpu_ib_preempt_job_recovery
    amdgpu_fence_emit
        if (job && job->job_run_counter) -> dma_fence_get(fence); 4
        rcu_assign_pointer(*ptr, dma_fence_get(fence)); 5

    dma_fence_put(fence); 4

amdgpu_fence_wait_empty
    dma_fence_get_rcu(fence) 5
    dma_fence_put(fence) 4

amdgpu_process_fence (EOP interrupt for re-submission of preempted job)
    dma_fence_put 3

amdgpu_ib_preempt_signal_fences
    dma_fence_put 2

amdgpu_job_free_cb
    dma_fence_put(>hw_fence) 1

drm_sched_fence_release_scheduled
    dma_fence_put(fence->parent); 0

Also take a look here for reference - 
https://drive.google.com/file/d/1yEoeW6OQC9WnwmzFW6NBLhFP_jD0xcHm/view


Andrey





Andrey





-Original Message-
From: Christian König 
Sent: Friday, July 15, 2022 4:48 PM
To: Zhu, Jiadong ; amd-gfx@lists.freedesktop.org; Grodzovsky, 
Andrey 
Cc: Huang, Ray ; Liu, Aaron 
Subject: Re: [PATCH 3/3] drm/amdgpu: skip put fence if signal fails

[CAUTION: External Email]

Am 15.07.22 um 10:43 schrieb jiadong@amd.com:

From: "Jiadong.Zhu" 

Dma_fence_signal returning non-zero indicates that the fence is
signaled and put somewhere else.
Skip dma_fence_put to make the fence refcount correct.

Well quite a big NAK on this.

Reference counting should be completely independent where a fence signals.

Andrey can you take a look at this as well?

Thanks,
Christian.


Signed-off-by: Jiadong.Zhu 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 4 ++--
   1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index f4ed0785d523..93c1a5e83835 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1500,8 +1500,8 @@ static void 

[PATCH 01/31] drm/amd/display: Support vertical interrupt 0 for all dcn ASIC

2022-07-16 Thread Rodrigo Siqueira
From: Wayne Lin 

[Why]
When CONFIG_DRM_AMD_SECURE_DISPLAY is enabled, it will try
to register vertical interrupt 0 for specific task.

Currently, only dcn10 have defined relevant info for vertical interrupt
0. If we enable CONFIG_DRM_AMD_SECURE_DISPLAY for other dcn ASIC, will
get DC_IRQ_SOURCE_INVALID while calling dc_interrupt_to_irq_source() and
cause pointer errors.

[How]
Add support of vertical interrupt 0 for all dcn ASIC.

Acked-by: Alan Liu 
Signed-off-by: Wayne Lin 
---
 .../display/dc/irq/dcn30/irq_service_dcn30.c  | 14 +++---
 .../dc/irq/dcn303/irq_service_dcn303.c| 19 +++
 2 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c 
b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c
index 146cd1819912..2aa74ee1502a 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c
@@ -289,6 +289,13 @@ static const struct irq_source_info_funcs 
vline0_irq_info_funcs = {
.funcs = _irq_info_funcs\
}
 
+#define dmub_trace_int_entry()\
+   [DC_IRQ_SOURCE_DMCUB_OUTBOX0] = {\
+   IRQ_REG_ENTRY_DMUB(DMCUB_INTERRUPT_ENABLE, 
DMCUB_OUTBOX0_READY_INT_EN,\
+   DMCUB_INTERRUPT_ACK, DMCUB_OUTBOX0_READY_INT_ACK),\
+   .funcs = _trace_irq_info_funcs\
+   }
+
 #define vline0_int_entry(reg_num)\
[DC_IRQ_SOURCE_DC1_VLINE0 + reg_num] = {\
IRQ_REG_ENTRY(OTG, reg_num,\
@@ -297,13 +304,6 @@ static const struct irq_source_info_funcs 
vline0_irq_info_funcs = {
.funcs = _irq_info_funcs\
}
 
-#define dmub_trace_int_entry()\
-   [DC_IRQ_SOURCE_DMCUB_OUTBOX0] = {\
-   IRQ_REG_ENTRY_DMUB(DMCUB_INTERRUPT_ENABLE, 
DMCUB_OUTBOX0_READY_INT_EN,\
-   DMCUB_INTERRUPT_ACK, DMCUB_OUTBOX0_READY_INT_ACK),\
-   .funcs = _trace_irq_info_funcs\
-   }
-
 #define dummy_irq_entry() \
{\
.funcs = _irq_info_funcs\
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c 
b/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c
index 66e60762388e..1d149d290147 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c
@@ -24,6 +24,10 @@ static enum dc_irq_source to_dal_irq_source_dcn303(struct 
irq_service *irq_servi
return DC_IRQ_SOURCE_VBLANK1;
case DCN_1_0__SRCID__DC_D2_OTG_VSTARTUP:
return DC_IRQ_SOURCE_VBLANK2;
+   case DCN_1_0__SRCID__OTG1_VERTICAL_INTERRUPT0_CONTROL:
+   return DC_IRQ_SOURCE_DC1_VLINE0;
+   case DCN_1_0__SRCID__OTG2_VERTICAL_INTERRUPT0_CONTROL:
+   return DC_IRQ_SOURCE_DC2_VLINE0;
case DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT:
return DC_IRQ_SOURCE_PFLIP1;
case DCN_1_0__SRCID__HUBP1_FLIP_INTERRUPT:
@@ -96,6 +100,11 @@ static const struct irq_source_info_funcs 
vblank_irq_info_funcs = {
.ack = NULL
 };
 
+static const struct irq_source_info_funcs vline0_irq_info_funcs = {
+   .set = NULL,
+   .ack = NULL
+};
+
 #undef BASE_INNER
 #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
 
@@ -164,6 +173,14 @@ static const struct irq_source_info_funcs 
vblank_irq_info_funcs = {
.funcs = _irq_info_funcs\
}
 
+#define vline0_int_entry(reg_num)\
+   [DC_IRQ_SOURCE_DC1_VLINE0 + reg_num] = {\
+   IRQ_REG_ENTRY(OTG, reg_num,\
+   OTG_VERTICAL_INTERRUPT0_CONTROL, 
OTG_VERTICAL_INTERRUPT0_INT_ENABLE,\
+   OTG_VERTICAL_INTERRUPT0_CONTROL, 
OTG_VERTICAL_INTERRUPT0_CLEAR),\
+   .funcs = _irq_info_funcs\
+   }
+
 #define dummy_irq_entry() { .funcs = _irq_info_funcs }
 
 #define i2c_int_entry(reg_num) \
@@ -236,6 +253,8 @@ static const struct irq_source_info 
irq_source_info_dcn303[DAL_IRQ_SOURCES_NUMBE
vupdate_no_lock_int_entry(1),
vblank_int_entry(0),
vblank_int_entry(1),
+   vline0_int_entry(0),
+   vline0_int_entry(1),
 };
 
 static const struct irq_service_funcs irq_service_funcs_dcn303 = {
-- 
2.37.0



[PATCH 21/31] drm/amd/display: Move insert entry table to the FPU code

2022-07-16 Thread Rodrigo Siqueira
The insert_entry_into_table_sorted function uses FPU operation and calls
other static functions support. This commit moves the insert entry
function with all the required struct and static functions to the FPU
file.

Reviewed-by: Harry Wentland 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Rodrigo Siqueira 
---
 .../drm/amd/display/dc/dcn32/dcn32_resource.c | 188 +-
 .../drm/amd/display/dc/dcn32/dcn32_resource.h |   3 +
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 186 +
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  |   9 +
 4 files changed, 208 insertions(+), 178 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index a56d87140eba..1c124231b00a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -115,137 +115,6 @@ static const struct IP_BASE DCN_BASE = { { { { 
0x0012, 0x00C0, 0x34C
 
 #define DC_LOGGER_INIT(logger)
 
-#define DCN3_2_DEFAULT_DET_SIZE 256
-#define DCN3_2_MAX_DET_SIZE 1152
-#define DCN3_2_MIN_DET_SIZE 128
-#define DCN3_2_MIN_COMPBUF_SIZE_KB 128
-
-struct _vcs_dpi_ip_params_st dcn3_2_ip = {
-   .gpuvm_enable = 0,
-   .gpuvm_max_page_table_levels = 4,
-   .hostvm_enable = 0,
-   .rob_buffer_size_kbytes = 128,
-   .det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE,
-   .config_return_buffer_size_in_kbytes = 1280,
-   .compressed_buffer_segment_size_in_kbytes = 64,
-   .meta_fifo_size_in_kentries = 22,
-   .zero_size_buffer_entries = 512,
-   .compbuf_reserved_space_64b = 256,
-   .compbuf_reserved_space_zs = 64,
-   .dpp_output_buffer_pixels = 2560,
-   .opp_output_buffer_lines = 1,
-   .pixel_chunk_size_kbytes = 8,
-   .alpha_pixel_chunk_size_kbytes = 4, // not appearing in spreadsheet, 
match c code from hw team
-   .min_pixel_chunk_size_bytes = 1024,
-   .dcc_meta_buffer_size_bytes = 6272,
-   .meta_chunk_size_kbytes = 2,
-   .min_meta_chunk_size_bytes = 256,
-   .writeback_chunk_size_kbytes = 8,
-   .ptoi_supported = false,
-   .num_dsc = 4,
-   .maximum_dsc_bits_per_component = 12,
-   .maximum_pixels_per_line_per_dsc_unit = 6016,
-   .dsc422_native_support = true,
-   .is_line_buffer_bpp_fixed = true,
-   .line_buffer_fixed_bpp = 57,
-   .line_buffer_size_bits = 1171920, //DPP doc, DCN3_2_DisplayMode_73.xlsm 
still shows as 986880 bits with 48 bpp
-   .max_line_buffer_lines = 32,
-   .writeback_interface_buffer_size_kbytes = 90,
-   .max_num_dpp = 4,
-   .max_num_otg = 4,
-   .max_num_hdmi_frl_outputs = 1,
-   .max_num_wb = 1,
-   .max_dchub_pscl_bw_pix_per_clk = 4,
-   .max_pscl_lb_bw_pix_per_clk = 2,
-   .max_lb_vscl_bw_pix_per_clk = 4,
-   .max_vscl_hscl_bw_pix_per_clk = 4,
-   .max_hscl_ratio = 6,
-   .max_vscl_ratio = 6,
-   .max_hscl_taps = 8,
-   .max_vscl_taps = 8,
-   .dpte_buffer_size_in_pte_reqs_luma = 64,
-   .dpte_buffer_size_in_pte_reqs_chroma = 34,
-   .dispclk_ramp_margin_percent = 1,
-   .max_inter_dcn_tile_repeaters = 8,
-   .cursor_buffer_size = 16,
-   .cursor_chunk_size = 2,
-   .writeback_line_buffer_buffer_size = 0,
-   .writeback_min_hscl_ratio = 1,
-   .writeback_min_vscl_ratio = 1,
-   .writeback_max_hscl_ratio = 1,
-   .writeback_max_vscl_ratio = 1,
-   .writeback_max_hscl_taps = 1,
-   .writeback_max_vscl_taps = 1,
-   .dppclk_delay_subtotal = 47,
-   .dppclk_delay_scl = 50,
-   .dppclk_delay_scl_lb_only = 16,
-   .dppclk_delay_cnvc_formatter = 28,
-   .dppclk_delay_cnvc_cursor = 6,
-   .dispclk_delay_subtotal = 125,
-   .dynamic_metadata_vm_enabled = false,
-   .odm_combine_4to1_supported = false,
-   .dcc_supported = true,
-   .max_num_dp2p0_outputs = 2,
-   .max_num_dp2p0_streams = 4,
-};
-
-struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = {
-   .clock_limits = {
-   {
-   .state = 0,
-   .dcfclk_mhz = 1564.0,
-   .fabricclk_mhz = 400.0,
-   .dispclk_mhz = 2150.0,
-   .dppclk_mhz = 2150.0,
-   .phyclk_mhz = 810.0,
-   .phyclk_d18_mhz = 667.0,
-   .phyclk_d32_mhz = 625.0,
-   .socclk_mhz = 1200.0,
-   .dscclk_mhz = 716.667,
-   .dram_speed_mts = 1600.0,
-   .dtbclk_mhz = 1564.0,
-   },
-   },
-   .num_states = 1,
-   .sr_exit_time_us = 5.20,
-   .sr_enter_plus_exit_time_us = 9.60,
-   .sr_exit_z8_time_us = 285.0,
-   .sr_enter_plus_exit_z8_time_us = 320,
-   .writeback_latency_us = 12.0,
-   .round_trip_ping_latency_dcfclk_cycles = 263,
-   .urgent_latency_pixel_data_only_us = 4.0,
-   

[PATCH v9 11/14] tools: update hmm-test to support device coherent type

2022-07-16 Thread Alex Sierra
Test cases such as migrate_fault and migrate_multiple, were modified to
explicit migrate from device to sys memory without the need of page
faults, when using device coherent type.

Snapshot test case updated to read memory device type first and based
on that, get the proper returned results migrate_ping_pong test case
added to test explicit migration from device to sys memory for both
private and coherent zone types.

Helpers to migrate from device to sys memory and vicerversa
were also added.

Signed-off-by: Alex Sierra 
Acked-by: Felix Kuehling 
Reviewed-by: Alistair Popple 
Signed-off-by: Christoph Hellwig 
---
 tools/testing/selftests/vm/hmm-tests.c | 121 -
 1 file changed, 100 insertions(+), 21 deletions(-)

diff --git a/tools/testing/selftests/vm/hmm-tests.c 
b/tools/testing/selftests/vm/hmm-tests.c
index 203323967b50..4b547188ec40 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -46,6 +46,13 @@ struct hmm_buffer {
uint64_tfaults;
 };
 
+enum {
+   HMM_PRIVATE_DEVICE_ONE,
+   HMM_PRIVATE_DEVICE_TWO,
+   HMM_COHERENCE_DEVICE_ONE,
+   HMM_COHERENCE_DEVICE_TWO,
+};
+
 #define TWOMEG (1 << 21)
 #define HMM_BUFFER_SIZE (1024 << 12)
 #define HMM_PATH_MAX64
@@ -60,6 +67,21 @@ FIXTURE(hmm)
unsigned intpage_shift;
 };
 
+FIXTURE_VARIANT(hmm)
+{
+   int device_number;
+};
+
+FIXTURE_VARIANT_ADD(hmm, hmm_device_private)
+{
+   .device_number = HMM_PRIVATE_DEVICE_ONE,
+};
+
+FIXTURE_VARIANT_ADD(hmm, hmm_device_coherent)
+{
+   .device_number = HMM_COHERENCE_DEVICE_ONE,
+};
+
 FIXTURE(hmm2)
 {
int fd0;
@@ -68,6 +90,24 @@ FIXTURE(hmm2)
unsigned intpage_shift;
 };
 
+FIXTURE_VARIANT(hmm2)
+{
+   int device_number0;
+   int device_number1;
+};
+
+FIXTURE_VARIANT_ADD(hmm2, hmm2_device_private)
+{
+   .device_number0 = HMM_PRIVATE_DEVICE_ONE,
+   .device_number1 = HMM_PRIVATE_DEVICE_TWO,
+};
+
+FIXTURE_VARIANT_ADD(hmm2, hmm2_device_coherent)
+{
+   .device_number0 = HMM_COHERENCE_DEVICE_ONE,
+   .device_number1 = HMM_COHERENCE_DEVICE_TWO,
+};
+
 static int hmm_open(int unit)
 {
char pathname[HMM_PATH_MAX];
@@ -81,12 +121,19 @@ static int hmm_open(int unit)
return fd;
 }
 
+static bool hmm_is_coherent_type(int dev_num)
+{
+   return (dev_num >= HMM_COHERENCE_DEVICE_ONE);
+}
+
 FIXTURE_SETUP(hmm)
 {
self->page_size = sysconf(_SC_PAGE_SIZE);
self->page_shift = ffs(self->page_size) - 1;
 
-   self->fd = hmm_open(0);
+   self->fd = hmm_open(variant->device_number);
+   if (self->fd < 0 && hmm_is_coherent_type(variant->device_number))
+   SKIP(exit(0), "DEVICE_COHERENT not available");
ASSERT_GE(self->fd, 0);
 }
 
@@ -95,9 +142,11 @@ FIXTURE_SETUP(hmm2)
self->page_size = sysconf(_SC_PAGE_SIZE);
self->page_shift = ffs(self->page_size) - 1;
 
-   self->fd0 = hmm_open(0);
+   self->fd0 = hmm_open(variant->device_number0);
+   if (self->fd0 < 0 && hmm_is_coherent_type(variant->device_number0))
+   SKIP(exit(0), "DEVICE_COHERENT not available");
ASSERT_GE(self->fd0, 0);
-   self->fd1 = hmm_open(1);
+   self->fd1 = hmm_open(variant->device_number1);
ASSERT_GE(self->fd1, 0);
 }
 
@@ -211,6 +260,20 @@ static void hmm_nanosleep(unsigned int n)
nanosleep(, NULL);
 }
 
+static int hmm_migrate_sys_to_dev(int fd,
+  struct hmm_buffer *buffer,
+  unsigned long npages)
+{
+   return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_DEV, buffer, npages);
+}
+
+static int hmm_migrate_dev_to_sys(int fd,
+  struct hmm_buffer *buffer,
+  unsigned long npages)
+{
+   return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_SYS, buffer, npages);
+}
+
 /*
  * Simple NULL test of device open/close.
  */
@@ -875,7 +938,7 @@ TEST_F(hmm, migrate)
ptr[i] = i;
 
/* Migrate memory to device. */
-   ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+   ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, npages);
 
@@ -923,7 +986,7 @@ TEST_F(hmm, migrate_fault)
ptr[i] = i;
 
/* Migrate memory to device. */
-   ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+   ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, npages);
 
@@ -936,7 +999,7 @@ TEST_F(hmm, migrate_fault)
ASSERT_EQ(ptr[i], i);
 
/* Migrate memory to the device again. */
-   ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+   ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, 

Re: [PATCH v2] drm/amdgpu: add HW_IP_VCN_UNIFIED type

2022-07-16 Thread Christian König

Am 15.07.22 um 16:44 schrieb Ruijing Dong:

Define HW_IP_VCN_UNIFIED type the same as HW_IP_VCN_ENC.

VCN4 support for libdrm needs a new definition for
the unified queue, so that it can align to the kernel.

link: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/245/commits

Signed-off-by: Ruijing Dong 
---
  include/uapi/drm/amdgpu_drm.h | 1 +
  1 file changed, 1 insertion(+)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 18d3246d636e..fe33db8441bc 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -560,6 +560,7 @@ struct drm_amdgpu_gem_va {
  #define AMDGPU_HW_IP_UVD_ENC  5
  #define AMDGPU_HW_IP_VCN_DEC  6
  #define AMDGPU_HW_IP_VCN_ENC  7
+#define AMDGPU_HW_IP_VCN_UNIFIED  AMDGPU_HW_IP_VCN_ENC


Why exactly do we need a new define for this? Essentially the encode 
queue is extended with new functionality, isn't it?


So I think we should just stick to AMDGPU_HW_IP_VCN_ENC and not add an 
alias for it.


Regards,
Christian.


  #define AMDGPU_HW_IP_VCN_JPEG 8
  #define AMDGPU_HW_IP_NUM  9
  




RE: [PATCH v2] drm/amdgpu: add HW_IP_VCN_UNIFIED type

2022-07-16 Thread Dong, Ruijing
[AMD Official Use Only - General]

>> Why exactly do we need a new define for this? Essentially the encode queue 
>> is extended with new functionality, isn't it?
>> So I think we should just stick to AMDGPU_HW_IP_VCN_ENC and not add an alias 
>> for it.

Yes, it extended the encode queue to include new functionality, and that looks 
little confused when send
decoding jobs to the encoding queue. Then I assume this bias can reduce the 
confusion.

Does this change make sense in this regard? certainly we can stick to 
AMDGPU_HW_IP_VCN_ENC.

Thanks,
Ruijing

-Original Message-
From: Koenig, Christian 
Sent: Friday, July 15, 2022 11:18 AM
To: Dong, Ruijing ; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Liu, Leo 
Subject: Re: [PATCH v2] drm/amdgpu: add HW_IP_VCN_UNIFIED type

Am 15.07.22 um 16:44 schrieb Ruijing Dong:
> Define HW_IP_VCN_UNIFIED type the same as HW_IP_VCN_ENC.
>
> VCN4 support for libdrm needs a new definition for the unified queue,
> so that it can align to the kernel.
>
> link:
> https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/245/commits
>
> Signed-off-by: Ruijing Dong 
> ---
>   include/uapi/drm/amdgpu_drm.h | 1 +
>   1 file changed, 1 insertion(+)
>
> diff --git a/include/uapi/drm/amdgpu_drm.h
> b/include/uapi/drm/amdgpu_drm.h index 18d3246d636e..fe33db8441bc
> 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -560,6 +560,7 @@ struct drm_amdgpu_gem_va {
>   #define AMDGPU_HW_IP_UVD_ENC  5
>   #define AMDGPU_HW_IP_VCN_DEC  6
>   #define AMDGPU_HW_IP_VCN_ENC  7
> +#define AMDGPU_HW_IP_VCN_UNIFIED  AMDGPU_HW_IP_VCN_ENC

Why exactly do we need a new define for this? Essentially the encode queue is 
extended with new functionality, isn't it?

So I think we should just stick to AMDGPU_HW_IP_VCN_ENC and not add an alias 
for it.

Regards,
Christian.

>   #define AMDGPU_HW_IP_VCN_JPEG 8
>   #define AMDGPU_HW_IP_NUM  9
>



RE: [PATCH] drm/amdgpu: align between libdrm and drm api

2022-07-16 Thread Dong, Ruijing
[AMD Official Use Only - General]

Hi Christian,

You are right, when process the libdrm code review (not committed yet), we 
realized the corresponding file needs to align to the kernel.
So we will need to have this header file changed first, then to process libdrm 
code again.

Thanks,
Ruijing

-Original Message-
From: Christian König 
Sent: Friday, July 15, 2022 4:41 AM
To: Dong, Ruijing ; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Liu, Leo ; 
Koenig, Christian 
Subject: Re: [PATCH] drm/amdgpu: align between libdrm and drm api

Am 14.07.22 um 23:22 schrieb Ruijing Dong:
> define HW_IP_VCN_UNIFIED the same as HW_IP_VCN_ENC

Usually that should be the other way around, libdrm aligns to the kernel.

Why was that modification committed to libdrm first? There are usually plenty 
of warnings before we can do that.

Regards,
Christian.

>
> Signed-off-by: Ruijing Dong 
> ---
>   include/uapi/drm/amdgpu_drm.h | 1 +
>   1 file changed, 1 insertion(+)
>
> diff --git a/include/uapi/drm/amdgpu_drm.h
> b/include/uapi/drm/amdgpu_drm.h index 18d3246d636e..fe33db8441bc
> 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -560,6 +560,7 @@ struct drm_amdgpu_gem_va {
>   #define AMDGPU_HW_IP_UVD_ENC  5
>   #define AMDGPU_HW_IP_VCN_DEC  6
>   #define AMDGPU_HW_IP_VCN_ENC  7
> +#define AMDGPU_HW_IP_VCN_UNIFIED  AMDGPU_HW_IP_VCN_ENC
>   #define AMDGPU_HW_IP_VCN_JPEG 8
>   #define AMDGPU_HW_IP_NUM  9
>



[PATCH v9 01/14] mm: rename is_pinnable_pages to is_longterm_pinnable_pages

2022-07-16 Thread Alex Sierra
is_pinnable_page() and folio_is_pinnable() were renamed to
is_longterm_pinnable_page() and folio_is_longterm_pinnable()
respectively. These functions are used in the FOLL_LONGTERM flag
context.

Signed-off-by: Alex Sierra 
Reviewed-by: David Hildenbrand 
---
 include/linux/mm.h | 8 
 mm/gup.c   | 4 ++--
 mm/gup_test.c  | 2 +-
 mm/hugetlb.c   | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index cf3d0d673f6b..3b31b33bd5be 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1592,7 +1592,7 @@ static inline bool page_needs_cow_for_dma(struct 
vm_area_struct *vma,
 
 /* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */
 #ifdef CONFIG_MIGRATION
-static inline bool is_pinnable_page(struct page *page)
+static inline bool is_longterm_pinnable_page(struct page *page)
 {
 #ifdef CONFIG_CMA
int mt = get_pageblock_migratetype(page);
@@ -1603,15 +1603,15 @@ static inline bool is_pinnable_page(struct page *page)
return !is_zone_movable_page(page) || is_zero_pfn(page_to_pfn(page));
 }
 #else
-static inline bool is_pinnable_page(struct page *page)
+static inline bool is_longterm_pinnable_page(struct page *page)
 {
return true;
 }
 #endif
 
-static inline bool folio_is_pinnable(struct folio *folio)
+static inline bool folio_is_longterm_pinnable(struct folio *folio)
 {
-   return is_pinnable_page(>page);
+   return is_longterm_pinnable_page(>page);
 }
 
 static inline void set_page_zone(struct page *page, enum zone_type zone)
diff --git a/mm/gup.c b/mm/gup.c
index 551264407624..b65fe8bf5af4 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -133,7 +133,7 @@ struct folio *try_grab_folio(struct page *page, int refs, 
unsigned int flags)
 * path.
 */
if (unlikely((flags & FOLL_LONGTERM) &&
-!is_pinnable_page(page)))
+!is_longterm_pinnable_page(page)))
return NULL;
 
/*
@@ -1891,7 +1891,7 @@ static long check_and_migrate_movable_pages(unsigned long 
nr_pages,
continue;
prev_folio = folio;
 
-   if (folio_is_pinnable(folio))
+   if (folio_is_longterm_pinnable(folio))
continue;
 
/*
diff --git a/mm/gup_test.c b/mm/gup_test.c
index d974dec19e1c..12b0a91767d3 100644
--- a/mm/gup_test.c
+++ b/mm/gup_test.c
@@ -53,7 +53,7 @@ static void verify_dma_pinned(unsigned int cmd, struct page 
**pages,
dump_page(page, "gup_test failure");
break;
} else if (cmd == PIN_LONGTERM_BENCHMARK &&
-   WARN(!is_pinnable_page(page),
+   WARN(!is_longterm_pinnable_page(page),
 "pages[%lu] is NOT pinnable but pinned\n",
 i)) {
dump_page(page, "gup_test failure");
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a57e1be41401..368fd33787b0 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1135,7 +1135,7 @@ static struct page *dequeue_huge_page_node_exact(struct 
hstate *h, int nid)
 
lockdep_assert_held(_lock);
list_for_each_entry(page, >hugepage_freelists[nid], lru) {
-   if (pin && !is_pinnable_page(page))
+   if (pin && !is_longterm_pinnable_page(page))
continue;
 
if (PageHWPoison(page))
-- 
2.32.0



[PATCH v9 02/14] mm: move page zone helpers from mm.h to mmzone.h

2022-07-16 Thread Alex Sierra
[WHY]
It makes more sense to have these helpers in zone specific header
file, rather than the generic mm.h

Signed-off-by: Alex Sierra 
---
 include/linux/memremap.h |  2 +-
 include/linux/mm.h   | 78 ---
 include/linux/mmzone.h   | 80 
 3 files changed, 81 insertions(+), 79 deletions(-)

diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 8af304f6b504..77229165c914 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -2,7 +2,7 @@
 #ifndef _LINUX_MEMREMAP_H_
 #define _LINUX_MEMREMAP_H_
 
-#include 
+#include 
 #include 
 #include 
 #include 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3b31b33bd5be..2df8c2b98d36 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1049,84 +1049,6 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf);
  *   back into memory.
  */
 
-/*
- * The zone field is never updated after free_area_init_core()
- * sets it, so none of the operations on it need to be atomic.
- */
-
-/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */
-#define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH)
-#define NODES_PGOFF(SECTIONS_PGOFF - NODES_WIDTH)
-#define ZONES_PGOFF(NODES_PGOFF - ZONES_WIDTH)
-#define LAST_CPUPID_PGOFF  (ZONES_PGOFF - LAST_CPUPID_WIDTH)
-#define KASAN_TAG_PGOFF(LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH)
-
-/*
- * Define the bit shifts to access each section.  For non-existent
- * sections we define the shift as 0; that plus a 0 mask ensures
- * the compiler will optimise away reference to them.
- */
-#define SECTIONS_PGSHIFT   (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0))
-#define NODES_PGSHIFT  (NODES_PGOFF * (NODES_WIDTH != 0))
-#define ZONES_PGSHIFT  (ZONES_PGOFF * (ZONES_WIDTH != 0))
-#define LAST_CPUPID_PGSHIFT(LAST_CPUPID_PGOFF * (LAST_CPUPID_WIDTH != 0))
-#define KASAN_TAG_PGSHIFT  (KASAN_TAG_PGOFF * (KASAN_TAG_WIDTH != 0))
-
-/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */
-#ifdef NODE_NOT_IN_PAGE_FLAGS
-#define ZONEID_SHIFT   (SECTIONS_SHIFT + ZONES_SHIFT)
-#define ZONEID_PGOFF   ((SECTIONS_PGOFF < ZONES_PGOFF)? \
-   SECTIONS_PGOFF : ZONES_PGOFF)
-#else
-#define ZONEID_SHIFT   (NODES_SHIFT + ZONES_SHIFT)
-#define ZONEID_PGOFF   ((NODES_PGOFF < ZONES_PGOFF)? \
-   NODES_PGOFF : ZONES_PGOFF)
-#endif
-
-#define ZONEID_PGSHIFT (ZONEID_PGOFF * (ZONEID_SHIFT != 0))
-
-#define ZONES_MASK ((1UL << ZONES_WIDTH) - 1)
-#define NODES_MASK ((1UL << NODES_WIDTH) - 1)
-#define SECTIONS_MASK  ((1UL << SECTIONS_WIDTH) - 1)
-#define LAST_CPUPID_MASK   ((1UL << LAST_CPUPID_SHIFT) - 1)
-#define KASAN_TAG_MASK ((1UL << KASAN_TAG_WIDTH) - 1)
-#define ZONEID_MASK((1UL << ZONEID_SHIFT) - 1)
-
-static inline enum zone_type page_zonenum(const struct page *page)
-{
-   ASSERT_EXCLUSIVE_BITS(page->flags, ZONES_MASK << ZONES_PGSHIFT);
-   return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
-}
-
-static inline enum zone_type folio_zonenum(const struct folio *folio)
-{
-   return page_zonenum(>page);
-}
-
-#ifdef CONFIG_ZONE_DEVICE
-static inline bool is_zone_device_page(const struct page *page)
-{
-   return page_zonenum(page) == ZONE_DEVICE;
-}
-extern void memmap_init_zone_device(struct zone *, unsigned long,
-   unsigned long, struct dev_pagemap *);
-#else
-static inline bool is_zone_device_page(const struct page *page)
-{
-   return false;
-}
-#endif
-
-static inline bool folio_is_zone_device(const struct folio *folio)
-{
-   return is_zone_device_page(>page);
-}
-
-static inline bool is_zone_movable_page(const struct page *page)
-{
-   return page_zonenum(page) == ZONE_MOVABLE;
-}
-
 #if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX)
 DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
 
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index aab70355d64f..47fc41f43c48 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -730,6 +730,86 @@ static inline bool zone_is_empty(struct zone *zone)
return zone->spanned_pages == 0;
 }
 
+#ifndef BUILD_VDSO32_64
+/*
+ * The zone field is never updated after free_area_init_core()
+ * sets it, so none of the operations on it need to be atomic.
+ */
+
+/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */
+#define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH)
+#define NODES_PGOFF(SECTIONS_PGOFF - NODES_WIDTH)
+#define ZONES_PGOFF(NODES_PGOFF - ZONES_WIDTH)
+#define LAST_CPUPID_PGOFF  (ZONES_PGOFF - LAST_CPUPID_WIDTH)
+#define KASAN_TAG_PGOFF(LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH)
+
+/*
+ * Define the bit shifts to access 

[PATCH v9 05/14] mm: add device coherent vma selection for memory migration

2022-07-16 Thread Alex Sierra
This case is used to migrate pages from device memory, back to system
memory. Device coherent type memory is cache coherent from device and CPU
point of view.

Signed-off-by: Alex Sierra 
Acked-by: Felix Kuehling 
Reviewed-by: Alistair Poppple 
Signed-off-by: Christoph Hellwig 
Reviewed-by: David Hildenbrand 
---
 include/linux/migrate.h |  1 +
 mm/migrate_device.c | 12 +---
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 069a89e847f3..b84908debe5c 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -148,6 +148,7 @@ static inline unsigned long migrate_pfn(unsigned long pfn)
 enum migrate_vma_direction {
MIGRATE_VMA_SELECT_SYSTEM = 1 << 0,
MIGRATE_VMA_SELECT_DEVICE_PRIVATE = 1 << 1,
+   MIGRATE_VMA_SELECT_DEVICE_COHERENT = 1 << 2,
 };
 
 struct migrate_vma {
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index a4847ad65da3..18bc6483f63a 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -148,15 +148,21 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
if (is_writable_device_private_entry(entry))
mpfn |= MIGRATE_PFN_WRITE;
} else {
-   if (!(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM))
-   goto next;
pfn = pte_pfn(pte);
-   if (is_zero_pfn(pfn)) {
+   if (is_zero_pfn(pfn) &&
+   (migrate->flags & MIGRATE_VMA_SELECT_SYSTEM)) {
mpfn = MIGRATE_PFN_MIGRATE;
migrate->cpages++;
goto next;
}
page = vm_normal_page(migrate->vma, addr, pte);
+   if (page && !is_zone_device_page(page) &&
+   !(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM))
+   goto next;
+   else if (page && is_device_coherent_page(page) &&
+   (!(migrate->flags & 
MIGRATE_VMA_SELECT_DEVICE_COHERENT) ||
+page->pgmap->owner != migrate->pgmap_owner))
+   goto next;
mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
}
-- 
2.32.0



[pull] amdgpu drm-fixes-5.19

2022-07-16 Thread Alex Deucher
Hi Dave, Daniel,

One more stable fix for 5.19.

The following changes since commit 3283c83eb6fcfbda8ea03d7149d8e42e71c5d45e:

  drm/amd/display: Ensure valid event timestamp for cursor-only commits 
(2022-07-13 12:20:37 -0400)

are available in the Git repository at:

  https://gitlab.freedesktop.org/agd5f/linux.git 
tags/amd-drm-fixes-5.19-2022-07-15

for you to fetch changes up to 2d4bd81fea1ad6ebba543bd6da3ef5179d130e6a:

  drm/amd/display: Fix new dmub notification enabling in DM (2022-07-15 
10:04:59 -0400)


amd-drm-fixes-5.19-2022-07-15:

amdgpu:
- DMUB display fix


Stylon Wang (1):
  drm/amd/display: Fix new dmub notification enabling in DM

 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 27 ---
 1 file changed, 19 insertions(+), 8 deletions(-)


Re: [PATCH v1 1/6] dma-buf: Add _unlocked postfix to function names

2022-07-16 Thread Dmitry Osipenko
On 7/15/22 10:19, Christian König wrote:
>> -struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment
>> *attach,
>> -    enum dma_data_direction direction)
>> +struct sg_table *
>> +dma_buf_map_attachment_unlocked(struct dma_buf_attachment *attach,
>> +    enum dma_data_direction direction)
> 
> The locking state of mapping and unmapping operations depend on if the
> attachment is dynamic or not.
> 
> So this here is not a good idea at all since it suggests that the
> function is always called without holding the lock.

I had the same thought while was working on this patch and initially was
thinking about adding an "unlocked" alias to dma_buf_map_attachment().
In the end I decided that it will create even more confusion and it's
simpler just to rename this func here since there are only two drivers
using the dynamic mapping.

Do you have suggestions how to improve it?

-- 
Best regards,
Dmitry


Re: [PATCH v1 4/6] dma-buf: Acquire wait-wound context on attachment

2022-07-16 Thread Dmitry Osipenko
On 7/15/22 09:50, Christian König wrote:
> Am 15.07.22 um 02:52 schrieb Dmitry Osipenko:
>> Intel i915 GPU driver uses wait-wound mutex to lock multiple GEMs on the
>> attachment to the i915 dma-buf. In order to let all drivers utilize
>> shared
>> wait-wound context during attachment in a general way, make dma-buf
>> core to
>> acquire the ww context internally for the attachment operation and update
>> i915 driver to use the importer's ww context instead of the internal one.
>>
>>  From now on all dma-buf exporters shall use the importer's ww context
>> for
>> the attachment operation.
>>
>> Signed-off-by: Dmitry Osipenko 
>> ---
>>   drivers/dma-buf/dma-buf.c |  8 +-
>>   drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c    |  2 +-
>>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  2 +-
>>   drivers/gpu/drm/i915/gem/i915_gem_object.h    |  6 ++---
>>   drivers/gpu/drm/i915/i915_gem_evict.c |  2 +-
>>   drivers/gpu/drm/i915/i915_gem_ww.c    | 26 +++
>>   drivers/gpu/drm/i915/i915_gem_ww.h    | 15 +--
>>   7 files changed, 47 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
>> index 0ee588276534..37545ecb845a 100644
>> --- a/drivers/dma-buf/dma-buf.c
>> +++ b/drivers/dma-buf/dma-buf.c
>> @@ -807,6 +807,8 @@ static struct sg_table * __map_dma_buf(struct
>> dma_buf_attachment *attach,
>>    * Optionally this calls _buf_ops.attach to allow
>> device-specific attach
>>    * functionality.
>>    *
>> + * Exporters shall use ww_ctx acquired by this function.
>> + *
>>    * Returns:
>>    *
>>    * A pointer to newly created _buf_attachment on success, or a
>> negative
>> @@ -822,6 +824,7 @@ dma_buf_dynamic_attach_unlocked(struct dma_buf
>> *dmabuf, struct device *dev,
>>   void *importer_priv)
>>   {
>>   struct dma_buf_attachment *attach;
>> +    struct ww_acquire_ctx ww_ctx;
>>   int ret;
>>     if (WARN_ON(!dmabuf || !dev))
>> @@ -841,7 +844,8 @@ dma_buf_dynamic_attach_unlocked(struct dma_buf
>> *dmabuf, struct device *dev,
>>   attach->importer_ops = importer_ops;
>>   attach->importer_priv = importer_priv;
>>   -    dma_resv_lock(dmabuf->resv, NULL);
>> +    ww_acquire_init(_ctx, _ww_class);
>> +    dma_resv_lock(dmabuf->resv, _ctx);
> 
> That won't work like this. The core property of a WW context is that you
> need to unwind all the locks and re-quire them with the contended one
> first.
> 
> When you statically lock the imported one here you can't do that any more.

You're right. I felt that something is missing here, but couldn't
notice. I'll think more about this and enable
CONFIG_DEBUG_WW_MUTEX_SLOWPATH. Thank you!

-- 
Best regards,
Dmitry


[PATCH] mm/gup: migrate device coherent pages when pinning instead of failing

2022-07-16 Thread Alistair Popple
Currently any attempts to pin a device coherent page will fail. This is
because device coherent pages need to be managed by a device driver, and
pinning them would prevent a driver from migrating them off the device.

However this is no reason to fail pinning of these pages. These are
coherent and accessible from the CPU so can be migrated just like
pinning ZONE_MOVABLE pages. So instead of failing all attempts to pin
them first try migrating them out of ZONE_DEVICE.

[hch: rebased to the split device memory checks,
  moved migrate_device_page to migrate_device.c]

Signed-off-by: Alistair Popple 
Acked-by: Felix Kuehling 
Signed-off-by: Christoph Hellwig 
---

This patch hopefully addresses all of David's comments. It replaces both my "mm:
remove the vma check in migrate_vma_setup()" and "mm/gup: migrate device
coherent pages when pinning instead of failing" patches. I'm not sure what the
best way of including this is, perhaps Alex can respin the series with this
patch instead?

 - Alistair

 mm/gup.c| 50 +--
 mm/internal.h   |  1 +
 mm/migrate_device.c | 52 +
 3 files changed, 96 insertions(+), 7 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index b65fe8bf5af4..22b97ab61cd9 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1881,7 +1881,7 @@ static long check_and_migrate_movable_pages(unsigned long 
nr_pages,
unsigned long isolation_error_count = 0, i;
struct folio *prev_folio = NULL;
LIST_HEAD(movable_page_list);
-   bool drain_allow = true;
+   bool drain_allow = true, coherent_pages = false;
int ret = 0;
 
for (i = 0; i < nr_pages; i++) {
@@ -1891,9 +1891,38 @@ static long check_and_migrate_movable_pages(unsigned 
long nr_pages,
continue;
prev_folio = folio;
 
-   if (folio_is_longterm_pinnable(folio))
+   /*
+* Device coherent pages are managed by a driver and should not
+* be pinned indefinitely as it prevents the driver moving the
+* page. So when trying to pin with FOLL_LONGTERM instead try
+* to migrate the page out of device memory.
+*/
+   if (folio_is_device_coherent(folio)) {
+   /*
+* We always want a new GUP lookup with device coherent
+* pages.
+*/
+   pages[i] = 0;
+   coherent_pages = true;
+
+   /*
+* Migration will fail if the page is pinned, so convert
+* the pin on the source page to a normal reference.
+*/
+   if (gup_flags & FOLL_PIN) {
+   get_page(>page);
+   unpin_user_page(>page);
+   }
+
+   ret = migrate_device_coherent_page(>page);
+   if (ret)
+   goto unpin_pages;
+
continue;
+   }
 
+   if (folio_is_longterm_pinnable(folio))
+   continue;
/*
 * Try to move out any movable page before pinning the range.
 */
@@ -1919,7 +1948,8 @@ static long check_and_migrate_movable_pages(unsigned long 
nr_pages,
folio_nr_pages(folio));
}
 
-   if (!list_empty(_page_list) || isolation_error_count)
+   if (!list_empty(_page_list) || isolation_error_count
+   || coherent_pages)
goto unpin_pages;
 
/*
@@ -1929,10 +1959,16 @@ static long check_and_migrate_movable_pages(unsigned 
long nr_pages,
return nr_pages;
 
 unpin_pages:
-   if (gup_flags & FOLL_PIN) {
-   unpin_user_pages(pages, nr_pages);
-   } else {
-   for (i = 0; i < nr_pages; i++)
+   /*
+* pages[i] might be NULL if any device coherent pages were found.
+*/
+   for (i = 0; i < nr_pages; i++) {
+   if (!pages[i])
+   continue;
+
+   if (gup_flags & FOLL_PIN)
+   unpin_user_page(pages[i]);
+   else
put_page(pages[i]);
}
 
diff --git a/mm/internal.h b/mm/internal.h
index c0f8fbe0445b..899dab512c5a 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -853,6 +853,7 @@ int numa_migrate_prep(struct page *page, struct 
vm_area_struct *vma,
  unsigned long addr, int page_nid, int *flags);
 
 void free_zone_device_page(struct page *page);
+int migrate_device_coherent_page(struct page *page);
 
 /*
  * mm/gup.c
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index 18bc6483f63a..7feeb447e3b9 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -686,6 +686,12 @@ void 

[PATCH v1 2/6] drm/gem: Take reservation lock for vmap/vunmap operations

2022-07-16 Thread Dmitry Osipenko
The new common dma-buf locking convention will require buffer importers
to hold the reservation lock around mapping operations. Make DRM GEM core
to take the lock around the vmapping operations and update QXL and i915
drivers to use the locked functions for the case where DRM core now holds
the lock. This patch prepares DRM core and drivers to transition to the
common dma-buf locking convention where vmapping of exported GEMs will
be done under the held reservation lock.

Signed-off-by: Dmitry Osipenko 
---
 drivers/gpu/drm/drm_client.c |  4 +--
 drivers/gpu/drm/drm_gem.c| 28 
 drivers/gpu/drm/drm_gem_framebuffer_helper.c |  6 ++---
 drivers/gpu/drm/drm_prime.c  |  4 +--
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c   |  2 +-
 drivers/gpu/drm/qxl/qxl_object.c | 17 ++--
 drivers/gpu/drm/qxl/qxl_prime.c  |  4 +--
 include/drm/drm_gem.h|  3 +++
 8 files changed, 50 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c
index 2b230b4d6942..fbcb1e995384 100644
--- a/drivers/gpu/drm/drm_client.c
+++ b/drivers/gpu/drm/drm_client.c
@@ -323,7 +323,7 @@ drm_client_buffer_vmap(struct drm_client_buffer *buffer,
 * fd_install step out of the driver backend hooks, to make that
 * final step optional for internal users.
 */
-   ret = drm_gem_vmap(buffer->gem, map);
+   ret = drm_gem_vmap_unlocked(buffer->gem, map);
if (ret)
return ret;
 
@@ -345,7 +345,7 @@ void drm_client_buffer_vunmap(struct drm_client_buffer 
*buffer)
 {
struct iosys_map *map = >map;
 
-   drm_gem_vunmap(buffer->gem, map);
+   drm_gem_vunmap_unlocked(buffer->gem, map);
 }
 EXPORT_SYMBOL(drm_client_buffer_vunmap);
 
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index eb0c2d041f13..9769c33cad99 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -1155,6 +1155,8 @@ void drm_gem_print_info(struct drm_printer *p, unsigned 
int indent,
 
 int drm_gem_pin(struct drm_gem_object *obj)
 {
+   dma_resv_assert_held(obj->resv);
+
if (obj->funcs->pin)
return obj->funcs->pin(obj);
else
@@ -1163,6 +1165,8 @@ int drm_gem_pin(struct drm_gem_object *obj)
 
 void drm_gem_unpin(struct drm_gem_object *obj)
 {
+   dma_resv_assert_held(obj->resv);
+
if (obj->funcs->unpin)
obj->funcs->unpin(obj);
 }
@@ -1171,6 +1175,8 @@ int drm_gem_vmap(struct drm_gem_object *obj, struct 
iosys_map *map)
 {
int ret;
 
+   dma_resv_assert_held(obj->resv);
+
if (!obj->funcs->vmap)
return -EOPNOTSUPP;
 
@@ -1186,6 +1192,8 @@ EXPORT_SYMBOL(drm_gem_vmap);
 
 void drm_gem_vunmap(struct drm_gem_object *obj, struct iosys_map *map)
 {
+   dma_resv_assert_held(obj->resv);
+
if (iosys_map_is_null(map))
return;
 
@@ -1197,6 +1205,26 @@ void drm_gem_vunmap(struct drm_gem_object *obj, struct 
iosys_map *map)
 }
 EXPORT_SYMBOL(drm_gem_vunmap);
 
+int drm_gem_vmap_unlocked(struct drm_gem_object *obj, struct iosys_map *map)
+{
+   int ret;
+
+   dma_resv_lock(obj->resv, NULL);
+   ret = drm_gem_vmap(obj, map);
+   dma_resv_unlock(obj->resv);
+
+   return ret;
+}
+EXPORT_SYMBOL(drm_gem_vmap_unlocked);
+
+void drm_gem_vunmap_unlocked(struct drm_gem_object *obj, struct iosys_map *map)
+{
+   dma_resv_lock(obj->resv, NULL);
+   drm_gem_vunmap(obj, map);
+   dma_resv_unlock(obj->resv);
+}
+EXPORT_SYMBOL(drm_gem_vunmap_unlocked);
+
 /**
  * drm_gem_lock_reservations - Sets up the ww context and acquires
  * the lock on an array of GEM objects.
diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c 
b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
index 880a4975507f..e35e224e6303 100644
--- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c
+++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
@@ -354,7 +354,7 @@ int drm_gem_fb_vmap(struct drm_framebuffer *fb, struct 
iosys_map *map,
ret = -EINVAL;
goto err_drm_gem_vunmap;
}
-   ret = drm_gem_vmap(obj, [i]);
+   ret = drm_gem_vmap_unlocked(obj, [i]);
if (ret)
goto err_drm_gem_vunmap;
}
@@ -376,7 +376,7 @@ int drm_gem_fb_vmap(struct drm_framebuffer *fb, struct 
iosys_map *map,
obj = drm_gem_fb_get_obj(fb, i);
if (!obj)
continue;
-   drm_gem_vunmap(obj, [i]);
+   drm_gem_vunmap_unlocked(obj, [i]);
}
return ret;
 }
@@ -403,7 +403,7 @@ void drm_gem_fb_vunmap(struct drm_framebuffer *fb, struct 
iosys_map *map)
continue;
if (iosys_map_is_null([i]))
continue;
-   drm_gem_vunmap(obj, [i]);
+   drm_gem_vunmap_unlocked(obj, 

Re: Linux 5.19-rc6

2022-07-16 Thread Geert Uytterhoeven
On Thu, Jul 14, 2022 at 7:24 PM Guenter Roeck  wrote:
> On 7/14/22 09:48, Linus Torvalds wrote:
> > And some look positively strange. Like that
> >
> >drivers/mfd/asic3.c: error: unused variable 'asic'
> > [-Werror=unused-variable]:  => 941:23
> >
> > which is clearly used three lines later by
> >
> >  iounmap(asic->tmio_cnf);
> >
> > and I can't find any case of 'iounmap()' having been defined to an
> > empty macro or anything like that to explain it. The error in
> > drivers/tty/serial/sh-sci.c looks to be exactly the same issue, just
> > with ioremap() instead of iounmap().
> >
> > It would be good to have some way to find which build/architecture it
> > is, because right now it just looks bogus.
> >
> > Do you perhaps use some broken compiler that complains when the empty
> > inline functions don't use their arguments? Because that's what those
> > ioremap/iounmap() ones look like to me, but there might be some
> > magical architecture / config that has issues that aren't obvious.
> >
> > IOW, I'd love to get those fixed, but I would also want a little bit more 
> > info.
> >
> Geert gave the necessary hint - it looks like sh-nommu used defines
> for iomap() and iounmap(), which made the variable unused. According
> to Geert that was fixed a couple of days ago.

Yes, post-rc6 should be fine, as the fix went in... for the third time.
Combine people that keep on switching back to macros without reading
a file's history with unresponsive maintainers...

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds


Re: [PATCH] mm/gup: migrate device coherent pages when pinning instead of failing

2022-07-16 Thread Sierra Guiza, Alejandro (Alex)



On 7/14/2022 9:11 PM, Alistair Popple wrote:

Currently any attempts to pin a device coherent page will fail. This is
because device coherent pages need to be managed by a device driver, and
pinning them would prevent a driver from migrating them off the device.

However this is no reason to fail pinning of these pages. These are
coherent and accessible from the CPU so can be migrated just like
pinning ZONE_MOVABLE pages. So instead of failing all attempts to pin
them first try migrating them out of ZONE_DEVICE.

[hch: rebased to the split device memory checks,
   moved migrate_device_page to migrate_device.c]

Signed-off-by: Alistair Popple 
Acked-by: Felix Kuehling 
Signed-off-by: Christoph Hellwig 
---

This patch hopefully addresses all of David's comments. It replaces both my "mm:
remove the vma check in migrate_vma_setup()" and "mm/gup: migrate device
coherent pages when pinning instead of failing" patches. I'm not sure what the
best way of including this is, perhaps Alex can respin the series with this
patch instead?


For sure Alistair. I'll include this in my next patch series version.

Thanks,
Alex Sierra



  - Alistair

  mm/gup.c| 50 +--
  mm/internal.h   |  1 +
  mm/migrate_device.c | 52 +
  3 files changed, 96 insertions(+), 7 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index b65fe8bf5af4..22b97ab61cd9 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1881,7 +1881,7 @@ static long check_and_migrate_movable_pages(unsigned long 
nr_pages,
unsigned long isolation_error_count = 0, i;
struct folio *prev_folio = NULL;
LIST_HEAD(movable_page_list);
-   bool drain_allow = true;
+   bool drain_allow = true, coherent_pages = false;
int ret = 0;
  
  	for (i = 0; i < nr_pages; i++) {

@@ -1891,9 +1891,38 @@ static long check_and_migrate_movable_pages(unsigned 
long nr_pages,
continue;
prev_folio = folio;
  
-		if (folio_is_longterm_pinnable(folio))

+   /*
+* Device coherent pages are managed by a driver and should not
+* be pinned indefinitely as it prevents the driver moving the
+* page. So when trying to pin with FOLL_LONGTERM instead try
+* to migrate the page out of device memory.
+*/
+   if (folio_is_device_coherent(folio)) {
+   /*
+* We always want a new GUP lookup with device coherent
+* pages.
+*/
+   pages[i] = 0;
+   coherent_pages = true;
+
+   /*
+* Migration will fail if the page is pinned, so convert
+* the pin on the source page to a normal reference.
+*/
+   if (gup_flags & FOLL_PIN) {
+   get_page(>page);
+   unpin_user_page(>page);
+   }
+
+   ret = migrate_device_coherent_page(>page);
+   if (ret)
+   goto unpin_pages;
+
continue;
+   }
  
+		if (folio_is_longterm_pinnable(folio))

+   continue;
/*
 * Try to move out any movable page before pinning the range.
 */
@@ -1919,7 +1948,8 @@ static long check_and_migrate_movable_pages(unsigned long 
nr_pages,
folio_nr_pages(folio));
}
  
-	if (!list_empty(_page_list) || isolation_error_count)

+   if (!list_empty(_page_list) || isolation_error_count
+   || coherent_pages)
goto unpin_pages;
  
  	/*

@@ -1929,10 +1959,16 @@ static long check_and_migrate_movable_pages(unsigned 
long nr_pages,
return nr_pages;
  
  unpin_pages:

-   if (gup_flags & FOLL_PIN) {
-   unpin_user_pages(pages, nr_pages);
-   } else {
-   for (i = 0; i < nr_pages; i++)
+   /*
+* pages[i] might be NULL if any device coherent pages were found.
+*/
+   for (i = 0; i < nr_pages; i++) {
+   if (!pages[i])
+   continue;
+
+   if (gup_flags & FOLL_PIN)
+   unpin_user_page(pages[i]);
+   else
put_page(pages[i]);
}
  
diff --git a/mm/internal.h b/mm/internal.h

index c0f8fbe0445b..899dab512c5a 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -853,6 +853,7 @@ int numa_migrate_prep(struct page *page, struct 
vm_area_struct *vma,
  unsigned long addr, int page_nid, int *flags);
  
  void free_zone_device_page(struct page *page);

+int migrate_device_coherent_page(struct page *page);
  
  /*

   * mm/gup.c
diff --git a/mm/migrate_device.c 

Re: [PATCH v2 16/29] ACPI: video: Add Nvidia WMI EC brightness control detection

2022-07-16 Thread Hans de Goede
Hi Daniel,

On 7/12/22 22:13, Daniel Dadap wrote:
> Thanks, Hans:
> 
> On 7/12/22 14:38, Hans de Goede wrote:
>> On some new laptop designs a new Nvidia specific WMI interface is present
>> which gives info about panel brightness control and may allow controlling
>> the brightness through this interface when the embedded controller is used
>> for brightness control.
>>
>> When this WMI interface is present and indicates that the EC is used,
>> then this interface should be used for brightness control.
>>
>> Signed-off-by: Hans de Goede 
>> ---
>>   drivers/acpi/Kconfig   |  1 +
>>   drivers/acpi/video_detect.c    | 35 ++
>>   drivers/gpu/drm/gma500/Kconfig |  2 ++
>>   drivers/gpu/drm/i915/Kconfig   |  2 ++
>>   include/acpi/video.h   |  1 +
>>   5 files changed, 41 insertions(+)
>>
>> diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
>> index 1e34f846508f..c372385cfc3f 100644
>> --- a/drivers/acpi/Kconfig
>> +++ b/drivers/acpi/Kconfig
>> @@ -212,6 +212,7 @@ config ACPI_VIDEO
>>   tristate "Video"
>>   depends on X86 && BACKLIGHT_CLASS_DEVICE
>>   depends on INPUT
>> +    depends on ACPI_WMI
>>   select THERMAL
>>   help
>>     This driver implements the ACPI Extensions For Display Adapters
>> diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
>> index 8c2863403040..7b89dc9a04a2 100644
>> --- a/drivers/acpi/video_detect.c
>> +++ b/drivers/acpi/video_detect.c
>> @@ -75,6 +75,35 @@ find_video(acpi_handle handle, u32 lvl, void *context, 
>> void **rv)
>>   return AE_OK;
>>   }
>>   +#define WMI_BRIGHTNESS_METHOD_SOURCE    2
>> +#define WMI_BRIGHTNESS_MODE_GET    0
>> +#define WMI_BRIGHTNESS_SOURCE_EC    2
>> +
>> +struct wmi_brightness_args {
>> +    u32 mode;
>> +    u32 val;
>> +    u32 ret;
>> +    u32 ignored[3];
>> +};
>> +
>> +static bool nvidia_wmi_ec_supported(void)
>> +{
>> +    struct wmi_brightness_args args = {
>> +    .mode = WMI_BRIGHTNESS_MODE_GET,
>> +    .val = 0,
>> +    .ret = 0,
>> +    };
>> +    struct acpi_buffer buf = { (acpi_size)sizeof(args),  };
>> +    acpi_status status;
>> +
>> +    status = wmi_evaluate_method("603E9613-EF25-4338-A3D0-C46177516DB7", 0,
>> + WMI_BRIGHTNESS_METHOD_SOURCE, , );
>> +    if (ACPI_FAILURE(status))
>> +    return false;
>> +
>> +    return args.ret == WMI_BRIGHTNESS_SOURCE_EC;
>> +}
>> +
> 
> 
> The code duplication here with nvidia-wmi-ec-backlight.c is a little 
> unfortunate. Can we move the constants, struct definition, and WMI GUID from 
> that file to a header file that's used both by the EC backlight driver and 
> the ACPI video driver?

Yes that is a good idea. I suggest using 
include/linux/platform_data/x86/nvidia-wmi-ec-backlight.h
to move the shared definitions there.

If you can submit 2 patches on top of this series:

1. Moving the definitions from drivers/platform/x86/nvidia-wmi-ec-backlight.c to
   include/linux/platform_data/x86/nvidia-wmi-ec-backlight.h

2. Switching the code from this patch over to using the new 
nvidia-wmi-ec-backlight.h

Then for the next version I'll add patch 1. to the series and squash patch 2.
into this one.

> I was thinking it might be nice to add a wrapper around 
> wmi_brightness_notify() in nvidia-wmi-ec-backlight.c that does this source == 
> WMI_BRIGHTNESS_SOURCE_EC test, and then export it so that it can be called 
> both here and in the EC backlight driver's probe routine, but then I guess 
> that would make video.ko depend on nvidia-wmi-ec-backlight.ko, which seems 
> wrong. It also seems wrong to implement the WMI plumbing in the ACPI video 
> driver, and export it so that the EC backlight driver can use it, so I guess 
> I can live with the duplication of the relatively simple WMI stuff here, it 
> would just be nice to not have to define all of the API constants, structure, 
> and GUID twice.

Agreed.

> 
> 
>>   /* Force to use vendor driver when the ACPI device is known to be
>>    * buggy */
>>   static int video_detect_force_vendor(const struct dmi_system_id *d)
>> @@ -518,6 +547,7 @@ static const struct dmi_system_id 
>> video_detect_dmi_table[] = {
>>   static enum acpi_backlight_type __acpi_video_get_backlight_type(bool 
>> native)
>>   {
>>   static DEFINE_MUTEX(init_mutex);
>> +    static bool nvidia_wmi_ec_present;
>>   static bool native_available;
>>   static bool init_done;
>>   static long video_caps;
>> @@ -530,6 +560,7 @@ static enum acpi_backlight_type 
>> __acpi_video_get_backlight_type(bool native)
>>   acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
>>   ACPI_UINT32_MAX, find_video, NULL,
>>   _caps, NULL);
>> +    nvidia_wmi_ec_present = nvidia_wmi_ec_supported();
>>   init_done = true;
>>   }
>>   if (native)
>> @@ -547,6 +578,10 @@ static enum acpi_backlight_type 
>> __acpi_video_get_backlight_type(bool native)
>>   

[PATCH v1 3/6] dma-buf: Move all dma-bufs to dynamic locking specification

2022-07-16 Thread Dmitry Osipenko
This patch moves the non-dynamic dma-buf users over to the dynamic
locking specification. From now on all dma-buf importers are responsible
for holding dma-buf's reservation lock around operations performed over
dma-bufs. This strict locking convention prevents dead lock situation for
dma-buf importers and exporters.

Previously the "unlocked" versions of the dma-buf API functions weren't
taking the reservation lock and this patch makes them to take the lock.

Intel and AMD GPU drivers already were mapping imported dma-bufs under
the held lock, hence the "locked" variant of the functions are added
for them and the drivers are updated to use the "locked" versions.

Intel driver is also updated to not lock the exported buffer on
attachment since lock is now held by importer. We also need to move
the ww context acquirement from exporters (i915 driver) to importers,
otherwise lockdep won't be happy. This will be done in the next patch
since i915 is the only driver that uses ww context on attachment today
and it's not critical to make this change separately for i915 driver.

Signed-off-by: Dmitry Osipenko 
---
 drivers/dma-buf/dma-buf.c  | 125 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c|   4 +-
 drivers/gpu/drm/drm_prime.c|   4 +-
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c |  12 +-
 include/linux/dma-buf.h|   6 +
 5 files changed, 104 insertions(+), 47 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index d16237a6ffaa..0ee588276534 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -841,14 +841,14 @@ dma_buf_dynamic_attach_unlocked(struct dma_buf *dmabuf, 
struct device *dev,
attach->importer_ops = importer_ops;
attach->importer_priv = importer_priv;
 
+   dma_resv_lock(dmabuf->resv, NULL);
+
if (dmabuf->ops->attach) {
ret = dmabuf->ops->attach(dmabuf, attach);
if (ret)
goto err_attach;
}
-   dma_resv_lock(dmabuf->resv, NULL);
list_add(>node, >attachments);
-   dma_resv_unlock(dmabuf->resv);
 
/* When either the importer or the exporter can't handle dynamic
 * mappings we cache the mapping here to avoid issues with the
@@ -859,7 +859,6 @@ dma_buf_dynamic_attach_unlocked(struct dma_buf *dmabuf, 
struct device *dev,
struct sg_table *sgt;
 
if (dma_buf_is_dynamic(attach->dmabuf)) {
-   dma_resv_lock(attach->dmabuf->resv, NULL);
ret = dmabuf->ops->pin(attach);
if (ret)
goto err_unlock;
@@ -872,15 +871,16 @@ dma_buf_dynamic_attach_unlocked(struct dma_buf *dmabuf, 
struct device *dev,
ret = PTR_ERR(sgt);
goto err_unpin;
}
-   if (dma_buf_is_dynamic(attach->dmabuf))
-   dma_resv_unlock(attach->dmabuf->resv);
attach->sgt = sgt;
attach->dir = DMA_BIDIRECTIONAL;
}
 
+   dma_resv_unlock(dmabuf->resv);
+
return attach;
 
 err_attach:
+   dma_resv_unlock(attach->dmabuf->resv);
kfree(attach);
return ERR_PTR(ret);
 
@@ -889,8 +889,7 @@ dma_buf_dynamic_attach_unlocked(struct dma_buf *dmabuf, 
struct device *dev,
dmabuf->ops->unpin(attach);
 
 err_unlock:
-   if (dma_buf_is_dynamic(attach->dmabuf))
-   dma_resv_unlock(attach->dmabuf->resv);
+   dma_resv_unlock(dmabuf->resv);
 
dma_buf_detach_unlocked(dmabuf, attach);
return ERR_PTR(ret);
@@ -937,24 +936,23 @@ void dma_buf_detach_unlocked(struct dma_buf *dmabuf,
if (WARN_ON(!dmabuf || !attach))
return;
 
-   if (attach->sgt) {
-   if (dma_buf_is_dynamic(attach->dmabuf))
-   dma_resv_lock(attach->dmabuf->resv, NULL);
+   if (WARN_ON(dmabuf != attach->dmabuf))
+   return;
 
+   dma_resv_lock(dmabuf->resv, NULL);
+
+   if (attach->sgt) {
__unmap_dma_buf(attach, attach->sgt, attach->dir);
 
-   if (dma_buf_is_dynamic(attach->dmabuf)) {
+   if (dma_buf_is_dynamic(attach->dmabuf))
dmabuf->ops->unpin(attach);
-   dma_resv_unlock(attach->dmabuf->resv);
-   }
}
-
-   dma_resv_lock(dmabuf->resv, NULL);
list_del(>node);
-   dma_resv_unlock(dmabuf->resv);
+
if (dmabuf->ops->detach)
dmabuf->ops->detach(dmabuf, attach);
 
+   dma_resv_unlock(dmabuf->resv);
kfree(attach);
 }
 EXPORT_SYMBOL_NS_GPL(dma_buf_detach_unlocked, DMA_BUF);
@@ -1030,10 +1028,11 @@ EXPORT_SYMBOL_NS_GPL(dma_buf_unpin, DMA_BUF);
  *
  * Important: Dynamic importers must wait for the exclusive fence of the struct
  * dma_resv attached to the DMA-BUF first.
+ *
+ * Importer is responsible for 

[PATCH v2] drm/amdgpu: add HW_IP_VCN_UNIFIED type

2022-07-16 Thread Ruijing Dong
Define HW_IP_VCN_UNIFIED type the same as HW_IP_VCN_ENC.

VCN4 support for libdrm needs a new definition for
the unified queue, so that it can align to the kernel.

link: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/245/commits

Signed-off-by: Ruijing Dong 
---
 include/uapi/drm/amdgpu_drm.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 18d3246d636e..fe33db8441bc 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -560,6 +560,7 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_HW_IP_UVD_ENC  5
 #define AMDGPU_HW_IP_VCN_DEC  6
 #define AMDGPU_HW_IP_VCN_ENC  7
+#define AMDGPU_HW_IP_VCN_UNIFIED  AMDGPU_HW_IP_VCN_ENC
 #define AMDGPU_HW_IP_VCN_JPEG 8
 #define AMDGPU_HW_IP_NUM  9
 
-- 
2.25.1



[PATCH v1 6/6] dma-buf: Remove internal lock

2022-07-16 Thread Dmitry Osipenko
The internal dma-buf lock isn't needed anymore because the updated
locking specification claims that dma-buf reservation must be locked
by importers, and thus, the internal data is already protected by the
reservation lock. Remove the obsoleted internal lock.

Signed-off-by: Dmitry Osipenko 
---
 drivers/dma-buf/dma-buf.c | 5 -
 include/linux/dma-buf.h   | 9 -
 2 files changed, 14 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 37545ecb845a..4cc739537ebd 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -656,7 +656,6 @@ struct dma_buf *dma_buf_export(const struct 
dma_buf_export_info *exp_info)
 
dmabuf->file = file;
 
-   mutex_init(>lock);
INIT_LIST_HEAD(>attachments);
 
mutex_lock(_list.lock);
@@ -1459,7 +1458,6 @@ int dma_buf_vmap_unlocked(struct dma_buf *dmabuf, struct 
iosys_map *map)
return -EINVAL;
 
dma_resv_lock(dmabuf->resv, NULL);
-   mutex_lock(>lock);
if (dmabuf->vmapping_counter) {
dmabuf->vmapping_counter++;
BUG_ON(iosys_map_is_null(>vmap_ptr));
@@ -1479,7 +1477,6 @@ int dma_buf_vmap_unlocked(struct dma_buf *dmabuf, struct 
iosys_map *map)
*map = dmabuf->vmap_ptr;
 
 out_unlock:
-   mutex_unlock(>lock);
dma_resv_unlock(dmabuf->resv);
return ret;
 }
@@ -1500,13 +1497,11 @@ void dma_buf_vunmap_unlocked(struct dma_buf *dmabuf, 
struct iosys_map *map)
BUG_ON(!iosys_map_is_equal(>vmap_ptr, map));
 
dma_resv_lock(dmabuf->resv, NULL);
-   mutex_lock(>lock);
if (--dmabuf->vmapping_counter == 0) {
if (dmabuf->ops->vunmap)
dmabuf->ops->vunmap(dmabuf, map);
iosys_map_clear(>vmap_ptr);
}
-   mutex_unlock(>lock);
dma_resv_unlock(dmabuf->resv);
 }
 EXPORT_SYMBOL_NS_GPL(dma_buf_vunmap_unlocked, DMA_BUF);
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index da924a56d58f..abdd99042c77 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -326,15 +326,6 @@ struct dma_buf {
/** @ops: dma_buf_ops associated with this buffer object. */
const struct dma_buf_ops *ops;
 
-   /**
-* @lock:
-*
-* Used internally to serialize list manipulation, attach/detach and
-* vmap/unmap. Note that in many cases this is superseeded by
-* dma_resv_lock() on @resv.
-*/
-   struct mutex lock;
-
/**
 * @vmapping_counter:
 *
-- 
2.36.1



[PATCH v1 0/6] Move all drivers to a common dma-buf locking convention

2022-07-16 Thread Dmitry Osipenko
Hello,

This series moves all drivers to a dynamic dma-buf locking specification.
>From now on all dma-buf importers are made responsible for holding
dma-buf's reservation lock around all operations performed over dma-bufs.
This common locking convention allows us to utilize reservation lock more
broadly around kernel without fearing of potential dead locks.

This patchset passes all i915 selftests. It was also tested using VirtIO,
Panfrost, Lima and Tegra drivers. I tested cases of display+GPU,
display+V4L and GPU+V4L dma-buf sharing, which covers majority of kernel
drivers since rest of the drivers share same or similar code paths.

This is a continuation of [1] where Christian König asked to factor out
the dma-buf locking changes into separate series.

[1] 
https://lore.kernel.org/dri-devel/20220526235040.678984-1-dmitry.osipe...@collabora.com/

Dmitry Osipenko (6):
  dma-buf: Add _unlocked postfix to function names
  drm/gem: Take reservation lock for vmap/vunmap operations
  dma-buf: Move all dma-bufs to dynamic locking specification
  dma-buf: Acquire wait-wound context on attachment
  media: videobuf2: Stop using internal dma-buf lock
  dma-buf: Remove internal lock

 drivers/dma-buf/dma-buf.c | 198 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c   |   4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |   4 +-
 drivers/gpu/drm/armada/armada_gem.c   |  14 +-
 drivers/gpu/drm/drm_client.c  |   4 +-
 drivers/gpu/drm/drm_gem.c |  28 +++
 drivers/gpu/drm/drm_gem_cma_helper.c  |   6 +-
 drivers/gpu/drm/drm_gem_framebuffer_helper.c  |   6 +-
 drivers/gpu/drm/drm_gem_shmem_helper.c|   6 +-
 drivers/gpu/drm/drm_prime.c   |  12 +-
 drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c   |   6 +-
 drivers/gpu/drm/exynos/exynos_drm_gem.c   |   2 +-
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c|  20 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c|   2 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|   6 +-
 .../drm/i915/gem/selftests/i915_gem_dmabuf.c  |  20 +-
 drivers/gpu/drm/i915/i915_gem_evict.c |   2 +-
 drivers/gpu/drm/i915/i915_gem_ww.c|  26 ++-
 drivers/gpu/drm/i915/i915_gem_ww.h|  15 +-
 drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c |   8 +-
 drivers/gpu/drm/qxl/qxl_object.c  |  17 +-
 drivers/gpu/drm/qxl/qxl_prime.c   |   4 +-
 drivers/gpu/drm/tegra/gem.c   |  27 +--
 drivers/infiniband/core/umem_dmabuf.c |  11 +-
 .../common/videobuf2/videobuf2-dma-contig.c   |  26 +--
 .../media/common/videobuf2/videobuf2-dma-sg.c |  23 +-
 .../common/videobuf2/videobuf2-vmalloc.c  |  17 +-
 .../platform/nvidia/tegra-vde/dmabuf-cache.c  |  12 +-
 drivers/misc/fastrpc.c|  12 +-
 drivers/xen/gntdev-dmabuf.c   |  14 +-
 include/drm/drm_gem.h |   3 +
 include/linux/dma-buf.h   |  49 ++---
 32 files changed, 347 insertions(+), 257 deletions(-)

-- 
2.36.1



[PATCH v1 5/6] media: videobuf2: Stop using internal dma-buf lock

2022-07-16 Thread Dmitry Osipenko
All drivers that use dma-bufs have been moved to the updated locking
specification and now dma-buf reservation is guaranteed to be locked
by importers during the mapping operations. There is no need to take
the internal dma-buf lock anymore. Remove locking from the videobuf2
memory allocators.

Signed-off-by: Dmitry Osipenko 
---
 drivers/media/common/videobuf2/videobuf2-dma-contig.c | 11 +--
 drivers/media/common/videobuf2/videobuf2-dma-sg.c | 11 +--
 drivers/media/common/videobuf2/videobuf2-vmalloc.c| 11 +--
 3 files changed, 3 insertions(+), 30 deletions(-)

diff --git a/drivers/media/common/videobuf2/videobuf2-dma-contig.c 
b/drivers/media/common/videobuf2/videobuf2-dma-contig.c
index de762dbdaf78..2c69bf0470e7 100644
--- a/drivers/media/common/videobuf2/videobuf2-dma-contig.c
+++ b/drivers/media/common/videobuf2/videobuf2-dma-contig.c
@@ -382,18 +382,12 @@ static struct sg_table *vb2_dc_dmabuf_ops_map(
struct dma_buf_attachment *db_attach, enum dma_data_direction dma_dir)
 {
struct vb2_dc_attachment *attach = db_attach->priv;
-   /* stealing dmabuf mutex to serialize map/unmap operations */
-   struct mutex *lock = _attach->dmabuf->lock;
struct sg_table *sgt;
 
-   mutex_lock(lock);
-
sgt = >sgt;
/* return previously mapped sg table */
-   if (attach->dma_dir == dma_dir) {
-   mutex_unlock(lock);
+   if (attach->dma_dir == dma_dir)
return sgt;
-   }
 
/* release any previous cache */
if (attach->dma_dir != DMA_NONE) {
@@ -409,14 +403,11 @@ static struct sg_table *vb2_dc_dmabuf_ops_map(
if (dma_map_sgtable(db_attach->dev, sgt, dma_dir,
DMA_ATTR_SKIP_CPU_SYNC)) {
pr_err("failed to map scatterlist\n");
-   mutex_unlock(lock);
return ERR_PTR(-EIO);
}
 
attach->dma_dir = dma_dir;
 
-   mutex_unlock(lock);
-
return sgt;
 }
 
diff --git a/drivers/media/common/videobuf2/videobuf2-dma-sg.c 
b/drivers/media/common/videobuf2/videobuf2-dma-sg.c
index 39e11600304a..e63e718c0bf7 100644
--- a/drivers/media/common/videobuf2/videobuf2-dma-sg.c
+++ b/drivers/media/common/videobuf2/videobuf2-dma-sg.c
@@ -424,18 +424,12 @@ static struct sg_table *vb2_dma_sg_dmabuf_ops_map(
struct dma_buf_attachment *db_attach, enum dma_data_direction dma_dir)
 {
struct vb2_dma_sg_attachment *attach = db_attach->priv;
-   /* stealing dmabuf mutex to serialize map/unmap operations */
-   struct mutex *lock = _attach->dmabuf->lock;
struct sg_table *sgt;
 
-   mutex_lock(lock);
-
sgt = >sgt;
/* return previously mapped sg table */
-   if (attach->dma_dir == dma_dir) {
-   mutex_unlock(lock);
+   if (attach->dma_dir == dma_dir)
return sgt;
-   }
 
/* release any previous cache */
if (attach->dma_dir != DMA_NONE) {
@@ -446,14 +440,11 @@ static struct sg_table *vb2_dma_sg_dmabuf_ops_map(
/* mapping to the client with new direction */
if (dma_map_sgtable(db_attach->dev, sgt, dma_dir, 0)) {
pr_err("failed to map scatterlist\n");
-   mutex_unlock(lock);
return ERR_PTR(-EIO);
}
 
attach->dma_dir = dma_dir;
 
-   mutex_unlock(lock);
-
return sgt;
 }
 
diff --git a/drivers/media/common/videobuf2/videobuf2-vmalloc.c 
b/drivers/media/common/videobuf2/videobuf2-vmalloc.c
index 7831bf545874..41db707e43a4 100644
--- a/drivers/media/common/videobuf2/videobuf2-vmalloc.c
+++ b/drivers/media/common/videobuf2/videobuf2-vmalloc.c
@@ -267,18 +267,12 @@ static struct sg_table *vb2_vmalloc_dmabuf_ops_map(
struct dma_buf_attachment *db_attach, enum dma_data_direction dma_dir)
 {
struct vb2_vmalloc_attachment *attach = db_attach->priv;
-   /* stealing dmabuf mutex to serialize map/unmap operations */
-   struct mutex *lock = _attach->dmabuf->lock;
struct sg_table *sgt;
 
-   mutex_lock(lock);
-
sgt = >sgt;
/* return previously mapped sg table */
-   if (attach->dma_dir == dma_dir) {
-   mutex_unlock(lock);
+   if (attach->dma_dir == dma_dir)
return sgt;
-   }
 
/* release any previous cache */
if (attach->dma_dir != DMA_NONE) {
@@ -289,14 +283,11 @@ static struct sg_table *vb2_vmalloc_dmabuf_ops_map(
/* mapping to the client with new direction */
if (dma_map_sgtable(db_attach->dev, sgt, dma_dir, 0)) {
pr_err("failed to map scatterlist\n");
-   mutex_unlock(lock);
return ERR_PTR(-EIO);
}
 
attach->dma_dir = dma_dir;
 
-   mutex_unlock(lock);
-
return sgt;
 }
 
-- 
2.36.1



Re: [PATCH v2 20/29] platform/x86: acer-wmi: Move backlight DMI quirks to acpi/video_detect.c

2022-07-16 Thread Hans de Goede
Hi,

On 7/12/22 22:24, Daniel Dadap wrote:
> I'll ask around to see if there's some DMI property we can match in order to 
> detect whether a system is expected to use the EC backlight driver: if so, 
> maybe we can avoid the WMI interactions in patch 16/29 of this series. 
> Although I suppose even if there were a DMI property, we'd still need to call 
> the WMI-wrapped ACPI method to check whether the system is currently 
> configured to drive the backlight through the EC, unless the system somehow 
> exports a different DMI table depending on the current backlight control 
> configuration, which I imagine to be unlikely.

IMHO the duplication is fine, it is also important that
the video_detect.c code and the actual backlight driver use
the same detection mechanism where possible.
Otherwise acpi_video_get_backlight_type() may return
acpi_backlight_nvidia_wmi_ec while the EC backlight driver
refuses to load...

Regards,

Hans





> 
> This change looks fine to me, although I suppose somebody who maintains the 
> acer-wmi driver should comment. The bugzilla links are a nice touch.
> 
> On 7/12/22 14:39, Hans de Goede wrote:
>> Move the backlight DMI quirks to acpi/video_detect.c, so that
>> the driver no longer needs to call acpi_video_set_dmi_backlight_type().
>>
>> acpi_video_set_dmi_backlight_type() is troublesome because it may end up
>> getting called after other backlight drivers have already called
>> acpi_video_get_backlight_type() resulting in the other drivers
>> already being registered even though they should not.
>>
>> Note that even though the DMI quirk table name was video_vendor_dmi_table,
>> 5/6 quirks were actually quirks to use the GPU native backlight.
>>
>> These 5 quirks also had a callback in their dmi_system_id entry which
>> disabled the acer-wmi vendor driver; and any DMI match resulted in:
>>
>> acpi_video_set_dmi_backlight_type(acpi_backlight_vendor);
>>
>> which disabled the acpi_video driver, so only the native driver was left.
>> The new entries for these 5/6 devices correctly marks these as needing
>> the native backlight driver.
>>
>> Also note that other changes in this series change the native backlight
>> drivers to no longer unconditionally register their backlight. Instead
>> these drivers now do this check:
>>
>> if (acpi_video_get_backlight_type(false) != acpi_backlight_native)
>>     return 0; /* bail */
>>
>> which without this patch would have broken these 5/6 "special" quirks.
>>
>> Since I had to look at all the commits adding the quirks anyways, to make
>> sure that I understood the code correctly, I've also added links to
>> the various original bugzillas for these quirks to the new entries.
>>
>> Signed-off-by: Hans de Goede 
>> ---
>>   drivers/acpi/video_detect.c | 53 ++
>>   drivers/platform/x86/acer-wmi.c | 66 -
>>   2 files changed, 53 insertions(+), 66 deletions(-)
>>
>> diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
>> index a514adaec14d..cd51cb0d7821 100644
>> --- a/drivers/acpi/video_detect.c
>> +++ b/drivers/acpi/video_detect.c
>> @@ -147,6 +147,15 @@ static const struct dmi_system_id 
>> video_detect_dmi_table[] = {
>>   DMI_MATCH(DMI_BOARD_NAME, "X360"),
>>   },
>>   },
>> +    {
>> + /* https://bugzilla.redhat.com/show_bug.cgi?id=1128309 */
>> + .callback = video_detect_force_vendor,
>> + /* Acer KAV80 */
>> + .matches = {
>> +    DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
>> +    DMI_MATCH(DMI_PRODUCT_NAME, "KAV80"),
>> +    },
>> +    },
>>   {
>>   .callback = video_detect_force_vendor,
>>   /* Asus UL30VT */
>> @@ -427,6 +436,41 @@ static const struct dmi_system_id 
>> video_detect_dmi_table[] = {
>>   DMI_MATCH(DMI_BOARD_NAME, "JV50"),
>>   },
>>   },
>> +    {
>> + /* https://bugzilla.redhat.com/show_bug.cgi?id=1012674 */
>> + .callback = video_detect_force_native,
>> + /* Acer Aspire 5741 */
>> + .matches = {
>> +    DMI_MATCH(DMI_BOARD_VENDOR, "Acer"),
>> +    DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5741"),
>> +    },
>> +    },
>> +    {
>> + /* https://bugzilla.kernel.org/show_bug.cgi?id=42993 */
>> + .callback = video_detect_force_native,
>> + /* Acer Aspire 5750 */
>> + .matches = {
>> +    DMI_MATCH(DMI_BOARD_VENDOR, "Acer"),
>> +    DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5750"),
>> +    },
>> +    },
>> +    {
>> + /* https://bugzilla.kernel.org/show_bug.cgi?id=42833 */
>> + .callback = video_detect_force_native,
>> + /* Acer Extensa 5235 */
>> + .matches = {
>> +    DMI_MATCH(DMI_BOARD_VENDOR, "Acer"),
>> +    DMI_MATCH(DMI_PRODUCT_NAME, "Extensa 5235"),
>> +    },
>> +    },
>> +    {
>> + .callback = video_detect_force_native,
>> + /* Acer TravelMate 4750 */
>> + .matches = {
>> +    DMI_MATCH(DMI_BOARD_VENDOR, "Acer"),
>> +    DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 

[PATCH v1 4/6] dma-buf: Acquire wait-wound context on attachment

2022-07-16 Thread Dmitry Osipenko
Intel i915 GPU driver uses wait-wound mutex to lock multiple GEMs on the
attachment to the i915 dma-buf. In order to let all drivers utilize shared
wait-wound context during attachment in a general way, make dma-buf core to
acquire the ww context internally for the attachment operation and update
i915 driver to use the importer's ww context instead of the internal one.

>From now on all dma-buf exporters shall use the importer's ww context for
the attachment operation.

Signed-off-by: Dmitry Osipenko 
---
 drivers/dma-buf/dma-buf.c |  8 +-
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c|  2 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c|  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  6 ++---
 drivers/gpu/drm/i915/i915_gem_evict.c |  2 +-
 drivers/gpu/drm/i915/i915_gem_ww.c| 26 +++
 drivers/gpu/drm/i915/i915_gem_ww.h| 15 +--
 7 files changed, 47 insertions(+), 14 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 0ee588276534..37545ecb845a 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -807,6 +807,8 @@ static struct sg_table * __map_dma_buf(struct 
dma_buf_attachment *attach,
  * Optionally this calls _buf_ops.attach to allow device-specific attach
  * functionality.
  *
+ * Exporters shall use ww_ctx acquired by this function.
+ *
  * Returns:
  *
  * A pointer to newly created _buf_attachment on success, or a negative
@@ -822,6 +824,7 @@ dma_buf_dynamic_attach_unlocked(struct dma_buf *dmabuf, 
struct device *dev,
void *importer_priv)
 {
struct dma_buf_attachment *attach;
+   struct ww_acquire_ctx ww_ctx;
int ret;
 
if (WARN_ON(!dmabuf || !dev))
@@ -841,7 +844,8 @@ dma_buf_dynamic_attach_unlocked(struct dma_buf *dmabuf, 
struct device *dev,
attach->importer_ops = importer_ops;
attach->importer_priv = importer_priv;
 
-   dma_resv_lock(dmabuf->resv, NULL);
+   ww_acquire_init(_ctx, _ww_class);
+   dma_resv_lock(dmabuf->resv, _ctx);
 
if (dmabuf->ops->attach) {
ret = dmabuf->ops->attach(dmabuf, attach);
@@ -876,11 +880,13 @@ dma_buf_dynamic_attach_unlocked(struct dma_buf *dmabuf, 
struct device *dev,
}
 
dma_resv_unlock(dmabuf->resv);
+   ww_acquire_fini(_ctx);
 
return attach;
 
 err_attach:
dma_resv_unlock(attach->dmabuf->resv);
+   ww_acquire_fini(_ctx);
kfree(attach);
return ERR_PTR(ret);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index c199bf71c373..9173f0232b16 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -173,7 +173,7 @@ static int i915_gem_dmabuf_attach(struct dma_buf *dmabuf,
if (!i915_gem_object_can_migrate(obj, INTEL_REGION_SMEM))
return -EOPNOTSUPP;
 
-   for_i915_gem_ww(, err, true) {
+   for_i915_dmabuf_ww(, dmabuf, err, true) {
err = i915_gem_object_migrate(obj, , INTEL_REGION_SMEM);
if (err)
continue;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 30fe847c6664..ad7d602fc43a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -3409,7 +3409,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_vma;
}
 
-   ww_acquire_done();
+   ww_acquire_done(eb.ww.ctx);
eb_capture_stage();
 
out_fence = eb_requests_create(, in_fence, out_fence_fd);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index e11d82a9f7c3..5ae38f94a5c7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -178,9 +178,9 @@ static inline int __i915_gem_object_lock(struct 
drm_i915_gem_object *obj,
int ret;
 
if (intr)
-   ret = dma_resv_lock_interruptible(obj->base.resv, ww ? >ctx 
: NULL);
+   ret = dma_resv_lock_interruptible(obj->base.resv, ww ? ww->ctx 
: NULL);
else
-   ret = dma_resv_lock(obj->base.resv, ww ? >ctx : NULL);
+   ret = dma_resv_lock(obj->base.resv, ww ? ww->ctx : NULL);
 
if (!ret && ww) {
i915_gem_object_get(obj);
@@ -216,7 +216,7 @@ static inline bool i915_gem_object_trylock(struct 
drm_i915_gem_object *obj,
if (!ww)
return dma_resv_trylock(obj->base.resv);
else
-   return ww_mutex_trylock(>base.resv->lock, >ctx);
+   return ww_mutex_trylock(>base.resv->lock, ww->ctx);
 }
 
 static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c 
b/drivers/gpu/drm/i915/i915_gem_evict.c
index 

[PATCH v1 1/6] dma-buf: Add _unlocked postfix to function names

2022-07-16 Thread Dmitry Osipenko
Add _unlocked postfix to the dma-buf API function names in a preparation
to move all non-dynamic dma-buf users over to the dynamic locking
specification. This patch only renames API functions, preparing drivers
to the common locking convention. Later on we will make the "unlocked"
functions to take the reservation lock.

Suggested-by: Christian König 
Signed-off-by: Dmitry Osipenko 
---
 drivers/dma-buf/dma-buf.c | 76 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c   |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  4 +-
 drivers/gpu/drm/armada/armada_gem.c   | 14 ++--
 drivers/gpu/drm/drm_gem_cma_helper.c  |  6 +-
 drivers/gpu/drm/drm_gem_shmem_helper.c|  6 +-
 drivers/gpu/drm/drm_prime.c   | 12 +--
 drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c   |  6 +-
 drivers/gpu/drm/exynos/exynos_drm_gem.c   |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c| 12 +--
 .../drm/i915/gem/selftests/i915_gem_dmabuf.c  | 20 ++---
 drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c |  8 +-
 drivers/gpu/drm/tegra/gem.c   | 27 +++
 drivers/infiniband/core/umem_dmabuf.c | 11 +--
 .../common/videobuf2/videobuf2-dma-contig.c   | 15 ++--
 .../media/common/videobuf2/videobuf2-dma-sg.c | 12 +--
 .../common/videobuf2/videobuf2-vmalloc.c  |  6 +-
 .../platform/nvidia/tegra-vde/dmabuf-cache.c  | 12 +--
 drivers/misc/fastrpc.c| 12 +--
 drivers/xen/gntdev-dmabuf.c   | 14 ++--
 include/linux/dma-buf.h   | 34 +
 21 files changed, 161 insertions(+), 152 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 44574fbe7482..d16237a6ffaa 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -795,7 +795,7 @@ static struct sg_table * __map_dma_buf(struct 
dma_buf_attachment *attach,
 }
 
 /**
- * dma_buf_dynamic_attach - Add the device to dma_buf's attachments list
+ * dma_buf_dynamic_attach_unlocked - Add the device to dma_buf's attachments 
list
  * @dmabuf:[in]buffer to attach device to.
  * @dev:   [in]device to be attached.
  * @importer_ops:  [in]importer operations for the attachment
@@ -817,9 +817,9 @@ static struct sg_table * __map_dma_buf(struct 
dma_buf_attachment *attach,
  * indicated with the error code -EBUSY.
  */
 struct dma_buf_attachment *
-dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev,
-  const struct dma_buf_attach_ops *importer_ops,
-  void *importer_priv)
+dma_buf_dynamic_attach_unlocked(struct dma_buf *dmabuf, struct device *dev,
+   const struct dma_buf_attach_ops *importer_ops,
+   void *importer_priv)
 {
struct dma_buf_attachment *attach;
int ret;
@@ -892,25 +892,25 @@ dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct 
device *dev,
if (dma_buf_is_dynamic(attach->dmabuf))
dma_resv_unlock(attach->dmabuf->resv);
 
-   dma_buf_detach(dmabuf, attach);
+   dma_buf_detach_unlocked(dmabuf, attach);
return ERR_PTR(ret);
 }
-EXPORT_SYMBOL_NS_GPL(dma_buf_dynamic_attach, DMA_BUF);
+EXPORT_SYMBOL_NS_GPL(dma_buf_dynamic_attach_unlocked, DMA_BUF);
 
 /**
- * dma_buf_attach - Wrapper for dma_buf_dynamic_attach
+ * dma_buf_attach_unlocked - Wrapper for dma_buf_dynamic_attach
  * @dmabuf:[in]buffer to attach device to.
  * @dev:   [in]device to be attached.
  *
- * Wrapper to call dma_buf_dynamic_attach() for drivers which still use a 
static
- * mapping.
+ * Wrapper to call dma_buf_dynamic_attach_unlocked() for drivers which still
+ * use a static mapping.
  */
-struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
- struct device *dev)
+struct dma_buf_attachment *dma_buf_attach_unlocked(struct dma_buf *dmabuf,
+  struct device *dev)
 {
-   return dma_buf_dynamic_attach(dmabuf, dev, NULL, NULL);
+   return dma_buf_dynamic_attach_unlocked(dmabuf, dev, NULL, NULL);
 }
-EXPORT_SYMBOL_NS_GPL(dma_buf_attach, DMA_BUF);
+EXPORT_SYMBOL_NS_GPL(dma_buf_attach_unlocked, DMA_BUF);
 
 static void __unmap_dma_buf(struct dma_buf_attachment *attach,
struct sg_table *sg_table,
@@ -923,7 +923,7 @@ static void __unmap_dma_buf(struct dma_buf_attachment 
*attach,
 }
 
 /**
- * dma_buf_detach - Remove the given attachment from dmabuf's attachments list
+ * dma_buf_detach_unlocked - Remove the given attachment from dmabuf's 
attachments list
  * @dmabuf:[in]buffer to detach from.
  * @attach:[in]attachment to be detached; is free'd after this call.
  *
@@ -931,7 +931,8 @@ static void __unmap_dma_buf(struct dma_buf_attachment 
*attach,
  *
  * Optionally this calls _buf_ops.detach for device-specific detach.
  */
-void dma_buf_detach(struct 

Re: [PATCH 12/12] drm/amd/display: Rewrite CalculateWriteBackDISPCLK function

2022-07-16 Thread André Almeida
Às 13:45 de 14/07/22, Maíra Canal escreveu:
> Based on the dml30_CalculateWriteBackDISPCLK, it separates the
> DISPCLK calculations on three variables, making no functional changes, in 
> order
> to make it more readable and better express that three values are being 
> compared
> on dml_max.
> 
> Signed-off-by: Maíra Canal 
> ---
>  .../drm/amd/display/dc/dml/display_mode_vba.c | 31 ---
>  1 file changed, 20 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c 
> b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
> index c5a0a3649e9a..5fc1d16a2e15 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
> @@ -1113,20 +1113,29 @@ double CalculateWriteBackDISPCLK(
>   unsigned int HTotal,
>   unsigned int WritebackChromaLineBufferWidth)
>  {
> - double CalculateWriteBackDISPCLK = 1.01 * PixelClock * dml_max(
> - dml_ceil(WritebackLumaHTaps / 4.0, 1) / WritebackHRatio,
> - dml_max((WritebackLumaVTaps * dml_ceil(1.0 / WritebackVRatio, 
> 1) * dml_ceil(WritebackDestinationWidth / 4.0, 1)
> +
> + double DISPCLK_H = 0, DISPCLK_V = 0, DISPCLK_HB = 0;
> + double CalculateWriteBackDISPCLK = 0;
> +

Small nit: no need to initialize to 0 those variables here. They are
getting initialized bellow anyway.

> + DISPCLK_H = dml_ceil(WritebackLumaHTaps / 4.0, 1) / WritebackHRatio;
> + DISPCLK_V = (WritebackLumaVTaps * dml_ceil(1.0 / WritebackVRatio, 1) * 
> dml_ceil(WritebackDestinationWidth / 4.0, 1)
>   + dml_ceil(WritebackDestinationWidth / 4.0, 1)) / 
> (double) HTotal + dml_ceil(1.0 / WritebackVRatio, 1)
> - * (dml_ceil(WritebackLumaVTaps / 4.0, 1) + 4.0) / 
> (double) HTotal,
> - dml_ceil(1.0 / WritebackVRatio, 1) * 
> WritebackDestinationWidth / (double) HTotal));
> + * (dml_ceil(WritebackLumaVTaps / 4.0, 1) + 4.0) / 
> (double) HTotal;
> + DISPCLK_HB = dml_ceil(1.0 / WritebackVRatio, 1) * 
> WritebackDestinationWidth / (double) HTotal;
> +
> + CalculateWriteBackDISPCLK = 1.01 * PixelClock * dml_max3(DISPCLK_H, 
> DISPCLK_V, DISPCLK_HB);
> +
>   if (WritebackPixelFormat != dm_444_32) {
> - CalculateWriteBackDISPCLK = dml_max(CalculateWriteBackDISPCLK, 
> 1.01 * PixelClock * dml_max(
> - dml_ceil(WritebackChromaHTaps / 2.0, 1) / (2 * 
> WritebackHRatio),
> - dml_max((WritebackChromaVTaps * dml_ceil(1 / (2 * 
> WritebackVRatio), 1) * dml_ceil(WritebackDestinationWidth / 2.0 / 2.0, 1)
> - + dml_ceil(WritebackDestinationWidth / 2.0 / 
> WritebackChromaLineBufferWidth, 1)) / HTotal
> - + dml_ceil(1 / (2 * WritebackVRatio), 1) * 
> (dml_ceil(WritebackChromaVTaps / 4.0, 1) + 4) / HTotal,
> - dml_ceil(1.0 / (2 * WritebackVRatio), 1) * 
> WritebackDestinationWidth / 2.0 / HTotal)));
> + DISPCLK_H = dml_ceil(WritebackChromaHTaps / 2.0, 1) / (2 * 
> WritebackHRatio);
> + DISPCLK_V = (WritebackChromaVTaps * dml_ceil(1 / (2 * 
> WritebackVRatio), 1) *
> + dml_ceil(WritebackDestinationWidth / 4.0, 1) +
> + dml_ceil(WritebackDestinationWidth / 2.0 / 
> WritebackChromaLineBufferWidth, 1)) / HTotal +
> + dml_ceil(1 / (2 * WritebackVRatio), 1) 
> *(dml_ceil(WritebackChromaVTaps / 4.0, 1) + 4) / HTotal;
> + DISPCLK_HB = dml_ceil(1.0 / (2 * WritebackVRatio), 1) * 
> WritebackDestinationWidth / 2.0 / HTotal;
> + CalculateWriteBackDISPCLK = dml_max(CalculateWriteBackDISPCLK,
> + 1.01 * PixelClock * dml_max3(DISPCLK_H, 
> DISPCLK_V, DISPCLK_HB));
>   }
> +
>   return CalculateWriteBackDISPCLK;
>  }
>