[RFC v1 3/3] drm/i915: Apply border adjustments and enable scaler on the crtc
If the userspace has enabled the border property on a given connector, then relevant adjustments to position and size are made in addition to enabling the scaler on the associated crtc. Similar to how the panel fitter is implemented, the visible area of the crtc is tracked using a struct drm_rect object that is part of the crtc_state. This object is added to the state checker and support for hardware readout is also included. Cc: Ville Syrjälä Cc: Matt Roper Signed-off-by: Vivek Kasireddy --- .../gpu/drm/i915/display/intel_connector.c| 29 +++ .../gpu/drm/i915/display/intel_connector.h| 2 ++ drivers/gpu/drm/i915/display/intel_display.c | 17 --- .../drm/i915/display/intel_display_types.h| 5 drivers/gpu/drm/i915/display/intel_dp.c | 9 ++ drivers/gpu/drm/i915/display/intel_hdmi.c | 9 ++ drivers/gpu/drm/i915/display/skl_scaler.c | 20 ++--- 7 files changed, 83 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_connector.c b/drivers/gpu/drm/i915/display/intel_connector.c index 05185db6635e..8c5dfbb98811 100644 --- a/drivers/gpu/drm/i915/display/intel_connector.c +++ b/drivers/gpu/drm/i915/display/intel_connector.c @@ -219,6 +219,35 @@ static const struct drm_prop_enum_list force_audio_names[] = { { HDMI_AUDIO_ON, "on" }, }; +int intel_connector_apply_border(struct intel_crtc_state *crtc_state, +void *border_data) +{ + const struct drm_display_mode *adjusted_mode = + _state->hw.adjusted_mode; + int width = adjusted_mode->crtc_hdisplay; + int height = adjusted_mode->crtc_vdisplay; + struct drm_rect *border = border_data; + int left = border->x1; + int top = border->y1; + int right = border->x2; + int bottom = border->y2; + + if (left < 0 || top < 0 || right < 0 || bottom < 0) + return -EINVAL; + + if (left + right >= width || top + bottom >= height) + return -EINVAL; + + width -= (left + right); + height -= (top + bottom); + + drm_rect_init(_state->border.dst, + left, top, width, height); + crtc_state->border.enabled = true; + + return 0; +} + void intel_attach_force_audio_property(struct drm_connector *connector) { diff --git a/drivers/gpu/drm/i915/display/intel_connector.h b/drivers/gpu/drm/i915/display/intel_connector.h index ab88b57d475b..93106d855452 100644 --- a/drivers/gpu/drm/i915/display/intel_connector.h +++ b/drivers/gpu/drm/i915/display/intel_connector.h @@ -26,6 +26,8 @@ bool intel_connector_get_hw_state(struct intel_connector *connector); enum pipe intel_connector_get_pipe(struct intel_connector *connector); int intel_connector_update_modes(struct drm_connector *connector, const struct drm_edid *drm_edid); +int intel_connector_apply_border(struct intel_crtc_state *crtc_state, +void *border_data); int intel_ddc_get_modes(struct drm_connector *c, struct i2c_adapter *ddc); void intel_attach_force_audio_property(struct drm_connector *connector); void intel_attach_broadcast_rgb_property(struct drm_connector *connector); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 0ea62c278948..af615e576fe7 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -1679,9 +1679,12 @@ static void hsw_crtc_enable(struct intel_atomic_state *state, glk_pipe_scaler_clock_gating_wa(dev_priv, pipe, true); if (DISPLAY_VER(dev_priv) >= 9) { - const struct drm_rect *dst = _crtc_state->pch_pfit.dst; + const struct drm_rect *dst = new_crtc_state->pch_pfit.enabled ? +_crtc_state->pch_pfit.dst : +_crtc_state->border.dst; - if (new_crtc_state->pch_pfit.enabled) + if (new_crtc_state->pch_pfit.enabled || + new_crtc_state->border.enabled) skl_program_crtc_scaler(new_crtc_state, dst); } else { ilk_pfit_enable(new_crtc_state); @@ -5196,6 +5199,9 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_BOOL(pch_pfit.enabled); PIPE_CONF_CHECK_RECT(pch_pfit.dst); + PIPE_CONF_CHECK_BOOL(border.enabled); + PIPE_CONF_CHECK_RECT(border.dst); + PIPE_CONF_CHECK_I(scaler_state.scaler_id); PIPE_CONF_CHECK_I(pixel_rate); @@ -6564,9 +6570,12 @@ static void intel_pipe_fastset(const struct intel_crtc_state *old_crtc_state, /* on skylake this is done by detaching scalers */ if (DISPLAY
[RFC v1 1/3] drm/i915: Rename skl_pfit_enable() to skl_program_crtc_scaler()
Given that skl_pfit_enable() mostly enables (or programs) the scaler at the crtc level, it makes sense to change its name to skl_program_crtc_scaler(). Also, the rename and the addition of struct drm_rect * parameter helps if we'd like to use this function to enable the scaler at the crtc level for features other than panel fitting. Cc: Ville Syrjälä Cc: Matt Roper Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/display/intel_display.c | 14 ++ drivers/gpu/drm/i915/display/skl_scaler.c| 7 ++- drivers/gpu/drm/i915/display/skl_scaler.h| 3 ++- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index c5de4561f458..0ea62c278948 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -1678,10 +1678,14 @@ static void hsw_crtc_enable(struct intel_atomic_state *state, if (psl_clkgate_wa) glk_pipe_scaler_clock_gating_wa(dev_priv, pipe, true); - if (DISPLAY_VER(dev_priv) >= 9) - skl_pfit_enable(new_crtc_state); - else + if (DISPLAY_VER(dev_priv) >= 9) { + const struct drm_rect *dst = _crtc_state->pch_pfit.dst; + + if (new_crtc_state->pch_pfit.enabled) + skl_program_crtc_scaler(new_crtc_state, dst); + } else { ilk_pfit_enable(new_crtc_state); + } /* * On ILK+ LUT must be loaded before the pipe is running but with @@ -6560,8 +6564,10 @@ static void intel_pipe_fastset(const struct intel_crtc_state *old_crtc_state, /* on skylake this is done by detaching scalers */ if (DISPLAY_VER(dev_priv) >= 9) { + const struct drm_rect *dst = _crtc_state->pch_pfit.dst; + if (new_crtc_state->pch_pfit.enabled) - skl_pfit_enable(new_crtc_state); + skl_program_crtc_scaler(new_crtc_state, dst); } else if (HAS_PCH_SPLIT(dev_priv)) { if (new_crtc_state->pch_pfit.enabled) ilk_pfit_enable(new_crtc_state); diff --git a/drivers/gpu/drm/i915/display/skl_scaler.c b/drivers/gpu/drm/i915/display/skl_scaler.c index 8a934bada624..67a87cc0411a 100644 --- a/drivers/gpu/drm/i915/display/skl_scaler.c +++ b/drivers/gpu/drm/i915/display/skl_scaler.c @@ -704,13 +704,13 @@ static void skl_scaler_setup_filter(struct drm_i915_private *dev_priv, enum pipe } } -void skl_pfit_enable(const struct intel_crtc_state *crtc_state) +void skl_program_crtc_scaler(const struct intel_crtc_state *crtc_state, +const struct drm_rect *dst) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); const struct intel_crtc_scaler_state *scaler_state = _state->scaler_state; - const struct drm_rect *dst = _state->pch_pfit.dst; u16 uv_rgb_hphase, uv_rgb_vphase; enum pipe pipe = crtc->pipe; int width = drm_rect_width(dst); @@ -722,9 +722,6 @@ void skl_pfit_enable(const struct intel_crtc_state *crtc_state) int id; u32 ps_ctrl; - if (!crtc_state->pch_pfit.enabled) - return; - if (drm_WARN_ON(_priv->drm, crtc_state->scaler_state.scaler_id < 0)) return; diff --git a/drivers/gpu/drm/i915/display/skl_scaler.h b/drivers/gpu/drm/i915/display/skl_scaler.h index 63f93ca03c89..45b9ac3ec779 100644 --- a/drivers/gpu/drm/i915/display/skl_scaler.h +++ b/drivers/gpu/drm/i915/display/skl_scaler.h @@ -24,7 +24,8 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state); -void skl_pfit_enable(const struct intel_crtc_state *crtc_state); +void skl_program_crtc_scaler(const struct intel_crtc_state *crtc_state, +const struct drm_rect *dst); void skl_program_plane_scaler(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, -- 2.43.0
[RFC v1 2/3] drm/i915: Attach the Border property to DP and HDMI connectors
The Border property is created as a blob if it doesn't exist and then attached to DP and HDMI connectors. When userspace wants to populate this blob, it is expected that it provides data of size sizeof(struct drm_rect). Cc: Ville Syrjälä Cc: Matt Roper Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/display/intel_atomic.c | 29 ++- .../gpu/drm/i915/display/intel_connector.c| 20 + .../gpu/drm/i915/display/intel_connector.h| 1 + .../gpu/drm/i915/display/intel_display_core.h | 1 + .../drm/i915/display/intel_display_types.h| 1 + drivers/gpu/drm/i915/display/intel_dp.c | 2 ++ drivers/gpu/drm/i915/display/intel_hdmi.c | 2 ++ 7 files changed, 55 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index ec0d5168b503..76cdcad175cc 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -67,6 +67,9 @@ int intel_digital_connector_atomic_get_property(struct drm_connector *connector, *val = intel_conn_state->force_audio; else if (property == dev_priv->display.properties.broadcast_rgb) *val = intel_conn_state->broadcast_rgb; + else if (property == dev_priv->display.properties.border) + *val = (intel_conn_state->border) ? + intel_conn_state->border->base.id : 0; else { drm_dbg_atomic(_priv->drm, "Unknown property [PROP:%d:%s]\n", @@ -95,6 +98,8 @@ int intel_digital_connector_atomic_set_property(struct drm_connector *connector, struct drm_i915_private *dev_priv = to_i915(dev); struct intel_digital_connector_state *intel_conn_state = to_intel_digital_connector_state(state); + bool replaced; + int ret; if (property == dev_priv->display.properties.force_audio) { intel_conn_state->force_audio = val; @@ -106,11 +111,32 @@ int intel_digital_connector_atomic_set_property(struct drm_connector *connector, return 0; } + if (property == dev_priv->display.properties.border) { + ret = drm_property_replace_blob_from_id(dev, + _conn_state->border, + val, + sizeof(struct drm_rect), -1, + ); + return ret; + } + drm_dbg_atomic(_priv->drm, "Unknown property [PROP:%d:%s]\n", property->base.id, property->name); return -EINVAL; } +static bool intel_connector_blob_equal(struct drm_property_blob *old_blob, + struct drm_property_blob *new_blob) +{ + if (!old_blob || !new_blob) + return false; + + if (old_blob->length != new_blob->length) + return false; + + return !memcmp(old_blob->data, new_blob->data, old_blob->length); +} + int intel_digital_connector_atomic_check(struct drm_connector *conn, struct drm_atomic_state *state) { @@ -142,7 +168,8 @@ int intel_digital_connector_atomic_check(struct drm_connector *conn, new_conn_state->base.content_type != old_conn_state->base.content_type || new_conn_state->base.scaling_mode != old_conn_state->base.scaling_mode || new_conn_state->base.privacy_screen_sw_state != old_conn_state->base.privacy_screen_sw_state || - !drm_connector_atomic_hdr_metadata_equal(old_state, new_state)) + !drm_connector_atomic_hdr_metadata_equal(old_state, new_state) || + !intel_connector_blob_equal(old_conn_state->border, new_conn_state->border)) crtc_state->mode_changed = true; return 0; diff --git a/drivers/gpu/drm/i915/display/intel_connector.c b/drivers/gpu/drm/i915/display/intel_connector.c index c65887870ddc..05185db6635e 100644 --- a/drivers/gpu/drm/i915/display/intel_connector.c +++ b/drivers/gpu/drm/i915/display/intel_connector.c @@ -308,3 +308,23 @@ intel_attach_scaling_mode_property(struct drm_connector *connector) connector->state->scaling_mode = DRM_MODE_SCALE_ASPECT; } + +void +intel_attach_border_property(struct drm_connector *connector) +{ + struct drm_device *dev = connector->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_property *prop; + + prop = dev_priv->display.properties.border; + if (prop == NULL) { + prop = drm_property_create(dev, DRM_MODE_PROP_BLOB, + "Border", 0); + if (prop == NULL) + return; + +
[RFC v1 0/3] drm/i915: Add support for XRandR Border property
Some customers and users have expressed interest in adding borders (or margins) to certain displays in their multi-display configurations. To address this need, this patchset implements the XRandR Border property as defined here: https://cgit.freedesktop.org/xorg/proto/randrproto/tree/randrproto.txt#n2032 --- Patchset overview: Patch 1: Create skl_program_crtc_scaler() to program scaler for crtc Patch 2: Create and attach the Border property to DP and HDMI Patch 3: Implement Border property by enabling crtc scalar This series is tested using following method: - Run the following xrandr command with different parameters: xrandr --output HDMI-3 --pos 1920x0 --mode 1280x1024 --fb 3840x2160 --scale 2.11x2.11 --set "Border" 150,0,150,0 The following patch was also added to the modesetting driver to implement the Border property: https://gitlab.freedesktop.org/Vivek/xserver/-/commit/62abfc438f0d17fe7f88bf2826c9784c2b36443b Cc: Ville Syrjälä Cc: Matt Roper Cc: Dongwon Kim Vivek Kasireddy (3): drm/i915: Rename skl_pfit_enable() to skl_program_crtc_scaler() drm/i915: Attach the Border property to DP and HDMI connectors drm/i915: Apply border adjustments and enable scaler on the crtc drivers/gpu/drm/i915/display/intel_atomic.c | 29 ++- .../gpu/drm/i915/display/intel_connector.c| 49 +++ .../gpu/drm/i915/display/intel_connector.h| 3 ++ drivers/gpu/drm/i915/display/intel_display.c | 25 -- .../gpu/drm/i915/display/intel_display_core.h | 1 + .../drm/i915/display/intel_display_types.h| 6 +++ drivers/gpu/drm/i915/display/intel_dp.c | 11 + drivers/gpu/drm/i915/display/intel_hdmi.c | 11 + drivers/gpu/drm/i915/display/skl_scaler.c | 27 ++ drivers/gpu/drm/i915/display/skl_scaler.h | 3 +- 10 files changed, 149 insertions(+), 16 deletions(-) -- 2.43.0
[Intel-gfx] [PATCH v2 1/1] drm/i915/tc: Don't default disconnected legacy Type-C ports to TBT mode (v2)
Commit 30e114ef4b16 ("drm/i915/tc: Check for DP-alt, legacy sinks before taking PHY ownership") defaults any disconnected Type-C ports to TBT-alt mode which presents a problem (which could most likely result in a system hang) when userspace forces a modeset on a Type-C port that is wired for legacy HDMI. The following warning is seen when Weston forces a modeset on a disconnected legacy Type-C port (HDMI) on a TGL based Gigabyte system: (https://www.gigabyte.com/Mini-PcBarebone/GB-BSi3-1115G4-rev-10#ov) Missing case (clock == 173000) WARNING: CPU: 1 PID: 438 at drivers/gpu/drm/i915/display/intel_ddi.c:245 icl_ddi_tc_enable_clock.cold+0x16a/0x1cf [i915] CPU: 1 PID: 438 Comm: kworker/u8:3 Tainted: G U W E 5.18.0-rc5-drm-tip+ #20 Hardware name: GIGABYTE GB-BSi3-1115G4/GB-BSi3-1115G4, BIOS F9 10/16/2021 Workqueue: i915_modeset intel_atomic_commit_work [i915] RIP: 0010:icl_ddi_tc_enable_clock.cold+0x16a/0x1cf [i915] Code: 74 6c 7f 10 81 fd d0 78 02 00 74 6d 81 fd b0 1e 04 00 74 70 48 63 d5 48 c7 c6 c0 7b ab c0 48 c7 c7 20 75 ab c0 e8 b8 b5 c1 f0 <0f> 0b 45 31 ed e9 fb fe ff ff 49 63 d5 48 c7 c6 80 7b ab c0 48 c7 RSP: 0018:8882522c78f0 EFLAGS: 00010282 RAX: RBX: 0003 RCX: RDX: 0027 RSI: 0004 RDI: ed104a458f10 RBP: 00011558 R08: b078de4e R09: 888269ca748b R10: ed104d394e91 R11: R12: 888255a318f8 R13: 0002 R14: 888255a3 R15: 88823ef00348 FS: () GS:888269c8() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: 7fd7afa42000 CR3: 000255c02004 CR4: 007706e0 PKRU: 5554 Call Trace: intel_ddi_pre_enable.cold+0x96/0x5bf [i915] intel_encoders_pre_enable+0x10e/0x140 [i915] hsw_crtc_enable+0x207/0x99d [i915] ? ilk_crtc_enable.cold+0x2a/0x2a [i915] ? prepare_to_wait_exclusive+0x120/0x120 intel_enable_crtc+0x9a/0xf0 [i915] skl_commit_modeset_enables+0x466/0x820 [i915] ? intel_commit_modeset_enables+0xd0/0xd0 [i915] ? intel_mbus_dbox_update+0x1ed/0x250 [i915] intel_atomic_commit_tail+0xf2d/0x3040 [i915] _raw_spin_lock_irqsave+0x87/0xe0 _raw_read_unlock_irqrestore+0x40/0x40 __update_load_avg_cfs_rq+0x70/0x5c0 __i915_sw_fence_complete+0x85/0x3b0 [i915] ? intel_get_crtc_new_encoder+0x190/0x190 [i915] ? sysvec_irq_work+0x13/0x90 ? asm_sysvec_irq_work+0x12/0x20 ? _raw_spin_lock_irq+0x82/0xd0 ? read_word_at_a_time+0xe/0x20 ? process_one_work+0x393/0x690 process_one_work+0x393/0x690 worker_thread+0x2b7/0x620 ? process_one_work+0x690/0x690 kthread+0x15a/0x190 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 Continuing with the modeset without setting the DDI clock results in more warnings and eventually a system hang. This does not seem to happen with disconnected legacy or DP-alt DP ports because the clock rate defaults to 162000 (which is a valid TBT clock) during the link training process. Therefore, to fix this issue, this patch avoids setting disconnected Type-C legacy ports to TBT-alt mode which prevents the selection of TBT PLL when a modeset is forced. v2: (Imre) - Retain the check for legacy hotplug live status to account for incorrect VBTs. Cc: Imre Deak Cc: José Roberto de Souza Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/display/intel_tc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index b8b822ea3755..6773840f6cc7 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -494,7 +494,8 @@ static void icl_tc_phy_connect(struct intel_digital_port *dig_port, } live_status_mask = tc_port_live_status_mask(dig_port); - if (!(live_status_mask & (BIT(TC_PORT_DP_ALT) | BIT(TC_PORT_LEGACY { + if (!(live_status_mask & (BIT(TC_PORT_DP_ALT) | BIT(TC_PORT_LEGACY))) && + !dig_port->tc_legacy_port) { drm_dbg_kms(>drm, "Port %s: PHY ownership not required (live status %02x)\n", dig_port->tc_port_name, live_status_mask); goto out_set_tbt_alt_mode; -- 2.35.1
[Intel-gfx] [PATCH v2 0/1] drm/i915/tc: Prevent system hang when modesetting disconnected Type-C ports (v2)
The following patch tries to prevent a system hang when a modeset is forced by userspace (Weston) on legacy Type-C ports that are disconnected. This issue was accidentally discovered while trying to modeset one of the HDMI ports on the TGL based Gigabyte system (https://www.gigabyte.com/Mini-PcBarebone/GB-BSi3-1115G4-rev-10#ov) using the following Weston settings (configured via weston.ini): [output] name=HDMI-A-3 mode=173.00 1920 2048 2248 2576 1080 1083 1088 1120 -hsync +vsync force-on=true Entering the name of the HDMI connector incorrectly above (for example HDMI-A-3 (disconnected) instead of HDMI-A-2 (connected)) lead to warnings in the log followed by a system hang. To fix this issue, the following patch prevents the selection of TBT PLL for legacy Type-C ports. v2: Drop the second patch (that rejects modesets on disconnected tc ports) from this series. Cc: Imre Deak Cc: José Roberto de Souza Cc: Ville Syrjälä Vivek Kasireddy (1): drm/i915/tc: Don't default disconnected legacy Type-C ports to TBT mode (v2) drivers/gpu/drm/i915/display/intel_tc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) -- 2.35.1
[Intel-gfx] [PATCH v1 1/2] drm/i915/tc: Don't default disconnected legacy Type-C ports to TBT mode
Commit 30e114ef4b16 ("drm/i915/tc: Check for DP-alt, legacy sinks before taking PHY ownership") defaults any disconnected Type-C ports to TBT-alt mode which presents a problem (which could most likely result in a system hang) when userspace forces a modeset on a Type-C port that is wired for legacy HDMI. The following warning is seen when Weston forces a modeset on a disconnected legacy Type-C port (HDMI) on a TGL based Gigabyte system: (https://www.gigabyte.com/Mini-PcBarebone/GB-BSi3-1115G4-rev-10#ov) Missing case (clock == 173000) WARNING: CPU: 1 PID: 438 at drivers/gpu/drm/i915/display/intel_ddi.c:245 icl_ddi_tc_enable_clock.cold+0x16a/0x1cf [i915] CPU: 1 PID: 438 Comm: kworker/u8:3 Tainted: G U W E 5.18.0-rc5-drm-tip+ #20 Hardware name: GIGABYTE GB-BSi3-1115G4/GB-BSi3-1115G4, BIOS F9 10/16/2021 Workqueue: i915_modeset intel_atomic_commit_work [i915] RIP: 0010:icl_ddi_tc_enable_clock.cold+0x16a/0x1cf [i915] Code: 74 6c 7f 10 81 fd d0 78 02 00 74 6d 81 fd b0 1e 04 00 74 70 48 63 d5 48 c7 c6 c0 7b ab c0 48 c7 c7 20 75 ab c0 e8 b8 b5 c1 f0 <0f> 0b 45 31 ed e9 fb fe ff ff 49 63 d5 48 c7 c6 80 7b ab c0 48 c7 RSP: 0018:8882522c78f0 EFLAGS: 00010282 RAX: RBX: 0003 RCX: RDX: 0027 RSI: 0004 RDI: ed104a458f10 RBP: 00011558 R08: b078de4e R09: 888269ca748b R10: ed104d394e91 R11: R12: 888255a318f8 R13: 0002 R14: 888255a3 R15: 88823ef00348 FS: () GS:888269c8() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: 7fd7afa42000 CR3: 000255c02004 CR4: 007706e0 PKRU: 5554 Call Trace: intel_ddi_pre_enable.cold+0x96/0x5bf [i915] intel_encoders_pre_enable+0x10e/0x140 [i915] hsw_crtc_enable+0x207/0x99d [i915] ? ilk_crtc_enable.cold+0x2a/0x2a [i915] ? prepare_to_wait_exclusive+0x120/0x120 intel_enable_crtc+0x9a/0xf0 [i915] skl_commit_modeset_enables+0x466/0x820 [i915] ? intel_commit_modeset_enables+0xd0/0xd0 [i915] ? intel_mbus_dbox_update+0x1ed/0x250 [i915] intel_atomic_commit_tail+0xf2d/0x3040 [i915] _raw_spin_lock_irqsave+0x87/0xe0 _raw_read_unlock_irqrestore+0x40/0x40 __update_load_avg_cfs_rq+0x70/0x5c0 __i915_sw_fence_complete+0x85/0x3b0 [i915] ? intel_get_crtc_new_encoder+0x190/0x190 [i915] ? sysvec_irq_work+0x13/0x90 ? asm_sysvec_irq_work+0x12/0x20 ? _raw_spin_lock_irq+0x82/0xd0 ? read_word_at_a_time+0xe/0x20 ? process_one_work+0x393/0x690 process_one_work+0x393/0x690 worker_thread+0x2b7/0x620 ? process_one_work+0x690/0x690 kthread+0x15a/0x190 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 Continuing with the modeset without setting the DDI clock results in more warnings and eventually a system hang. This does not seem to happen with disconnected legacy or DP-alt DP ports because the clock rate defaults to 162000 (which is a valid TBT clock) during the link training process. Therefore, to fix this issue, this patch avoids setting disconnected Type-C legacy ports to TBT-alt mode which prevents the selection of TBT PLL when a modeset is forced. Cc: Imre Deak Cc: José Roberto de Souza Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/display/intel_tc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index b8b822ea3755..0c3304be0602 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -494,7 +494,8 @@ static void icl_tc_phy_connect(struct intel_digital_port *dig_port, } live_status_mask = tc_port_live_status_mask(dig_port); - if (!(live_status_mask & (BIT(TC_PORT_DP_ALT) | BIT(TC_PORT_LEGACY { + if (!(live_status_mask & BIT(TC_PORT_DP_ALT)) && + !dig_port->tc_legacy_port) { drm_dbg_kms(>drm, "Port %s: PHY ownership not required (live status %02x)\n", dig_port->tc_port_name, live_status_mask); goto out_set_tbt_alt_mode; -- 2.35.1
[Intel-gfx] [PATCH v1 2/2] drm/i915: Reject the atomic modeset if an associated Type-C port is disconnected
Although, doing a modeset on any disconnected connector might be futile, it can be particularly problematic if the connector is a Type-C port without a sink. And, the spec only says "Display software must not use a disconnected port" while referring to the Type-C DDI seqeuence, it does not spell out what happens if such an attempt is made. Experimental results have shown that this can lead to serious issues including a system hang. Therefore, reject the atomic modeset if we detect that the Type-C port is not connected. Cc: Ville Syrjälä Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/display/intel_atomic.c | 20 1 file changed, 20 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index 40da7910f845..40576964b8c1 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -114,6 +114,8 @@ int intel_digital_connector_atomic_set_property(struct drm_connector *connector, int intel_digital_connector_atomic_check(struct drm_connector *conn, struct drm_atomic_state *state) { + struct drm_device *dev = conn->dev; + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_connector_state *new_state = drm_atomic_get_new_connector_state(state, conn); struct intel_digital_connector_state *new_conn_state = @@ -122,6 +124,10 @@ int intel_digital_connector_atomic_check(struct drm_connector *conn, drm_atomic_get_old_connector_state(state, conn); struct intel_digital_connector_state *old_conn_state = to_intel_digital_connector_state(old_state); + struct intel_encoder *encoder = + intel_attached_encoder(to_intel_connector(conn)); + struct intel_digital_port *dig_port = + encoder ? enc_to_dig_port(encoder) : NULL; struct drm_crtc_state *crtc_state; intel_hdcp_atomic_check(conn, old_state, new_state); @@ -131,6 +137,20 @@ int intel_digital_connector_atomic_check(struct drm_connector *conn, crtc_state = drm_atomic_get_new_crtc_state(state, new_state->crtc); + /* +* The spec says that it is not safe to use a disconnected Type-C port. +* Therefore, check to see if this connector is connected and reject +* the modeset if there is no sink detected. +*/ + if (dig_port && !dig_port->connected(encoder) && + intel_phy_is_tc(dev_priv, + intel_port_to_phy(dev_priv, encoder->port))) { + drm_dbg_atomic(_priv->drm, + "[CONNECTOR:%d:%s] is not connected; rejecting the modeset\n", + conn->base.id, conn->name); + return -EINVAL; + } + /* * These properties are handled by fastset, and might not end * up in a modeset. -- 2.35.1
[Intel-gfx] [PATCH v1 0/2] drm/i915/tc: Prevent system hang when modesetting disconnected Type-C ports
The following two patches try to prevent a system hang when a modeset is forced by userspace (Weston) on legacy Type-C ports that are disconnected. This issue was accidentally discovered while trying to modeset one of the HDMI ports on the TGL based Gigabyte system (https://www.gigabyte.com/Mini-PcBarebone/GB-BSi3-1115G4-rev-10#ov) using the following Weston settings (configured via weston.ini): [output] name=HDMI-A-3 mode=173.00 1920 2048 2248 2576 1080 1083 1088 1120 -hsync +vsync force-on=true Entering the name of the HDMI connector incorrectly above (for example HDMI-A-3 (disconnected) instead of HDMI-A-2 (connected)) lead to warnings in the log followed by a system hang. To fix this issue, the first patch prevents the selection of TBT PLL for legacy Type-C ports and the second one rejects any attempts to modeset disconnected Type-C ports. Cc: Imre Deak Cc: José Roberto de Souza Cc: Ville Syrjälä Vivek Kasireddy (2): drm/i915/tc: Don't default disconnected legacy Type-C ports to TBT mode drm/i915: Reject the atomic modeset if an associated Type-C port is disconnected drivers/gpu/drm/i915/display/intel_atomic.c | 20 drivers/gpu/drm/i915/display/intel_tc.c | 3 ++- 2 files changed, 22 insertions(+), 1 deletion(-) -- 2.35.1
[Intel-gfx] [PATCH v2] drm/i915/gem: Don't evict unmappable VMAs when pinning with PIN_MAPPABLE (v2)
On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or more framebuffers/scanout buffers results in only one that is mappable/ fenceable. Therefore, pageflipping between these 2 FBs where only one is mappable/fenceable creates latencies large enough to miss alternate vblanks thereby producing less optimal framerate. This mainly happens because when i915_gem_object_pin_to_display_plane() is called to pin one of the FB objs, the associated vma is identified as misplaced -- because there is no space for it in the aperture -- and therefore i915_vma_unbind() is called which unbinds and evicts it. This misplaced vma gets subseqently pinned only when i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This whole thing results in a latency of ~10ms and happens every other repaint cycle. Therefore, to fix this issue, we just ensure that the misplaced VMA does not get evicted when we try to pin it with PIN_MAPPABLE -- by returning early if the mappable/fenceable flag is not set. Testcase: Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform with a 8K@60 mode results in only ~40 FPS (compared to ~59 FPS with this patch). Since upstream Weston submits a frame ~7ms before the next vblank, the latencies seen between atomic commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that it misses the vblank every other frame. Here is the ftrace snippet that shows the source of the ~10ms latency: i915_gem_object_pin_to_display_plane() { 0.102 us |i915_gem_object_set_cache_level(); i915_gem_object_ggtt_pin_ww() { 0.390 us | i915_vma_instance(); 0.178 us | i915_vma_misplaced(); i915_vma_unbind() { __i915_active_wait() { 0.082 us |i915_active_acquire_if_busy(); 0.475 us | } intel_runtime_pm_get() { 0.087 us |intel_runtime_pm_acquire(); 0.259 us | } __i915_active_wait() { 0.085 us |i915_active_acquire_if_busy(); 0.240 us | } __i915_vma_evict() { ggtt_unbind_vma() { gen8_ggtt_clear_range() { 10507.255 us |} 10507.689 us | } 10508.516 us | } v2: - Expand the code comments to describe the ping-pong issue. Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/i915_gem.c | 13 - 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9747924cc57b..44741f842852 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -939,8 +939,19 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj, if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) return ERR_PTR(-ENOSPC); + /* +* If this misplaced vma is too big (i.e, at-least +* half the size of aperture) or hasn't been pinned +* mappable before, we ignore the misplacement when +* PIN_NONBLOCK is set in order to avoid the ping-pong +* issue described above. In other words, we try to +* avoid the costly operation of unbinding this vma +* from the GGTT and rebinding it back because there +* may not be enough space for this vma in the aperture. +*/ if (flags & PIN_MAPPABLE && - vma->fence_size > ggtt->mappable_end / 2) + (vma->fence_size > ggtt->mappable_end / 2 || + !i915_vma_is_map_and_fenceable(vma))) return ERR_PTR(-ENOSPC); } -- 2.35.1
[Intel-gfx] [PATCH v1] drm/i915/gem: Don't evict unmappable VMAs when pinning with PIN_MAPPABLE
On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or more framebuffers/scanout buffers results in only one that is mappable/ fenceable. Therefore, pageflipping between these 2 FBs where only one is mappable/fenceable creates latencies large enough to miss alternate vblanks thereby producing less optimal framerate. This mainly happens because when i915_gem_object_pin_to_display_plane() is called to pin one of the FB objs, the associated vma is identified as misplaced -- because there is no space for it in the aperture -- and therefore i915_vma_unbind() is called which unbinds and evicts it. This misplaced vma gets subseqently pinned only when i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This whole thing results in a latency of ~10ms and happens every other repaint cycle. Therefore, to fix this issue, we just ensure that the misplaced VMA does not get evicted when we try to pin it with PIN_MAPPABLE -- by returning early if the mappable/fenceable flag is not set. Testcase: Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform with a 8K@60 mode results in only ~40 FPS (compared to ~59 FPS with this patch). Since upstream Weston submits a frame ~7ms before the next vblank, the latencies seen between atomic commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that it misses the vblank every other frame. Here is the ftrace snippet that shows the source of the ~10ms latency: i915_gem_object_pin_to_display_plane() { 0.102 us |i915_gem_object_set_cache_level(); i915_gem_object_ggtt_pin_ww() { 0.390 us | i915_vma_instance(); 0.178 us | i915_vma_misplaced(); i915_vma_unbind() { __i915_active_wait() { 0.082 us |i915_active_acquire_if_busy(); 0.475 us | } intel_runtime_pm_get() { 0.087 us |intel_runtime_pm_acquire(); 0.259 us | } __i915_active_wait() { 0.085 us |i915_active_acquire_if_busy(); 0.240 us | } __i915_vma_evict() { ggtt_unbind_vma() { gen8_ggtt_clear_range() { 10507.255 us |} 10507.689 us | } 10508.516 us | } Cc: Tvrtko Ursulin Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/i915_gem.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9747924cc57b..7307c5de1c58 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -939,8 +939,14 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj, if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) return ERR_PTR(-ENOSPC); + /* +* If this misplaced vma is too big (i.e, at-least +* half the size of aperture) or just unmappable, +* we would not be able to pin with PIN_MAPPABLE. +*/ if (flags & PIN_MAPPABLE && - vma->fence_size > ggtt->mappable_end / 2) + (vma->fence_size > ggtt->mappable_end / 2 || + !i915_vma_is_map_and_fenceable(vma))) return ERR_PTR(-ENOSPC); } -- 2.35.1
[Intel-gfx] [PATCH v6 2/2] drm/i915/gem: Don't try to map and fence large scanout buffers (v9)
On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or more framebuffers/scanout buffers results in only one that is mappable/ fenceable. Therefore, pageflipping between these 2 FBs where only one is mappable/fenceable creates latencies large enough to miss alternate vblanks thereby producing less optimal framerate. This mainly happens because when i915_gem_object_pin_to_display_plane() is called to pin one of the FB objs, the associated vma is identified as misplaced and therefore i915_vma_unbind() is called which unbinds and evicts it. This misplaced vma gets subseqently pinned only when i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This results in a latency of ~10ms and happens every other vblank/repaint cycle. Therefore, to fix this issue, we try to see if there is space to map at-least two objects of a given size and return early if there isn't. This would ensure that we do not try with PIN_MAPPABLE for any objects that are too big to map thereby preventing unncessary unbind. Testcase: Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits a frame ~7ms before the next vblank, the latencies seen between atomic commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that it misses the vblank every other frame. Here is the ftrace snippet that shows the source of the ~10ms latency: i915_gem_object_pin_to_display_plane() { 0.102 us |i915_gem_object_set_cache_level(); i915_gem_object_ggtt_pin_ww() { 0.390 us | i915_vma_instance(); 0.178 us | i915_vma_misplaced(); i915_vma_unbind() { __i915_active_wait() { 0.082 us |i915_active_acquire_if_busy(); 0.475 us | } intel_runtime_pm_get() { 0.087 us |intel_runtime_pm_acquire(); 0.259 us | } __i915_active_wait() { 0.085 us |i915_active_acquire_if_busy(); 0.240 us | } __i915_vma_evict() { ggtt_unbind_vma() { gen8_ggtt_clear_range() { 10507.255 us |} 10507.689 us | } 10508.516 us | } v2: Instead of using bigjoiner checks, determine whether a scanout buffer is too big by checking to see if it is possible to map two of them into the ggtt. v3 (Ville): - Count how many fb objects can be fit into the available holes instead of checking for a hole twice the object size. - Take alignment constraints into account. - Limit this large scanout buffer check to >= Gen 11 platforms. v4: - Remove existing heuristic that checks just for size. (Ville) - Return early if we find space to map at-least two objects. (Tvrtko) - Slightly update the commit message. v5: (Tvrtko) - Rename the function to indicate that the object may be too big to map into the aperture. - Account for guard pages while calculating the total size required for the object. - Do not subject all objects to the heuristic check and instead consider objects only of a certain size. - Do the hole walk using the rbtree. - Preserve the existing PIN_NONBLOCK logic. - Drop the PIN_MAPPABLE check while pinning the VMA. v6: (Tvrtko) - Return 0 on success and the specific error code on failure to preserve the existing behavior. v7: (Ville) - Drop the HAS_GMCH(i915), DISPLAY_VER(i915) < 11 and size < ggtt->mappable_end / 4 checks. - Drop the redundant check that is based on previous heuristic. v8: - Make sure that we are holding the mutex associated with ggtt vm as we traverse the hole nodes. v9: (Tvrtko) - Use mutex_lock_interruptible_nested() instead of mutex_lock(). Cc: Ville Syrjälä Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Manasi Navare Reviewed-by: Tvrtko Ursulin Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/i915_gem.c | 128 +++- 1 file changed, 94 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9747924cc57b..e0d731b3f215 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -49,6 +49,7 @@ #include "gem/i915_gem_pm.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_userptr.h" +#include "gem/i915_gem_tiling.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" @@ -882,6 +883,96 @@ static void discard_ggtt_vma(struct i915_vma *vma) spin_unlock(>vma.lock); } +static int +i915_gem_object_fits_in_aperture(struct drm_i915_gem_object *obj, +u64 alignment, u64 flags) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; + struct drm_mm_node *hole; + u64 hole_start, hole_end, start, end; +
[Intel-gfx] [PATCH v6 1/2] drm/mm: Add an iterator to optimally walk over holes for an allocation (v6)
This iterator relies on drm_mm_first_hole() and drm_mm_next_hole() functions to identify suitable holes for an allocation of a given size by efficiently traversing the rbtree associated with the given allocator. It replaces the for loop in drm_mm_insert_node_in_range() and can also be used by drm drivers to quickly identify holes of a certain size within a given range. v2: (Tvrtko) - Prepend a double underscore for the newly exported first/next_hole - s/each_best_hole/each_suitable_hole/g - Mask out DRM_MM_INSERT_ONCE from the mode before calling first/next_hole and elsewhere. v3: (Tvrtko) - Reduce the number of hunks by retaining the "mode" variable name v4: - Typo: s/__drm_mm_next_hole(.., hole/__drm_mm_next_hole(.., pos v5: (Tvrtko) - Fixed another typo: should pass caller_mode instead of mode to the iterator in drm_mm_insert_node_in_range(). v6: (Tvrtko) - Fix the checkpatch warning that warns about precedence issues. Reviewed-by: Tvrtko Ursulin Acked-by: Christian König Suggested-by: Tvrtko Ursulin Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/drm_mm.c | 32 +++- include/drm/drm_mm.h | 36 2 files changed, 51 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 8257f9d4f619..6ff98a0e4df3 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -352,10 +352,10 @@ static struct drm_mm_node *find_hole_addr(struct drm_mm *mm, u64 addr, u64 size) return node; } -static struct drm_mm_node * -first_hole(struct drm_mm *mm, - u64 start, u64 end, u64 size, - enum drm_mm_insert_mode mode) +struct drm_mm_node * +__drm_mm_first_hole(struct drm_mm *mm, + u64 start, u64 end, u64 size, + enum drm_mm_insert_mode mode) { switch (mode) { default: @@ -374,6 +374,7 @@ first_hole(struct drm_mm *mm, hole_stack); } } +EXPORT_SYMBOL(__drm_mm_first_hole); /** * DECLARE_NEXT_HOLE_ADDR - macro to declare next hole functions @@ -410,11 +411,11 @@ static struct drm_mm_node *name(struct drm_mm_node *entry, u64 size) \ DECLARE_NEXT_HOLE_ADDR(next_hole_high_addr, rb_left, rb_right) DECLARE_NEXT_HOLE_ADDR(next_hole_low_addr, rb_right, rb_left) -static struct drm_mm_node * -next_hole(struct drm_mm *mm, - struct drm_mm_node *node, - u64 size, - enum drm_mm_insert_mode mode) +struct drm_mm_node * +__drm_mm_next_hole(struct drm_mm *mm, + struct drm_mm_node *node, + u64 size, + enum drm_mm_insert_mode mode) { switch (mode) { default: @@ -432,6 +433,7 @@ next_hole(struct drm_mm *mm, return >hole_stack == >hole_stack ? NULL : node; } } +EXPORT_SYMBOL(__drm_mm_next_hole); /** * drm_mm_reserve_node - insert an pre-initialized node @@ -516,11 +518,11 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, u64 size, u64 alignment, unsigned long color, u64 range_start, u64 range_end, - enum drm_mm_insert_mode mode) + enum drm_mm_insert_mode caller_mode) { struct drm_mm_node *hole; u64 remainder_mask; - bool once; + enum drm_mm_insert_mode mode = caller_mode & ~DRM_MM_INSERT_ONCE; DRM_MM_BUG_ON(range_start > range_end); @@ -533,13 +535,9 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, if (alignment <= 1) alignment = 0; - once = mode & DRM_MM_INSERT_ONCE; - mode &= ~DRM_MM_INSERT_ONCE; - remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0; - for (hole = first_hole(mm, range_start, range_end, size, mode); -hole; -hole = once ? NULL : next_hole(mm, hole, size, mode)) { + drm_mm_for_each_suitable_hole(hole, mm, range_start, range_end, + size, caller_mode) { u64 hole_start = __drm_mm_hole_node_start(hole); u64 hole_end = hole_start + hole->hole_size; u64 adj_start, adj_end; diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index ac33ba1b18bc..896754fa6d69 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -400,6 +400,42 @@ static inline u64 drm_mm_hole_node_end(const struct drm_mm_node *hole_node) 1 : 0; \ pos = list_next_entry(pos, hole_stack)) +struct drm_mm_node * +__drm_mm_first_hole(struct drm_mm *mm, + u64 start, u64 end, u64 size, + enum drm_mm_insert_mode mode); + +struct drm_mm_node * +__drm_mm_next_hole(struct drm_mm *mm, + struct drm_mm_node *node, + u64 size, +
[Intel-gfx] [PATCH v6 0/2] drm/mm: Add an iterator to optimally walk over holes suitable for an allocation
The first patch is a drm core patch that replaces the for loop in drm_mm_insert_node_in_range() with the iterator and would not cause any functional changes. The second patch is a i915 driver specific patch that also uses the iterator but solves a different problem. v2: - Added a new patch to this series to fix a potential NULL dereference. - Fixed a typo associated with the iterator introduced in the drm core patch. - Added locking around the snippet in the i915 patch that traverses the GGTT hole nodes. v3: (Tvrtko) - Replaced mutex_lock with mutex_lock_interruptible_nested() in the i915 patch. v4: (Tvrtko) - Dropped the patch added in v2 as it was deemed unnecessary. v5: (Tvrtko) - Fixed yet another typo in the drm core patch: should have passed caller_mode instead of mode to the iterator. v6: (Tvrtko) - Fixed the checkpatch warning that warns about precedence issues. Cc: Tvrtko Ursulin Cc: Nirmoy Das Cc: Christian König Vivek Kasireddy (2): drm/mm: Add an iterator to optimally walk over holes for an allocation (v6) drm/i915/gem: Don't try to map and fence large scanout buffers (v9) drivers/gpu/drm/drm_mm.c| 32 drivers/gpu/drm/i915/i915_gem.c | 128 +++- include/drm/drm_mm.h| 36 + 3 files changed, 145 insertions(+), 51 deletions(-) -- 2.35.1
[Intel-gfx] [CI 1/2] drm/mm: Add an iterator to optimally walk over holes for an allocation (v5)
This iterator relies on drm_mm_first_hole() and drm_mm_next_hole() functions to identify suitable holes for an allocation of a given size by efficiently traversing the rbtree associated with the given allocator. It replaces the for loop in drm_mm_insert_node_in_range() and can also be used by drm drivers to quickly identify holes of a certain size within a given range. v2: (Tvrtko) - Prepend a double underscore for the newly exported first/next_hole - s/each_best_hole/each_suitable_hole/g - Mask out DRM_MM_INSERT_ONCE from the mode before calling first/next_hole and elsewhere. v3: (Tvrtko) - Reduce the number of hunks by retaining the "mode" variable name v4: - Typo: s/__drm_mm_next_hole(.., hole/__drm_mm_next_hole(.., pos v5: (Tvrtko) - Fixed another typo: should pass caller_mode instead of mode to the iterator in drm_mm_insert_node_in_range(). Reviewed-by: Tvrtko Ursulin Acked-by: Christian König Suggested-by: Tvrtko Ursulin Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/drm_mm.c | 32 +++- include/drm/drm_mm.h | 36 2 files changed, 51 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 8257f9d4f619..6ff98a0e4df3 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -352,10 +352,10 @@ static struct drm_mm_node *find_hole_addr(struct drm_mm *mm, u64 addr, u64 size) return node; } -static struct drm_mm_node * -first_hole(struct drm_mm *mm, - u64 start, u64 end, u64 size, - enum drm_mm_insert_mode mode) +struct drm_mm_node * +__drm_mm_first_hole(struct drm_mm *mm, + u64 start, u64 end, u64 size, + enum drm_mm_insert_mode mode) { switch (mode) { default: @@ -374,6 +374,7 @@ first_hole(struct drm_mm *mm, hole_stack); } } +EXPORT_SYMBOL(__drm_mm_first_hole); /** * DECLARE_NEXT_HOLE_ADDR - macro to declare next hole functions @@ -410,11 +411,11 @@ static struct drm_mm_node *name(struct drm_mm_node *entry, u64 size) \ DECLARE_NEXT_HOLE_ADDR(next_hole_high_addr, rb_left, rb_right) DECLARE_NEXT_HOLE_ADDR(next_hole_low_addr, rb_right, rb_left) -static struct drm_mm_node * -next_hole(struct drm_mm *mm, - struct drm_mm_node *node, - u64 size, - enum drm_mm_insert_mode mode) +struct drm_mm_node * +__drm_mm_next_hole(struct drm_mm *mm, + struct drm_mm_node *node, + u64 size, + enum drm_mm_insert_mode mode) { switch (mode) { default: @@ -432,6 +433,7 @@ next_hole(struct drm_mm *mm, return >hole_stack == >hole_stack ? NULL : node; } } +EXPORT_SYMBOL(__drm_mm_next_hole); /** * drm_mm_reserve_node - insert an pre-initialized node @@ -516,11 +518,11 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, u64 size, u64 alignment, unsigned long color, u64 range_start, u64 range_end, - enum drm_mm_insert_mode mode) + enum drm_mm_insert_mode caller_mode) { struct drm_mm_node *hole; u64 remainder_mask; - bool once; + enum drm_mm_insert_mode mode = caller_mode & ~DRM_MM_INSERT_ONCE; DRM_MM_BUG_ON(range_start > range_end); @@ -533,13 +535,9 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, if (alignment <= 1) alignment = 0; - once = mode & DRM_MM_INSERT_ONCE; - mode &= ~DRM_MM_INSERT_ONCE; - remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0; - for (hole = first_hole(mm, range_start, range_end, size, mode); -hole; -hole = once ? NULL : next_hole(mm, hole, size, mode)) { + drm_mm_for_each_suitable_hole(hole, mm, range_start, range_end, + size, caller_mode) { u64 hole_start = __drm_mm_hole_node_start(hole); u64 hole_end = hole_start + hole->hole_size; u64 adj_start, adj_end; diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index ac33ba1b18bc..dff6db627807 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -400,6 +400,42 @@ static inline u64 drm_mm_hole_node_end(const struct drm_mm_node *hole_node) 1 : 0; \ pos = list_next_entry(pos, hole_stack)) +struct drm_mm_node * +__drm_mm_first_hole(struct drm_mm *mm, + u64 start, u64 end, u64 size, + enum drm_mm_insert_mode mode); + +struct drm_mm_node * +__drm_mm_next_hole(struct drm_mm *mm, + struct drm_mm_node *node, + u64 size, + enum drm_mm_insert_mode mode); + +/** + * drm_mm_for_each_suitable_hole
[Intel-gfx] [CI 0/2] drm/mm: Add an iterator to optimally walk over holes suitable for an allocation
The first patch is a drm core patch that replaces the for loop in drm_mm_insert_node_in_range() with the iterator and would not cause any functional changes. The second patch is a i915 driver specific patch that also uses the iterator but solves a different problem. v2: - Added a new patch to this series to fix a potential NULL dereference. - Fixed a typo associated with the iterator introduced in the drm core patch. - Added locking around the snippet in the i915 patch that traverses the GGTT hole nodes. v3: (Tvrtko) - Replaced mutex_lock with mutex_lock_interruptible_nested() in the i915 patch. v4: (Tvrtko) - Dropped the patch added in v2 as it was deemed unnecessary. v5: (Tvrtko) - Fixed yet another typo in the drm core patch: should have passed caller_mode instead of mode to the iterator. Cc: Tvrtko Ursulin Cc: Nirmoy Das Cc: Christian König Vivek Kasireddy (2): drm/mm: Add an iterator to optimally walk over holes for an allocation (v5) drm/i915/gem: Don't try to map and fence large scanout buffers (v9) drivers/gpu/drm/drm_mm.c| 32 drivers/gpu/drm/i915/i915_gem.c | 128 +++- include/drm/drm_mm.h| 36 + 3 files changed, 145 insertions(+), 51 deletions(-) -- 2.35.1
[Intel-gfx] [CI 2/2] drm/i915/gem: Don't try to map and fence large scanout buffers (v9)
On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or more framebuffers/scanout buffers results in only one that is mappable/ fenceable. Therefore, pageflipping between these 2 FBs where only one is mappable/fenceable creates latencies large enough to miss alternate vblanks thereby producing less optimal framerate. This mainly happens because when i915_gem_object_pin_to_display_plane() is called to pin one of the FB objs, the associated vma is identified as misplaced and therefore i915_vma_unbind() is called which unbinds and evicts it. This misplaced vma gets subseqently pinned only when i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This results in a latency of ~10ms and happens every other vblank/repaint cycle. Therefore, to fix this issue, we try to see if there is space to map at-least two objects of a given size and return early if there isn't. This would ensure that we do not try with PIN_MAPPABLE for any objects that are too big to map thereby preventing unncessary unbind. Testcase: Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits a frame ~7ms before the next vblank, the latencies seen between atomic commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that it misses the vblank every other frame. Here is the ftrace snippet that shows the source of the ~10ms latency: i915_gem_object_pin_to_display_plane() { 0.102 us |i915_gem_object_set_cache_level(); i915_gem_object_ggtt_pin_ww() { 0.390 us | i915_vma_instance(); 0.178 us | i915_vma_misplaced(); i915_vma_unbind() { __i915_active_wait() { 0.082 us |i915_active_acquire_if_busy(); 0.475 us | } intel_runtime_pm_get() { 0.087 us |intel_runtime_pm_acquire(); 0.259 us | } __i915_active_wait() { 0.085 us |i915_active_acquire_if_busy(); 0.240 us | } __i915_vma_evict() { ggtt_unbind_vma() { gen8_ggtt_clear_range() { 10507.255 us |} 10507.689 us | } 10508.516 us | } v2: Instead of using bigjoiner checks, determine whether a scanout buffer is too big by checking to see if it is possible to map two of them into the ggtt. v3 (Ville): - Count how many fb objects can be fit into the available holes instead of checking for a hole twice the object size. - Take alignment constraints into account. - Limit this large scanout buffer check to >= Gen 11 platforms. v4: - Remove existing heuristic that checks just for size. (Ville) - Return early if we find space to map at-least two objects. (Tvrtko) - Slightly update the commit message. v5: (Tvrtko) - Rename the function to indicate that the object may be too big to map into the aperture. - Account for guard pages while calculating the total size required for the object. - Do not subject all objects to the heuristic check and instead consider objects only of a certain size. - Do the hole walk using the rbtree. - Preserve the existing PIN_NONBLOCK logic. - Drop the PIN_MAPPABLE check while pinning the VMA. v6: (Tvrtko) - Return 0 on success and the specific error code on failure to preserve the existing behavior. v7: (Ville) - Drop the HAS_GMCH(i915), DISPLAY_VER(i915) < 11 and size < ggtt->mappable_end / 4 checks. - Drop the redundant check that is based on previous heuristic. v8: - Make sure that we are holding the mutex associated with ggtt vm as we traverse the hole nodes. v9: (Tvrtko) - Use mutex_lock_interruptible_nested() instead of mutex_lock(). Cc: Ville Syrjälä Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Manasi Navare Reviewed-by: Tvrtko Ursulin Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/i915_gem.c | 128 +++- 1 file changed, 94 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2e10187cd0a0..4bef9eaa8b2e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -49,6 +49,7 @@ #include "gem/i915_gem_pm.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_userptr.h" +#include "gem/i915_gem_tiling.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" @@ -879,6 +880,96 @@ static void discard_ggtt_vma(struct i915_vma *vma) spin_unlock(>vma.lock); } +static int +i915_gem_object_fits_in_aperture(struct drm_i915_gem_object *obj, +u64 alignment, u64 flags) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; + struct drm_mm_node *hole; + u64 hole_start, hole_end, start, end; +
[Intel-gfx] [CI 0/2] drm/mm: Add an iterator to optimally walk over holes suitable for an allocation
The first patch is a drm core patch that replaces the for loop in drm_mm_insert_node_in_range() with the iterator and would not cause any functional changes. The second patch is a i915 driver specific patch that also uses the iterator but solves a different problem. v2: - Added a new patch to this series to fix a potential NULL dereference. - Fixed a typo associated with the iterator introduced in the drm core patch. - Added locking around the snippet in the i915 patch that traverses the GGTT hole nodes. v3: (Tvrtko) - Replaced mutex_lock with mutex_lock_interruptible_nested() in the i915 patch. v4: (Tvrtko) - Dropped the patch added in v2 as it was deemed unnecessary. v5: (Tvrtko) - Fixed yet another typo in the drm core patch: should have passed caller_mode instead of mode to the iterator. Cc: Tvrtko Ursulin Cc: Nirmoy Das Cc: Christian König Vivek Kasireddy (2): drm/mm: Add an iterator to optimally walk over holes for an allocation (v5) drm/i915/gem: Don't try to map and fence large scanout buffers (v9) drivers/gpu/drm/drm_mm.c| 32 drivers/gpu/drm/i915/i915_gem.c | 128 +++- include/drm/drm_mm.h| 36 + 3 files changed, 145 insertions(+), 51 deletions(-) -- 2.34.1
[Intel-gfx] [CI 2/2] drm/i915/gem: Don't try to map and fence large scanout buffers (v9)
On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or more framebuffers/scanout buffers results in only one that is mappable/ fenceable. Therefore, pageflipping between these 2 FBs where only one is mappable/fenceable creates latencies large enough to miss alternate vblanks thereby producing less optimal framerate. This mainly happens because when i915_gem_object_pin_to_display_plane() is called to pin one of the FB objs, the associated vma is identified as misplaced and therefore i915_vma_unbind() is called which unbinds and evicts it. This misplaced vma gets subseqently pinned only when i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This results in a latency of ~10ms and happens every other vblank/repaint cycle. Therefore, to fix this issue, we try to see if there is space to map at-least two objects of a given size and return early if there isn't. This would ensure that we do not try with PIN_MAPPABLE for any objects that are too big to map thereby preventing unncessary unbind. Testcase: Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits a frame ~7ms before the next vblank, the latencies seen between atomic commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that it misses the vblank every other frame. Here is the ftrace snippet that shows the source of the ~10ms latency: i915_gem_object_pin_to_display_plane() { 0.102 us |i915_gem_object_set_cache_level(); i915_gem_object_ggtt_pin_ww() { 0.390 us | i915_vma_instance(); 0.178 us | i915_vma_misplaced(); i915_vma_unbind() { __i915_active_wait() { 0.082 us |i915_active_acquire_if_busy(); 0.475 us | } intel_runtime_pm_get() { 0.087 us |intel_runtime_pm_acquire(); 0.259 us | } __i915_active_wait() { 0.085 us |i915_active_acquire_if_busy(); 0.240 us | } __i915_vma_evict() { ggtt_unbind_vma() { gen8_ggtt_clear_range() { 10507.255 us |} 10507.689 us | } 10508.516 us | } v2: Instead of using bigjoiner checks, determine whether a scanout buffer is too big by checking to see if it is possible to map two of them into the ggtt. v3 (Ville): - Count how many fb objects can be fit into the available holes instead of checking for a hole twice the object size. - Take alignment constraints into account. - Limit this large scanout buffer check to >= Gen 11 platforms. v4: - Remove existing heuristic that checks just for size. (Ville) - Return early if we find space to map at-least two objects. (Tvrtko) - Slightly update the commit message. v5: (Tvrtko) - Rename the function to indicate that the object may be too big to map into the aperture. - Account for guard pages while calculating the total size required for the object. - Do not subject all objects to the heuristic check and instead consider objects only of a certain size. - Do the hole walk using the rbtree. - Preserve the existing PIN_NONBLOCK logic. - Drop the PIN_MAPPABLE check while pinning the VMA. v6: (Tvrtko) - Return 0 on success and the specific error code on failure to preserve the existing behavior. v7: (Ville) - Drop the HAS_GMCH(i915), DISPLAY_VER(i915) < 11 and size < ggtt->mappable_end / 4 checks. - Drop the redundant check that is based on previous heuristic. v8: - Make sure that we are holding the mutex associated with ggtt vm as we traverse the hole nodes. v9: (Tvrtko) - Use mutex_lock_interruptible_nested() instead of mutex_lock(). Cc: Ville Syrjälä Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Manasi Navare Reviewed-by: Tvrtko Ursulin Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/i915_gem.c | 128 +++- 1 file changed, 94 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2e10187cd0a0..4bef9eaa8b2e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -49,6 +49,7 @@ #include "gem/i915_gem_pm.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_userptr.h" +#include "gem/i915_gem_tiling.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" @@ -879,6 +880,96 @@ static void discard_ggtt_vma(struct i915_vma *vma) spin_unlock(>vma.lock); } +static int +i915_gem_object_fits_in_aperture(struct drm_i915_gem_object *obj, +u64 alignment, u64 flags) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; + struct drm_mm_node *hole; + u64 hole_start, hole_end, start, end; +
[Intel-gfx] [CI 1/2] drm/mm: Add an iterator to optimally walk over holes for an allocation (v5)
This iterator relies on drm_mm_first_hole() and drm_mm_next_hole() functions to identify suitable holes for an allocation of a given size by efficiently traversing the rbtree associated with the given allocator. It replaces the for loop in drm_mm_insert_node_in_range() and can also be used by drm drivers to quickly identify holes of a certain size within a given range. v2: (Tvrtko) - Prepend a double underscore for the newly exported first/next_hole - s/each_best_hole/each_suitable_hole/g - Mask out DRM_MM_INSERT_ONCE from the mode before calling first/next_hole and elsewhere. v3: (Tvrtko) - Reduce the number of hunks by retaining the "mode" variable name v4: - Typo: s/__drm_mm_next_hole(.., hole/__drm_mm_next_hole(.., pos v5: (Tvrtko) - Fixed another typo: should pass caller_mode instead of mode to the iterator in drm_mm_insert_node_in_range(). Reviewed-by: Tvrtko Ursulin Acked-by: Christian König Suggested-by: Tvrtko Ursulin Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/drm_mm.c | 32 +++- include/drm/drm_mm.h | 36 2 files changed, 51 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 8257f9d4f619..6ff98a0e4df3 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -352,10 +352,10 @@ static struct drm_mm_node *find_hole_addr(struct drm_mm *mm, u64 addr, u64 size) return node; } -static struct drm_mm_node * -first_hole(struct drm_mm *mm, - u64 start, u64 end, u64 size, - enum drm_mm_insert_mode mode) +struct drm_mm_node * +__drm_mm_first_hole(struct drm_mm *mm, + u64 start, u64 end, u64 size, + enum drm_mm_insert_mode mode) { switch (mode) { default: @@ -374,6 +374,7 @@ first_hole(struct drm_mm *mm, hole_stack); } } +EXPORT_SYMBOL(__drm_mm_first_hole); /** * DECLARE_NEXT_HOLE_ADDR - macro to declare next hole functions @@ -410,11 +411,11 @@ static struct drm_mm_node *name(struct drm_mm_node *entry, u64 size) \ DECLARE_NEXT_HOLE_ADDR(next_hole_high_addr, rb_left, rb_right) DECLARE_NEXT_HOLE_ADDR(next_hole_low_addr, rb_right, rb_left) -static struct drm_mm_node * -next_hole(struct drm_mm *mm, - struct drm_mm_node *node, - u64 size, - enum drm_mm_insert_mode mode) +struct drm_mm_node * +__drm_mm_next_hole(struct drm_mm *mm, + struct drm_mm_node *node, + u64 size, + enum drm_mm_insert_mode mode) { switch (mode) { default: @@ -432,6 +433,7 @@ next_hole(struct drm_mm *mm, return >hole_stack == >hole_stack ? NULL : node; } } +EXPORT_SYMBOL(__drm_mm_next_hole); /** * drm_mm_reserve_node - insert an pre-initialized node @@ -516,11 +518,11 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, u64 size, u64 alignment, unsigned long color, u64 range_start, u64 range_end, - enum drm_mm_insert_mode mode) + enum drm_mm_insert_mode caller_mode) { struct drm_mm_node *hole; u64 remainder_mask; - bool once; + enum drm_mm_insert_mode mode = caller_mode & ~DRM_MM_INSERT_ONCE; DRM_MM_BUG_ON(range_start > range_end); @@ -533,13 +535,9 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, if (alignment <= 1) alignment = 0; - once = mode & DRM_MM_INSERT_ONCE; - mode &= ~DRM_MM_INSERT_ONCE; - remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0; - for (hole = first_hole(mm, range_start, range_end, size, mode); -hole; -hole = once ? NULL : next_hole(mm, hole, size, mode)) { + drm_mm_for_each_suitable_hole(hole, mm, range_start, range_end, + size, caller_mode) { u64 hole_start = __drm_mm_hole_node_start(hole); u64 hole_end = hole_start + hole->hole_size; u64 adj_start, adj_end; diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index ac33ba1b18bc..dff6db627807 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -400,6 +400,42 @@ static inline u64 drm_mm_hole_node_end(const struct drm_mm_node *hole_node) 1 : 0; \ pos = list_next_entry(pos, hole_stack)) +struct drm_mm_node * +__drm_mm_first_hole(struct drm_mm *mm, + u64 start, u64 end, u64 size, + enum drm_mm_insert_mode mode); + +struct drm_mm_node * +__drm_mm_next_hole(struct drm_mm *mm, + struct drm_mm_node *node, + u64 size, + enum drm_mm_insert_mode mode); + +/** + * drm_mm_for_each_suitable_hole
[Intel-gfx] [CI 0/2] drm/mm: Add an iterator to optimally walk over holes suitable for an allocation
The first patch is a drm core patch that replaces the for loop in drm_mm_insert_node_in_range() with the iterator and would not cause any functional changes. The second patch is a i915 driver specific patch that also uses the iterator but solves a different problem. v2: - Added a new patch to this series to fix a potential NULL dereference. - Fixed a typo associated with the iterator introduced in the drm core patch. - Added locking around the snippet in the i915 patch that traverses the GGTT hole nodes. v3: (Tvrtko) - Replaced mutex_lock with mutex_lock_interruptible_nested() in the i915 patch. v4: (Tvrtko) - Dropped the patch added in v2 as it was deemed unnecessary. Cc: Tvrtko Ursulin Cc: Nirmoy Das Cc: Christian König Vivek Kasireddy (2): drm/mm: Add an iterator to optimally walk over holes for an allocation (v4) drm/i915/gem: Don't try to map and fence large scanout buffers (v9) drivers/gpu/drm/drm_mm.c| 32 drivers/gpu/drm/i915/i915_gem.c | 128 +++- include/drm/drm_mm.h| 36 + 3 files changed, 145 insertions(+), 51 deletions(-) -- 2.34.1
[Intel-gfx] [CI 2/2] drm/i915/gem: Don't try to map and fence large scanout buffers (v9)
On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or more framebuffers/scanout buffers results in only one that is mappable/ fenceable. Therefore, pageflipping between these 2 FBs where only one is mappable/fenceable creates latencies large enough to miss alternate vblanks thereby producing less optimal framerate. This mainly happens because when i915_gem_object_pin_to_display_plane() is called to pin one of the FB objs, the associated vma is identified as misplaced and therefore i915_vma_unbind() is called which unbinds and evicts it. This misplaced vma gets subseqently pinned only when i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This results in a latency of ~10ms and happens every other vblank/repaint cycle. Therefore, to fix this issue, we try to see if there is space to map at-least two objects of a given size and return early if there isn't. This would ensure that we do not try with PIN_MAPPABLE for any objects that are too big to map thereby preventing unncessary unbind. Testcase: Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits a frame ~7ms before the next vblank, the latencies seen between atomic commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that it misses the vblank every other frame. Here is the ftrace snippet that shows the source of the ~10ms latency: i915_gem_object_pin_to_display_plane() { 0.102 us |i915_gem_object_set_cache_level(); i915_gem_object_ggtt_pin_ww() { 0.390 us | i915_vma_instance(); 0.178 us | i915_vma_misplaced(); i915_vma_unbind() { __i915_active_wait() { 0.082 us |i915_active_acquire_if_busy(); 0.475 us | } intel_runtime_pm_get() { 0.087 us |intel_runtime_pm_acquire(); 0.259 us | } __i915_active_wait() { 0.085 us |i915_active_acquire_if_busy(); 0.240 us | } __i915_vma_evict() { ggtt_unbind_vma() { gen8_ggtt_clear_range() { 10507.255 us |} 10507.689 us | } 10508.516 us | } v2: Instead of using bigjoiner checks, determine whether a scanout buffer is too big by checking to see if it is possible to map two of them into the ggtt. v3 (Ville): - Count how many fb objects can be fit into the available holes instead of checking for a hole twice the object size. - Take alignment constraints into account. - Limit this large scanout buffer check to >= Gen 11 platforms. v4: - Remove existing heuristic that checks just for size. (Ville) - Return early if we find space to map at-least two objects. (Tvrtko) - Slightly update the commit message. v5: (Tvrtko) - Rename the function to indicate that the object may be too big to map into the aperture. - Account for guard pages while calculating the total size required for the object. - Do not subject all objects to the heuristic check and instead consider objects only of a certain size. - Do the hole walk using the rbtree. - Preserve the existing PIN_NONBLOCK logic. - Drop the PIN_MAPPABLE check while pinning the VMA. v6: (Tvrtko) - Return 0 on success and the specific error code on failure to preserve the existing behavior. v7: (Ville) - Drop the HAS_GMCH(i915), DISPLAY_VER(i915) < 11 and size < ggtt->mappable_end / 4 checks. - Drop the redundant check that is based on previous heuristic. v8: - Make sure that we are holding the mutex associated with ggtt vm as we traverse the hole nodes. v9: (Tvrtko) - Use mutex_lock_interruptible_nested() instead of mutex_lock(). Cc: Ville Syrjälä Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Manasi Navare Reviewed-by: Tvrtko Ursulin Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/i915_gem.c | 128 +++- 1 file changed, 94 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2e10187cd0a0..4bef9eaa8b2e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -49,6 +49,7 @@ #include "gem/i915_gem_pm.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_userptr.h" +#include "gem/i915_gem_tiling.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" @@ -879,6 +880,96 @@ static void discard_ggtt_vma(struct i915_vma *vma) spin_unlock(>vma.lock); } +static int +i915_gem_object_fits_in_aperture(struct drm_i915_gem_object *obj, +u64 alignment, u64 flags) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; + struct drm_mm_node *hole; + u64 hole_start, hole_end, start, end; +
[Intel-gfx] [CI 1/2] drm/mm: Add an iterator to optimally walk over holes for an allocation (v4)
This iterator relies on drm_mm_first_hole() and drm_mm_next_hole() functions to identify suitable holes for an allocation of a given size by efficiently traversing the rbtree associated with the given allocator. It replaces the for loop in drm_mm_insert_node_in_range() and can also be used by drm drivers to quickly identify holes of a certain size within a given range. v2: (Tvrtko) - Prepend a double underscore for the newly exported first/next_hole - s/each_best_hole/each_suitable_hole/g - Mask out DRM_MM_INSERT_ONCE from the mode before calling first/next_hole and elsewhere. v3: (Tvrtko) - Reduce the number of hunks by retaining the "mode" variable name v4: - Typo: s/__drm_mm_next_hole(.., hole/__drm_mm_next_hole(.., pos Reviewed-by: Tvrtko Ursulin Acked-by: Christian König Suggested-by: Tvrtko Ursulin Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/drm_mm.c | 32 +++- include/drm/drm_mm.h | 36 2 files changed, 51 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 8257f9d4f619..8efea548ae9f 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -352,10 +352,10 @@ static struct drm_mm_node *find_hole_addr(struct drm_mm *mm, u64 addr, u64 size) return node; } -static struct drm_mm_node * -first_hole(struct drm_mm *mm, - u64 start, u64 end, u64 size, - enum drm_mm_insert_mode mode) +struct drm_mm_node * +__drm_mm_first_hole(struct drm_mm *mm, + u64 start, u64 end, u64 size, + enum drm_mm_insert_mode mode) { switch (mode) { default: @@ -374,6 +374,7 @@ first_hole(struct drm_mm *mm, hole_stack); } } +EXPORT_SYMBOL(__drm_mm_first_hole); /** * DECLARE_NEXT_HOLE_ADDR - macro to declare next hole functions @@ -410,11 +411,11 @@ static struct drm_mm_node *name(struct drm_mm_node *entry, u64 size) \ DECLARE_NEXT_HOLE_ADDR(next_hole_high_addr, rb_left, rb_right) DECLARE_NEXT_HOLE_ADDR(next_hole_low_addr, rb_right, rb_left) -static struct drm_mm_node * -next_hole(struct drm_mm *mm, - struct drm_mm_node *node, - u64 size, - enum drm_mm_insert_mode mode) +struct drm_mm_node * +__drm_mm_next_hole(struct drm_mm *mm, + struct drm_mm_node *node, + u64 size, + enum drm_mm_insert_mode mode) { switch (mode) { default: @@ -432,6 +433,7 @@ next_hole(struct drm_mm *mm, return >hole_stack == >hole_stack ? NULL : node; } } +EXPORT_SYMBOL(__drm_mm_next_hole); /** * drm_mm_reserve_node - insert an pre-initialized node @@ -516,11 +518,11 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, u64 size, u64 alignment, unsigned long color, u64 range_start, u64 range_end, - enum drm_mm_insert_mode mode) + enum drm_mm_insert_mode caller_mode) { struct drm_mm_node *hole; u64 remainder_mask; - bool once; + enum drm_mm_insert_mode mode = caller_mode & ~DRM_MM_INSERT_ONCE; DRM_MM_BUG_ON(range_start > range_end); @@ -533,13 +535,9 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, if (alignment <= 1) alignment = 0; - once = mode & DRM_MM_INSERT_ONCE; - mode &= ~DRM_MM_INSERT_ONCE; - remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0; - for (hole = first_hole(mm, range_start, range_end, size, mode); -hole; -hole = once ? NULL : next_hole(mm, hole, size, mode)) { + drm_mm_for_each_suitable_hole(hole, mm, range_start, range_end, + size, mode) { u64 hole_start = __drm_mm_hole_node_start(hole); u64 hole_end = hole_start + hole->hole_size; u64 adj_start, adj_end; diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index ac33ba1b18bc..dff6db627807 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -400,6 +400,42 @@ static inline u64 drm_mm_hole_node_end(const struct drm_mm_node *hole_node) 1 : 0; \ pos = list_next_entry(pos, hole_stack)) +struct drm_mm_node * +__drm_mm_first_hole(struct drm_mm *mm, + u64 start, u64 end, u64 size, + enum drm_mm_insert_mode mode); + +struct drm_mm_node * +__drm_mm_next_hole(struct drm_mm *mm, + struct drm_mm_node *node, + u64 size, + enum drm_mm_insert_mode mode); + +/** + * drm_mm_for_each_suitable_hole - iterator to optimally walk over all + * holes that can fit an allocation of the given @size. + * @pos: _mm_node used internally to track pr
[Intel-gfx] [CI 2/2] drm/i915/gem: Don't try to map and fence large scanout buffers (v9)
On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or more framebuffers/scanout buffers results in only one that is mappable/ fenceable. Therefore, pageflipping between these 2 FBs where only one is mappable/fenceable creates latencies large enough to miss alternate vblanks thereby producing less optimal framerate. This mainly happens because when i915_gem_object_pin_to_display_plane() is called to pin one of the FB objs, the associated vma is identified as misplaced and therefore i915_vma_unbind() is called which unbinds and evicts it. This misplaced vma gets subseqently pinned only when i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This results in a latency of ~10ms and happens every other vblank/repaint cycle. Therefore, to fix this issue, we try to see if there is space to map at-least two objects of a given size and return early if there isn't. This would ensure that we do not try with PIN_MAPPABLE for any objects that are too big to map thereby preventing unncessary unbind. Testcase: Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits a frame ~7ms before the next vblank, the latencies seen between atomic commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that it misses the vblank every other frame. Here is the ftrace snippet that shows the source of the ~10ms latency: i915_gem_object_pin_to_display_plane() { 0.102 us |i915_gem_object_set_cache_level(); i915_gem_object_ggtt_pin_ww() { 0.390 us | i915_vma_instance(); 0.178 us | i915_vma_misplaced(); i915_vma_unbind() { __i915_active_wait() { 0.082 us |i915_active_acquire_if_busy(); 0.475 us | } intel_runtime_pm_get() { 0.087 us |intel_runtime_pm_acquire(); 0.259 us | } __i915_active_wait() { 0.085 us |i915_active_acquire_if_busy(); 0.240 us | } __i915_vma_evict() { ggtt_unbind_vma() { gen8_ggtt_clear_range() { 10507.255 us |} 10507.689 us | } 10508.516 us | } v2: Instead of using bigjoiner checks, determine whether a scanout buffer is too big by checking to see if it is possible to map two of them into the ggtt. v3 (Ville): - Count how many fb objects can be fit into the available holes instead of checking for a hole twice the object size. - Take alignment constraints into account. - Limit this large scanout buffer check to >= Gen 11 platforms. v4: - Remove existing heuristic that checks just for size. (Ville) - Return early if we find space to map at-least two objects. (Tvrtko) - Slightly update the commit message. v5: (Tvrtko) - Rename the function to indicate that the object may be too big to map into the aperture. - Account for guard pages while calculating the total size required for the object. - Do not subject all objects to the heuristic check and instead consider objects only of a certain size. - Do the hole walk using the rbtree. - Preserve the existing PIN_NONBLOCK logic. - Drop the PIN_MAPPABLE check while pinning the VMA. v6: (Tvrtko) - Return 0 on success and the specific error code on failure to preserve the existing behavior. v7: (Ville) - Drop the HAS_GMCH(i915), DISPLAY_VER(i915) < 11 and size < ggtt->mappable_end / 4 checks. - Drop the redundant check that is based on previous heuristic. v8: - Make sure that we are holding the mutex associated with ggtt vm as we traverse the hole nodes. v9: (Tvrtko) - Use mutex_lock_interruptible_nested() instead of mutex_lock(). Cc: Ville Syrjälä Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Manasi Navare Reviewed-by: Tvrtko Ursulin Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/i915_gem.c | 128 +++- 1 file changed, 94 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2e10187cd0a0..4bef9eaa8b2e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -49,6 +49,7 @@ #include "gem/i915_gem_pm.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_userptr.h" +#include "gem/i915_gem_tiling.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" @@ -879,6 +880,96 @@ static void discard_ggtt_vma(struct i915_vma *vma) spin_unlock(>vma.lock); } +static int +i915_gem_object_fits_in_aperture(struct drm_i915_gem_object *obj, +u64 alignment, u64 flags) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; + struct drm_mm_node *hole; + u64 hole_start, hole_end, start, end; +
[Intel-gfx] [CI 1/2] drm/mm: Add an iterator to optimally walk over holes for an allocation (v4)
This iterator relies on drm_mm_first_hole() and drm_mm_next_hole() functions to identify suitable holes for an allocation of a given size by efficiently traversing the rbtree associated with the given allocator. It replaces the for loop in drm_mm_insert_node_in_range() and can also be used by drm drivers to quickly identify holes of a certain size within a given range. v2: (Tvrtko) - Prepend a double underscore for the newly exported first/next_hole - s/each_best_hole/each_suitable_hole/g - Mask out DRM_MM_INSERT_ONCE from the mode before calling first/next_hole and elsewhere. v3: (Tvrtko) - Reduce the number of hunks by retaining the "mode" variable name v4: - Typo: s/__drm_mm_next_hole(.., hole/__drm_mm_next_hole(.., pos Reviewed-by: Tvrtko Ursulin Acked-by: Christian König Suggested-by: Tvrtko Ursulin Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/drm_mm.c | 32 +++- include/drm/drm_mm.h | 36 2 files changed, 51 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 8257f9d4f619..8efea548ae9f 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -352,10 +352,10 @@ static struct drm_mm_node *find_hole_addr(struct drm_mm *mm, u64 addr, u64 size) return node; } -static struct drm_mm_node * -first_hole(struct drm_mm *mm, - u64 start, u64 end, u64 size, - enum drm_mm_insert_mode mode) +struct drm_mm_node * +__drm_mm_first_hole(struct drm_mm *mm, + u64 start, u64 end, u64 size, + enum drm_mm_insert_mode mode) { switch (mode) { default: @@ -374,6 +374,7 @@ first_hole(struct drm_mm *mm, hole_stack); } } +EXPORT_SYMBOL(__drm_mm_first_hole); /** * DECLARE_NEXT_HOLE_ADDR - macro to declare next hole functions @@ -410,11 +411,11 @@ static struct drm_mm_node *name(struct drm_mm_node *entry, u64 size) \ DECLARE_NEXT_HOLE_ADDR(next_hole_high_addr, rb_left, rb_right) DECLARE_NEXT_HOLE_ADDR(next_hole_low_addr, rb_right, rb_left) -static struct drm_mm_node * -next_hole(struct drm_mm *mm, - struct drm_mm_node *node, - u64 size, - enum drm_mm_insert_mode mode) +struct drm_mm_node * +__drm_mm_next_hole(struct drm_mm *mm, + struct drm_mm_node *node, + u64 size, + enum drm_mm_insert_mode mode) { switch (mode) { default: @@ -432,6 +433,7 @@ next_hole(struct drm_mm *mm, return >hole_stack == >hole_stack ? NULL : node; } } +EXPORT_SYMBOL(__drm_mm_next_hole); /** * drm_mm_reserve_node - insert an pre-initialized node @@ -516,11 +518,11 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, u64 size, u64 alignment, unsigned long color, u64 range_start, u64 range_end, - enum drm_mm_insert_mode mode) + enum drm_mm_insert_mode caller_mode) { struct drm_mm_node *hole; u64 remainder_mask; - bool once; + enum drm_mm_insert_mode mode = caller_mode & ~DRM_MM_INSERT_ONCE; DRM_MM_BUG_ON(range_start > range_end); @@ -533,13 +535,9 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, if (alignment <= 1) alignment = 0; - once = mode & DRM_MM_INSERT_ONCE; - mode &= ~DRM_MM_INSERT_ONCE; - remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0; - for (hole = first_hole(mm, range_start, range_end, size, mode); -hole; -hole = once ? NULL : next_hole(mm, hole, size, mode)) { + drm_mm_for_each_suitable_hole(hole, mm, range_start, range_end, + size, mode) { u64 hole_start = __drm_mm_hole_node_start(hole); u64 hole_end = hole_start + hole->hole_size; u64 adj_start, adj_end; diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index ac33ba1b18bc..dff6db627807 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -400,6 +400,42 @@ static inline u64 drm_mm_hole_node_end(const struct drm_mm_node *hole_node) 1 : 0; \ pos = list_next_entry(pos, hole_stack)) +struct drm_mm_node * +__drm_mm_first_hole(struct drm_mm *mm, + u64 start, u64 end, u64 size, + enum drm_mm_insert_mode mode); + +struct drm_mm_node * +__drm_mm_next_hole(struct drm_mm *mm, + struct drm_mm_node *node, + u64 size, + enum drm_mm_insert_mode mode); + +/** + * drm_mm_for_each_suitable_hole - iterator to optimally walk over all + * holes that can fit an allocation of the given @size. + * @pos: _mm_node used internally to track pr
[Intel-gfx] [CI 0/2] drm/mm: Add an iterator to optimally walk over holes suitable for an allocation
The first patch is a drm core patch that replaces the for loop in drm_mm_insert_node_in_range() with the iterator and would not cause any functional changes. The second patch is a i915 driver specific patch that also uses the iterator but solves a different problem. v2: - Added a new patch to this series to fix a potential NULL dereference. - Fixed a typo associated with the iterator introduced in the drm core patch. - Added locking around the snippet in the i915 patch that traverses the GGTT hole nodes. v3: (Tvrtko) - Replaced mutex_lock with mutex_lock_interruptible_nested() in the i915 patch. v4: (Tvrtko) - Dropped the patch added in v2 as it was deemed unnecessary. Cc: Tvrtko Ursulin Cc: Nirmoy Das Cc: Christian König Vivek Kasireddy (2): drm/mm: Add an iterator to optimally walk over holes for an allocation (v4) drm/i915/gem: Don't try to map and fence large scanout buffers (v9) drivers/gpu/drm/drm_mm.c| 32 drivers/gpu/drm/i915/i915_gem.c | 128 +++- include/drm/drm_mm.h| 36 + 3 files changed, 145 insertions(+), 51 deletions(-) -- 2.34.1
[Intel-gfx] [PATCH v3 3/3] drm/i915/gem: Don't try to map and fence large scanout buffers (v9)
On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or more framebuffers/scanout buffers results in only one that is mappable/ fenceable. Therefore, pageflipping between these 2 FBs where only one is mappable/fenceable creates latencies large enough to miss alternate vblanks thereby producing less optimal framerate. This mainly happens because when i915_gem_object_pin_to_display_plane() is called to pin one of the FB objs, the associated vma is identified as misplaced and therefore i915_vma_unbind() is called which unbinds and evicts it. This misplaced vma gets subseqently pinned only when i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This results in a latency of ~10ms and happens every other vblank/repaint cycle. Therefore, to fix this issue, we try to see if there is space to map at-least two objects of a given size and return early if there isn't. This would ensure that we do not try with PIN_MAPPABLE for any objects that are too big to map thereby preventing unncessary unbind. Testcase: Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits a frame ~7ms before the next vblank, the latencies seen between atomic commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that it misses the vblank every other frame. Here is the ftrace snippet that shows the source of the ~10ms latency: i915_gem_object_pin_to_display_plane() { 0.102 us |i915_gem_object_set_cache_level(); i915_gem_object_ggtt_pin_ww() { 0.390 us | i915_vma_instance(); 0.178 us | i915_vma_misplaced(); i915_vma_unbind() { __i915_active_wait() { 0.082 us |i915_active_acquire_if_busy(); 0.475 us | } intel_runtime_pm_get() { 0.087 us |intel_runtime_pm_acquire(); 0.259 us | } __i915_active_wait() { 0.085 us |i915_active_acquire_if_busy(); 0.240 us | } __i915_vma_evict() { ggtt_unbind_vma() { gen8_ggtt_clear_range() { 10507.255 us |} 10507.689 us | } 10508.516 us | } v2: Instead of using bigjoiner checks, determine whether a scanout buffer is too big by checking to see if it is possible to map two of them into the ggtt. v3 (Ville): - Count how many fb objects can be fit into the available holes instead of checking for a hole twice the object size. - Take alignment constraints into account. - Limit this large scanout buffer check to >= Gen 11 platforms. v4: - Remove existing heuristic that checks just for size. (Ville) - Return early if we find space to map at-least two objects. (Tvrtko) - Slightly update the commit message. v5: (Tvrtko) - Rename the function to indicate that the object may be too big to map into the aperture. - Account for guard pages while calculating the total size required for the object. - Do not subject all objects to the heuristic check and instead consider objects only of a certain size. - Do the hole walk using the rbtree. - Preserve the existing PIN_NONBLOCK logic. - Drop the PIN_MAPPABLE check while pinning the VMA. v6: (Tvrtko) - Return 0 on success and the specific error code on failure to preserve the existing behavior. v7: (Ville) - Drop the HAS_GMCH(i915), DISPLAY_VER(i915) < 11 and size < ggtt->mappable_end / 4 checks. - Drop the redundant check that is based on previous heuristic. v8: - Make sure that we are holding the mutex associated with ggtt vm as we traverse the hole nodes. v9: (Tvrtko) - Use mutex_lock_interruptible_nested() instead of mutex_lock(). Cc: Ville Syrjälä Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Manasi Navare Reviewed-by: Tvrtko Ursulin Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/i915_gem.c | 128 +++- 1 file changed, 94 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2e10187cd0a0..4bef9eaa8b2e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -49,6 +49,7 @@ #include "gem/i915_gem_pm.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_userptr.h" +#include "gem/i915_gem_tiling.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" @@ -879,6 +880,96 @@ static void discard_ggtt_vma(struct i915_vma *vma) spin_unlock(>vma.lock); } +static int +i915_gem_object_fits_in_aperture(struct drm_i915_gem_object *obj, +u64 alignment, u64 flags) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; + struct drm_mm_node *hole; + u64 hole_start, hole_end, start, end; +
[Intel-gfx] [PATCH v3 1/3] drm/mm: Ensure that the entry is not NULL before extracting rb_node
While looking for next holes suitable for an allocation, although, it is highly unlikely, make sure that the DECLARE_NEXT_HOLE_ADDR macro is using a valid node before it extracts the rb_node from it. Cc: Tvrtko Ursulin Cc: Christian König Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/drm_mm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 8257f9d4f619..499d8874e4ed 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -389,11 +389,12 @@ first_hole(struct drm_mm *mm, #define DECLARE_NEXT_HOLE_ADDR(name, first, last) \ static struct drm_mm_node *name(struct drm_mm_node *entry, u64 size) \ { \ - struct rb_node *parent, *node = >rb_hole_addr; \ + struct rb_node *parent, *node; \ \ - if (!entry || RB_EMPTY_NODE(node)) \ + if (!entry || RB_EMPTY_NODE(>rb_hole_addr)) \ return NULL;\ \ + node = >rb_hole_addr;\ if (usable_hole_addr(node->first, size)) { \ node = node->first; \ while (usable_hole_addr(node->last, size)) \ -- 2.34.1
[Intel-gfx] [PATCH v3 2/3] drm/mm: Add an iterator to optimally walk over holes for an allocation (v4)
This iterator relies on drm_mm_first_hole() and drm_mm_next_hole() functions to identify suitable holes for an allocation of a given size by efficiently traversing the rbtree associated with the given allocator. It replaces the for loop in drm_mm_insert_node_in_range() and can also be used by drm drivers to quickly identify holes of a certain size within a given range. v2: (Tvrtko) - Prepend a double underscore for the newly exported first/next_hole - s/each_best_hole/each_suitable_hole/g - Mask out DRM_MM_INSERT_ONCE from the mode before calling first/next_hole and elsewhere. v3: (Tvrtko) - Reduce the number of hunks by retaining the "mode" variable name v4: - Typo: s/__drm_mm_next_hole(.., hole/__drm_mm_next_hole(.., pos Reviewed-by: Tvrtko Ursulin Acked-by: Christian König Suggested-by: Tvrtko Ursulin Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/drm_mm.c | 32 +++- include/drm/drm_mm.h | 36 2 files changed, 51 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 499d8874e4ed..f5339610361c 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -352,10 +352,10 @@ static struct drm_mm_node *find_hole_addr(struct drm_mm *mm, u64 addr, u64 size) return node; } -static struct drm_mm_node * -first_hole(struct drm_mm *mm, - u64 start, u64 end, u64 size, - enum drm_mm_insert_mode mode) +struct drm_mm_node * +__drm_mm_first_hole(struct drm_mm *mm, + u64 start, u64 end, u64 size, + enum drm_mm_insert_mode mode) { switch (mode) { default: @@ -374,6 +374,7 @@ first_hole(struct drm_mm *mm, hole_stack); } } +EXPORT_SYMBOL(__drm_mm_first_hole); /** * DECLARE_NEXT_HOLE_ADDR - macro to declare next hole functions @@ -411,11 +412,11 @@ static struct drm_mm_node *name(struct drm_mm_node *entry, u64 size) \ DECLARE_NEXT_HOLE_ADDR(next_hole_high_addr, rb_left, rb_right) DECLARE_NEXT_HOLE_ADDR(next_hole_low_addr, rb_right, rb_left) -static struct drm_mm_node * -next_hole(struct drm_mm *mm, - struct drm_mm_node *node, - u64 size, - enum drm_mm_insert_mode mode) +struct drm_mm_node * +__drm_mm_next_hole(struct drm_mm *mm, + struct drm_mm_node *node, + u64 size, + enum drm_mm_insert_mode mode) { switch (mode) { default: @@ -433,6 +434,7 @@ next_hole(struct drm_mm *mm, return >hole_stack == >hole_stack ? NULL : node; } } +EXPORT_SYMBOL(__drm_mm_next_hole); /** * drm_mm_reserve_node - insert an pre-initialized node @@ -517,11 +519,11 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, u64 size, u64 alignment, unsigned long color, u64 range_start, u64 range_end, - enum drm_mm_insert_mode mode) + enum drm_mm_insert_mode caller_mode) { struct drm_mm_node *hole; u64 remainder_mask; - bool once; + enum drm_mm_insert_mode mode = caller_mode & ~DRM_MM_INSERT_ONCE; DRM_MM_BUG_ON(range_start > range_end); @@ -534,13 +536,9 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, if (alignment <= 1) alignment = 0; - once = mode & DRM_MM_INSERT_ONCE; - mode &= ~DRM_MM_INSERT_ONCE; - remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0; - for (hole = first_hole(mm, range_start, range_end, size, mode); -hole; -hole = once ? NULL : next_hole(mm, hole, size, mode)) { + drm_mm_for_each_suitable_hole(hole, mm, range_start, range_end, + size, mode) { u64 hole_start = __drm_mm_hole_node_start(hole); u64 hole_end = hole_start + hole->hole_size; u64 adj_start, adj_end; diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index ac33ba1b18bc..dff6db627807 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -400,6 +400,42 @@ static inline u64 drm_mm_hole_node_end(const struct drm_mm_node *hole_node) 1 : 0; \ pos = list_next_entry(pos, hole_stack)) +struct drm_mm_node * +__drm_mm_first_hole(struct drm_mm *mm, + u64 start, u64 end, u64 size, + enum drm_mm_insert_mode mode); + +struct drm_mm_node * +__drm_mm_next_hole(struct drm_mm *mm, + struct drm_mm_node *node, + u64 size, + enum drm_mm_insert_mode mode); + +/** + * drm_mm_for_each_suitable_hole - iterator to optimally walk over all + * holes that can fit an allocation of the given @size. + * @pos: _mm_node used internally to track pr
[Intel-gfx] [PATCH v3 0/3] drm/mm: Add an iterator to optimally walk over holes suitable for an allocation
The first patch is a drm core patch that replaces the for loop in drm_mm_insert_node_in_range() with the iterator and would not cause any functional changes. The second patch is a i915 driver specific patch that also uses the iterator but solves a different problem. v2: - Added a new patch to this series to fix a potential NULL dereference. - Fixed a typo associated with the iterator introduced in the drm core patch. - Added locking around the snippet in the i915 patch that traverses the GGTT hole nodes. v3: (Tvrtko) - Replaced mutex_lock with mutex_lock_interruptible_nested() in the i915 patch. Cc: Tvrtko Ursulin Cc: Nirmoy Das Cc: Christian König Vivek Kasireddy (3): drm/mm: Ensure that the entry is not NULL before extracting rb_node drm/mm: Add an iterator to optimally walk over holes for an allocation (v4) drm/i915/gem: Don't try to map and fence large scanout buffers (v9) drivers/gpu/drm/drm_mm.c| 37 + drivers/gpu/drm/i915/i915_gem.c | 128 +++- include/drm/drm_mm.h| 36 + 3 files changed, 148 insertions(+), 53 deletions(-) -- 2.34.1
[Intel-gfx] [PATCH v2 3/3] drm/i915/gem: Don't try to map and fence large scanout buffers (v8)
On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or more framebuffers/scanout buffers results in only one that is mappable/ fenceable. Therefore, pageflipping between these 2 FBs where only one is mappable/fenceable creates latencies large enough to miss alternate vblanks thereby producing less optimal framerate. This mainly happens because when i915_gem_object_pin_to_display_plane() is called to pin one of the FB objs, the associated vma is identified as misplaced and therefore i915_vma_unbind() is called which unbinds and evicts it. This misplaced vma gets subseqently pinned only when i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This results in a latency of ~10ms and happens every other vblank/repaint cycle. Therefore, to fix this issue, we try to see if there is space to map at-least two objects of a given size and return early if there isn't. This would ensure that we do not try with PIN_MAPPABLE for any objects that are too big to map thereby preventing unncessary unbind. Testcase: Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits a frame ~7ms before the next vblank, the latencies seen between atomic commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that it misses the vblank every other frame. Here is the ftrace snippet that shows the source of the ~10ms latency: i915_gem_object_pin_to_display_plane() { 0.102 us |i915_gem_object_set_cache_level(); i915_gem_object_ggtt_pin_ww() { 0.390 us | i915_vma_instance(); 0.178 us | i915_vma_misplaced(); i915_vma_unbind() { __i915_active_wait() { 0.082 us |i915_active_acquire_if_busy(); 0.475 us | } intel_runtime_pm_get() { 0.087 us |intel_runtime_pm_acquire(); 0.259 us | } __i915_active_wait() { 0.085 us |i915_active_acquire_if_busy(); 0.240 us | } __i915_vma_evict() { ggtt_unbind_vma() { gen8_ggtt_clear_range() { 10507.255 us |} 10507.689 us | } 10508.516 us | } v2: Instead of using bigjoiner checks, determine whether a scanout buffer is too big by checking to see if it is possible to map two of them into the ggtt. v3 (Ville): - Count how many fb objects can be fit into the available holes instead of checking for a hole twice the object size. - Take alignment constraints into account. - Limit this large scanout buffer check to >= Gen 11 platforms. v4: - Remove existing heuristic that checks just for size. (Ville) - Return early if we find space to map at-least two objects. (Tvrtko) - Slightly update the commit message. v5: (Tvrtko) - Rename the function to indicate that the object may be too big to map into the aperture. - Account for guard pages while calculating the total size required for the object. - Do not subject all objects to the heuristic check and instead consider objects only of a certain size. - Do the hole walk using the rbtree. - Preserve the existing PIN_NONBLOCK logic. - Drop the PIN_MAPPABLE check while pinning the VMA. v6: (Tvrtko) - Return 0 on success and the specific error code on failure to preserve the existing behavior. v7: (Ville) - Drop the HAS_GMCH(i915), DISPLAY_VER(i915) < 11 and size < ggtt->mappable_end / 4 checks. - Drop the redundant check that is based on previous heuristic. v8: - Make sure that we are holding the mutex associated with ggtt vm as we traverse the hole nodes. Cc: Ville Syrjälä Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Manasi Navare Reviewed-by: Tvrtko Ursulin Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/i915_gem.c | 124 +++- 1 file changed, 90 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2e10187cd0a0..db00e71ce328 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -49,6 +49,7 @@ #include "gem/i915_gem_pm.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_userptr.h" +#include "gem/i915_gem_tiling.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" @@ -879,6 +880,92 @@ static void discard_ggtt_vma(struct i915_vma *vma) spin_unlock(>vma.lock); } +static int +i915_gem_object_fits_in_aperture(struct drm_i915_gem_object *obj, +u64 alignment, u64 flags) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; + struct drm_mm_node *hole; + u64 hole_start, hole_end, start, end; + u64 fence_size, fence_alignment; + unsigned int count = 0; + + /* +* If the requi
[Intel-gfx] [PATCH v2 2/3] drm/mm: Add an iterator to optimally walk over holes for an allocation (v4)
This iterator relies on drm_mm_first_hole() and drm_mm_next_hole() functions to identify suitable holes for an allocation of a given size by efficiently traversing the rbtree associated with the given allocator. It replaces the for loop in drm_mm_insert_node_in_range() and can also be used by drm drivers to quickly identify holes of a certain size within a given range. v2: (Tvrtko) - Prepend a double underscore for the newly exported first/next_hole - s/each_best_hole/each_suitable_hole/g - Mask out DRM_MM_INSERT_ONCE from the mode before calling first/next_hole and elsewhere. v3: (Tvrtko) - Reduce the number of hunks by retaining the "mode" variable name v4: - Typo: s/__drm_mm_next_hole(.., hole/__drm_mm_next_hole(.., pos Reviewed-by: Tvrtko Ursulin Acked-by: Christian König Suggested-by: Tvrtko Ursulin Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/drm_mm.c | 32 +++- include/drm/drm_mm.h | 36 2 files changed, 51 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 499d8874e4ed..f5339610361c 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -352,10 +352,10 @@ static struct drm_mm_node *find_hole_addr(struct drm_mm *mm, u64 addr, u64 size) return node; } -static struct drm_mm_node * -first_hole(struct drm_mm *mm, - u64 start, u64 end, u64 size, - enum drm_mm_insert_mode mode) +struct drm_mm_node * +__drm_mm_first_hole(struct drm_mm *mm, + u64 start, u64 end, u64 size, + enum drm_mm_insert_mode mode) { switch (mode) { default: @@ -374,6 +374,7 @@ first_hole(struct drm_mm *mm, hole_stack); } } +EXPORT_SYMBOL(__drm_mm_first_hole); /** * DECLARE_NEXT_HOLE_ADDR - macro to declare next hole functions @@ -411,11 +412,11 @@ static struct drm_mm_node *name(struct drm_mm_node *entry, u64 size) \ DECLARE_NEXT_HOLE_ADDR(next_hole_high_addr, rb_left, rb_right) DECLARE_NEXT_HOLE_ADDR(next_hole_low_addr, rb_right, rb_left) -static struct drm_mm_node * -next_hole(struct drm_mm *mm, - struct drm_mm_node *node, - u64 size, - enum drm_mm_insert_mode mode) +struct drm_mm_node * +__drm_mm_next_hole(struct drm_mm *mm, + struct drm_mm_node *node, + u64 size, + enum drm_mm_insert_mode mode) { switch (mode) { default: @@ -433,6 +434,7 @@ next_hole(struct drm_mm *mm, return >hole_stack == >hole_stack ? NULL : node; } } +EXPORT_SYMBOL(__drm_mm_next_hole); /** * drm_mm_reserve_node - insert an pre-initialized node @@ -517,11 +519,11 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, u64 size, u64 alignment, unsigned long color, u64 range_start, u64 range_end, - enum drm_mm_insert_mode mode) + enum drm_mm_insert_mode caller_mode) { struct drm_mm_node *hole; u64 remainder_mask; - bool once; + enum drm_mm_insert_mode mode = caller_mode & ~DRM_MM_INSERT_ONCE; DRM_MM_BUG_ON(range_start > range_end); @@ -534,13 +536,9 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, if (alignment <= 1) alignment = 0; - once = mode & DRM_MM_INSERT_ONCE; - mode &= ~DRM_MM_INSERT_ONCE; - remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0; - for (hole = first_hole(mm, range_start, range_end, size, mode); -hole; -hole = once ? NULL : next_hole(mm, hole, size, mode)) { + drm_mm_for_each_suitable_hole(hole, mm, range_start, range_end, + size, mode) { u64 hole_start = __drm_mm_hole_node_start(hole); u64 hole_end = hole_start + hole->hole_size; u64 adj_start, adj_end; diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index ac33ba1b18bc..dff6db627807 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -400,6 +400,42 @@ static inline u64 drm_mm_hole_node_end(const struct drm_mm_node *hole_node) 1 : 0; \ pos = list_next_entry(pos, hole_stack)) +struct drm_mm_node * +__drm_mm_first_hole(struct drm_mm *mm, + u64 start, u64 end, u64 size, + enum drm_mm_insert_mode mode); + +struct drm_mm_node * +__drm_mm_next_hole(struct drm_mm *mm, + struct drm_mm_node *node, + u64 size, + enum drm_mm_insert_mode mode); + +/** + * drm_mm_for_each_suitable_hole - iterator to optimally walk over all + * holes that can fit an allocation of the given @size. + * @pos: _mm_node used internally to track pr
[Intel-gfx] [PATCH v2 1/3] drm/mm: Ensure that the entry is not NULL before extracting rb_node
While looking for next holes suitable for an allocation, although, it is highly unlikely, make sure that the DECLARE_NEXT_HOLE_ADDR macro is using a valid node before it extracts the rb_node from it. Cc: Tvrtko Ursulin Cc: Christian König Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/drm_mm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 8257f9d4f619..499d8874e4ed 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -389,11 +389,12 @@ first_hole(struct drm_mm *mm, #define DECLARE_NEXT_HOLE_ADDR(name, first, last) \ static struct drm_mm_node *name(struct drm_mm_node *entry, u64 size) \ { \ - struct rb_node *parent, *node = >rb_hole_addr; \ + struct rb_node *parent, *node; \ \ - if (!entry || RB_EMPTY_NODE(node)) \ + if (!entry || RB_EMPTY_NODE(>rb_hole_addr)) \ return NULL;\ \ + node = >rb_hole_addr;\ if (usable_hole_addr(node->first, size)) { \ node = node->first; \ while (usable_hole_addr(node->last, size)) \ -- 2.34.1
[Intel-gfx] [PATCH v2 0/3] drm/mm: Add an iterator to optimally walk over holes suitable for an allocation
The first patch is a drm core patch that replaces the for loop in drm_mm_insert_node_in_range() with the iterator and would not cause any functional changes. The second patch is a i915 driver specific patch that also uses the iterator but solves a different problem. v2: - Added a new patch to this series to fix a potential NULL dereference. - Fixed a typo associated with the iterator introduced in the drm core patch. - Added locking around the snippet in the i915 patch that traverses the GGTT hole nodes. Cc: Tvrtko Ursulin Cc: Nirmoy Das Cc: Christian König Vivek Kasireddy (3): drm/mm: Ensure that the entry is not NULL before extracting rb_node drm/mm: Add an iterator to optimally walk over holes for an allocation (v4) drm/i915/gem: Don't try to map and fence large scanout buffers (v8) drivers/gpu/drm/drm_mm.c| 37 +- drivers/gpu/drm/i915/i915_gem.c | 124 +++- include/drm/drm_mm.h| 36 ++ 3 files changed, 144 insertions(+), 53 deletions(-) -- 2.34.1
[Intel-gfx] [PATCH 1/2] drm/mm: Add an iterator to optimally walk over holes for an allocation (v3)
This iterator relies on drm_mm_first_hole() and drm_mm_next_hole() functions to identify suitable holes for an allocation of a given size by efficiently traversing the rbtree associated with the given allocator. It replaces the for loop in drm_mm_insert_node_in_range() and can also be used by drm drivers to quickly identify holes of a certain size within a given range. v2: (Tvrtko) - Prepend a double underscore for the newly exported first/next_hole - s/each_best_hole/each_suitable_hole/g - Mask out DRM_MM_INSERT_ONCE from the mode before calling first/next_hole and elsewhere. v3: (Tvrtko) - Reduce the number of hunks by retaining the "mode" variable name Cc: Christian König Reviewed-by: Tvrtko Ursulin Suggested-by: Tvrtko Ursulin Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/drm_mm.c | 32 +++- include/drm/drm_mm.h | 36 2 files changed, 51 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 8257f9d4f619..8efea548ae9f 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -352,10 +352,10 @@ static struct drm_mm_node *find_hole_addr(struct drm_mm *mm, u64 addr, u64 size) return node; } -static struct drm_mm_node * -first_hole(struct drm_mm *mm, - u64 start, u64 end, u64 size, - enum drm_mm_insert_mode mode) +struct drm_mm_node * +__drm_mm_first_hole(struct drm_mm *mm, + u64 start, u64 end, u64 size, + enum drm_mm_insert_mode mode) { switch (mode) { default: @@ -374,6 +374,7 @@ first_hole(struct drm_mm *mm, hole_stack); } } +EXPORT_SYMBOL(__drm_mm_first_hole); /** * DECLARE_NEXT_HOLE_ADDR - macro to declare next hole functions @@ -410,11 +411,11 @@ static struct drm_mm_node *name(struct drm_mm_node *entry, u64 size) \ DECLARE_NEXT_HOLE_ADDR(next_hole_high_addr, rb_left, rb_right) DECLARE_NEXT_HOLE_ADDR(next_hole_low_addr, rb_right, rb_left) -static struct drm_mm_node * -next_hole(struct drm_mm *mm, - struct drm_mm_node *node, - u64 size, - enum drm_mm_insert_mode mode) +struct drm_mm_node * +__drm_mm_next_hole(struct drm_mm *mm, + struct drm_mm_node *node, + u64 size, + enum drm_mm_insert_mode mode) { switch (mode) { default: @@ -432,6 +433,7 @@ next_hole(struct drm_mm *mm, return >hole_stack == >hole_stack ? NULL : node; } } +EXPORT_SYMBOL(__drm_mm_next_hole); /** * drm_mm_reserve_node - insert an pre-initialized node @@ -516,11 +518,11 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, u64 size, u64 alignment, unsigned long color, u64 range_start, u64 range_end, - enum drm_mm_insert_mode mode) + enum drm_mm_insert_mode caller_mode) { struct drm_mm_node *hole; u64 remainder_mask; - bool once; + enum drm_mm_insert_mode mode = caller_mode & ~DRM_MM_INSERT_ONCE; DRM_MM_BUG_ON(range_start > range_end); @@ -533,13 +535,9 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, if (alignment <= 1) alignment = 0; - once = mode & DRM_MM_INSERT_ONCE; - mode &= ~DRM_MM_INSERT_ONCE; - remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0; - for (hole = first_hole(mm, range_start, range_end, size, mode); -hole; -hole = once ? NULL : next_hole(mm, hole, size, mode)) { + drm_mm_for_each_suitable_hole(hole, mm, range_start, range_end, + size, mode) { u64 hole_start = __drm_mm_hole_node_start(hole); u64 hole_end = hole_start + hole->hole_size; u64 adj_start, adj_end; diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index ac33ba1b18bc..777f659f9692 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -400,6 +400,42 @@ static inline u64 drm_mm_hole_node_end(const struct drm_mm_node *hole_node) 1 : 0; \ pos = list_next_entry(pos, hole_stack)) +struct drm_mm_node * +__drm_mm_first_hole(struct drm_mm *mm, + u64 start, u64 end, u64 size, + enum drm_mm_insert_mode mode); + +struct drm_mm_node * +__drm_mm_next_hole(struct drm_mm *mm, + struct drm_mm_node *node, + u64 size, + enum drm_mm_insert_mode mode); + +/** + * drm_mm_for_each_suitable_hole - iterator to optimally walk over all + * holes that can fit an allocation of the given @size. + * @pos: _mm_node used internally to track progress + * @mm: _mm allocator to walk + * @range_start: start of
[Intel-gfx] [PATCH 2/2] drm/i915/gem: Don't try to map and fence large scanout buffers (v7)
On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or more framebuffers/scanout buffers results in only one that is mappable/ fenceable. Therefore, pageflipping between these 2 FBs where only one is mappable/fenceable creates latencies large enough to miss alternate vblanks thereby producing less optimal framerate. This mainly happens because when i915_gem_object_pin_to_display_plane() is called to pin one of the FB objs, the associated vma is identified as misplaced and therefore i915_vma_unbind() is called which unbinds and evicts it. This misplaced vma gets subseqently pinned only when i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This results in a latency of ~10ms and happens every other vblank/repaint cycle. Therefore, to fix this issue, we try to see if there is space to map at-least two objects of a given size and return early if there isn't. This would ensure that we do not try with PIN_MAPPABLE for any objects that are too big to map thereby preventing unncessary unbind. Testcase: Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits a frame ~7ms before the next vblank, the latencies seen between atomic commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that it misses the vblank every other frame. Here is the ftrace snippet that shows the source of the ~10ms latency: i915_gem_object_pin_to_display_plane() { 0.102 us |i915_gem_object_set_cache_level(); i915_gem_object_ggtt_pin_ww() { 0.390 us | i915_vma_instance(); 0.178 us | i915_vma_misplaced(); i915_vma_unbind() { __i915_active_wait() { 0.082 us |i915_active_acquire_if_busy(); 0.475 us | } intel_runtime_pm_get() { 0.087 us |intel_runtime_pm_acquire(); 0.259 us | } __i915_active_wait() { 0.085 us |i915_active_acquire_if_busy(); 0.240 us | } __i915_vma_evict() { ggtt_unbind_vma() { gen8_ggtt_clear_range() { 10507.255 us |} 10507.689 us | } 10508.516 us | } v2: Instead of using bigjoiner checks, determine whether a scanout buffer is too big by checking to see if it is possible to map two of them into the ggtt. v3 (Ville): - Count how many fb objects can be fit into the available holes instead of checking for a hole twice the object size. - Take alignment constraints into account. - Limit this large scanout buffer check to >= Gen 11 platforms. v4: - Remove existing heuristic that checks just for size. (Ville) - Return early if we find space to map at-least two objects. (Tvrtko) - Slightly update the commit message. v5: (Tvrtko) - Rename the function to indicate that the object may be too big to map into the aperture. - Account for guard pages while calculating the total size required for the object. - Do not subject all objects to the heuristic check and instead consider objects only of a certain size. - Do the hole walk using the rbtree. - Preserve the existing PIN_NONBLOCK logic. - Drop the PIN_MAPPABLE check while pinning the VMA. v6: (Tvrtko) - Return 0 on success and the specific error code on failure to preserve the existing behavior. v7: (Ville) - Drop the HAS_GMCH(i915), DISPLAY_VER(i915) < 11 and size < ggtt->mappable_end / 4 checks. - Drop the redundant check that is based on previous heuristic. Cc: Ville Syrjälä Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Manasi Navare Reviewed-by: Tvrtko Ursulin Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/i915_gem.c | 120 +++- 1 file changed, 86 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2e10187cd0a0..260cd3961ca1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -49,6 +49,7 @@ #include "gem/i915_gem_pm.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_userptr.h" +#include "gem/i915_gem_tiling.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" @@ -879,6 +880,88 @@ static void discard_ggtt_vma(struct i915_vma *vma) spin_unlock(>vma.lock); } +static int +i915_gem_object_fits_in_aperture(struct drm_i915_gem_object *obj, +u64 alignment, u64 flags) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; + struct drm_mm_node *hole; + u64 hole_start, hole_end, start, end; + u64 fence_size, fence_alignment; + unsigned int count = 0; + + /* +* If the required space is larger than the available +* aperture, we will not able to find a slot for the +
[Intel-gfx] [PATCH 1/2] drm/mm: Add an iterator to optimally walk over holes for an allocation (v3)
This iterator relies on drm_mm_first_hole() and drm_mm_next_hole() functions to identify suitable holes for an allocation of a given size by efficiently traversing the rbtree associated with the given allocator. It replaces the for loop in drm_mm_insert_node_in_range() and can also be used by drm drivers to quickly identify holes of a certain size within a given range. v2: (Tvrtko) - Prepend a double underscore for the newly exported first/next_hole - s/each_best_hole/each_suitable_hole/g - Mask out DRM_MM_INSERT_ONCE from the mode before calling first/next_hole and elsewhere. v3: (Tvrtko) - Reduce the number of hunks by retaining the "mode" variable name Reviewed-by: Tvrtko Ursulin Suggested-by: Tvrtko Ursulin Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/drm_mm.c | 32 +++- include/drm/drm_mm.h | 36 2 files changed, 51 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 8257f9d4f619..8efea548ae9f 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -352,10 +352,10 @@ static struct drm_mm_node *find_hole_addr(struct drm_mm *mm, u64 addr, u64 size) return node; } -static struct drm_mm_node * -first_hole(struct drm_mm *mm, - u64 start, u64 end, u64 size, - enum drm_mm_insert_mode mode) +struct drm_mm_node * +__drm_mm_first_hole(struct drm_mm *mm, + u64 start, u64 end, u64 size, + enum drm_mm_insert_mode mode) { switch (mode) { default: @@ -374,6 +374,7 @@ first_hole(struct drm_mm *mm, hole_stack); } } +EXPORT_SYMBOL(__drm_mm_first_hole); /** * DECLARE_NEXT_HOLE_ADDR - macro to declare next hole functions @@ -410,11 +411,11 @@ static struct drm_mm_node *name(struct drm_mm_node *entry, u64 size) \ DECLARE_NEXT_HOLE_ADDR(next_hole_high_addr, rb_left, rb_right) DECLARE_NEXT_HOLE_ADDR(next_hole_low_addr, rb_right, rb_left) -static struct drm_mm_node * -next_hole(struct drm_mm *mm, - struct drm_mm_node *node, - u64 size, - enum drm_mm_insert_mode mode) +struct drm_mm_node * +__drm_mm_next_hole(struct drm_mm *mm, + struct drm_mm_node *node, + u64 size, + enum drm_mm_insert_mode mode) { switch (mode) { default: @@ -432,6 +433,7 @@ next_hole(struct drm_mm *mm, return >hole_stack == >hole_stack ? NULL : node; } } +EXPORT_SYMBOL(__drm_mm_next_hole); /** * drm_mm_reserve_node - insert an pre-initialized node @@ -516,11 +518,11 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, u64 size, u64 alignment, unsigned long color, u64 range_start, u64 range_end, - enum drm_mm_insert_mode mode) + enum drm_mm_insert_mode caller_mode) { struct drm_mm_node *hole; u64 remainder_mask; - bool once; + enum drm_mm_insert_mode mode = caller_mode & ~DRM_MM_INSERT_ONCE; DRM_MM_BUG_ON(range_start > range_end); @@ -533,13 +535,9 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, if (alignment <= 1) alignment = 0; - once = mode & DRM_MM_INSERT_ONCE; - mode &= ~DRM_MM_INSERT_ONCE; - remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0; - for (hole = first_hole(mm, range_start, range_end, size, mode); -hole; -hole = once ? NULL : next_hole(mm, hole, size, mode)) { + drm_mm_for_each_suitable_hole(hole, mm, range_start, range_end, + size, mode) { u64 hole_start = __drm_mm_hole_node_start(hole); u64 hole_end = hole_start + hole->hole_size; u64 adj_start, adj_end; diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index ac33ba1b18bc..777f659f9692 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -400,6 +400,42 @@ static inline u64 drm_mm_hole_node_end(const struct drm_mm_node *hole_node) 1 : 0; \ pos = list_next_entry(pos, hole_stack)) +struct drm_mm_node * +__drm_mm_first_hole(struct drm_mm *mm, + u64 start, u64 end, u64 size, + enum drm_mm_insert_mode mode); + +struct drm_mm_node * +__drm_mm_next_hole(struct drm_mm *mm, + struct drm_mm_node *node, + u64 size, + enum drm_mm_insert_mode mode); + +/** + * drm_mm_for_each_suitable_hole - iterator to optimally walk over all + * holes that can fit an allocation of the given @size. + * @pos: _mm_node used internally to track progress + * @mm: _mm allocator to walk + * @range_start: start of the allowed range for the allocati
[Intel-gfx] [PATCH 0/2] drm/mm: Add an iterator to optimally walk over holes suitable for an allocation
The first patch is a drm core patch that replaces the for loop in drm_mm_insert_node_in_range() with the iterator and would not cause any functional changes. The second patch is a i915 driver specific patch that also uses the iterator but solves a different problem. Cc: Tvrtko Ursulin Cc: Nirmoy Das Cc: Christian König Vivek Kasireddy (2): drm/mm: Add an iterator to optimally walk over holes for an allocation (v3) drm/i915/gem: Don't try to map and fence large scanout buffers (v7) drivers/gpu/drm/drm_mm.c| 32 - drivers/gpu/drm/i915/i915_gem.c | 120 +++- include/drm/drm_mm.h| 36 ++ 3 files changed, 137 insertions(+), 51 deletions(-) -- 2.34.1
[Intel-gfx] [PATCH 2/2] drm/i915/gem: Don't try to map and fence large scanout buffers (v6)
On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or more framebuffers/scanout buffers results in only one that is mappable/ fenceable. Therefore, pageflipping between these 2 FBs where only one is mappable/fenceable creates latencies large enough to miss alternate vblanks thereby producing less optimal framerate. This mainly happens because when i915_gem_object_pin_to_display_plane() is called to pin one of the FB objs, the associated vma is identified as misplaced and therefore i915_vma_unbind() is called which unbinds and evicts it. This misplaced vma gets subseqently pinned only when i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This results in a latency of ~10ms and happens every other vblank/repaint cycle. Therefore, to fix this issue, we try to see if there is space to map at-least two objects of a given size and return early if there isn't. This would ensure that we do not try with PIN_MAPPABLE for any objects that are too big to map thereby preventing unncessary unbind. Testcase: Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits a frame ~7ms before the next vblank, the latencies seen between atomic commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that it misses the vblank every other frame. Here is the ftrace snippet that shows the source of the ~10ms latency: i915_gem_object_pin_to_display_plane() { 0.102 us |i915_gem_object_set_cache_level(); i915_gem_object_ggtt_pin_ww() { 0.390 us | i915_vma_instance(); 0.178 us | i915_vma_misplaced(); i915_vma_unbind() { __i915_active_wait() { 0.082 us |i915_active_acquire_if_busy(); 0.475 us | } intel_runtime_pm_get() { 0.087 us |intel_runtime_pm_acquire(); 0.259 us | } __i915_active_wait() { 0.085 us |i915_active_acquire_if_busy(); 0.240 us | } __i915_vma_evict() { ggtt_unbind_vma() { gen8_ggtt_clear_range() { 10507.255 us |} 10507.689 us | } 10508.516 us | } v2: Instead of using bigjoiner checks, determine whether a scanout buffer is too big by checking to see if it is possible to map two of them into the ggtt. v3 (Ville): - Count how many fb objects can be fit into the available holes instead of checking for a hole twice the object size. - Take alignment constraints into account. - Limit this large scanout buffer check to >= Gen 11 platforms. v4: - Remove existing heuristic that checks just for size. (Ville) - Return early if we find space to map at-least two objects. (Tvrtko) - Slightly update the commit message. v5: (Tvrtko) - Rename the function to indicate that the object may be too big to map into the aperture. - Account for guard pages while calculating the total size required for the object. - Do not subject all objects to the heuristic check and instead consider objects only of a certain size. - Do the hole walk using the rbtree. - Preserve the existing PIN_NONBLOCK logic. - Drop the PIN_MAPPABLE check while pinning the VMA. v6: (Tvrtko) - Return 0 on success and the specific error code on failure to preserve the existing behavior. Cc: Ville Syrjälä Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Manasi Navare Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/i915_gem.c | 120 1 file changed, 90 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e3a2c2a0e156..39f0d17550c3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -46,6 +46,7 @@ #include "gem/i915_gem_mman.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_userptr.h" +#include "gem/i915_gem_tiling.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" @@ -876,6 +877,92 @@ static void discard_ggtt_vma(struct i915_vma *vma) spin_unlock(>vma.lock); } +static int +i915_gem_object_fits_in_aperture(struct drm_i915_gem_object *obj, +u64 alignment, u64 flags) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; + struct drm_mm_node *hole; + u64 hole_start, hole_end, start, end; + u64 fence_size, fence_alignment; + unsigned int count = 0; + + /* +* If the required space is larger than the available +* aperture, we will not able to find a slot for the +* object and unbinding the object now will be in +* vain. Worse, doing so may cause us to ping-pong +* the object in and out of the Global GTT and +* waste a lot of c
[Intel-gfx] [PATCH 1/2] drm/mm: Add an iterator to optimally walk over holes for an allocation (v2)
This iterator relies on drm_mm_first_hole() and drm_mm_next_hole() functions to identify suitable holes for an allocation of a given size by efficiently traversing the rbtree associated with the given allocator. It replaces the for loop in drm_mm_insert_node_in_range() and can also be used by drm drivers to quickly identify holes of a certain size within a given range. v2: (Tvrtko) - Prepend a double underscore for the newly exported first/next_hole - s/each_best_hole/each_suitable_hole/g - Mask out DRM_MM_INSERT_ONCE from the mode before calling first/next_hole and elsewhere. Suggested-by: Tvrtko Ursulin Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/drm_mm.c | 38 ++ include/drm/drm_mm.h | 36 2 files changed, 54 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 8257f9d4f619..b6da1dffcfcb 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -352,10 +352,10 @@ static struct drm_mm_node *find_hole_addr(struct drm_mm *mm, u64 addr, u64 size) return node; } -static struct drm_mm_node * -first_hole(struct drm_mm *mm, - u64 start, u64 end, u64 size, - enum drm_mm_insert_mode mode) +struct drm_mm_node * +__drm_mm_first_hole(struct drm_mm *mm, + u64 start, u64 end, u64 size, + enum drm_mm_insert_mode mode) { switch (mode) { default: @@ -374,6 +374,7 @@ first_hole(struct drm_mm *mm, hole_stack); } } +EXPORT_SYMBOL(__drm_mm_first_hole); /** * DECLARE_NEXT_HOLE_ADDR - macro to declare next hole functions @@ -410,11 +411,11 @@ static struct drm_mm_node *name(struct drm_mm_node *entry, u64 size) \ DECLARE_NEXT_HOLE_ADDR(next_hole_high_addr, rb_left, rb_right) DECLARE_NEXT_HOLE_ADDR(next_hole_low_addr, rb_right, rb_left) -static struct drm_mm_node * -next_hole(struct drm_mm *mm, - struct drm_mm_node *node, - u64 size, - enum drm_mm_insert_mode mode) +struct drm_mm_node * +__drm_mm_next_hole(struct drm_mm *mm, + struct drm_mm_node *node, + u64 size, + enum drm_mm_insert_mode mode) { switch (mode) { default: @@ -432,6 +433,7 @@ next_hole(struct drm_mm *mm, return >hole_stack == >hole_stack ? NULL : node; } } +EXPORT_SYMBOL(__drm_mm_next_hole); /** * drm_mm_reserve_node - insert an pre-initialized node @@ -520,7 +522,6 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, { struct drm_mm_node *hole; u64 remainder_mask; - bool once; DRM_MM_BUG_ON(range_start > range_end); @@ -533,22 +534,19 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, if (alignment <= 1) alignment = 0; - once = mode & DRM_MM_INSERT_ONCE; - mode &= ~DRM_MM_INSERT_ONCE; - remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0; - for (hole = first_hole(mm, range_start, range_end, size, mode); -hole; -hole = once ? NULL : next_hole(mm, hole, size, mode)) { + drm_mm_for_each_suitable_hole(hole, mm, range_start, range_end, + size, mode) { u64 hole_start = __drm_mm_hole_node_start(hole); u64 hole_end = hole_start + hole->hole_size; u64 adj_start, adj_end; u64 col_start, col_end; + enum drm_mm_insert_mode placement = mode & ~DRM_MM_INSERT_ONCE; - if (mode == DRM_MM_INSERT_LOW && hole_start >= range_end) + if (placement == DRM_MM_INSERT_LOW && hole_start >= range_end) break; - if (mode == DRM_MM_INSERT_HIGH && hole_end <= range_start) + if (placement == DRM_MM_INSERT_HIGH && hole_end <= range_start) break; col_start = hole_start; @@ -562,7 +560,7 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, if (adj_end <= adj_start || adj_end - adj_start < size) continue; - if (mode == DRM_MM_INSERT_HIGH) + if (placement == DRM_MM_INSERT_HIGH) adj_start = adj_end - size; if (alignment) { @@ -574,7 +572,7 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, div64_u64_rem(adj_start, alignment, ); if (rem) { adj_start -= rem; - if (mode != DRM_MM_INSERT_HIGH) + if (placement != DRM_MM_INSERT_HIGH) adj_start += alignment; if (adj_start < max(col
[Intel-gfx] [PATCH 2/2] drm/i915/gem: Don't try to map and fence large scanout buffers (v5)
On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or more framebuffers/scanout buffers results in only one that is mappable/ fenceable. Therefore, pageflipping between these 2 FBs where only one is mappable/fenceable creates latencies large enough to miss alternate vblanks thereby producing less optimal framerate. This mainly happens because when i915_gem_object_pin_to_display_plane() is called to pin one of the FB objs, the associated vma is identified as misplaced and therefore i915_vma_unbind() is called which unbinds and evicts it. This misplaced vma gets subseqently pinned only when i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This results in a latency of ~10ms and happens every other vblank/repaint cycle. Therefore, to fix this issue, we try to see if there is space to map at-least two objects of a given size and return early if there isn't. This would ensure that we do not try with PIN_MAPPABLE for any objects that are too big to map thereby preventing unncessary unbind. Testcase: Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits a frame ~7ms before the next vblank, the latencies seen between atomic commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that it misses the vblank every other frame. Here is the ftrace snippet that shows the source of the ~10ms latency: i915_gem_object_pin_to_display_plane() { 0.102 us |i915_gem_object_set_cache_level(); i915_gem_object_ggtt_pin_ww() { 0.390 us | i915_vma_instance(); 0.178 us | i915_vma_misplaced(); i915_vma_unbind() { __i915_active_wait() { 0.082 us |i915_active_acquire_if_busy(); 0.475 us | } intel_runtime_pm_get() { 0.087 us |intel_runtime_pm_acquire(); 0.259 us | } __i915_active_wait() { 0.085 us |i915_active_acquire_if_busy(); 0.240 us | } __i915_vma_evict() { ggtt_unbind_vma() { gen8_ggtt_clear_range() { 10507.255 us |} 10507.689 us | } 10508.516 us | } v2: Instead of using bigjoiner checks, determine whether a scanout buffer is too big by checking to see if it is possible to map two of them into the ggtt. v3 (Ville): - Count how many fb objects can be fit into the available holes instead of checking for a hole twice the object size. - Take alignment constraints into account. - Limit this large scanout buffer check to >= Gen 11 platforms. v4: - Remove existing heuristic that checks just for size. (Ville) - Return early if we find space to map at-least two objects. (Tvrtko) - Slightly update the commit message. v5: (Tvrtko) - Rename the function to indicate that the object may be too big to map into the aperture. - Account for guard pages while calculating the total size required for the object. - Do not subject all objects to the heuristic check and instead consider objects only of a certain size. - Do the hole walk using the rbtree. - Preserve the existing PIN_NONBLOCK logic. - Drop the PIN_MAPPABLE check while pinning the VMA. Cc: Ville Syrjälä Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Manasi Navare Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/i915_gem.c | 117 1 file changed, 88 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e3a2c2a0e156..752fec2b4c60 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -46,6 +46,7 @@ #include "gem/i915_gem_mman.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_userptr.h" +#include "gem/i915_gem_tiling.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" @@ -876,6 +877,92 @@ static void discard_ggtt_vma(struct i915_vma *vma) spin_unlock(>vma.lock); } +static bool +i915_gem_object_fits_in_aperture(struct drm_i915_gem_object *obj, +u64 alignment, u64 flags) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; + struct drm_mm_node *hole; + u64 hole_start, hole_end, start, end; + u64 fence_size, fence_alignment; + unsigned int count = 0; + + /* +* If the required space is larger than the available +* aperture, we will not able to find a slot for the +* object and unbinding the object now will be in +* vain. Worse, doing so may cause us to ping-pong +* the object in and out of the Global GTT and +* waste a lot of cycles under the mutex. +*/ + if (obj->base.size > ggtt->mappable_end) + return true; + +
[Intel-gfx] [PATCH 1/2] drm/mm: Add an iterator to optimally walk over holes for an allocation
This iterator relies on drm_mm_first_hole() and drm_mm_next_hole() functions to identify suitable holes for an allocation of a given size by efficently traversing the rbtree associated with the given allocator. It replaces the for loop in drm_mm_insert_node_in_range() and can also be used by drm drivers to quickly identify holes of a certain size within a given range. Suggested-by: Tvrtko Ursulin Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/drm_mm.c | 28 include/drm/drm_mm.h | 32 2 files changed, 44 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 8257f9d4f619..416c849c10e5 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -352,10 +352,10 @@ static struct drm_mm_node *find_hole_addr(struct drm_mm *mm, u64 addr, u64 size) return node; } -static struct drm_mm_node * -first_hole(struct drm_mm *mm, - u64 start, u64 end, u64 size, - enum drm_mm_insert_mode mode) +struct drm_mm_node * +drm_mm_first_hole(struct drm_mm *mm, + u64 start, u64 end, u64 size, + enum drm_mm_insert_mode mode) { switch (mode) { default: @@ -374,6 +374,7 @@ first_hole(struct drm_mm *mm, hole_stack); } } +EXPORT_SYMBOL(drm_mm_first_hole); /** * DECLARE_NEXT_HOLE_ADDR - macro to declare next hole functions @@ -410,11 +411,11 @@ static struct drm_mm_node *name(struct drm_mm_node *entry, u64 size) \ DECLARE_NEXT_HOLE_ADDR(next_hole_high_addr, rb_left, rb_right) DECLARE_NEXT_HOLE_ADDR(next_hole_low_addr, rb_right, rb_left) -static struct drm_mm_node * -next_hole(struct drm_mm *mm, - struct drm_mm_node *node, - u64 size, - enum drm_mm_insert_mode mode) +struct drm_mm_node * +drm_mm_next_hole(struct drm_mm *mm, +struct drm_mm_node *node, +u64 size, +enum drm_mm_insert_mode mode) { switch (mode) { default: @@ -432,6 +433,7 @@ next_hole(struct drm_mm *mm, return >hole_stack == >hole_stack ? NULL : node; } } +EXPORT_SYMBOL(drm_mm_next_hole); /** * drm_mm_reserve_node - insert an pre-initialized node @@ -520,7 +522,6 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, { struct drm_mm_node *hole; u64 remainder_mask; - bool once; DRM_MM_BUG_ON(range_start > range_end); @@ -533,13 +534,8 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, if (alignment <= 1) alignment = 0; - once = mode & DRM_MM_INSERT_ONCE; - mode &= ~DRM_MM_INSERT_ONCE; - remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0; - for (hole = first_hole(mm, range_start, range_end, size, mode); -hole; -hole = once ? NULL : next_hole(mm, hole, size, mode)) { + drm_mm_for_each_best_hole(hole, mm, range_start, range_end, size, mode) { u64 hole_start = __drm_mm_hole_node_start(hole); u64 hole_end = hole_start + hole->hole_size; u64 adj_start, adj_end; diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index ac33ba1b18bc..5055447697fa 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -322,6 +322,17 @@ static inline u64 __drm_mm_hole_node_end(const struct drm_mm_node *hole_node) return list_next_entry(hole_node, node_list)->start; } +struct drm_mm_node * +drm_mm_first_hole(struct drm_mm *mm, + u64 start, u64 end, u64 size, + enum drm_mm_insert_mode mode); + +struct drm_mm_node * +drm_mm_next_hole(struct drm_mm *mm, +struct drm_mm_node *node, +u64 size, +enum drm_mm_insert_mode mode); + /** * drm_mm_hole_node_end - computes the end of the hole following @node * @hole_node: drm_mm_node which implicitly tracks the following hole @@ -400,6 +411,27 @@ static inline u64 drm_mm_hole_node_end(const struct drm_mm_node *hole_node) 1 : 0; \ pos = list_next_entry(pos, hole_stack)) +/** + * drm_mm_for_each_best_hole - iterator to optimally walk over all holes >= @size + * @pos: _mm_node used internally to track progress + * @mm: _mm allocator to walk + * @range_start: start of the allowed range for the allocation + * @range_end: end of the allowed range for the allocation + * @size: size of the allocation + * @mode: fine-tune the allocation search + * + * This iterator walks over all holes suitable for the allocation of given + * @size in a very efficient manner. It is implemented by calling + * drm_mm_first_hole() and drm_mm_next_hole() which identify the + * appropriate holes within the given range by efficently traversing the + * rbtree associated with @mm. + */ +#define drm_mm_for_each_best_hole(pos, mm, range
[Intel-gfx] [PATCH v4 RESEND] drm/i915/gem: Don't try to map and fence large scanout buffers (v4)
On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or more framebuffers/scanout buffers results in only one that is mappable/ fenceable. Therefore, pageflipping between these 2 FBs where only one is mappable/fenceable creates latencies large enough to miss alternate vblanks thereby producing less optimal framerate. This mainly happens because when i915_gem_object_pin_to_display_plane() is called to pin one of the FB objs, the associated vma is identified as misplaced and therefore i915_vma_unbind() is called which unbinds and evicts it. This misplaced vma gets subseqently pinned only when i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This results in a latency of ~10ms and happens every other vblank/repaint cycle. Therefore, to fix this issue, we try to see if there is space to map at-least two objects of a given size and return early if there isn't. This would ensure that we do not try with PIN_MAPPABLE for any objects that are too big to map thereby preventing unncessary unbind. Testcase: Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits a frame ~7ms before the next vblank, the latencies seen between atomic commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that it misses the vblank every other frame. Here is the ftrace snippet that shows the source of the ~10ms latency: i915_gem_object_pin_to_display_plane() { 0.102 us |i915_gem_object_set_cache_level(); i915_gem_object_ggtt_pin_ww() { 0.390 us | i915_vma_instance(); 0.178 us | i915_vma_misplaced(); i915_vma_unbind() { __i915_active_wait() { 0.082 us |i915_active_acquire_if_busy(); 0.475 us | } intel_runtime_pm_get() { 0.087 us |intel_runtime_pm_acquire(); 0.259 us | } __i915_active_wait() { 0.085 us |i915_active_acquire_if_busy(); 0.240 us | } __i915_vma_evict() { ggtt_unbind_vma() { gen8_ggtt_clear_range() { 10507.255 us |} 10507.689 us | } 10508.516 us | } v2: Instead of using bigjoiner checks, determine whether a scanout buffer is too big by checking to see if it is possible to map two of them into the ggtt. v3 (Ville): - Count how many fb objects can be fit into the available holes instead of checking for a hole twice the object size. - Take alignment constraints into account. - Limit this large scanout buffer check to >= Gen 11 platforms. v4: - Remove existing heuristic that checks just for size. (Ville) - Return early if we find space to map at-least two objects. (Tvrtko) - Slightly update the commit message. Cc: Ville Syrjälä Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Manasi Navare Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/i915_gem.c | 88 ++--- drivers/gpu/drm/i915/i915_vma.c | 2 +- 2 files changed, 60 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e3a2c2a0e156..95ec972f8c8a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -46,6 +46,7 @@ #include "gem/i915_gem_mman.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_userptr.h" +#include "gem/i915_gem_tiling.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" @@ -876,6 +877,63 @@ static void discard_ggtt_vma(struct i915_vma *vma) spin_unlock(>vma.lock); } +static bool i915_gem_obj_too_big(struct drm_i915_gem_object *obj, +u64 alignment) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; + struct drm_mm_node *hole; + u64 hole_start, hole_end, start, end; + u64 fence_size, fence_alignment; + unsigned int count = 0; + + /* +* If the required space is larger than the available +* aperture, we will not able to find a slot for the +* object and unbinding the object now will be in +* vain. Worse, doing so may cause us to ping-pong +* the object in and out of the Global GTT and +* waste a lot of cycles under the mutex. +*/ + if (obj->base.size > ggtt->mappable_end) + return true; + + if (HAS_GMCH(i915) || DISPLAY_VER(i915) < 11 || + !i915_gem_object_is_framebuffer(obj)) + return false; + + fence_size = i915_gem_fence_size(i915, obj->base.size, +i915_gem_object_get_tiling(obj), +i915_gem_object_get_stride(obj)); + fence_alignment = i915_gem_fence_alignment(i915, o
[Intel-gfx] [PATCH] drm/i915/gem: Don't try to map and fence large scanout buffers (v4)
On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or more framebuffers/scanout buffers results in only one that is mappable/ fenceable. Therefore, pageflipping between these 2 FBs where only one is mappable/fenceable creates latencies large enough to miss alternate vblanks thereby producing less optimal framerate. This mainly happens because when i915_gem_object_pin_to_display_plane() is called to pin one of the FB objs, the associated vma is identified as misplaced and therefore i915_vma_unbind() is called which unbinds and evicts it. This misplaced vma gets subseqently pinned only when i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This results in a latency of ~10ms and happens every other vblank/repaint cycle. Therefore, to fix this issue, we try to see if there is space to map at-least two objects of a given size and return early if there isn't. This would ensure that we do not try with PIN_MAPPABLE for any objects that are too big to map thereby preventing unncessary unbind. Testcase: Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits a frame ~7ms before the next vblank, the latencies seen between atomic commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that it misses the vblank every other frame. Here is the ftrace snippet that shows the source of the ~10ms latency: i915_gem_object_pin_to_display_plane() { 0.102 us |i915_gem_object_set_cache_level(); i915_gem_object_ggtt_pin_ww() { 0.390 us | i915_vma_instance(); 0.178 us | i915_vma_misplaced(); i915_vma_unbind() { __i915_active_wait() { 0.082 us |i915_active_acquire_if_busy(); 0.475 us | } intel_runtime_pm_get() { 0.087 us |intel_runtime_pm_acquire(); 0.259 us | } __i915_active_wait() { 0.085 us |i915_active_acquire_if_busy(); 0.240 us | } __i915_vma_evict() { ggtt_unbind_vma() { gen8_ggtt_clear_range() { 10507.255 us |} 10507.689 us | } 10508.516 us | } v2: Instead of using bigjoiner checks, determine whether a scanout buffer is too big by checking to see if it is possible to map two of them into the ggtt. v3 (Ville): - Count how many fb objects can be fit into the available holes instead of checking for a hole twice the object size. - Take alignment constraints into account. - Limit this large scanout buffer check to >= Gen 11 platforms. v4: - Remove existing heuristic that checks just for size. (Ville) - Return early if we find space to map at-least two objects. (Tvrtko) - Slightly update the commit message. Cc: Ville Syrjälä Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Manasi Navare Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/i915_gem.c | 87 ++--- drivers/gpu/drm/i915/i915_vma.c | 2 +- 2 files changed, 59 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d0e642c82064..287508c37a9a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -866,6 +866,63 @@ static void discard_ggtt_vma(struct i915_vma *vma) spin_unlock(>vma.lock); } +static bool i915_gem_obj_too_big(struct drm_i915_gem_object *obj, +u64 alignment) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = >ggtt; + struct drm_mm_node *hole; + u64 hole_start, hole_end, start, end; + u64 fence_size, fence_alignment; + unsigned int count = 0; + + /* +* If the required space is larger than the available +* aperture, we will not able to find a slot for the +* object and unbinding the object now will be in +* vain. Worse, doing so may cause us to ping-pong +* the object in and out of the Global GTT and +* waste a lot of cycles under the mutex. +*/ + if (obj->base.size > ggtt->mappable_end) + return true; + + if (HAS_GMCH(i915) || DISPLAY_VER(i915) < 11 || + !i915_gem_object_is_framebuffer(obj)) + return false; + + fence_size = i915_gem_fence_size(i915, obj->base.size, +i915_gem_object_get_tiling(obj), +i915_gem_object_get_stride(obj)); + fence_alignment = i915_gem_fence_alignment(i915, obj->base.size, + i915_gem_object_get_tiling(obj), + i915_gem_object_get_stride(obj)); + alignment = max_t(u64, alignment, fence_alignment); + + /* +* Assuming this object is a large scanout buffer,
[Intel-gfx] [PATCH] drm/i915/gem: Don't try to map and fence large scanout buffers (v3)
On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or more framebuffers/scanout buffers results in only one that is mappable/ fenceable. Therefore, pageflipping between these 2 FBs where only one is mappable/fenceable creates latencies large enough to miss alternate vblanks thereby producing less optimal framerate. This mainly happens because when i915_gem_object_pin_to_display_plane() is called to pin one of the FB objs, the associated vma is identified as misplaced and therefore i915_vma_unbind() is called which unbinds and evicts it. This misplaced vma gets subseqently pinned only when i915_gem_object_ggtt_pin_ww() is called without the mappable flag. This results in a latency of ~10ms and happens every other vblank/repaint cycle. Testcase: Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits a frame ~7ms before the next vblank, the latencies seen between atomic commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that it misses the vblank every other frame. Here is the ftrace snippet that shows the source of the ~10ms latency: i915_gem_object_pin_to_display_plane() { 0.102 us |i915_gem_object_set_cache_level(); i915_gem_object_ggtt_pin_ww() { 0.390 us | i915_vma_instance(); 0.178 us | i915_vma_misplaced(); i915_vma_unbind() { __i915_active_wait() { 0.082 us |i915_active_acquire_if_busy(); 0.475 us | } intel_runtime_pm_get() { 0.087 us |intel_runtime_pm_acquire(); 0.259 us | } __i915_active_wait() { 0.085 us |i915_active_acquire_if_busy(); 0.240 us | } __i915_vma_evict() { ggtt_unbind_vma() { gen8_ggtt_clear_range() { 10507.255 us |} 10507.689 us | } 10508.516 us | } v2: Instead of using bigjoiner checks, determine whether a scanout buffer is too big by checking to see if it is possible to map two of them into the ggtt. v3 (Ville): - Count how many fb objects can be fit into the available holes instead of checking for a hole twice the object size. - Take alignment constraints into account. - Limit this large scanout buffer check to >= Gen 12 platforms. Cc: Ville Syrjälä Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Manasi Navare Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/i915_gem.c | 65 - drivers/gpu/drm/i915/i915_vma.c | 2 +- 2 files changed, 57 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 981e383d1a5d..761dc385fbfc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -866,6 +866,61 @@ static void discard_ggtt_vma(struct i915_vma *vma) spin_unlock(>vma.lock); } +static bool i915_gem_obj_too_big(struct drm_i915_gem_object *obj, +u64 alignment) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = >ggtt; + struct drm_mm_node *hole; + u64 hole_start, hole_end, start, end; + u64 fence_size, fence_alignment; + unsigned int count = 0; + + /* +* If the required space is larger than the available +* aperture, we will not able to find a slot for the +* object and unbinding the object now will be in +* vain. Worse, doing so may cause us to ping-pong +* the object in and out of the Global GTT and +* waste a lot of cycles under the mutex. +*/ + if (obj->base.size > ggtt->mappable_end) + return true; + + if (HAS_GMCH(i915) || DISPLAY_VER(i915) < 11 || + !i915_gem_object_is_framebuffer(obj)) + return false; + + fence_size = i915_gem_fence_size(i915, obj->base.size, +i915_gem_object_get_tiling(obj), +i915_gem_object_get_stride(obj)); + fence_alignment = i915_gem_fence_alignment(i915, obj->base.size, +i915_gem_object_get_tiling(obj), +i915_gem_object_get_stride(obj)); + alignment = max_t(u64, alignment, fence_alignment); + + /* +* Assuming this object is a large scanout buffer, we try to find +* out if there is room to map at-least two of them. There could +* be space available to map one but to be consistent, we try to +* avoid mapping/fencing any of them. +*/ + drm_mm_for_each_hole(hole, >vm.mm, hole_start, hole_end) { + do { + start = round_up(hole_start, alignment); + end = min_t(u64, hole_end, ggtt->mappable_end); + + i
[Intel-gfx] [PATCH] drm/i915/gem: Don't try to map and fence large scanout buffers
On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or more framebuffers/scanout buffers results in only one that is mappable/ fenceable. Therefore, pageflipping between these 2 FBs where only one is mappable/fenceable creates latencies large enough to miss alternate vblanks thereby producing less optimal framerate. This mainly happens because when i915_gem_object_pin_to_display_plane() is called to pin one of the FB objs, the associated vma is identified as misplaced and therefore i915_vma_unbind() is called which unbinds and evicts it. This misplaced vma gets subseqently pinned only when i915_gem_object_ggtt_pin_ww() is called without the mappable flag. This results in a latency of ~10ms and happens every other vblank/repaint cycle. Testcase: Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits a frame ~7ms before the next vblank, the latencies seen between atomic commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that it misses the vblank every other frame. Here is the ftrace snippet that shows the source of the ~10ms latency: i915_gem_object_pin_to_display_plane() { 0.102 us |i915_gem_object_set_cache_level(); i915_gem_object_ggtt_pin_ww() { 0.390 us | i915_vma_instance(); 0.178 us | i915_vma_misplaced(); i915_vma_unbind() { __i915_active_wait() { 0.082 us |i915_active_acquire_if_busy(); 0.475 us | } intel_runtime_pm_get() { 0.087 us |intel_runtime_pm_acquire(); 0.259 us | } __i915_active_wait() { 0.085 us |i915_active_acquire_if_busy(); 0.240 us | } __i915_vma_evict() { ggtt_unbind_vma() { gen8_ggtt_clear_range() { 10507.255 us |} 10507.689 us | } 10508.516 us | } v2: Instead of using bigjoiner checks, determine whether a scanout buffer is too big by checking to see if it is possible to map two of them into the ggtt. Cc: Ville Syrjälä Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Manasi Navare Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/i915_gem.c | 48 ++--- drivers/gpu/drm/i915/i915_vma.c | 2 +- 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 981e383d1a5d..0050c7e4bb51 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -866,6 +866,44 @@ static void discard_ggtt_vma(struct i915_vma *vma) spin_unlock(>vma.lock); } +static bool i915_gem_obj_too_big(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = >ggtt; + struct drm_mm_node *hole; + u64 hole_start, hole_end; + u64 fence_size; + + /* +* If the required space is larger than the available +* aperture, we will not able to find a slot for the +* object and unbinding the object now will be in +* vain. Worse, doing so may cause us to ping-pong +* the object in and out of the Global GTT and +* waste a lot of cycles under the mutex. +*/ + if (obj->base.size > ggtt->mappable_end) + return true; + + fence_size = i915_gem_fence_size(i915, obj->base.size, +i915_gem_object_get_tiling(obj), +i915_gem_object_get_stride(obj)); + + /* +* Assuming this object is a large scanout buffer, we try to find +* out if there is room to map at-least two of them. There could +* be space available to map one but to be consistent, we try to +* avoid mapping/fencing any of them. +*/ + drm_mm_for_each_hole(hole, >vm.mm, hole_start, hole_end) { + if (hole_end - hole_start > 2 * fence_size && + hole_start + 2 * fence_size < ggtt->mappable_end) + return false; + } + + return true; +} + struct i915_vma * i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj, struct i915_gem_ww_ctx *ww, @@ -879,15 +917,7 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj, if (flags & PIN_MAPPABLE && (!view || view->type == I915_GGTT_VIEW_NORMAL)) { - /* -* If the required space is larger than the available -* aperture, we will not able to find a slot for the -* object and unbinding the object now will be in -* vain. Worse, doing so may cause us to ping-pong -* the object in and out of the Global GTT and -* waste a lot of cycles unde
[Intel-gfx] [PATCH] drm/i915/gem: Don't try to map and fence 8K/bigjoiner scanout buffers
On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or more framebuffers/scanout buffers results in only one that is mappable/ fenceable. Therefore, pageflipping between these 2 FBs where only one is mappable/fenceable creates latencies large enough to miss alternate vblanks thereby producing less optimal framerate. This mainly happens because when i915_gem_object_pin_to_display_plane() is called to pin one of the FB objs, the associated vma is identified as misplaced and therefore i915_vma_unbind() is called which unbinds and evicts it. This misplaced vma gets subseqently pinned only when i915_gem_object_ggtt_pin_ww() is called without the mappable flag. This results in a latency of ~10ms and happens every other vblank/repaint cycle. Testcase: Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits a frame ~7ms before the next vblank, the latencies seen between atomic commit and flip event is 7, 24 (7 + 16.66), 7, 24. suggesting that it misses the vblank every other frame. Here is the ftrace snippet that shows the source of the ~10ms latency: i915_gem_object_pin_to_display_plane() { 0.102 us |i915_gem_object_set_cache_level(); i915_gem_object_ggtt_pin_ww() { 0.390 us | i915_vma_instance(); 0.178 us | i915_vma_misplaced(); i915_vma_unbind() { __i915_active_wait() { 0.082 us |i915_active_acquire_if_busy(); 0.475 us | } intel_runtime_pm_get() { 0.087 us |intel_runtime_pm_acquire(); 0.259 us | } __i915_active_wait() { 0.085 us |i915_active_acquire_if_busy(); 0.240 us | } __i915_vma_evict() { ggtt_unbind_vma() { gen8_ggtt_clear_range() { 10507.255 us |} 10507.689 us | } 10508.516 us | } Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Manasi Navare Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/display/intel_fb_pin.c | 11 +-- drivers/gpu/drm/i915/display/intel_overlay.c | 11 --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 6 -- drivers/gpu/drm/i915/gem/i915_gem_object.h | 3 ++- 4 files changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c index 3f77f3013584..53c156d9a9f9 100644 --- a/drivers/gpu/drm/i915/display/intel_fb_pin.c +++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c @@ -144,7 +144,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, if (!ret) { vma = i915_gem_object_pin_to_display_plane(obj, , alignment, - view, pinctl); + view, pinctl, uses_fence); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err_unpin; @@ -218,9 +218,16 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state) INTEL_INFO(dev_priv)->display.cursor_needs_physical; if (!intel_fb_uses_dpt(fb)) { + struct intel_crtc *crtc = to_intel_crtc(plane_state->hw.crtc); + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + bool uses_fence = intel_plane_uses_fence(plane_state); + bool is_bigjoiner = crtc_state->bigjoiner || + crtc_state->bigjoiner_slave; + vma = intel_pin_and_fence_fb_obj(fb, phys_cursor, _state->view.gtt, - intel_plane_uses_fence(plane_state), +uses_fence && !is_bigjoiner, _state->flags); if (IS_ERR(vma)) return PTR_ERR(vma); diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 7e3f5c6ca484..e9563b40b911 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -755,10 +755,14 @@ static u32 overlay_cmd_reg(struct drm_intel_overlay_put_image *params) return cmd; } -static struct i915_vma *intel_overlay_pin_fb(struct drm_i915_gem_object *new_bo) +static struct i915_vma *intel_overlay_pin_fb(struct drm_i915_gem_object *new_bo, +struct intel_overlay *overlay) { struct i915_gem_ww_ctx ww; struct i915_vma *vma; + const struct intel_plane_state *plane_state = + to_intel_plane_state(overlay->crtc->base.primary->state); + bool uses_fence = intel_plane_us
[Intel-gfx] [PATCH] drm/i915/dsi: Dont forget to clean up the connector on error (v2)
If an error is encountered during the DSI initialization setup, the drm connector object also needs to be cleaned up along with the encoder. The error can happen due to a missing mode in the VBT or for other reasons. v2: Rephrase the commit message to make it more clear. Cc: Jani Nikula Cc: Vandita Kulkarni Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/display/icl_dsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 4fec5bd64920..f93f72463df5 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1954,6 +1954,7 @@ void icl_dsi_init(struct drm_i915_private *dev_priv) return; err: + drm_connector_cleanup(connector); drm_encoder_cleanup(>base); kfree(intel_dsi); kfree(intel_connector); -- 2.21.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915/dsi: Dont forget to clean up the connector on error
During the DSI initialization setup, after instantiating the relevant drm connector and encoder objects, the connector also needs to be cleaned up along with the encoder if an error is encountered. The error can happen due to a missing mode in the VBT or for other reasons. Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/display/icl_dsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 4fec5bd64920..f93f72463df5 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1954,6 +1954,7 @@ void icl_dsi_init(struct drm_i915_private *dev_priv) return; err: + drm_connector_cleanup(connector); drm_encoder_cleanup(>base); kfree(intel_dsi); kfree(intel_connector); -- 2.21.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915/hotplug: Use phy to get the hpd_pin instead of the port (v5)
On some platforms such as Elkhart Lake, although we may use DDI D to drive a connector, we have to use PHY A (Combo Phy PORT A) to detect the hotplug interrupts as per the spec because there is no one-to-one mapping between DDIs and PHYs. Therefore, use the function intel_port_to_phy() which contains the logic for such mapping(s) to find the correct hpd_pin. This change should not affect other platforms as there is always a one-to-one mapping between DDIs and PHYs. v2: - Convert the case statements to use PHYs instead of PORTs (Jani) v3: - Refactor the function to reduce the number of return statements by lumping all the case statements together except PHY_F which needs special handling (Jose) v4: - Add a comment describing how the HPD pin value associated with any port can be retrieved using port or phy enum value. (Jani) v5: - Use case ranges instead of individual labels and also normalize the return statement by adding -PHY_A to the expression (Ville) Cc: Jani Nikula Cc: Matt Roper Cc: José Roberto de Souza Cc: Ville Syrjala Signed-off-by: Vivek Kasireddy Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/i915/display/intel_hotplug.c | 31 ++-- drivers/gpu/drm/i915/i915_drv.h | 7 + 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c index 4a6208857488..562227d54ccc 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug.c +++ b/drivers/gpu/drm/i915/display/intel_hotplug.c @@ -87,29 +87,16 @@ enum hpd_pin intel_hpd_pin_default(struct drm_i915_private *dev_priv, enum port port) { - switch (port) { - case PORT_A: - return HPD_PORT_A; - case PORT_B: - return HPD_PORT_B; - case PORT_C: - return HPD_PORT_C; - case PORT_D: - return HPD_PORT_D; - case PORT_E: - return HPD_PORT_E; - case PORT_F: - if (IS_CNL_WITH_PORT_F(dev_priv)) - return HPD_PORT_E; - return HPD_PORT_F; - case PORT_G: - return HPD_PORT_G; - case PORT_H: - return HPD_PORT_H; - case PORT_I: - return HPD_PORT_I; + enum phy phy = intel_port_to_phy(dev_priv, port); + + switch (phy) { + case PHY_F: + return IS_CNL_WITH_PORT_F(dev_priv) ? HPD_PORT_E : HPD_PORT_F; + case PHY_A ... PHY_E: + case PHY_G ... PHY_I: + return HPD_PORT_A + phy - PHY_A; default: - MISSING_CASE(port); + MISSING_CASE(phy); return HPD_NONE; } } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 123d0fadfafc..21e4c0852e23 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -114,6 +114,13 @@ struct drm_i915_gem_object; +/* + * The code assumes that the hpd_pins below have consecutive values and + * starting with HPD_PORT_A, the HPD pin associated with any port can be + * retrieved by adding the corresponding port (or phy) enum value to + * HPD_PORT_A in most cases. For example: + * HPD_PORT_C = HPD_PORT_A + PHY_C - PHY_A + */ enum hpd_pin { HPD_NONE = 0, HPD_TV = HPD_NONE, /* TV is known to be unreliable */ -- 2.21.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915/hotplug: Use phy to get the hpd_pin instead of the port (v4)
On some platforms such as Elkhart Lake, although we may use DDI D to drive a connector, we have to use PHY A (Combo Phy PORT A) to detect the hotplug interrupts as per the spec because there is no one-to-one mapping between DDIs and PHYs. Therefore, use the function intel_port_to_phy() which contains the logic for such mapping(s) to find the correct hpd_pin. This change should not affect other platforms as there is always a one-to-one mapping between DDIs and PHYs. v2: - Convert the case statements to use PHYs instead of PORTs (Jani) v3: - Refactor the function to reduce the number of return statements by lumping all the case statements together except PHY_F which needs special handling (Jose) v4: - Add a comment describing how the HPD pin value associated with any port can be retrieved using port or phy enum value. (Jani) Cc: Jani Nikula Cc: Matt Roper Cc: José Roberto de Souza Signed-off-by: Vivek Kasireddy Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/i915/display/intel_hotplug.c | 37 drivers/gpu/drm/i915/i915_drv.h | 6 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c index 4a6208857488..e1ddccc2ce97 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug.c +++ b/drivers/gpu/drm/i915/display/intel_hotplug.c @@ -87,29 +87,22 @@ enum hpd_pin intel_hpd_pin_default(struct drm_i915_private *dev_priv, enum port port) { - switch (port) { - case PORT_A: - return HPD_PORT_A; - case PORT_B: - return HPD_PORT_B; - case PORT_C: - return HPD_PORT_C; - case PORT_D: - return HPD_PORT_D; - case PORT_E: - return HPD_PORT_E; - case PORT_F: - if (IS_CNL_WITH_PORT_F(dev_priv)) - return HPD_PORT_E; - return HPD_PORT_F; - case PORT_G: - return HPD_PORT_G; - case PORT_H: - return HPD_PORT_H; - case PORT_I: - return HPD_PORT_I; + enum phy phy = intel_port_to_phy(dev_priv, port); + + switch (phy) { + case PHY_F: + return IS_CNL_WITH_PORT_F(dev_priv) ? HPD_PORT_E : HPD_PORT_F; + case PHY_A: + case PHY_B: + case PHY_C: + case PHY_D: + case PHY_E: + case PHY_G: + case PHY_H: + case PHY_I: + return HPD_PORT_A + phy; default: - MISSING_CASE(port); + MISSING_CASE(phy); return HPD_NONE; } } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b621df933212..c9d7b9127b6e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -117,6 +117,12 @@ struct drm_i915_gem_object; +/* + * The code assumes that the hpd_pins below have consecutive values and + * starting with HPD_PORT_A, the HPD pin associated with any port can be + * retrieved by adding the corresponding port (or phy) enum value to + * HPD_PORT_A. For example, HPD_PORT_C = HPD_PORT_A + PORT_C/PHY_C. + */ enum hpd_pin { HPD_NONE = 0, HPD_TV = HPD_NONE, /* TV is known to be unreliable */ -- 2.21.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915/hotplug: Use phy to get the hpd_pin instead of the port (v3)
On Fri, 31 Jan 2020 11:35:35 +0200 Jani Nikula wrote: Hi Jani, > On Thu, 30 Jan 2020, Vivek Kasireddy > wrote: > > On some platforms such as Elkhart Lake, although we may use DDI D > > to drive a connector, we have to use PHY A (Combo Phy PORT A) to > > detect the hotplug interrupts as per the spec because there is no > > one-to-one mapping between DDIs and PHYs. Therefore, use the > > function intel_port_to_phy() which contains the logic for such > > mapping(s) to find the correct hpd_pin. > > > > This change should not affect other platforms as there is always > > a one-to-one mapping between DDIs and PHYs. > > > > v2: > > - Convert the case statements to use PHYs instead of PORTs (Jani) > > > > v3: > > - Refactor the function to reduce the number of return statements by > > lumping all the case statements together except PHY_F which needs > > special handling (Jose) > > > > Cc: Jani Nikula > > Cc: Matt Roper > > Cc: José Roberto de Souza > > Signed-off-by: Vivek Kasireddy > > --- > > drivers/gpu/drm/i915/display/intel_hotplug.c | 37 > > 1 file changed, 15 insertions(+), 22 > > deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c > > b/drivers/gpu/drm/i915/display/intel_hotplug.c index > > 042d98bae1ea..27e3033278a0 100644 --- > > a/drivers/gpu/drm/i915/display/intel_hotplug.c +++ > > b/drivers/gpu/drm/i915/display/intel_hotplug.c @@ -89,29 +89,22 @@ > > enum hpd_pin intel_hpd_pin_default(struct drm_i915_private > > *dev_priv, enum port port) > > { > > - switch (port) { > > - case PORT_A: > > - return HPD_PORT_A; > > - case PORT_B: > > - return HPD_PORT_B; > > - case PORT_C: > > - return HPD_PORT_C; > > - case PORT_D: > > - return HPD_PORT_D; > > - case PORT_E: > > - return HPD_PORT_E; > > - case PORT_F: > > - if (IS_CNL_WITH_PORT_F(dev_priv)) > > - return HPD_PORT_E; > > - return HPD_PORT_F; > > - case PORT_G: > > - return HPD_PORT_G; > > - case PORT_H: > > - return HPD_PORT_H; > > - case PORT_I: > > - return HPD_PORT_I; > > + enum phy phy = intel_port_to_phy(dev_priv, port); > > + > > + switch (phy) { > > + case PHY_F: > > + return IS_CNL_WITH_PORT_F(dev_priv) ? HPD_PORT_E : > > HPD_PORT_F; > > + case PHY_A: > > + case PHY_B: > > + case PHY_C: > > + case PHY_D: > > + case PHY_E: > > + case PHY_G: > > + case PHY_H: > > + case PHY_I: > > + return HPD_PORT_A + phy; > > I know José asked you to do this, but now you've tied two enum > sequences together without explaining it anywhere. Before this, > AFAICT, enum hpd_pin was just an abstract enumeration where the > actual values of the enums didn't mean a thing, apart from 0 for > HPD_NONE. > > Maybe this is what we want to do, but we should never be so casual > about it. Do you suggest that I explain this in the description associated with v3 that we now have a switch/case fallthrough in this function? Or, do you want me to send a v4 to include this in a comment? Thanks, Vivek > > > BR, > Jani. > > > > default: > > - MISSING_CASE(port); > > + MISSING_CASE(phy); > > return HPD_NONE; > > } > > } > ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915/ehl: Check VBT before updating the transcoder for pipe
On Tue, 4 Feb 2020 12:50:25 +0200 Jani Nikula wrote: Hi Jani, > On Mon, 03 Feb 2020, Vivek Kasireddy > wrote: > > Since the pipe->transcoder mapping is not expected to change unless > > there is either eDP or DSI connectors present, check the VBT to > > confirm their presence in addition to checking > > TRANS_DDI_FUNC_CTL(transcoder). This additional check is needed on > > platforms like Elkhart Lake because we cannot just rely on > > GOP/Firmware programmed values in TRANS_DDI_FUNC_CTL(transcoder) > > before updating the transcoder mapping. > > > > This patch is only relevant to EHL -- and a no-op on others -- > > because some of the PHYs are shared between the different DDIs and > > we rely on the VBT to present the most accurate information to the > > driver. > > > > Cc: Matt Roper > > Cc: José Roberto de Souza > > Signed-off-by: Vivek Kasireddy > > --- > > drivers/gpu/drm/i915/display/intel_display.c | 15 ++- > > 1 file changed, 14 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/i915/display/intel_display.c > > b/drivers/gpu/drm/i915/display/intel_display.c index > > c0e5002ce64c..4b38f293bd88 100644 --- > > a/drivers/gpu/drm/i915/display/intel_display.c +++ > > b/drivers/gpu/drm/i915/display/intel_display.c @@ -10805,6 > > +10805,18 @@ static void hsw_get_ddi_pll(struct drm_i915_private > > *dev_priv, enum port port, pipe_config->shared_dpll = > > intel_get_shared_dpll_by_id(dev_priv, id); } > > +static bool ehl_vbt_edp_dsi_present(struct drm_i915_private > > *dev_priv, > > + enum transcoder transcoder) > > +{ > > + bool edp_present = intel_bios_is_port_present(dev_priv, > > PORT_A); > > + bool dsi_present = intel_bios_is_dsi_present(dev_priv, > > NULL); + > > + if (IS_ELKHARTLAKE(dev_priv)) > > + return transcoder == TRANSCODER_EDP ? edp_present > > : dsi_present; + > > + return true; > > +} > > One of those things... this jumps out and immediately feels all wrong, > just like ehl_vbt_ddi_d_present() feels all wrong in > intel_combo_phy.c. But I don't know what would be the right thing to > do without spending time that I don't have on this. Is there a particular approach you want me to take to address this issue? All I am trying to do is address the plausible scenario(s) where the GOP/firmware may program the hardware in a certain way that seems incorrect from what i915 does based on the info in the VBT. I noticed this issue on the EHL board I am working on; therefore, I limited the fix to EHL only. Thanks, Vivek > > BR, > Jani. > > > > > + > > static bool hsw_get_transcoder_state(struct intel_crtc *crtc, > > struct intel_crtc_state > > *pipe_config, u64 *power_domain_mask, > > @@ -10844,7 +10856,8 @@ static bool hsw_get_transcoder_state(struct > > intel_crtc *crtc, > > tmp = intel_de_read(dev_priv, > > TRANS_DDI_FUNC_CTL(panel_transcoder)); > > - if (!(tmp & TRANS_DDI_FUNC_ENABLE)) > > + if (!(tmp & TRANS_DDI_FUNC_ENABLE) || > > + !ehl_vbt_edp_dsi_present(dev_priv, > > panel_transcoder)) continue; > > > > /* > ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915/ehl: Check VBT before updating the transcoder for pipe
Since the pipe->transcoder mapping is not expected to change unless there is either eDP or DSI connectors present, check the VBT to confirm their presence in addition to checking TRANS_DDI_FUNC_CTL(transcoder). This additional check is needed on platforms like Elkhart Lake because we cannot just rely on GOP/Firmware programmed values in TRANS_DDI_FUNC_CTL(transcoder) before updating the transcoder mapping. This patch is only relevant to EHL -- and a no-op on others -- because some of the PHYs are shared between the different DDIs and we rely on the VBT to present the most accurate information to the driver. Cc: Matt Roper Cc: José Roberto de Souza Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/display/intel_display.c | 15 ++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index c0e5002ce64c..4b38f293bd88 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -10805,6 +10805,18 @@ static void hsw_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port, pipe_config->shared_dpll = intel_get_shared_dpll_by_id(dev_priv, id); } +static bool ehl_vbt_edp_dsi_present(struct drm_i915_private *dev_priv, + enum transcoder transcoder) +{ + bool edp_present = intel_bios_is_port_present(dev_priv, PORT_A); + bool dsi_present = intel_bios_is_dsi_present(dev_priv, NULL); + + if (IS_ELKHARTLAKE(dev_priv)) + return transcoder == TRANSCODER_EDP ? edp_present : dsi_present; + + return true; +} + static bool hsw_get_transcoder_state(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config, u64 *power_domain_mask, @@ -10844,7 +10856,8 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc, tmp = intel_de_read(dev_priv, TRANS_DDI_FUNC_CTL(panel_transcoder)); - if (!(tmp & TRANS_DDI_FUNC_ENABLE)) + if (!(tmp & TRANS_DDI_FUNC_ENABLE) || + !ehl_vbt_edp_dsi_present(dev_priv, panel_transcoder)) continue; /* -- 2.21.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915/hotplug: Use phy to get the hpd_pin instead of the port (v3)
On some platforms such as Elkhart Lake, although we may use DDI D to drive a connector, we have to use PHY A (Combo Phy PORT A) to detect the hotplug interrupts as per the spec because there is no one-to-one mapping between DDIs and PHYs. Therefore, use the function intel_port_to_phy() which contains the logic for such mapping(s) to find the correct hpd_pin. This change should not affect other platforms as there is always a one-to-one mapping between DDIs and PHYs. v2: - Convert the case statements to use PHYs instead of PORTs (Jani) v3: - Refactor the function to reduce the number of return statements by lumping all the case statements together except PHY_F which needs special handling (Jose) Cc: Jani Nikula Cc: Matt Roper Cc: José Roberto de Souza Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/display/intel_hotplug.c | 37 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c index 042d98bae1ea..27e3033278a0 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug.c +++ b/drivers/gpu/drm/i915/display/intel_hotplug.c @@ -89,29 +89,22 @@ enum hpd_pin intel_hpd_pin_default(struct drm_i915_private *dev_priv, enum port port) { - switch (port) { - case PORT_A: - return HPD_PORT_A; - case PORT_B: - return HPD_PORT_B; - case PORT_C: - return HPD_PORT_C; - case PORT_D: - return HPD_PORT_D; - case PORT_E: - return HPD_PORT_E; - case PORT_F: - if (IS_CNL_WITH_PORT_F(dev_priv)) - return HPD_PORT_E; - return HPD_PORT_F; - case PORT_G: - return HPD_PORT_G; - case PORT_H: - return HPD_PORT_H; - case PORT_I: - return HPD_PORT_I; + enum phy phy = intel_port_to_phy(dev_priv, port); + + switch (phy) { + case PHY_F: + return IS_CNL_WITH_PORT_F(dev_priv) ? HPD_PORT_E : HPD_PORT_F; + case PHY_A: + case PHY_B: + case PHY_C: + case PHY_D: + case PHY_E: + case PHY_G: + case PHY_H: + case PHY_I: + return HPD_PORT_A + phy; default: - MISSING_CASE(port); + MISSING_CASE(phy); return HPD_NONE; } } -- 2.21.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915/hotplug: Use phy to get the hpd_pin instead of the port (v2)
On some platforms such as Elkhart Lake, although we may use DDI D to drive a connector, we have to use PHY A (Combo Phy PORT A) to detect the hotplug interrupts as per the spec because there is no one-to-one mapping between DDIs and PHYs. Therefore, use the function intel_port_to_phy() which contains the logic for such mapping(s) to find the correct hpd_pin. This change should not affect other platforms as there is always a one-to-one mapping between DDIs and PHYs. v2: - Convert the case statements to use PHYs instead of PORTs (Jani) Cc: Jani Nikula Cc: Matt Roper Cc: José Roberto de Souza Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/display/intel_hotplug.c | 24 +++- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c index 042d98bae1ea..2bcfa4682511 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug.c +++ b/drivers/gpu/drm/i915/display/intel_hotplug.c @@ -89,29 +89,31 @@ enum hpd_pin intel_hpd_pin_default(struct drm_i915_private *dev_priv, enum port port) { - switch (port) { - case PORT_A: + enum phy phy = intel_port_to_phy(dev_priv, port); + + switch (phy) { + case PHY_A: return HPD_PORT_A; - case PORT_B: + case PHY_B: return HPD_PORT_B; - case PORT_C: + case PHY_C: return HPD_PORT_C; - case PORT_D: + case PHY_D: return HPD_PORT_D; - case PORT_E: + case PHY_E: return HPD_PORT_E; - case PORT_F: + case PHY_F: if (IS_CNL_WITH_PORT_F(dev_priv)) return HPD_PORT_E; return HPD_PORT_F; - case PORT_G: + case PHY_G: return HPD_PORT_G; - case PORT_H: + case PHY_H: return HPD_PORT_H; - case PORT_I: + case PHY_I: return HPD_PORT_I; default: - MISSING_CASE(port); + MISSING_CASE(phy); return HPD_NONE; } } -- 2.21.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915/hotplug: Use phy to get the hpd_pin instead of the port
On some platforms such as Elkhart Lake, although we may use DDI D to drive a connector, we have to use PHY A (Combo Phy PORT A) to detect the hotplug interrupts as per the spec because there is no one-to-one mapping between DDIs and PHYs. Therefore, use the function intel_port_to_phy() which contains the logic for such mapping(s) to find the correct hpd_pin. This change should not affect other platforms as there is always a one-to-one mapping between DDIs and PHYs. Cc: Matt Roper Cc: José Roberto de Souza Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/display/intel_hotplug.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c index 042d98bae1ea..491f6b6f920d 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug.c +++ b/drivers/gpu/drm/i915/display/intel_hotplug.c @@ -89,7 +89,8 @@ enum hpd_pin intel_hpd_pin_default(struct drm_i915_private *dev_priv, enum port port) { - switch (port) { + enum phy phy = intel_port_to_phy(dev_priv, port); + switch (phy) { case PORT_A: return HPD_PORT_A; case PORT_B: @@ -111,7 +112,7 @@ enum hpd_pin intel_hpd_pin_default(struct drm_i915_private *dev_priv, case PORT_I: return HPD_PORT_I; default: - MISSING_CASE(port); + MISSING_CASE(phy); return HPD_NONE; } } -- 2.21.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915/ddi: Ensure that the value assigned to ddi_clk_needed is a bool
Currently, the value assigned to the bool variable ddi_clk_needed is a pointer -- which appears to have happened inadvertently. Therefore, add a "!!" before the expression on the right to ensure that it results in a bool. Cc: Jani Nikula Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/display/intel_ddi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index c96f629cddc3..6df485289bc6 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3109,7 +3109,7 @@ void icl_sanitize_encoder_pll_mapping(struct intel_encoder *encoder) } port_mask = BIT(encoder->port); - ddi_clk_needed = encoder->base.crtc; + ddi_clk_needed = !!encoder->base.crtc; if (encoder->type == INTEL_OUTPUT_DSI) { struct intel_encoder *other_encoder; -- 2.21.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915/ehl: Ensure that the DDI selection MUX is programmed correctly
Perhaps in some cases the BIOS/GOP or other firmware may turn on PHY A but may not program the MUX correctly. Therefore, re-program PHY A if it is determined after reading the VBT that the value programmed for the MUX bit does not match the expected value. Cc: Matt Roper Signed-off-by: Vivek Kasireddy --- .../gpu/drm/i915/display/intel_combo_phy.c| 74 +++ 1 file changed, 45 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_combo_phy.c b/drivers/gpu/drm/i915/display/intel_combo_phy.c index 5f54aca7c36f..ec63c2657923 100644 --- a/drivers/gpu/drm/i915/display/intel_combo_phy.c +++ b/drivers/gpu/drm/i915/display/intel_combo_phy.c @@ -191,20 +191,57 @@ static bool icl_combo_phy_enabled(struct drm_i915_private *dev_priv, (I915_READ(ICL_PORT_COMP_DW0(phy)) & COMP_INIT); } +static bool ehl_vbt_ddi_d_present(struct drm_i915_private *i915) +{ + bool ddi_a_present = intel_bios_is_port_present(i915, PORT_A); + bool ddi_d_present = intel_bios_is_port_present(i915, PORT_D); + bool dsi_present = intel_bios_is_dsi_present(i915, NULL); + + /* +* VBT's 'dvo port' field for child devices references the DDI, not +* the PHY. So if combo PHY A is wired up to drive an external +* display, we should see a child device present on PORT_D and +* nothing on PORT_A and no DSI. +*/ + if (ddi_d_present && !ddi_a_present && !dsi_present) + return true; + + /* +* If we encounter a VBT that claims to have an external display on +* DDI-D _and_ an internal display on DDI-A/DSI leave an error message +* in the log and let the internal display win. +*/ + if (ddi_d_present) + DRM_ERROR("VBT claims to have both internal and external displays on PHY A. Configuring for internal.\n"); + + return false; +} + static bool icl_combo_phy_verify_state(struct drm_i915_private *dev_priv, enum phy phy) { bool ret; + u32 expected_val = 0; if (!icl_combo_phy_enabled(dev_priv, phy)) return false; ret = cnl_verify_procmon_ref_values(dev_priv, phy); - if (phy == PHY_A) + if (phy == PHY_A) { ret &= check_phy_reg(dev_priv, phy, ICL_PORT_COMP_DW8(phy), IREFGEN, IREFGEN); + if (IS_ELKHARTLAKE(dev_priv)) { + if (ehl_vbt_ddi_d_present(dev_priv)) + expected_val = ICL_PHY_MISC_MUX_DDID; + + ret &= check_phy_reg(dev_priv, phy, ICL_PHY_MISC(phy), +ICL_PHY_MISC_MUX_DDID, +expected_val); + } + } + ret &= check_phy_reg(dev_priv, phy, ICL_PORT_CL_DW5(phy), CL_POWER_DOWN_ENABLE, CL_POWER_DOWN_ENABLE); @@ -263,32 +300,6 @@ void intel_combo_phy_power_up_lanes(struct drm_i915_private *dev_priv, I915_WRITE(ICL_PORT_CL_DW10(phy), val); } -static u32 ehl_combo_phy_a_mux(struct drm_i915_private *i915, u32 val) -{ - bool ddi_a_present = intel_bios_is_port_present(i915, PORT_A); - bool ddi_d_present = intel_bios_is_port_present(i915, PORT_D); - bool dsi_present = intel_bios_is_dsi_present(i915, NULL); - - /* -* VBT's 'dvo port' field for child devices references the DDI, not -* the PHY. So if combo PHY A is wired up to drive an external -* display, we should see a child device present on PORT_D and -* nothing on PORT_A and no DSI. -*/ - if (ddi_d_present && !ddi_a_present && !dsi_present) - return val | ICL_PHY_MISC_MUX_DDID; - - /* -* If we encounter a VBT that claims to have an external display on -* DDI-D _and_ an internal display on DDI-A/DSI leave an error message -* in the log and let the internal display win. -*/ - if (ddi_d_present) - DRM_ERROR("VBT claims to have both internal and external displays on PHY A. Configuring for internal.\n"); - - return val & ~ICL_PHY_MISC_MUX_DDID; -} - static void icl_combo_phys_init(struct drm_i915_private *dev_priv) { enum phy phy; @@ -319,8 +330,13 @@ static void icl_combo_phys_init(struct drm_i915_private *dev_priv) * "internal" child devices. */ val = I915_READ(ICL_PHY_MISC(phy)); - if (IS_ELKHARTLAKE(dev_priv) && phy == PHY_A) - val = ehl_combo_phy_a_mux(dev_priv, val); + if (IS_ELKHARTLAKE(dev_priv) && phy == PHY_A) { + val &= ~ICL_PHY_MISC_MUX_DDID; + + if (ehl_vbt_ddi_d_present(dev_priv))
[Intel-gfx] [PATCH] drm/i915/dsi: Ensure that the ACPI adapter lookup overrides the bus num
Remove the i2c_bus_num >= 0 check from the adapter lookup function as this would prevent ACPI bus number override. This check was mainly there to return early if the bus number has already been found but we anyway return in the next line if the slave address does not match. Fixes: 8cbf89db2941 ("drm/i915/dsi: Parse the I2C element from the VBT MIPI sequence block (v3)") Cc: Hans de Goede Cc: Nabendu Maiti Cc: Matt Roper Cc: Bob Paauwe Cc: Jani Nikula Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index 6ec35d975bd7..04f953ba8f00 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -394,8 +394,7 @@ static int i2c_adapter_lookup(struct acpi_resource *ares, void *data) acpi_handle adapter_handle; acpi_status status; - if (intel_dsi->i2c_bus_num >= 0 || - !i2c_acpi_get_i2c_resource(ares, )) + if (!i2c_acpi_get_i2c_resource(ares, )) return 1; if (lookup->slave_addr != sb->slave_address) -- 2.21.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915/dsi: Lookup the i2c bus from ACPI NS only if CONFIG_ACPI=y (v3)
Perform the i2c bus/adapter lookup from ACPI Namespace only if ACPI is enabled in the kernel config. If ACPI is not enabled or if the lookup fails, we'll fallback to using the VBT for identiying the i2c bus. v2: Clearly identify the commit this patch is fixing (Jani) v3: Remove the i2c_bus_num >= 0 check from the adapter lookup function as this would prevent ACPI bus number override. This check was mainly there to return early if the bus number has already been found but we anyway return in the next line if the slave address does not match. Fixes: 8cbf89db2941 ("drm/i915/dsi: Parse the I2C element from the VBT MIPI sequence block (v3)") Cc: Hans de Goede Cc: Nabendu Maiti Cc: Matt Roper Cc: Bob Paauwe Cc: Ville Syrjälä Cc: Jani Nikula Cc: Zhang Xiaoxu Reported-by: Hulk Robot Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 50 +--- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index 89fb0d90b694..04f953ba8f00 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -384,6 +384,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) return data; } +#ifdef CONFIG_ACPI static int i2c_adapter_lookup(struct acpi_resource *ares, void *data) { struct i2c_adapter_lookup *lookup = data; @@ -393,8 +394,7 @@ static int i2c_adapter_lookup(struct acpi_resource *ares, void *data) acpi_handle adapter_handle; acpi_status status; - if (intel_dsi->i2c_bus_num >= 0 || - !i2c_acpi_get_i2c_resource(ares, )) + if (!i2c_acpi_get_i2c_resource(ares, )) return 1; if (lookup->slave_addr != sb->slave_address) @@ -413,14 +413,41 @@ static int i2c_adapter_lookup(struct acpi_resource *ares, void *data) return 1; } -static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data) +static void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi, + const u16 slave_addr) { struct drm_device *drm_dev = intel_dsi->base.base.dev; struct device *dev = _dev->pdev->dev; - struct i2c_adapter *adapter; struct acpi_device *acpi_dev; struct list_head resource_list; struct i2c_adapter_lookup lookup; + + acpi_dev = ACPI_COMPANION(dev); + if (acpi_dev) { + memset(, 0, sizeof(lookup)); + lookup.slave_addr = slave_addr; + lookup.intel_dsi = intel_dsi; + lookup.dev_handle = acpi_device_handle(acpi_dev); + + INIT_LIST_HEAD(_list); + acpi_dev_get_resources(acpi_dev, _list, + i2c_adapter_lookup, + ); + acpi_dev_free_resource_list(_list); + } +} +#else +static inline void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi, +const u16 slave_addr) +{ +} +#endif + +static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data) +{ + struct drm_device *drm_dev = intel_dsi->base.base.dev; + struct device *dev = _dev->pdev->dev; + struct i2c_adapter *adapter; struct i2c_msg msg; int ret; u8 vbt_i2c_bus_num = *(data + 2); @@ -431,20 +458,7 @@ static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data) if (intel_dsi->i2c_bus_num < 0) { intel_dsi->i2c_bus_num = vbt_i2c_bus_num; - - acpi_dev = ACPI_COMPANION(dev); - if (acpi_dev) { - memset(, 0, sizeof(lookup)); - lookup.slave_addr = slave_addr; - lookup.intel_dsi = intel_dsi; - lookup.dev_handle = acpi_device_handle(acpi_dev); - - INIT_LIST_HEAD(_list); - acpi_dev_get_resources(acpi_dev, _list, - i2c_adapter_lookup, - ); - acpi_dev_free_resource_list(_list); - } + i2c_acpi_find_adapter(intel_dsi, slave_addr); } adapter = i2c_get_adapter(intel_dsi->i2c_bus_num); -- 2.21.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915/dsi: Lookup the i2c bus from ACPI NS only if CONFIG_ACPI=y (v2)
Perform the i2c bus/adapter lookup from ACPI Namespace only if ACPI is enabled in the kernel config. If ACPI is not enabled or if the lookup fails, we'll fallback to using the VBT for identiying the i2c bus. This patch Fixes: 8cbf89db2941 ("drm/i915/dsi: Parse the I2C element from the VBT MIPI sequence block (v3)") v2: Reformat the above line to clearly identify the commit this patch is fixing for CI (Jani) Cc: Hans de Goede Cc: Nabendu Maiti Cc: Matt Roper Cc: Bob Paauwe Cc: Ville Syrjälä Cc: Jani Nikula Cc: Zhang Xiaoxu Reported-by: Hulk Robot Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 47 +--- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index 89fb0d90b694..6ec35d975bd7 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -384,6 +384,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) return data; } +#ifdef CONFIG_ACPI static int i2c_adapter_lookup(struct acpi_resource *ares, void *data) { struct i2c_adapter_lookup *lookup = data; @@ -413,14 +414,41 @@ static int i2c_adapter_lookup(struct acpi_resource *ares, void *data) return 1; } -static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data) +static void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi, + const u16 slave_addr) { struct drm_device *drm_dev = intel_dsi->base.base.dev; struct device *dev = _dev->pdev->dev; - struct i2c_adapter *adapter; struct acpi_device *acpi_dev; struct list_head resource_list; struct i2c_adapter_lookup lookup; + + acpi_dev = ACPI_COMPANION(dev); + if (acpi_dev) { + memset(, 0, sizeof(lookup)); + lookup.slave_addr = slave_addr; + lookup.intel_dsi = intel_dsi; + lookup.dev_handle = acpi_device_handle(acpi_dev); + + INIT_LIST_HEAD(_list); + acpi_dev_get_resources(acpi_dev, _list, + i2c_adapter_lookup, + ); + acpi_dev_free_resource_list(_list); + } +} +#else +static inline void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi, +const u16 slave_addr) +{ +} +#endif + +static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data) +{ + struct drm_device *drm_dev = intel_dsi->base.base.dev; + struct device *dev = _dev->pdev->dev; + struct i2c_adapter *adapter; struct i2c_msg msg; int ret; u8 vbt_i2c_bus_num = *(data + 2); @@ -431,20 +459,7 @@ static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data) if (intel_dsi->i2c_bus_num < 0) { intel_dsi->i2c_bus_num = vbt_i2c_bus_num; - - acpi_dev = ACPI_COMPANION(dev); - if (acpi_dev) { - memset(, 0, sizeof(lookup)); - lookup.slave_addr = slave_addr; - lookup.intel_dsi = intel_dsi; - lookup.dev_handle = acpi_device_handle(acpi_dev); - - INIT_LIST_HEAD(_list); - acpi_dev_get_resources(acpi_dev, _list, - i2c_adapter_lookup, - ); - acpi_dev_free_resource_list(_list); - } + i2c_acpi_find_adapter(intel_dsi, slave_addr); } adapter = i2c_get_adapter(intel_dsi->i2c_bus_num); -- 2.21.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915/dsi: Lookup the i2c bus from ACPI NS only if CONFIG_ACPI=y
Perform the i2c bus/adapter lookup from ACPI Namespace only if ACPI is enabled in the kernel config. If ACPI is not enabled or if the lookup fails, we'll fallback to using the VBT for identiying the i2c bus. This fixes commit 8cbf89db2941("drm/i915/dsi: Parse the I2C element from the VBT MIPI sequence block (v3).") Cc: Hans de Goede Cc: Nabendu Maiti Cc: Matt Roper Cc: Bob Paauwe Cc: Ville Syrjälä Cc: Jani Nikula Cc: Zhang Xiaoxu Reported-by: Hulk Robot Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 47 +--- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index 89fb0d90b694..6ec35d975bd7 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -384,6 +384,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) return data; } +#ifdef CONFIG_ACPI static int i2c_adapter_lookup(struct acpi_resource *ares, void *data) { struct i2c_adapter_lookup *lookup = data; @@ -413,14 +414,41 @@ static int i2c_adapter_lookup(struct acpi_resource *ares, void *data) return 1; } -static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data) +static void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi, + const u16 slave_addr) { struct drm_device *drm_dev = intel_dsi->base.base.dev; struct device *dev = _dev->pdev->dev; - struct i2c_adapter *adapter; struct acpi_device *acpi_dev; struct list_head resource_list; struct i2c_adapter_lookup lookup; + + acpi_dev = ACPI_COMPANION(dev); + if (acpi_dev) { + memset(, 0, sizeof(lookup)); + lookup.slave_addr = slave_addr; + lookup.intel_dsi = intel_dsi; + lookup.dev_handle = acpi_device_handle(acpi_dev); + + INIT_LIST_HEAD(_list); + acpi_dev_get_resources(acpi_dev, _list, + i2c_adapter_lookup, + ); + acpi_dev_free_resource_list(_list); + } +} +#else +static inline void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi, +const u16 slave_addr) +{ +} +#endif + +static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data) +{ + struct drm_device *drm_dev = intel_dsi->base.base.dev; + struct device *dev = _dev->pdev->dev; + struct i2c_adapter *adapter; struct i2c_msg msg; int ret; u8 vbt_i2c_bus_num = *(data + 2); @@ -431,20 +459,7 @@ static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data) if (intel_dsi->i2c_bus_num < 0) { intel_dsi->i2c_bus_num = vbt_i2c_bus_num; - - acpi_dev = ACPI_COMPANION(dev); - if (acpi_dev) { - memset(, 0, sizeof(lookup)); - lookup.slave_addr = slave_addr; - lookup.intel_dsi = intel_dsi; - lookup.dev_handle = acpi_device_handle(acpi_dev); - - INIT_LIST_HEAD(_list); - acpi_dev_get_resources(acpi_dev, _list, - i2c_adapter_lookup, - ); - acpi_dev_free_resource_list(_list); - } + i2c_acpi_find_adapter(intel_dsi, slave_addr); } adapter = i2c_get_adapter(intel_dsi->i2c_bus_num); -- 2.21.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915/dsi: Parse the I2C element from the VBT MIPI sequence block (v3)
Parsing the i2c element is mainly done to transfer the payload from the MIPI sequence block to the relevant slave device. In some cases, the commands that are part of the payload can be used to turn on the backlight. This patch is actually a refactored version of this old patch: https://lists.freedesktop.org/archives/intel-gfx/2014-December/056897.html In addition to the refactoring, the original patch is augmented by looking up the i2c bus from ACPI NS instead of relying on the bus number provided in the VBT. This patch was tested on Aava Mobile's Inari 10 tablet. It enabled turning on the backlight by transfering the payload to the device. v2: - Add DRM_DEV_ERROR for invalid adapter and failed transfer and also drop the DRM_DEBUG that existed originally. (Hans) - Add two gotos instead of one to clean things up properly. v3: - Identify the device on which this patch was tested in the commit message (Ville) Cc: Hans de Goede Cc: Nabendu Maiti Cc: Matt Roper Cc: Bob Paauwe Cc: Ville Syrjälä Reviewed-by: Hans de Goede Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/display/intel_dsi.h | 3 + drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 99 +++- 2 files changed, 100 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dsi.h b/drivers/gpu/drm/i915/display/intel_dsi.h index 7481a5aa3084..6cef1356b4e6 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi.h +++ b/drivers/gpu/drm/i915/display/intel_dsi.h @@ -69,6 +69,9 @@ struct intel_dsi { /* number of DSI lanes */ unsigned int lane_count; + /* i2c bus associated with the slave device */ + int i2c_bus_num; + /* * video mode pixel format * diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index 0032161e0f76..89fb0d90b694 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -86,6 +86,12 @@ static struct gpio_map vlv_gpio_table[] = { { VLV_GPIO_NC_11_PANEL1_BKLTCTL }, }; +struct i2c_adapter_lookup { + u16 slave_addr; + struct intel_dsi *intel_dsi; + acpi_handle dev_handle; +}; + #define CHV_GPIO_IDX_START_N 0 #define CHV_GPIO_IDX_START_E 73 #define CHV_GPIO_IDX_START_SW 100 @@ -378,11 +384,98 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) return data; } +static int i2c_adapter_lookup(struct acpi_resource *ares, void *data) +{ + struct i2c_adapter_lookup *lookup = data; + struct intel_dsi *intel_dsi = lookup->intel_dsi; + struct acpi_resource_i2c_serialbus *sb; + struct i2c_adapter *adapter; + acpi_handle adapter_handle; + acpi_status status; + + if (intel_dsi->i2c_bus_num >= 0 || + !i2c_acpi_get_i2c_resource(ares, )) + return 1; + + if (lookup->slave_addr != sb->slave_address) + return 1; + + status = acpi_get_handle(lookup->dev_handle, +sb->resource_source.string_ptr, +_handle); + if (ACPI_FAILURE(status)) + return 1; + + adapter = i2c_acpi_find_adapter_by_handle(adapter_handle); + if (adapter) + intel_dsi->i2c_bus_num = adapter->nr; + + return 1; +} + static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data) { - DRM_DEBUG_KMS("Skipping I2C element execution\n"); + struct drm_device *drm_dev = intel_dsi->base.base.dev; + struct device *dev = _dev->pdev->dev; + struct i2c_adapter *adapter; + struct acpi_device *acpi_dev; + struct list_head resource_list; + struct i2c_adapter_lookup lookup; + struct i2c_msg msg; + int ret; + u8 vbt_i2c_bus_num = *(data + 2); + u16 slave_addr = *(u16 *)(data + 3); + u8 reg_offset = *(data + 5); + u8 payload_size = *(data + 6); + u8 *payload_data; + + if (intel_dsi->i2c_bus_num < 0) { + intel_dsi->i2c_bus_num = vbt_i2c_bus_num; + + acpi_dev = ACPI_COMPANION(dev); + if (acpi_dev) { + memset(, 0, sizeof(lookup)); + lookup.slave_addr = slave_addr; + lookup.intel_dsi = intel_dsi; + lookup.dev_handle = acpi_device_handle(acpi_dev); + + INIT_LIST_HEAD(_list); + acpi_dev_get_resources(acpi_dev, _list, + i2c_adapter_lookup, + ); + acpi_dev_free_resource_list(_list); + } + } - return data + *(data + 6) + 7; + adapter = i2c_get_adapter(intel_dsi->i2c_bus_num); + if (!adapter) { +
[Intel-gfx] [PATCH] drm/i915/dsi: Parse the I2C element from the VBT MIPI sequence block (v2)
Parsing the i2c element is mainly done to transfer the payload from the MIPI sequence block to the relevant slave device. In some cases, the commands that are part of the payload can be used to turn on the backlight. This patch is actually a refactored version of this old patch: https://lists.freedesktop.org/archives/intel-gfx/2014-December/056897.html In addition to the refactoring, the original patch is augmented by looking up the i2c bus from ACPI NS instead of relying on the bus number provided in the VBT. v2: - Add DRM_DEV_ERROR for invalid adapter and failed transfer and also drop the DRM_DEBUG that existed originally. (Hans) - Add two gotos instead of one to clean things up properly. CC: Hans de Goede Cc: Nabendu Maiti Cc: Matt Roper Cc: Bob Paauwe Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/display/intel_dsi.h | 3 + drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 99 +++- 2 files changed, 100 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dsi.h b/drivers/gpu/drm/i915/display/intel_dsi.h index b15be5814599..5651bc8aa5c2 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi.h +++ b/drivers/gpu/drm/i915/display/intel_dsi.h @@ -68,6 +68,9 @@ struct intel_dsi { /* number of DSI lanes */ unsigned int lane_count; + /* i2c bus associated with the slave device */ + int i2c_bus_num; + /* * video mode pixel format * diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index f90946c912ee..35fcef7c0d70 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -83,6 +83,12 @@ static struct gpio_map vlv_gpio_table[] = { { VLV_GPIO_NC_11_PANEL1_BKLTCTL }, }; +struct i2c_adapter_lookup { + u16 slave_addr; + struct intel_dsi *intel_dsi; + acpi_handle dev_handle; +}; + #define CHV_GPIO_IDX_START_N 0 #define CHV_GPIO_IDX_START_E 73 #define CHV_GPIO_IDX_START_SW 100 @@ -375,11 +381,98 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) return data; } +static int i2c_adapter_lookup(struct acpi_resource *ares, void *data) +{ + struct i2c_adapter_lookup *lookup = data; + struct intel_dsi *intel_dsi = lookup->intel_dsi; + struct acpi_resource_i2c_serialbus *sb; + struct i2c_adapter *adapter; + acpi_handle adapter_handle; + acpi_status status; + + if (intel_dsi->i2c_bus_num >= 0 || + !i2c_acpi_get_i2c_resource(ares, )) + return 1; + + if (lookup->slave_addr != sb->slave_address) + return 1; + + status = acpi_get_handle(lookup->dev_handle, +sb->resource_source.string_ptr, +_handle); + if (ACPI_FAILURE(status)) + return 1; + + adapter = i2c_acpi_find_adapter_by_handle(adapter_handle); + if (adapter) + intel_dsi->i2c_bus_num = adapter->nr; + + return 1; +} + static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data) { - DRM_DEBUG_KMS("Skipping I2C element execution\n"); + struct drm_device *drm_dev = intel_dsi->base.base.dev; + struct device *dev = _dev->pdev->dev; + struct i2c_adapter *adapter; + struct acpi_device *acpi_dev; + struct list_head resource_list; + struct i2c_adapter_lookup lookup; + struct i2c_msg msg; + int ret; + u8 vbt_i2c_bus_num = *(data + 2); + u16 slave_addr = *(u16 *)(data + 3); + u8 reg_offset = *(data + 5); + u8 payload_size = *(data + 6); + u8 *payload_data; + + if (intel_dsi->i2c_bus_num < 0) { + intel_dsi->i2c_bus_num = vbt_i2c_bus_num; + + acpi_dev = ACPI_COMPANION(dev); + if (acpi_dev) { + memset(, 0, sizeof(lookup)); + lookup.slave_addr = slave_addr; + lookup.intel_dsi = intel_dsi; + lookup.dev_handle = acpi_device_handle(acpi_dev); + + INIT_LIST_HEAD(_list); + acpi_dev_get_resources(acpi_dev, _list, + i2c_adapter_lookup, + ); + acpi_dev_free_resource_list(_list); + } + } - return data + *(data + 6) + 7; + adapter = i2c_get_adapter(intel_dsi->i2c_bus_num); + if (!adapter) { + DRM_DEV_ERROR(dev, "Cannot find a valid i2c bus for xfer\n"); + goto err_bus; + } + + payload_data = kzalloc(payload_size + 1, GFP_KERNEL); + if (!payload_data) + goto err_alloc; + + payload_data[0] = reg_offset; + memcpy(
Re: [Intel-gfx] [PATCH] drm/i915/dsi: Parse the I2C element from the VBT MIPI sequence block
On Fri, 3 Jan 2020 12:05:11 +0100 Hans de Goede wrote: Hi Hans, > Hi Vivek, > > On 03-01-2020 01:00, Vivek Kasireddy wrote: > > Parsing the i2c element is mainly done to transfer the payload from > > the MIPI sequence block to the relevant slave device. In some > > cases, the commands that are part of the payload can be used to > > turn on the backlight. > > > > This patch is actually a refactored version of this old patch: > > https://lists.freedesktop.org/archives/intel-gfx/2014-December/056897.html > > > > In addition to the refactoring, the old patch is augmented by > > looking up the i2c bus from ACPI NS instead of relying on the bus > > number provided in the VBT. > > > > Cc: Deepak M > > Cc: Nabendu Maiti > > Cc: Matt Roper > > Cc: Bob Paauwe > > Signed-off-by: Vivek Kasireddy > > Thank you for this patch, I have been doing a lot of work to make > DSI panels on Bay Trail and Cherry Trail devices work better, as such > I've done a lot of testing of DSI panels. But I have never seen any > MIPI sequences actually use the i2c commands. May I ask how you have > tested this? Do you have a device which actually uses the i2c > commands? Oh, they sure exist; we do have a device that uses i2c commands to turn on the backlight that we have tested this patch on. > > I also have some small review comments inline: > > > --- > > drivers/gpu/drm/i915/display/intel_dsi.h | 3 + > > drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 93 > > 2 files changed, 96 insertions(+) > > > > diff --git a/drivers/gpu/drm/i915/display/intel_dsi.h > > b/drivers/gpu/drm/i915/display/intel_dsi.h index > > b15be5814599..5651bc8aa5c2 100644 --- > > a/drivers/gpu/drm/i915/display/intel_dsi.h +++ > > b/drivers/gpu/drm/i915/display/intel_dsi.h @@ -68,6 +68,9 @@ struct > > intel_dsi { /* number of DSI lanes */ > > unsigned int lane_count; > > > > + /* i2c bus associated with the slave device */ > > + int i2c_bus_num; > > + > > /* > > * video mode pixel format > > * > > diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c > > b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index > > f90946c912ee..60441a5a3dba 100644 --- > > a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ > > b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -83,6 +83,12 @@ > > static struct gpio_map vlv_gpio_table[] = { { > > VLV_GPIO_NC_11_PANEL1_BKLTCTL }, }; > > > > +struct i2c_adapter_lookup { > > + u16 slave_addr; > > + struct intel_dsi *intel_dsi; > > + acpi_handle dev_handle; > > +}; > > + > > #define CHV_GPIO_IDX_START_N 0 > > #define CHV_GPIO_IDX_START_E 73 > > #define CHV_GPIO_IDX_START_SW 100 > > @@ -375,8 +381,93 @@ static const u8 *mipi_exec_gpio(struct > > intel_dsi *intel_dsi, const u8 *data) return data; > > } > > > > +static int i2c_adapter_lookup(struct acpi_resource *ares, void > > *data) +{ > > + struct i2c_adapter_lookup *lookup = data; > > + struct intel_dsi *intel_dsi = lookup->intel_dsi; > > + struct acpi_resource_i2c_serialbus *sb; > > + struct i2c_adapter *adapter; > > + acpi_handle adapter_handle; > > + acpi_status status; > > + > > + if (intel_dsi->i2c_bus_num >= 0 || > > + !i2c_acpi_get_i2c_resource(ares, )) > > + return 1; > > + > > + if (lookup->slave_addr != sb->slave_address) > > + return 1; > > + > > + status = acpi_get_handle(lookup->dev_handle, > > +sb->resource_source.string_ptr, > > +_handle); > > + if (ACPI_FAILURE(status)) > > + return 1; > > + > > + adapter = i2c_acpi_find_adapter_by_handle(adapter_handle); > > + if (adapter) > > + intel_dsi->i2c_bus_num = adapter->nr; > > + > > + return 1; > > +} > > + > > static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const > > u8 *data) { > > + struct drm_device *dev = intel_dsi->base.base.dev; > > + struct i2c_adapter *adapter; > > + struct acpi_device *acpi_dev; > > + struct list_head resource_list; > > + struct i2c_adapter_lookup lookup; > > + struct i2c_msg msg; > > + int ret; > > + u8 vbt_i2c_bus_num = *(data + 2); > > + u16 slave_addr = *(u16 *)(data + 3); > > + u8 reg_offset = *(data + 5); > > + u8 payload_size
[Intel-gfx] [PATCH] drm/i915/dsi: Parse the I2C element from the VBT MIPI sequence block
Parsing the i2c element is mainly done to transfer the payload from the MIPI sequence block to the relevant slave device. In some cases, the commands that are part of the payload can be used to turn on the backlight. This patch is actually a refactored version of this old patch: https://lists.freedesktop.org/archives/intel-gfx/2014-December/056897.html In addition to the refactoring, the old patch is augmented by looking up the i2c bus from ACPI NS instead of relying on the bus number provided in the VBT. Cc: Deepak M Cc: Nabendu Maiti Cc: Matt Roper Cc: Bob Paauwe Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/display/intel_dsi.h | 3 + drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 93 2 files changed, 96 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dsi.h b/drivers/gpu/drm/i915/display/intel_dsi.h index b15be5814599..5651bc8aa5c2 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi.h +++ b/drivers/gpu/drm/i915/display/intel_dsi.h @@ -68,6 +68,9 @@ struct intel_dsi { /* number of DSI lanes */ unsigned int lane_count; + /* i2c bus associated with the slave device */ + int i2c_bus_num; + /* * video mode pixel format * diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index f90946c912ee..60441a5a3dba 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -83,6 +83,12 @@ static struct gpio_map vlv_gpio_table[] = { { VLV_GPIO_NC_11_PANEL1_BKLTCTL }, }; +struct i2c_adapter_lookup { + u16 slave_addr; + struct intel_dsi *intel_dsi; + acpi_handle dev_handle; +}; + #define CHV_GPIO_IDX_START_N 0 #define CHV_GPIO_IDX_START_E 73 #define CHV_GPIO_IDX_START_SW 100 @@ -375,8 +381,93 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) return data; } +static int i2c_adapter_lookup(struct acpi_resource *ares, void *data) +{ + struct i2c_adapter_lookup *lookup = data; + struct intel_dsi *intel_dsi = lookup->intel_dsi; + struct acpi_resource_i2c_serialbus *sb; + struct i2c_adapter *adapter; + acpi_handle adapter_handle; + acpi_status status; + + if (intel_dsi->i2c_bus_num >= 0 || + !i2c_acpi_get_i2c_resource(ares, )) + return 1; + + if (lookup->slave_addr != sb->slave_address) + return 1; + + status = acpi_get_handle(lookup->dev_handle, +sb->resource_source.string_ptr, +_handle); + if (ACPI_FAILURE(status)) + return 1; + + adapter = i2c_acpi_find_adapter_by_handle(adapter_handle); + if (adapter) + intel_dsi->i2c_bus_num = adapter->nr; + + return 1; +} + static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data) { + struct drm_device *dev = intel_dsi->base.base.dev; + struct i2c_adapter *adapter; + struct acpi_device *acpi_dev; + struct list_head resource_list; + struct i2c_adapter_lookup lookup; + struct i2c_msg msg; + int ret; + u8 vbt_i2c_bus_num = *(data + 2); + u16 slave_addr = *(u16 *)(data + 3); + u8 reg_offset = *(data + 5); + u8 payload_size = *(data + 6); + u8 *payload_data; + + if (intel_dsi->i2c_bus_num < 0) { + intel_dsi->i2c_bus_num = vbt_i2c_bus_num; + + acpi_dev = ACPI_COMPANION(>pdev->dev); + if (acpi_dev) { + memset(, 0, sizeof(lookup)); + lookup.slave_addr = slave_addr; + lookup.intel_dsi = intel_dsi; + lookup.dev_handle = acpi_device_handle(acpi_dev); + + INIT_LIST_HEAD(_list); + acpi_dev_get_resources(acpi_dev, _list, + i2c_adapter_lookup, + ); + acpi_dev_free_resource_list(_list); + } + } + + adapter = i2c_get_adapter(intel_dsi->i2c_bus_num); + if (!adapter) + goto out; + + payload_data = kzalloc(payload_size + 1, GFP_KERNEL); + if (!payload_data) + goto out; + + payload_data[0] = reg_offset; + memcpy(_data[1], (data + 7), payload_size); + + msg.addr = slave_addr; + msg.flags = 0; + msg.len = payload_size + 1; + msg.buf = payload_data; + + ret = i2c_transfer(adapter, , 1); + if (ret < 0) + DRM_ERROR("i2c transfer failed"); + + kfree(payload_data); + i2c_put_adapter(adapter); + + return data + payload_size + 7; +out: DRM_DEBUG_KMS("Skipping I2C element execution\n&qu
[Intel-gfx] [PATCH] drm/i915: Correct the PCH type in irq postinstall
JasperLake PCH (JSP) has DDI HPD pin mappings similar to TGP and not MCC. Also add the correct HPD pin mappings for the MCC PCH. Cc: Matt Roper Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/i915_irq.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index ef09fbb36f37..e618f4621308 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3803,8 +3803,11 @@ static void icp_irq_postinstall(struct drm_i915_private *dev_priv) if (HAS_PCH_TGP(dev_priv)) icp_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK, TGP_TC_HPD_ENABLE_MASK); - else if (HAS_PCH_MCC(dev_priv)) + else if (HAS_PCH_JSP(dev_priv)) icp_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK, 0); + else if (HAS_PCH_MCC(dev_priv)) + icp_hpd_detection_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK, + ICP_TC_HPD_ENABLE(PORT_TC1)); else icp_hpd_detection_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK, ICP_TC_HPD_ENABLE_MASK); -- 2.21.0 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3] drm/i915: Introduce Jasper Lake PCH
On Tue, 15 Oct 2019 09:28:54 -0700 Matt Roper wrote: > The Jasper Lake PCH follows ICP/TGP's south display behavior and is > identical to MCC graphics-wise except that it does not use the unusual > (port C -> TC1) pin mapping that MCC does. > > Also, it turns out the extra PCH ID that we had previously thought > was a form of MCC is actually a second ID for JSP (i.e., port C uses > the port C pins instead of the TC1 pins). > > v2: > - Also update the port masks (not just the pin table) in >mcc_hpd_irq_setup. (Vivek) > > v3: > - Break jsp_hpd_irq_setup out into its own function for clarity. >(Vivek) > > Cc: José Roberto de Souza > Cc: James Ausmus > Cc: Vivek Kasireddy > Signed-off-by: Matt Roper > Reviewed-by: Vivek Kasireddy > --- > drivers/gpu/drm/i915/i915_irq.c | 24 +++- > drivers/gpu/drm/i915/intel_pch.c | 6 +- > drivers/gpu/drm/i915/intel_pch.h | 5 - > 3 files changed, 32 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_irq.c > b/drivers/gpu/drm/i915/i915_irq.c index d20ca02d3166..448390ad2128 > 100644 --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -2248,11 +2248,18 @@ static void icp_irq_handler(struct > drm_i915_private *dev_priv, u32 pch_iir) tc_hotplug_trigger = pch_iir > & SDE_TC_MASK_TGP; tc_port_hotplug_long_detect = > tgp_tc_port_hotplug_long_detect; pins = hpd_tgp; > + } else if (HAS_PCH_JSP(dev_priv)) { > + ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP; > + tc_hotplug_trigger = 0; > + pins = hpd_tgp; > } else if (HAS_PCH_MCC(dev_priv)) { > ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; > tc_hotplug_trigger = pch_iir & > SDE_TC_HOTPLUG_ICP(PORT_TC1); pins = hpd_icp; > } else { > + WARN(!HAS_PCH_ICP(dev_priv), > + "Unrecognized PCH type 0x%x\n", > INTEL_PCH_TYPE(dev_priv)); + > ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; > tc_hotplug_trigger = pch_iir & SDE_TC_MASK_ICP; > tc_port_hotplug_long_detect = > icp_tc_port_hotplug_long_detect; @@ -3384,6 +3391,19 @@ static void > mcc_hpd_irq_setup(struct drm_i915_private *dev_priv) hpd_icp); > } > > +/* > + * JSP behaves exactly the same as MCC above except that port C is > mapped to > + * the DDI-C pins instead of the TC1 pins. This means we should > follow TGP's > + * masks & tables rather than ICP's masks & tables. > + */ > +static void jsp_hpd_irq_setup(struct drm_i915_private *dev_priv) > +{ > + icp_hpd_irq_setup(dev_priv, > + SDE_DDI_MASK_TGP, 0, > + TGP_DDI_HPD_ENABLE_MASK, 0, > + hpd_tgp); > +} > + Looks good. Reviewed-by: Vivek Kasireddy > static void gen11_hpd_detection_setup(struct drm_i915_private > *dev_priv) { > u32 hotplug; > @@ -4315,7 +4335,9 @@ void intel_irq_init(struct drm_i915_private > *dev_priv) if (I915_HAS_HOTPLUG(dev_priv)) > dev_priv->display.hpd_irq_setup = > i915_hpd_irq_setup; } else { > - if (HAS_PCH_MCC(dev_priv)) > + if (HAS_PCH_JSP(dev_priv)) > + dev_priv->display.hpd_irq_setup = > jsp_hpd_irq_setup; > + else if (HAS_PCH_MCC(dev_priv)) > dev_priv->display.hpd_irq_setup = > mcc_hpd_irq_setup; else if (INTEL_GEN(dev_priv) >= 11) > dev_priv->display.hpd_irq_setup = > gen11_hpd_irq_setup; diff --git a/drivers/gpu/drm/i915/intel_pch.c > b/drivers/gpu/drm/i915/intel_pch.c index 15f8bff141f9..1035d3d46fd8 > 100644 --- a/drivers/gpu/drm/i915/intel_pch.c > +++ b/drivers/gpu/drm/i915/intel_pch.c > @@ -79,7 +79,6 @@ intel_pch_type(const struct drm_i915_private > *dev_priv, unsigned short id) WARN_ON(!IS_ICELAKE(dev_priv)); > return PCH_ICP; > case INTEL_PCH_MCC_DEVICE_ID_TYPE: > - case INTEL_PCH_MCC2_DEVICE_ID_TYPE: > DRM_DEBUG_KMS("Found Mule Creek Canyon PCH\n"); > WARN_ON(!IS_ELKHARTLAKE(dev_priv)); > return PCH_MCC; > @@ -87,6 +86,11 @@ intel_pch_type(const struct drm_i915_private > *dev_priv, unsigned short id) DRM_DEBUG_KMS("Found Tiger Lake LP > PCH\n"); WARN_ON(!IS_TIGERLAKE(dev_priv)); > return PCH_TGP; > + case INTEL_PCH_JSP_DEVICE_ID_TYPE: > + case INTEL_PCH_JSP2_DEVICE_ID_TYPE: > + DRM_DEBUG_KMS("Found Jasper Lake PCH\n"); > + WARN_ON(!IS_ELKHARTLAKE(dev_priv)); > + return PCH_JSP; > default: >
Re: [Intel-gfx] [PATCH] drm/i915/ehl: Don't forget to set TC long detect function
On Tue, 15 Oct 2019 09:11:31 -0700 Matt Roper wrote: > Since EHL's MCC PCH reuses one of the TC pins we need to supply a TC > long detect function when handling the interrupts. > > Fixes: 53448aed7b80 ("drm/i915/ehl: Port C's hotplug interrupt is > associated with TC1 bits") Reported-by: kbuild test robot > Reported-by: Dan Carpenter > Cc: Vivek Kasireddy > Signed-off-by: Matt Roper > --- > drivers/gpu/drm/i915/i915_irq.c | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/drivers/gpu/drm/i915/i915_irq.c > b/drivers/gpu/drm/i915/i915_irq.c index a7c968b01af3..af7426cd8de9 > 100644 --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -2251,6 +2251,7 @@ static void icp_irq_handler(struct > drm_i915_private *dev_priv, u32 pch_iir) } else if > (HAS_PCH_MCC(dev_priv)) { ddi_hotplug_trigger = pch_iir & > SDE_DDI_MASK_ICP; tc_hotplug_trigger = pch_iir & > SDE_TC_HOTPLUG_ICP(PORT_TC1); > + tc_port_hotplug_long_detect = > icp_tc_port_hotplug_long_detect; pins = hpd_icp; Reviewed-by: Vivek Kasireddy > } else { > ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v2] drm/i915: Introduce Jasper Lake PCH
On Mon, 14 Oct 2019 15:43:41 -0700 Matt Roper wrote: > The Jasper Lake PCH follows ICP/TGP's south display behavior and is > identical to MCC graphics-wise except that it does not use the unusual > (port C -> TC1) pin mapping that MCC does. > > Also, it turns out the extra PCH ID that we had previously thought > was a form of MCC is actually a second ID for JSP (i.e., port C uses > the port C pins instead of the TC1 pins). > > v2: > - Also update the port masks (not just the pin table) in >mcc_hpd_irq_setup. (Vivek) > > Cc: José Roberto de Souza > Cc: James Ausmus > Cc: Vivek Kasireddy > Signed-off-by: Matt Roper > --- > drivers/gpu/drm/i915/i915_irq.c | 31 +-- > drivers/gpu/drm/i915/intel_pch.c | 6 +- > drivers/gpu/drm/i915/intel_pch.h | 5 - > 3 files changed, 34 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_irq.c > b/drivers/gpu/drm/i915/i915_irq.c index d20ca02d3166..81e9ed48ce9f > 100644 --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -2248,11 +2248,18 @@ static void icp_irq_handler(struct > drm_i915_private *dev_priv, u32 pch_iir) tc_hotplug_trigger = pch_iir > & SDE_TC_MASK_TGP; tc_port_hotplug_long_detect = > tgp_tc_port_hotplug_long_detect; pins = hpd_tgp; > + } else if (HAS_PCH_JSP(dev_priv)) { > + ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP; > + tc_hotplug_trigger = 0; > + pins = hpd_tgp; > } else if (HAS_PCH_MCC(dev_priv)) { > ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; > tc_hotplug_trigger = pch_iir & > SDE_TC_HOTPLUG_ICP(PORT_TC1); pins = hpd_icp; > } else { > + WARN(!HAS_PCH_ICP(dev_priv), > + "Unrecognized PCH type 0x%x\n", > INTEL_PCH_TYPE(dev_priv)); + > ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; > tc_hotplug_trigger = pch_iir & SDE_TC_MASK_ICP; > tc_port_hotplug_long_detect = > icp_tc_port_hotplug_long_detect; @@ -3373,15 +3380,27 @@ static void > icp_hpd_irq_setup(struct drm_i915_private *dev_priv, } > > /* > - * EHL doesn't need most of gen11_hpd_irq_setup, it's handling only > the > + * EHL/JSL don't need most of gen11_hpd_irq_setup, they're handling > only the > * equivalent of SDE. > + * > + * Note that MCC and JSP have different port C pin mappings, hence > the use of > + * ICP's masks & tables (hpd C on TC1) vs TGP's masks & tables (hpd > C on DDIC) > + * depending on platform. > */ > static void mcc_hpd_irq_setup(struct drm_i915_private *dev_priv) > { > - icp_hpd_irq_setup(dev_priv, > - SDE_DDI_MASK_ICP, > SDE_TC_HOTPLUG_ICP(PORT_TC1), > - ICP_DDI_HPD_ENABLE_MASK, > ICP_TC_HPD_ENABLE(PORT_TC1), > - hpd_icp); > + if (HAS_PCH_JSP(dev_priv)) > + icp_hpd_irq_setup(dev_priv, > + SDE_DDI_MASK_TGP, 0, > + TGP_DDI_HPD_ENABLE_MASK, 0, > + hpd_tgp); > + else > + icp_hpd_irq_setup(dev_priv, > + SDE_DDI_MASK_ICP, > + SDE_TC_HOTPLUG_ICP(PORT_TC1), > + ICP_DDI_HPD_ENABLE_MASK, > + ICP_TC_HPD_ENABLE(PORT_TC1), > + hpd_icp); Although MCC and JSL PCH are similar, wouldn't it be a bit cleaner if we had a separate function for JSP? Something like jsp_hpd_irq_setup()... Regarldess, this patch is Reviewed-by: Vivek Kasireddy Thanks, Vivek > } > > static void gen11_hpd_detection_setup(struct drm_i915_private > *dev_priv) @@ -4315,7 +4334,7 @@ void intel_irq_init(struct > drm_i915_private *dev_priv) if (I915_HAS_HOTPLUG(dev_priv)) > dev_priv->display.hpd_irq_setup = > i915_hpd_irq_setup; } else { > - if (HAS_PCH_MCC(dev_priv)) > + if (HAS_PCH_MCC(dev_priv) || HAS_PCH_JSP(dev_priv)) > dev_priv->display.hpd_irq_setup = > mcc_hpd_irq_setup; else if (INTEL_GEN(dev_priv) >= 11) > dev_priv->display.hpd_irq_setup = > gen11_hpd_irq_setup; diff --git a/drivers/gpu/drm/i915/intel_pch.c > b/drivers/gpu/drm/i915/intel_pch.c index 15f8bff141f9..1035d3d46fd8 > 100644 --- a/drivers/gpu/drm/i915/intel_pch.c > +++ b/drivers/gpu/drm/i915/intel_pch.c > @@ -79,7 +79,6 @@ intel_pch_type(const struct drm_i915_private > *dev_priv, unsigned short id) WARN_ON(!IS_ICELAKE(dev_priv)); > return PCH_ICP; > case INTEL_PCH_MCC
Re: [Intel-gfx] [PATCH] drm/i915: Introduce Jasper Lake PCH
On Mon, 14 Oct 2019 14:24:31 -0700 Matt Roper wrote: > The Jasper Lake PCH follows ICP/TGP's south display behavior and is > identical to MCC graphics-wise except that it does not use the unusual > (port C -> TC1) pin mapping that MCC does. > > Also, it turns out the extra PCH ID that we had previously thought > was a form of MCC is actually a second ID for JSP (i.e., port C uses > the port C pins instead of the TC1 pins). > > Cc: José Roberto de Souza > Cc: James Ausmus > Cc: Vivek Kasireddy > Signed-off-by: Matt Roper > --- > drivers/gpu/drm/i915/i915_irq.c | 17 ++--- > drivers/gpu/drm/i915/intel_pch.c | 6 +- > drivers/gpu/drm/i915/intel_pch.h | 5 - > 3 files changed, 23 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_irq.c > b/drivers/gpu/drm/i915/i915_irq.c index d20ca02d3166..de16576bb5fa > 100644 --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -2248,11 +2248,18 @@ static void icp_irq_handler(struct > drm_i915_private *dev_priv, u32 pch_iir) tc_hotplug_trigger = pch_iir > & SDE_TC_MASK_TGP; tc_port_hotplug_long_detect = > tgp_tc_port_hotplug_long_detect; pins = hpd_tgp; > + } else if (HAS_PCH_JSP(dev_priv)) { > + ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP; > + tc_hotplug_trigger = 0; > + pins = hpd_tgp; > } else if (HAS_PCH_MCC(dev_priv)) { > ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; > tc_hotplug_trigger = pch_iir & > SDE_TC_HOTPLUG_ICP(PORT_TC1); pins = hpd_icp; > } else { > + WARN(!HAS_PCH_ICP(dev_priv), > + "Unrecognized PCH type 0x%x\n", > INTEL_PCH_TYPE(dev_priv)); + > ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; > tc_hotplug_trigger = pch_iir & SDE_TC_MASK_ICP; > tc_port_hotplug_long_detect = > icp_tc_port_hotplug_long_detect; @@ -3373,15 +3380,19 @@ static void > icp_hpd_irq_setup(struct drm_i915_private *dev_priv, } > > /* > - * EHL doesn't need most of gen11_hpd_irq_setup, it's handling only > the > + * EHL/JSL don't need most of gen11_hpd_irq_setup, they're handling > only the > * equivalent of SDE. > + * > + * Note that MCC and JSP have different port C pin mappings, hence > the use of > + * ICP's table (hpd C on TC1) vs TGP's table (hpd C on DDIC) > depending on > + * platform. > */ > static void mcc_hpd_irq_setup(struct drm_i915_private *dev_priv) > { > icp_hpd_irq_setup(dev_priv, > SDE_DDI_MASK_ICP, > SDE_TC_HOTPLUG_ICP(PORT_TC1), ICP_DDI_HPD_ENABLE_MASK, > ICP_TC_HPD_ENABLE(PORT_TC1), > - hpd_icp); > + HAS_PCH_JSP(dev_priv) ? hpd_tgp : hpd_icp); Unless I am misreading this, shouldn't you change the ddi_mask and ddi_enable_mask to _TGP as well? Thanks, Vivek > } > > static void gen11_hpd_detection_setup(struct drm_i915_private > *dev_priv) @@ -4315,7 +4326,7 @@ void intel_irq_init(struct > drm_i915_private *dev_priv) if (I915_HAS_HOTPLUG(dev_priv)) > dev_priv->display.hpd_irq_setup = > i915_hpd_irq_setup; } else { > - if (HAS_PCH_MCC(dev_priv)) > + if (HAS_PCH_MCC(dev_priv) || HAS_PCH_JSP(dev_priv)) > dev_priv->display.hpd_irq_setup = > mcc_hpd_irq_setup; else if (INTEL_GEN(dev_priv) >= 11) > dev_priv->display.hpd_irq_setup = > gen11_hpd_irq_setup; diff --git a/drivers/gpu/drm/i915/intel_pch.c > b/drivers/gpu/drm/i915/intel_pch.c index 15f8bff141f9..1035d3d46fd8 > 100644 --- a/drivers/gpu/drm/i915/intel_pch.c > +++ b/drivers/gpu/drm/i915/intel_pch.c > @@ -79,7 +79,6 @@ intel_pch_type(const struct drm_i915_private > *dev_priv, unsigned short id) WARN_ON(!IS_ICELAKE(dev_priv)); > return PCH_ICP; > case INTEL_PCH_MCC_DEVICE_ID_TYPE: > - case INTEL_PCH_MCC2_DEVICE_ID_TYPE: > DRM_DEBUG_KMS("Found Mule Creek Canyon PCH\n"); > WARN_ON(!IS_ELKHARTLAKE(dev_priv)); > return PCH_MCC; > @@ -87,6 +86,11 @@ intel_pch_type(const struct drm_i915_private > *dev_priv, unsigned short id) DRM_DEBUG_KMS("Found Tiger Lake LP > PCH\n"); WARN_ON(!IS_TIGERLAKE(dev_priv)); > return PCH_TGP; > + case INTEL_PCH_JSP_DEVICE_ID_TYPE: > + case INTEL_PCH_JSP2_DEVICE_ID_TYPE: > + DRM_DEBUG_KMS("Found Jasper Lake PCH\n"); > + WARN_ON(!IS_ELKHARTLAKE(dev_priv)); > + return PCH_JSP; > default: > return PCH_NONE; > } > diff --git a/drivers/gpu/drm/i915/intel_pc
[Intel-gfx] [PATCH] drm/i915/ehl: Port C's hotplug interrupt is associated with TC1 bits
On some platforms that have the MCC PCH, Port C's hotplug interrupt bits are mapped to TC1 bits. Suggested-by: Matt Roper Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/display/intel_dp.c | 3 +++ drivers/gpu/drm/i915/i915_irq.c | 8 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 0e45c61d7331..6594f2af1257 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5282,6 +5282,9 @@ static bool icl_combo_port_connected(struct drm_i915_private *dev_priv, { enum port port = intel_dig_port->base.port; + if (HAS_PCH_MCC(dev_priv) && port == PORT_C) + return I915_READ(SDEISR) & SDE_TC_HOTPLUG_ICP(PORT_TC1); + return I915_READ(SDEISR) & SDE_DDI_HOTPLUG_ICP(port); } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 3af7f7914c40..a7c968b01af3 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2249,8 +2249,8 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) tc_port_hotplug_long_detect = tgp_tc_port_hotplug_long_detect; pins = hpd_tgp; } else if (HAS_PCH_MCC(dev_priv)) { - ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP; - tc_hotplug_trigger = 0; + ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; + tc_hotplug_trigger = pch_iir & SDE_TC_HOTPLUG_ICP(PORT_TC1); pins = hpd_icp; } else { ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; @@ -3377,8 +3377,8 @@ static void icp_hpd_irq_setup(struct drm_i915_private *dev_priv, static void mcc_hpd_irq_setup(struct drm_i915_private *dev_priv) { icp_hpd_irq_setup(dev_priv, - SDE_DDI_MASK_TGP, 0, - TGP_DDI_HPD_ENABLE_MASK, 0, + SDE_DDI_MASK_ICP, SDE_TC_HOTPLUG_ICP(PORT_TC1), + ICP_DDI_HPD_ENABLE_MASK, ICP_TC_HPD_ENABLE(PORT_TC1), hpd_icp); } -- 2.21.0 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915/ehl: Use an id of 4 while accessing DPLL4's CR0 and CR1
Although, DPLL4 enable and disable is associated with MGPLL1_ENABLE register, we can use ICL_DPLL_CFGCR0/CR1 macros to access this dpll's CR0 and CR1 registers by passing an id of 4 to these macros. Reported-by: Ville Syrjälä Cc: Ville Syrjälä Cc: José Roberto de Souza Cc: Matt Roper Cc: Imre Deak Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 18 ++ 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 319a26a1ec10..f9bdf8514a53 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -3127,8 +3127,13 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, hw_state->cfgcr0 = I915_READ(TGL_DPLL_CFGCR0(id)); hw_state->cfgcr1 = I915_READ(TGL_DPLL_CFGCR1(id)); } else { - hw_state->cfgcr0 = I915_READ(ICL_DPLL_CFGCR0(id)); - hw_state->cfgcr1 = I915_READ(ICL_DPLL_CFGCR1(id)); + if (IS_ELKHARTLAKE(dev_priv) && id == DPLL_ID_EHL_DPLL4) { + hw_state->cfgcr0 = I915_READ(ICL_DPLL_CFGCR0(4)); + hw_state->cfgcr1 = I915_READ(ICL_DPLL_CFGCR1(4)); + } else { + hw_state->cfgcr0 = I915_READ(ICL_DPLL_CFGCR0(id)); + hw_state->cfgcr1 = I915_READ(ICL_DPLL_CFGCR1(id)); + } } ret = true; @@ -3169,8 +3174,13 @@ static void icl_dpll_write(struct drm_i915_private *dev_priv, cfgcr0_reg = TGL_DPLL_CFGCR0(id); cfgcr1_reg = TGL_DPLL_CFGCR1(id); } else { - cfgcr0_reg = ICL_DPLL_CFGCR0(id); - cfgcr1_reg = ICL_DPLL_CFGCR1(id); + if (IS_ELKHARTLAKE(dev_priv) && id == DPLL_ID_EHL_DPLL4) { + cfgcr0_reg = ICL_DPLL_CFGCR0(4); + cfgcr1_reg = ICL_DPLL_CFGCR1(4); + } else { + cfgcr0_reg = ICL_DPLL_CFGCR0(id); + cfgcr1_reg = ICL_DPLL_CFGCR1(id); + } } I915_WRITE(cfgcr0_reg, hw_state->cfgcr0); -- 2.21.0 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v10)
On Wed, 10 Jul 2019 21:47:52 +0300 Ville Syrjälä wrote: Hi Ville, > On Wed, Jul 03, 2019 at 04:03:53PM -0700, Vivek Kasireddy wrote: > > This patch adds support for DPLL4 on EHL that include the > > following restrictions: > > > > - DPLL4 cannot be used with DDIA (combo port A internal eDP usage). > > DPLL4 can be used with other DDIs, including DDID > > (combo port A external usage). > > > > - DPLL4 cannot be enabled when DC5 or DC6 are enabled. > > > > - The DPLL4 enable, lock, power enabled, and power state are > > connected to the MGPLL1_ENABLE register. > > > > v2: (suggestions from Bob Paauwe) > > - Rework ehl_get_dpll() function to call intel_find_shared_dpll() > > and iterate twice: once for Combo plls and once for MG plls. > > > > - Use MG pll funcs for DPLL4 instead of creating new ones and modify > > mg_pll_enable to include the restrictions for EHL. > > > > v3: Fix compilation error > > > > v4: (suggestions from Lucas and Ville) > > - Treat DPLL4 as a combo phy PLL and not as MG PLL > > - Disable DC states when this DPLL is being enabled > > - Reuse icl_get_dpll instead of creating a separate one for EHL > > > > v5: (suggestion from Ville) > > - Refcount the DC OFF power domains during the enabling and > > disabling of this DPLL. > > > > v6: rebase > > > > v7: (suggestion from Imre) > > - Add a new power domain instead of iterating over the domains > > assoicated with DC OFF power well. > > > > v8: (Ville and Imre) > > - Rename POWER_DOMAIN_DPLL4 TO POWER_DOMAIN_DPLL_DC_OFF > > - Grab a reference in intel_modeset_setup_hw_state() if this > > DPLL was already enabled perhaps by BIOS. > > - Check for the port type instead of the encoder > > > > v9: (Ville) > > - Move the block of code that grabs a reference to the power domain > > POWER_DOMAIN_DPLL_DC_OFF to intel_modeset_readout_hw_state() to > > ensure that there is a reference present before this DPLL might get > > disabled. > > > > v10: rebase > > > > Cc: José Roberto de Souza > > Cc: Ville Syrjälä > > Cc: Matt Roper > > Cc: Imre Deak > > Signed-off-by: Vivek Kasireddy > > --- > > drivers/gpu/drm/i915/display/intel_display.c | 7 +++ > > .../drm/i915/display/intel_display_power.c| 3 ++ > > .../drm/i915/display/intel_display_power.h| 1 + > > drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 47 > > +-- drivers/gpu/drm/i915/display/intel_dpll_mgr.h > > | 6 +++ 5 files changed, 60 insertions(+), 4 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/display/intel_display.c > > b/drivers/gpu/drm/i915/display/intel_display.c index > > 919f5ac844c8..557462208462 100644 --- > > a/drivers/gpu/drm/i915/display/intel_display.c +++ > > b/drivers/gpu/drm/i915/display/intel_display.c @@ -16653,6 > > +16653,13 @@ static void intel_modeset_readout_hw_state(struct > > drm_device *dev) pll->on = pll->info->funcs->get_hw_state(dev_priv, > > pll, >state.hw_state); > > + > > + if (IS_ELKHARTLAKE(dev_priv) && pll->on && > > + pll->info->id == DPLL_ID_EHL_DPLL4) { > > + pll->wakeref = > > intel_display_power_get(dev_priv, > > + > > POWER_DOMAIN_DPLL_DC_OFF); > > + } > > + > > pll->state.crtc_mask = 0; > > for_each_intel_crtc(dev, crtc) { > > struct intel_crtc_state *crtc_state = > > diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c > > b/drivers/gpu/drm/i915/display/intel_display_power.c index > > c19b958461ca..7437fc71d289 100644 --- > > a/drivers/gpu/drm/i915/display/intel_display_power.c +++ > > b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -118,6 > > +118,8 @@ intel_display_power_domain_str(enum > > intel_display_power_domain domain) return "MODESET"; case > > POWER_DOMAIN_GT_IRQ: return "GT_IRQ"; > > + case POWER_DOMAIN_DPLL_DC_OFF: > > + return "DPLL_DC_OFF"; > > default: > > MISSING_CASE(domain); > > return "?"; > > @@ -2455,6 +2457,7 @@ void intel_display_power_put(struct > > drm_i915_private *dev_priv, ICL_PW_2_POWER_DOMAINS > > | \ BIT_ULL(POWER_DOMAIN_MODESET) > > | \ BIT_ULL(POWER_DOMAIN_AUX_A) > > | \ > > + BIT_ULL(POWER_DOMAIN_DPLL_DC_OFF) | > > \ BIT_ULL(POWER_DOMAIN_INIT)) >
[Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v10)
This patch adds support for DPLL4 on EHL that include the following restrictions: - DPLL4 cannot be used with DDIA (combo port A internal eDP usage). DPLL4 can be used with other DDIs, including DDID (combo port A external usage). - DPLL4 cannot be enabled when DC5 or DC6 are enabled. - The DPLL4 enable, lock, power enabled, and power state are connected to the MGPLL1_ENABLE register. v2: (suggestions from Bob Paauwe) - Rework ehl_get_dpll() function to call intel_find_shared_dpll() and iterate twice: once for Combo plls and once for MG plls. - Use MG pll funcs for DPLL4 instead of creating new ones and modify mg_pll_enable to include the restrictions for EHL. v3: Fix compilation error v4: (suggestions from Lucas and Ville) - Treat DPLL4 as a combo phy PLL and not as MG PLL - Disable DC states when this DPLL is being enabled - Reuse icl_get_dpll instead of creating a separate one for EHL v5: (suggestion from Ville) - Refcount the DC OFF power domains during the enabling and disabling of this DPLL. v6: rebase v7: (suggestion from Imre) - Add a new power domain instead of iterating over the domains assoicated with DC OFF power well. v8: (Ville and Imre) - Rename POWER_DOMAIN_DPLL4 TO POWER_DOMAIN_DPLL_DC_OFF - Grab a reference in intel_modeset_setup_hw_state() if this DPLL was already enabled perhaps by BIOS. - Check for the port type instead of the encoder v9: (Ville) - Move the block of code that grabs a reference to the power domain POWER_DOMAIN_DPLL_DC_OFF to intel_modeset_readout_hw_state() to ensure that there is a reference present before this DPLL might get disabled. v10: rebase Cc: José Roberto de Souza Cc: Ville Syrjälä Cc: Matt Roper Cc: Imre Deak Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/display/intel_display.c | 7 +++ .../drm/i915/display/intel_display_power.c| 3 ++ .../drm/i915/display/intel_display_power.h| 1 + drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 47 +-- drivers/gpu/drm/i915/display/intel_dpll_mgr.h | 6 +++ 5 files changed, 60 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 919f5ac844c8..557462208462 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -16653,6 +16653,13 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) pll->on = pll->info->funcs->get_hw_state(dev_priv, pll, >state.hw_state); + + if (IS_ELKHARTLAKE(dev_priv) && pll->on && + pll->info->id == DPLL_ID_EHL_DPLL4) { + pll->wakeref = intel_display_power_get(dev_priv, + POWER_DOMAIN_DPLL_DC_OFF); + } + pll->state.crtc_mask = 0; for_each_intel_crtc(dev, crtc) { struct intel_crtc_state *crtc_state = diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index c19b958461ca..7437fc71d289 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -118,6 +118,8 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) return "MODESET"; case POWER_DOMAIN_GT_IRQ: return "GT_IRQ"; + case POWER_DOMAIN_DPLL_DC_OFF: + return "DPLL_DC_OFF"; default: MISSING_CASE(domain); return "?"; @@ -2455,6 +2457,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, ICL_PW_2_POWER_DOMAINS |\ BIT_ULL(POWER_DOMAIN_MODESET) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_DPLL_DC_OFF) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define ICL_DDI_IO_A_POWER_DOMAINS ( \ diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h index ff57b0a7fe59..8f43f7051a16 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.h +++ b/drivers/gpu/drm/i915/display/intel_display_power.h @@ -59,6 +59,7 @@ enum intel_display_power_domain { POWER_DOMAIN_GMBUS, POWER_DOMAIN_MODESET, POWER_DOMAIN_GT_IRQ, + POWER_DOMAIN_DPLL_DC_OFF, POWER_DOMAIN_INIT, POWER_DOMAIN_NUM, diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index f953971e7c3b..67cfe836286e 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -2905,6 +2905,9 @@ static bool icl_get_combo_phy_dpll(st
[Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v9)
This patch adds support for DPLL4 on EHL that include the following restrictions: - DPLL4 cannot be used with DDIA (combo port A internal eDP usage). DPLL4 can be used with other DDIs, including DDID (combo port A external usage). - DPLL4 cannot be enabled when DC5 or DC6 are enabled. - The DPLL4 enable, lock, power enabled, and power state are connected to the MGPLL1_ENABLE register. v2: (suggestions from Bob Paauwe) - Rework ehl_get_dpll() function to call intel_find_shared_dpll() and iterate twice: once for Combo plls and once for MG plls. - Use MG pll funcs for DPLL4 instead of creating new ones and modify mg_pll_enable to include the restrictions for EHL. v3: Fix compilation error v4: (suggestions from Lucas and Ville) - Treat DPLL4 as a combo phy PLL and not as MG PLL - Disable DC states when this DPLL is being enabled - Reuse icl_get_dpll instead of creating a separate one for EHL v5: (suggestion from Ville) - Refcount the DC OFF power domains during the enabling and disabling of this DPLL. v6: rebase v7: (suggestion from Imre) - Add a new power domain instead of iterating over the domains assoicated with DC OFF power well. v8: (Ville and Imre) - Rename POWER_DOMAIN_DPLL4 TO POWER_DOMAIN_DPLL_DC_OFF - Grab a reference in intel_modeset_setup_hw_state() if this DPLL was already enabled perhaps by BIOS. - Check for the port type instead of the encoder v9: (Ville) - Move the block of code that grabs a reference to the power domain POWER_DOMAIN_DPLL_DC_OFF to intel_modeset_readout_hw_state() to ensure that there is a reference present before this DPLL might get disabled. Cc: José Roberto de Souza Cc: Ville Syrjälä Cc: Matt Roper Cc: Imre Deak Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/display/intel_display.c | 7 .../drm/i915/display/intel_display_power.c| 3 ++ .../drm/i915/display/intel_display_power.h| 1 + drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 42 +-- drivers/gpu/drm/i915/display/intel_dpll_mgr.h | 6 +++ 5 files changed, 56 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index e55bd75528c1..3f1ff3bb5e36 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -16569,6 +16569,13 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) pll->on = pll->info->funcs->get_hw_state(dev_priv, pll, >state.hw_state); + + if (IS_ELKHARTLAKE(dev_priv) && pll->on && + pll->info->id == DPLL_ID_EHL_DPLL4) { + pll->wakeref = intel_display_power_get(dev_priv, + POWER_DOMAIN_DPLL_DC_OFF); + } + pll->state.crtc_mask = 0; for_each_intel_crtc(dev, crtc) { struct intel_crtc_state *crtc_state = diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index c93ad512014c..1c101a842331 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -117,6 +117,8 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) return "MODESET"; case POWER_DOMAIN_GT_IRQ: return "GT_IRQ"; + case POWER_DOMAIN_DPLL_DC_OFF: + return "DPLL_DC_OFF"; default: MISSING_CASE(domain); return "?"; @@ -2361,6 +2363,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, ICL_PW_2_POWER_DOMAINS |\ BIT_ULL(POWER_DOMAIN_MODESET) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_DPLL_DC_OFF) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define ICL_DDI_IO_A_POWER_DOMAINS ( \ diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h index ff57b0a7fe59..8f43f7051a16 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.h +++ b/drivers/gpu/drm/i915/display/intel_display_power.h @@ -59,6 +59,7 @@ enum intel_display_power_domain { POWER_DOMAIN_GMBUS, POWER_DOMAIN_MODESET, POWER_DOMAIN_GT_IRQ, + POWER_DOMAIN_DPLL_DC_OFF, POWER_DOMAIN_INIT, POWER_DOMAIN_NUM, diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 2d4e7b9a7b9d..81e1443cb583 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -2806,6 +2806,10 @@ icl_get_dpll(struct intel_crtc_state
[Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v8)
This patch adds support for DPLL4 on EHL that include the following restrictions: - DPLL4 cannot be used with DDIA (combo port A internal eDP usage). DPLL4 can be used with other DDIs, including DDID (combo port A external usage). - DPLL4 cannot be enabled when DC5 or DC6 are enabled. - The DPLL4 enable, lock, power enabled, and power state are connected to the MGPLL1_ENABLE register. v2: (suggestions from Bob Paauwe) - Rework ehl_get_dpll() function to call intel_find_shared_dpll() and iterate twice: once for Combo plls and once for MG plls. - Use MG pll funcs for DPLL4 instead of creating new ones and modify mg_pll_enable to include the restrictions for EHL. v3: Fix compilation error v4: (suggestions from Lucas and Ville) - Treat DPLL4 as a combo phy PLL and not as MG PLL - Disable DC states when this DPLL is being enabled - Reuse icl_get_dpll instead of creating a separate one for EHL v5: (suggestion from Ville) - Refcount the DC OFF power domains during the enabling and disabling of this DPLL. v6: rebase v7: (suggestion from Imre) - Add a new power domain instead of iterating over the domains assoicated with DC OFF power well. v8: (Ville and Imre) - Rename POWER_DOMAIN_DPLL4 TO POWER_DOMAIN_DPLL_DC_OFF - Grab a reference in intel_modeset_setup_hw_state() if this DPLL was already enabled perhaps by BIOS. - Check for the port type instead of the encoder Cc: José Roberto de Souza Cc: Ville Syrjälä Cc: Matt Roper Cc: Imre Deak Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/display/intel_display.c | 6 +++ .../drm/i915/display/intel_display_power.c| 3 ++ .../drm/i915/display/intel_display_power.h| 1 + drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 42 +-- drivers/gpu/drm/i915/display/intel_dpll_mgr.h | 6 +++ 5 files changed, 55 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 8592a7d422de..a5f387e486ee 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -16778,6 +16778,12 @@ intel_modeset_setup_hw_state(struct drm_device *dev, for (i = 0; i < dev_priv->num_shared_dpll; i++) { struct intel_shared_dpll *pll = _priv->shared_dplls[i]; + if (IS_ELKHARTLAKE(dev_priv) && pll->on && + pll->info->id == DPLL_ID_EHL_DPLL4) { + pll->wakeref = intel_display_power_get(dev_priv, + POWER_DOMAIN_DPLL_DC_OFF); + } + if (!pll->on || pll->active_mask) continue; diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index c93ad512014c..1c101a842331 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -117,6 +117,8 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) return "MODESET"; case POWER_DOMAIN_GT_IRQ: return "GT_IRQ"; + case POWER_DOMAIN_DPLL_DC_OFF: + return "DPLL_DC_OFF"; default: MISSING_CASE(domain); return "?"; @@ -2361,6 +2363,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, ICL_PW_2_POWER_DOMAINS |\ BIT_ULL(POWER_DOMAIN_MODESET) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_DPLL_DC_OFF) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define ICL_DDI_IO_A_POWER_DOMAINS ( \ diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h index ff57b0a7fe59..8f43f7051a16 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.h +++ b/drivers/gpu/drm/i915/display/intel_display_power.h @@ -59,6 +59,7 @@ enum intel_display_power_domain { POWER_DOMAIN_GMBUS, POWER_DOMAIN_MODESET, POWER_DOMAIN_GT_IRQ, + POWER_DOMAIN_DPLL_DC_OFF, POWER_DOMAIN_INIT, POWER_DOMAIN_NUM, diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 2d4e7b9a7b9d..81e1443cb583 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -2806,6 +2806,10 @@ icl_get_dpll(struct intel_crtc_state *crtc_state, if (intel_port_is_combophy(dev_priv, port)) { min = DPLL_ID_ICL_DPLL0; max = DPLL_ID_ICL_DPLL1; + + if (IS_ELKHARTLAKE(dev_priv) && port != PORT_A) + max = DPLL_ID_EHL_DPLL4; + ret = icl_calc_dpll_state(crtc_state, encoder
[Intel-gfx] [PATCH] drm/i915/ehl: Add power wells support for Elkhart Lake
The number of power wells and the relevant sequences are common between ICL and EHL since they both are Gen 11. The only significant differences are that EHL does not have DDI E and DDI D and type C/TBT ports. Cc: Clint Taylor Cc: José Roberto de Souza Cc: Matt Roper Cc: Imre Deak Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/intel_display_power.c | 210 - 1 file changed, 209 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display_power.c b/drivers/gpu/drm/i915/intel_display_power.c index c672c8080a93..e3ed77b843d2 100644 --- a/drivers/gpu/drm/i915/intel_display_power.c +++ b/drivers/gpu/drm/i915/intel_display_power.c @@ -2397,6 +2397,66 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, #define ICL_AUX_TBT4_IO_POWER_DOMAINS (\ BIT_ULL(POWER_DOMAIN_AUX_TBT4)) +#define EHL_PW_4_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_INIT)) + /* VDSC/joining */ +#define EHL_PW_3_POWER_DOMAINS ( \ + EHL_PW_4_POWER_DOMAINS |\ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) |\ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) |\ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) |\ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |\ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |\ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) |\ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_AUX_D) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) + /* +* - transcoder WD +* - KVMR (HW control) +*/ +#define EHL_PW_2_POWER_DOMAINS ( \ + EHL_PW_3_POWER_DOMAINS |\ + BIT_ULL(POWER_DOMAIN_TRANSCODER_EDP_VDSC) | \ + BIT_ULL(POWER_DOMAIN_INIT)) + /* +* - KVMR (HW control) +*/ +#define EHL_DISPLAY_DC_OFF_POWER_DOMAINS ( \ + EHL_PW_2_POWER_DOMAINS |\ + BIT_ULL(POWER_DOMAIN_MODESET) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) + +#define EHL_DDI_IO_A_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO)) +#define EHL_DDI_IO_B_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO)) +#define EHL_DDI_IO_C_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO)) +#define EHL_DDI_IO_D_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO)) + +#define EHL_AUX_A_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_IO_A) |\ + BIT_ULL(POWER_DOMAIN_AUX_A)) +#define EHL_AUX_B_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_B)) +#define EHL_AUX_C_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_C)) +#define EHL_AUX_D_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_D)) + static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { .sync_hw = i9xx_power_well_sync_hw_noop, .enable = i9xx_always_on_power_well_noop, @@ -3354,6 +3414,152 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }; +static const struct i915_power_well_desc ehl_power_wells[] = { + { + .name = "always-on", + .always_on = true, + .domains = POWER_DOMAIN_MASK, + .ops = _always_on_power_well_ops, + .id = DISP_PW_ID_NONE, + }, + { + .name = "power well 1", + /* Handled by the DMC firmware */ + .always_on = true, + .domains = 0, + .ops = _power_well_ops, + .id = SKL_DISP_PW_1, + { + .hsw.regs = _power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_PW_1, + .hsw.has_fuses = true, + }, + }, + { + .name = "DC off", + .domains = EHL_DISPLAY_DC_OFF_POWER_DOMAINS, + .ops = _dc_off_power_well_ops, + .id = DISP_PW_ID_NONE, + }, + { + .name = "power well 2", +
[Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v7)
This patch adds support for DPLL4 on EHL that include the following restrictions: - DPLL4 cannot be used with DDIA (combo port A internal eDP usage). DPLL4 can be used with other DDIs, including DDID (combo port A external usage). - DPLL4 cannot be enabled when DC5 or DC6 are enabled. - The DPLL4 enable, lock, power enabled, and power state are connected to the MGPLL1_ENABLE register. v2: (suggestions from Bob Paauwe) - Rework ehl_get_dpll() function to call intel_find_shared_dpll() and iterate twice: once for Combo plls and once for MG plls. - Use MG pll funcs for DPLL4 instead of creating new ones and modify mg_pll_enable to include the restrictions for EHL. v3: Fix compilation error v4: (suggestions from Lucas and Ville) - Treat DPLL4 as a combo phy PLL and not as MG PLL - Disable DC states when this DPLL is being enabled - Reuse icl_get_dpll instead of creating a separate one for EHL v5: (suggestion from Ville) - Refcount the DC OFF power domains during the enabling and disabling of this DPLL. v6: rebase v7: (suggestion from Imre) - Add a new power domain instead of iterating over the domains assoicated with DC OFF power well. Cc: Lucas De Marchi Cc: José Roberto de Souza Cc: Ville Syrjälä Cc: Matt Roper Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/intel_display_power.c | 3 ++ drivers/gpu/drm/i915/intel_display_power.h | 1 + drivers/gpu/drm/i915/intel_dpll_mgr.c | 44 -- drivers/gpu/drm/i915/intel_dpll_mgr.h | 6 +++ 4 files changed, 51 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display_power.c b/drivers/gpu/drm/i915/intel_display_power.c index 278a7edc94f5..2134d8b43f58 100644 --- a/drivers/gpu/drm/i915/intel_display_power.c +++ b/drivers/gpu/drm/i915/intel_display_power.c @@ -116,6 +116,8 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) return "MODESET"; case POWER_DOMAIN_GT_IRQ: return "GT_IRQ"; + case POWER_DOMAIN_DPLL4: + return "DPLL4"; default: MISSING_CASE(domain); return "?"; @@ -2357,6 +2359,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, ICL_PW_2_POWER_DOMAINS |\ BIT_ULL(POWER_DOMAIN_MODESET) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_DPLL4) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define ICL_DDI_IO_A_POWER_DOMAINS ( \ diff --git a/drivers/gpu/drm/i915/intel_display_power.h b/drivers/gpu/drm/i915/intel_display_power.h index ff57b0a7fe59..47266279 100644 --- a/drivers/gpu/drm/i915/intel_display_power.h +++ b/drivers/gpu/drm/i915/intel_display_power.h @@ -59,6 +59,7 @@ enum intel_display_power_domain { POWER_DOMAIN_GMBUS, POWER_DOMAIN_MODESET, POWER_DOMAIN_GT_IRQ, + POWER_DOMAIN_DPLL4, POWER_DOMAIN_INIT, POWER_DOMAIN_NUM, diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 69787f259677..3d712f54dc56 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2806,6 +2806,12 @@ icl_get_dpll(struct intel_crtc_state *crtc_state, if (intel_port_is_combophy(dev_priv, port)) { min = DPLL_ID_ICL_DPLL0; max = DPLL_ID_ICL_DPLL1; + + if (IS_ELKHARTLAKE(dev_priv)) { + if (encoder->type != INTEL_OUTPUT_EDP) + max = DPLL_ID_EHL_DPLL4; + } + ret = icl_calc_dpll_state(crtc_state, encoder); } else if (intel_port_is_tc(dev_priv, port)) { if (encoder->type == INTEL_OUTPUT_DP_MST) { @@ -2945,8 +2951,14 @@ static bool combo_pll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state) { - return icl_pll_get_hw_state(dev_priv, pll, hw_state, - CNL_DPLL_ENABLE(pll->info->id)); + i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id); + + if (IS_ELKHARTLAKE(dev_priv) && + pll->info->id == DPLL_ID_EHL_DPLL4) { + enable_reg = MG_PLL_ENABLE(0); + } + + return icl_pll_get_hw_state(dev_priv, pll, hw_state, enable_reg); } static bool tbt_pll_get_hw_state(struct drm_i915_private *dev_priv, @@ -3057,6 +3069,19 @@ static void combo_pll_enable(struct drm_i915_private *dev_priv, { i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id); + if (IS_ELKHARTLAKE(dev_priv) && + pll->info->id == DPLL_ID_EHL_DPLL4) { + enable_reg = MG_PLL_ENABLE(0); + + /* +* We need t
[Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v6)
This patch adds support for DPLL4 on EHL that include the following restrictions: - DPLL4 cannot be used with DDIA (combo port A internal eDP usage). DPLL4 can be used with other DDIs, including DDID (combo port A external usage). - DPLL4 cannot be enabled when DC5 or DC6 are enabled. - The DPLL4 enable, lock, power enabled, and power state are connected to the MGPLL1_ENABLE register. v2: (suggestions from Bob Paauwe) - Rework ehl_get_dpll() function to call intel_find_shared_dpll() and iterate twice: once for Combo plls and once for MG plls. - Use MG pll funcs for DPLL4 instead of creating new ones and modify mg_pll_enable to include the restrictions for EHL. v3: Fix compilation error v4: (suggestions from Lucas and Ville) - Treat DPLL4 as a combo phy PLL and not as MG PLL - Disable DC states when this DPLL is being enabled - Reuse icl_get_dpll instead of creating a separate one for EHL v5: (suggestion from Ville) - Refcount the DC OFF power domains during the enabling and disabling of this DPLL. v6: rebase Cc: Lucas De Marchi Cc: José Roberto de Souza Cc: Ville Syrjälä Cc: Matt Roper Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/intel_display_power.c | 20 +++ drivers/gpu/drm/i915/intel_display_power.h | 6 drivers/gpu/drm/i915/intel_dpll_mgr.c | 40 +++--- drivers/gpu/drm/i915/intel_dpll_mgr.h | 5 +++ 4 files changed, 67 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display_power.c b/drivers/gpu/drm/i915/intel_display_power.c index 278a7edc94f5..fd6d0d6a285a 100644 --- a/drivers/gpu/drm/i915/intel_display_power.c +++ b/drivers/gpu/drm/i915/intel_display_power.c @@ -4524,6 +4524,26 @@ void intel_power_domains_resume(struct drm_i915_private *i915) intel_power_domains_verify_state(i915); } +void icl_disable_dc_states(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll) +{ + enum intel_display_power_domain domain; + + for_each_power_domain(domain, ICL_DISPLAY_DC_OFF_POWER_DOMAINS) + pll->wakerefs[domain] = intel_display_power_get(dev_priv, + domain); +} + +void icl_enable_dc_states(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll) +{ + enum intel_display_power_domain domain; + + for_each_power_domain(domain, ICL_DISPLAY_DC_OFF_POWER_DOMAINS) + intel_display_power_put(dev_priv, domain, + pll->wakerefs[domain]); +} + #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) static void intel_power_domains_dump_info(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/intel_display_power.h b/drivers/gpu/drm/i915/intel_display_power.h index ff57b0a7fe59..2abaa3806ec6 100644 --- a/drivers/gpu/drm/i915/intel_display_power.h +++ b/drivers/gpu/drm/i915/intel_display_power.h @@ -12,6 +12,7 @@ struct drm_i915_private; struct intel_encoder; +struct intel_shared_dpll; enum intel_display_power_domain { POWER_DOMAIN_DISPLAY_CORE, @@ -285,4 +286,9 @@ void chv_phy_powergate_lanes(struct intel_encoder *encoder, bool chv_phy_powergate_ch(struct drm_i915_private *dev_priv, enum dpio_phy phy, enum dpio_channel ch, bool override); +void icl_disable_dc_states(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll); +void icl_enable_dc_states(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll); + #endif /* __INTEL_DISPLAY_POWER_H__ */ diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 69787f259677..2829b37e2909 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -22,8 +22,8 @@ */ #include "intel_dpio_phy.h" -#include "intel_dpll_mgr.h" #include "intel_drv.h" +#include "intel_dpll_mgr.h" /** * DOC: Display PLLs @@ -2806,6 +2806,12 @@ icl_get_dpll(struct intel_crtc_state *crtc_state, if (intel_port_is_combophy(dev_priv, port)) { min = DPLL_ID_ICL_DPLL0; max = DPLL_ID_ICL_DPLL1; + + if (IS_ELKHARTLAKE(dev_priv)) { + if (encoder->type != INTEL_OUTPUT_EDP) + max = DPLL_ID_EHL_DPLL4; + } + ret = icl_calc_dpll_state(crtc_state, encoder); } else if (intel_port_is_tc(dev_priv, port)) { if (encoder->type == INTEL_OUTPUT_DP_MST) { @@ -2945,8 +2951,14 @@ static bool combo_pll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state) { - return icl_pll_get_hw_state(dev_priv, pll, hw_state, -
[Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v5)
This patch adds support for DPLL4 on EHL that include the following restrictions: - DPLL4 cannot be used with DDIA (combo port A internal eDP usage). DPLL4 can be used with other DDIs, including DDID (combo port A external usage). - DPLL4 cannot be enabled when DC5 or DC6 are enabled. - The DPLL4 enable, lock, power enabled, and power state are connected to the MGPLL1_ENABLE register. v2: (suggestions from Bob Paauwe) - Rework ehl_get_dpll() function to call intel_find_shared_dpll() and iterate twice: once for Combo plls and once for MG plls. - Use MG pll funcs for DPLL4 instead of creating new ones and modify mg_pll_enable to include the restrictions for EHL. v3: Fix compilation error v4: (suggestions from Lucas and Ville) - Treat DPLL4 as a combo phy PLL and not as MG PLL - Disable DC states when this DPLL is being enabled - Reuse icl_get_dpll instead of creating a separate one for EHL v5: (suggestion from Ville) - Refcount the DC OFF power domains during the enabling and disabling of this DPLL. Cc: Lucas De Marchi Cc: José Roberto de Souza Cc: Ville Syrjälä Cc: Matt Roper Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/intel_dpll_mgr.c | 40 ++--- drivers/gpu/drm/i915/intel_dpll_mgr.h | 5 drivers/gpu/drm/i915/intel_runtime_pm.c | 21 + drivers/gpu/drm/i915/intel_runtime_pm.h | 5 4 files changed, 67 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 897d93537414..6d89d231b33d 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -22,8 +22,8 @@ */ #include "intel_dpio_phy.h" -#include "intel_dpll_mgr.h" #include "intel_drv.h" +#include "intel_dpll_mgr.h" /** * DOC: Display PLLs @@ -2806,6 +2806,12 @@ icl_get_dpll(struct intel_crtc_state *crtc_state, if (intel_port_is_combophy(dev_priv, port)) { min = DPLL_ID_ICL_DPLL0; max = DPLL_ID_ICL_DPLL1; + + if (IS_ELKHARTLAKE(dev_priv)) { + if (encoder->type != INTEL_OUTPUT_EDP) + max = DPLL_ID_EHL_DPLL4; + } + ret = icl_calc_dpll_state(crtc_state, encoder); } else if (intel_port_is_tc(dev_priv, port)) { if (encoder->type == INTEL_OUTPUT_DP_MST) { @@ -2945,8 +2951,14 @@ static bool combo_pll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state) { - return icl_pll_get_hw_state(dev_priv, pll, hw_state, - CNL_DPLL_ENABLE(pll->info->id)); + i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id); + + if (IS_ELKHARTLAKE(dev_priv) && + pll->info->id == DPLL_ID_EHL_DPLL4) { + enable_reg = MG_PLL_ENABLE(0); + } + + return icl_pll_get_hw_state(dev_priv, pll, hw_state, enable_reg); } static bool tbt_pll_get_hw_state(struct drm_i915_private *dev_priv, @@ -3057,6 +3069,14 @@ static void combo_pll_enable(struct drm_i915_private *dev_priv, { i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id); + if (IS_ELKHARTLAKE(dev_priv) && + pll->info->id == DPLL_ID_EHL_DPLL4) { + enable_reg = MG_PLL_ENABLE(0); + + /* Need to disable DC states when this DPLL is enabled. */ + icl_disable_dc_states(dev_priv, pll); + } + icl_pll_power_enable(dev_priv, pll, enable_reg); icl_dpll_write(dev_priv, pll); @@ -3152,7 +3172,18 @@ static void icl_pll_disable(struct drm_i915_private *dev_priv, static void combo_pll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { - icl_pll_disable(dev_priv, pll, CNL_DPLL_ENABLE(pll->info->id)); + i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id); + + if (IS_ELKHARTLAKE(dev_priv) && + pll->info->id == DPLL_ID_EHL_DPLL4) { + enable_reg = MG_PLL_ENABLE(0); + icl_pll_disable(dev_priv, pll, enable_reg); + + icl_enable_dc_states(dev_priv, pll); + return; + } + + icl_pll_disable(dev_priv, pll, enable_reg); } static void tbt_pll_disable(struct drm_i915_private *dev_priv, @@ -3230,6 +3261,7 @@ static const struct intel_dpll_mgr icl_pll_mgr = { static const struct dpll_info ehl_plls[] = { { "DPLL 0", _pll_funcs, DPLL_ID_ICL_DPLL0, 0 }, { "DPLL 1", _pll_funcs, DPLL_ID_ICL_DPLL1, 0 }, + { "DPLL 4", _pll_funcs, DPLL_ID_EHL_DPLL4, 0 }, { }, }; diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h b/drivers/gpu/drm/i915/intel_dpll_mgr.h index 8835dd20f1d2.
Re: [Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v4)
On Wed, 17 Apr 2019 16:06:11 +0300 Ville Syrjälä wrote: Hi Ville, > On Thu, Apr 11, 2019 at 04:36:00PM -0700, Vivek Kasireddy wrote: > > This patch adds support for DPLL4 on EHL that include the > > following restrictions: > > > > - DPLL4 cannot be used with DDIA (combo port A internal eDP usage). > > DPLL4 can be used with other DDIs, including DDID > > (combo port A external usage). > > > > - DPLL4 cannot be enabled when DC5 or DC6 are enabled. > > > > - The DPLL4 enable, lock, power enabled, and power state are > > connected to the MGPLL1_ENABLE register. > > > > v2: (suggestions from Bob Paauwe) > > - Rework ehl_get_dpll() function to call intel_find_shared_dpll() > > and iterate twice: once for Combo plls and once for MG plls. > > > > - Use MG pll funcs for DPLL4 instead of creating new ones and modify > > mg_pll_enable to include the restrictions for EHL. > > > > v3: Fix compilation error > > > > v4: (suggestions from Lucas and Ville) > > - Treat DPLL4 as a combo phy PLL and not as MG PLL > > - Disable DC states when this DPLL is being enabled > > - Reuse icl_get_dpll instead of creating a separate one for EHL > > > > Cc: Lucas De Marchi > > Cc: José Roberto de Souza > > Cc: Bob Paauwe > > Signed-off-by: Vivek Kasireddy > > --- > > drivers/gpu/drm/i915/intel_dpll_mgr.c | 35 > > --- > > drivers/gpu/drm/i915/intel_dpll_mgr.h | 4 2 files changed, 36 > > insertions(+), 3 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c > > b/drivers/gpu/drm/i915/intel_dpll_mgr.c index > > e01c057ce50b..207af4af4978 100644 --- > > a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ > > b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2825,6 +2825,12 @@ > > icl_get_dpll(struct intel_crtc_state *crtc_state, if > > (intel_port_is_combophy(dev_priv, port)) { min = DPLL_ID_ICL_DPLL0; > > max = DPLL_ID_ICL_DPLL1; > > + > > + if (IS_ELKHARTLAKE(dev_priv)) { > > + if (encoder->type != INTEL_OUTPUT_EDP) > > + max = DPLL_ID_EHL_DPLL4; > > + } > > + > > ret = icl_calc_dpll_state(crtc_state, encoder); > > } else if (intel_port_is_tc(dev_priv, port)) { > > if (encoder->type == INTEL_OUTPUT_DP_MST) { > > @@ -2964,8 +2970,14 @@ static bool combo_pll_get_hw_state(struct > > drm_i915_private *dev_priv, struct intel_shared_dpll *pll, > >struct intel_dpll_hw_state > > *hw_state) { > > - return icl_pll_get_hw_state(dev_priv, pll, hw_state, > > - > > CNL_DPLL_ENABLE(pll->info->id)); > > + i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id); > > + > > + if (IS_ELKHARTLAKE(dev_priv) && > > + pll->info->id == DPLL_ID_EHL_DPLL4) { > > + enable_reg = MG_PLL_ENABLE(0); > > + } > > + > > + return icl_pll_get_hw_state(dev_priv, pll, hw_state, > > enable_reg); } > > > > static bool tbt_pll_get_hw_state(struct drm_i915_private *dev_priv, > > @@ -3076,6 +3088,14 @@ static void combo_pll_enable(struct > > drm_i915_private *dev_priv, { > > i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id); > > > > + if (IS_ELKHARTLAKE(dev_priv) && > > + pll->info->id == DPLL_ID_EHL_DPLL4) { > > + enable_reg = MG_PLL_ENABLE(0); > > + > > + /* Need to disable DC states when this DPLL is > > enabled. */ > > + bxt_disable_dc9(dev_priv); > > You can't simply call that from random places. It needs to be handled > by the power domain stuff. The only other places in the driver, the functions bxt_disable/enable_dc9 are called are intel_runtime_suspend/resume and i915_drm_suspend_late/resume_early. Are you suggesting that I call one of these functions instead? Or, do you simply want me to pair bxt_*able_dc9 with intel_power_domains_suspend/resume and/or other functions similar to what the above mentioned functions do? Thanks, Vivek > > > + } > > + > > icl_pll_power_enable(dev_priv, pll, enable_reg); > > > > icl_dpll_write(dev_priv, pll); > > @@ -3171,7 +3191,15 @@ static void icl_pll_disable(struct > > drm_i915_private *dev_priv, static void combo_pll_disable(struct > > drm_i915_private *dev_priv, struct intel_shared_dpll *pll) > > { > > - icl_pll_disable(dev_priv, pll, > > CNL_DPLL_ENABLE(pll->info->id)); > > + i915_reg_t enable_reg = CNL_DPLL_ENABLE(
[Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v4)
This patch adds support for DPLL4 on EHL that include the following restrictions: - DPLL4 cannot be used with DDIA (combo port A internal eDP usage). DPLL4 can be used with other DDIs, including DDID (combo port A external usage). - DPLL4 cannot be enabled when DC5 or DC6 are enabled. - The DPLL4 enable, lock, power enabled, and power state are connected to the MGPLL1_ENABLE register. v2: (suggestions from Bob Paauwe) - Rework ehl_get_dpll() function to call intel_find_shared_dpll() and iterate twice: once for Combo plls and once for MG plls. - Use MG pll funcs for DPLL4 instead of creating new ones and modify mg_pll_enable to include the restrictions for EHL. v3: Fix compilation error v4: (suggestions from Lucas and Ville) - Treat DPLL4 as a combo phy PLL and not as MG PLL - Disable DC states when this DPLL is being enabled - Reuse icl_get_dpll instead of creating a separate one for EHL Cc: Lucas De Marchi Cc: José Roberto de Souza Cc: Bob Paauwe Signed-off-by: Vivek Kasireddy --- drivers/gpu/drm/i915/intel_dpll_mgr.c | 35 --- drivers/gpu/drm/i915/intel_dpll_mgr.h | 4 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index e01c057ce50b..207af4af4978 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2825,6 +2825,12 @@ icl_get_dpll(struct intel_crtc_state *crtc_state, if (intel_port_is_combophy(dev_priv, port)) { min = DPLL_ID_ICL_DPLL0; max = DPLL_ID_ICL_DPLL1; + + if (IS_ELKHARTLAKE(dev_priv)) { + if (encoder->type != INTEL_OUTPUT_EDP) + max = DPLL_ID_EHL_DPLL4; + } + ret = icl_calc_dpll_state(crtc_state, encoder); } else if (intel_port_is_tc(dev_priv, port)) { if (encoder->type == INTEL_OUTPUT_DP_MST) { @@ -2964,8 +2970,14 @@ static bool combo_pll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state) { - return icl_pll_get_hw_state(dev_priv, pll, hw_state, - CNL_DPLL_ENABLE(pll->info->id)); + i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id); + + if (IS_ELKHARTLAKE(dev_priv) && + pll->info->id == DPLL_ID_EHL_DPLL4) { + enable_reg = MG_PLL_ENABLE(0); + } + + return icl_pll_get_hw_state(dev_priv, pll, hw_state, enable_reg); } static bool tbt_pll_get_hw_state(struct drm_i915_private *dev_priv, @@ -3076,6 +3088,14 @@ static void combo_pll_enable(struct drm_i915_private *dev_priv, { i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id); + if (IS_ELKHARTLAKE(dev_priv) && + pll->info->id == DPLL_ID_EHL_DPLL4) { + enable_reg = MG_PLL_ENABLE(0); + + /* Need to disable DC states when this DPLL is enabled. */ + bxt_disable_dc9(dev_priv); + } + icl_pll_power_enable(dev_priv, pll, enable_reg); icl_dpll_write(dev_priv, pll); @@ -3171,7 +3191,15 @@ static void icl_pll_disable(struct drm_i915_private *dev_priv, static void combo_pll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { - icl_pll_disable(dev_priv, pll, CNL_DPLL_ENABLE(pll->info->id)); + i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id); + + if (IS_ELKHARTLAKE(dev_priv) && + pll->info->id == DPLL_ID_EHL_DPLL4) { + enable_reg = MG_PLL_ENABLE(0); + bxt_enable_dc9(dev_priv); + } + + icl_pll_disable(dev_priv, pll, enable_reg); } static void tbt_pll_disable(struct drm_i915_private *dev_priv, @@ -3249,6 +3277,7 @@ static const struct intel_dpll_mgr icl_pll_mgr = { static const struct dpll_info ehl_plls[] = { { "DPLL 0", _pll_funcs, DPLL_ID_ICL_DPLL0, 0 }, { "DPLL 1", _pll_funcs, DPLL_ID_ICL_DPLL1, 0 }, + { "DPLL 4", _pll_funcs, DPLL_ID_EHL_DPLL4, 0 }, { }, }; diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h b/drivers/gpu/drm/i915/intel_dpll_mgr.h index bd8124cc81ed..f3f99929cee8 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.h @@ -113,6 +113,10 @@ enum intel_dpll_id { * @DPLL_ID_ICL_DPLL1: ICL combo PHY DPLL1 */ DPLL_ID_ICL_DPLL1 = 1, + /** +* @DPLL_ID_EHL_DPLL4: EHL combo PHY DPLL4 +*/ + DPLL_ID_EHL_DPLL4 = 2, /** * @DPLL_ID_ICL_TBTPLL: ICL TBT PLL */ -- 2.14.5 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v3)
On Mon, 8 Apr 2019 12:11:15 +0300 Ville Syrjälä wrote: Hi, > On Fri, Apr 05, 2019 at 04:33:30PM -0700, Vivek Kasireddy wrote: > > On Fri, 5 Apr 2019 21:39:11 +0300 > > Ville Syrjälä wrote: > > Hi Ville, > > > > > On Fri, Apr 05, 2019 at 09:33:56PM +0300, Ville Syrjälä wrote: > > > > On Fri, Apr 05, 2019 at 10:59:53AM -0700, Vivek Kasireddy > > > > wrote: > > > > > This patch adds support for DPLL4 on EHL that include the > > > > > following restrictions: > > > > > > > > > > - DPLL4 cannot be used with DDIA (combo port A internal eDP > > > > > usage). DPLL4 can be used with other DDIs, including DDID > > > > > (combo port A external usage). > > > > > > > > > > - DPLL4 cannot be enabled when DC5 or DC6 are enabled. > > > > > > > > > > - The DPLL4 enable, lock, power enabled, and power state are > > > > > connected to the MGPLL1_ENABLE register. > > > > > > > > > > v2: (suggestions from Bob Paauwe) > > > > > - Rework ehl_get_dpll() function to call > > > > > intel_find_shared_dpll() and iterate twice: once for Combo > > > > > plls and once for MG plls. > > > > > > > > > > - Use MG pll funcs for DPLL4 instead of creating new ones and > > > > > modify mg_pll_enable to include the restrictions for EHL. > > > > > > > > > > v3: Fix compilation error > > > > > > > > > > Cc: Lucas De Marchi > > > > > Signed-off-by: Vivek Kasireddy > > > > > Reviewed-by: Bob Paauwe > > > > > --- > > > > > drivers/gpu/drm/i915/intel_dpll_mgr.c | 60 > > > > > ++- 1 file changed, 59 > > > > > insertions(+), 1 deletion(-) > > > > > > > > > > diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c > > > > > b/drivers/gpu/drm/i915/intel_dpll_mgr.c index > > > > > e01c057ce50b..c3f0b9720c54 100644 --- > > > > > a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ > > > > > b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2870,6 +2870,56 @@ > > > > > icl_get_dpll(struct intel_crtc_state *crtc_state, return pll; > > > > > } > > > > > > > > > > +static struct intel_shared_dpll * > > > > > +ehl_get_dpll(struct intel_crtc_state *crtc_state, > > > > > + struct intel_encoder *encoder) > > > > > +{ > > > > > + struct drm_i915_private *dev_priv = > > > > > to_i915(crtc_state->base.crtc->dev); > > > > > + struct intel_shared_dpll *pll; > > > > > + enum port port = encoder->port; > > > > > + enum intel_dpll_id min, max; > > > > > + bool ret; > > > > > + > > > > > + if (!intel_port_is_combophy(dev_priv, port)) { > > > > > + MISSING_CASE(port); > > > > > + return NULL; > > > > > + } > > > > > + > > > > > + min = DPLL_ID_ICL_DPLL0; > > > > > + max = DPLL_ID_ICL_DPLL1; > > > > > + ret = icl_calc_dpll_state(crtc_state, encoder); > > > > > + if (ret) { > > > > > + pll = intel_find_shared_dpll(crtc_state, min, > > > > > max); > > > > > + if (pll) { > > > > > + intel_reference_shared_dpll(pll, > > > > > crtc_state); > > > > > + return pll; > > > > > + } > > > > > + } else { > > > > > + DRM_DEBUG_KMS("Could not calculate PLL > > > > > state.\n"); > > > > > + } > > > > > + > > > > > + if (encoder->type == INTEL_OUTPUT_EDP) { > > > > > + DRM_DEBUG_KMS("Cannot use DPLL4 with > > > > > EDP.\n"); > > > > > + return NULL; > > > > > + } > > > > > + > > > > > + min = max = DPLL_ID_ICL_MGPLL1; > > > > > + ret = icl_calc_mg_pll_state(crtc_state); > > > > > + if (!ret) { > > > > > + DRM_DEBUG_KMS("Could not calculate PLL > > > > > state.\n"); > > > > > + return NULL; > > > >
Re: [Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v3)
On Fri, 5 Apr 2019 17:46:38 -0700 Lucas De Marchi wrote: Hi, > On Fri, Apr 05, 2019 at 10:59:53AM -0700, Vivek Kasireddy wrote: > >This patch adds support for DPLL4 on EHL that include the > >following restrictions: > > > >- DPLL4 cannot be used with DDIA (combo port A internal eDP usage). > > DPLL4 can be used with other DDIs, including DDID > > (combo port A external usage). > > > >- DPLL4 cannot be enabled when DC5 or DC6 are enabled. > > > >- The DPLL4 enable, lock, power enabled, and power state are > >connected > > to the MGPLL1_ENABLE register. > > ok > > > > >v2: (suggestions from Bob Paauwe) > >- Rework ehl_get_dpll() function to call intel_find_shared_dpll() and > > iterate twice: once for Combo plls and once for MG plls. > > > >- Use MG pll funcs for DPLL4 instead of creating new ones and modify > > mg_pll_enable to include the restrictions for EHL. > > these 2 don't match spec. > > "3rd PLL for use with combo PHY (DPLL4) and 3rd combo PHY DDI clocks > (DDIC clock)" > > This is a combophy pll, not a mg phy pll. The only thing that is > hooked to mg registers is the enable. So my understanding is that > what you need: > > - use the dpll calculations > - make sure intel_find_shared_dpll doesn't this if it's for eDP > - setup the enable/disable to use MG_ENABLE register Looks like my interpretation of the spec is different from yours but your comments make sense. Should I create a new ID for this DPLL or juse re-use DPLL_ID_ICL_MGPLL1? > > > > >v3: Fix compilation error > > > >Cc: Lucas De Marchi > >Signed-off-by: Vivek Kasireddy > >Reviewed-by: Bob Paauwe > >--- > > drivers/gpu/drm/i915/intel_dpll_mgr.c | 60 > > ++- 1 file changed, 59 > > insertions(+), 1 deletion(-) > > > >diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c > >b/drivers/gpu/drm/i915/intel_dpll_mgr.c index > >e01c057ce50b..c3f0b9720c54 100644 --- > >a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ > >b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2870,6 +2870,56 @@ > >icl_get_dpll(struct intel_crtc_state *crtc_state, > > return pll; > > } > > > >+static struct intel_shared_dpll * > >+ehl_get_dpll(struct intel_crtc_state *crtc_state, > >+ struct intel_encoder *encoder) > >+{ > >+struct drm_i915_private *dev_priv = > >to_i915(crtc_state->base.crtc->dev); > >+struct intel_shared_dpll *pll; > >+enum port port = encoder->port; > >+enum intel_dpll_id min, max; > >+bool ret; > >+ > >+if (!intel_port_is_combophy(dev_priv, port)) { > >+MISSING_CASE(port); > >+return NULL; > >+} > >+ > >+min = DPLL_ID_ICL_DPLL0; > >+max = DPLL_ID_ICL_DPLL1; > >+ret = icl_calc_dpll_state(crtc_state, encoder); > >+if (ret) { > >+pll = intel_find_shared_dpll(crtc_state, min, max); > >+if (pll) { > >+intel_reference_shared_dpll(pll, > >crtc_state); > >+return pll; > >+} > >+} else { > >+DRM_DEBUG_KMS("Could not calculate PLL state.\n"); > > the check for ret is swapped and you are missing a return here. Unless I am reading it utterly wrong, icl_get_dpll has this: if (!ret) { DRM_DEBUG_KMS("Could not calculate PLL state.\n"); return NULL; > > But given the comments above, I think it would be better to reuse > icl_get_dpll() rather than what you are doing here. I could have used icl_get_dpll() but thought it would be much cleaner to have a separate function for EHL; otherwise, I guess I need to sprinkle icl_get_dpll with many if(EHL) statements. > > >+} > >+ > >+if (encoder->type == INTEL_OUTPUT_EDP) { > >+DRM_DEBUG_KMS("Cannot use DPLL4 with EDP.\n"); > >+return NULL; > >+} > > this is already too late The idea was if we have EDP being used, then we first try to find if one of the combo PHY DPLLs are available to be used. If they are not, then we come here and return as we cannot use this one either. > > >+ > >+min = max = DPLL_ID_ICL_MGPLL1; > >+ret = icl_calc_mg_pll_state(crtc_state); > >+if (!ret) { > >+DRM_DEBUG_KMS("Could not calculate PLL state.\n"); > >+return NULL; > > again... ret == 0 is success, not otherwise I'll send out a v4 with your suggestions soon. Thanks, Vivek >
Re: [Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v3)
On Fri, 5 Apr 2019 21:39:11 +0300 Ville Syrjälä wrote: Hi Ville, > On Fri, Apr 05, 2019 at 09:33:56PM +0300, Ville Syrjälä wrote: > > On Fri, Apr 05, 2019 at 10:59:53AM -0700, Vivek Kasireddy wrote: > > > This patch adds support for DPLL4 on EHL that include the > > > following restrictions: > > > > > > - DPLL4 cannot be used with DDIA (combo port A internal eDP > > > usage). DPLL4 can be used with other DDIs, including DDID > > > (combo port A external usage). > > > > > > - DPLL4 cannot be enabled when DC5 or DC6 are enabled. > > > > > > - The DPLL4 enable, lock, power enabled, and power state are > > > connected to the MGPLL1_ENABLE register. > > > > > > v2: (suggestions from Bob Paauwe) > > > - Rework ehl_get_dpll() function to call intel_find_shared_dpll() > > > and iterate twice: once for Combo plls and once for MG plls. > > > > > > - Use MG pll funcs for DPLL4 instead of creating new ones and > > > modify mg_pll_enable to include the restrictions for EHL. > > > > > > v3: Fix compilation error > > > > > > Cc: Lucas De Marchi > > > Signed-off-by: Vivek Kasireddy > > > Reviewed-by: Bob Paauwe > > > --- > > > drivers/gpu/drm/i915/intel_dpll_mgr.c | 60 > > > ++- 1 file changed, 59 > > > insertions(+), 1 deletion(-) > > > > > > diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c > > > b/drivers/gpu/drm/i915/intel_dpll_mgr.c index > > > e01c057ce50b..c3f0b9720c54 100644 --- > > > a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ > > > b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2870,6 +2870,56 @@ > > > icl_get_dpll(struct intel_crtc_state *crtc_state, return pll; > > > } > > > > > > +static struct intel_shared_dpll * > > > +ehl_get_dpll(struct intel_crtc_state *crtc_state, > > > + struct intel_encoder *encoder) > > > +{ > > > + struct drm_i915_private *dev_priv = > > > to_i915(crtc_state->base.crtc->dev); > > > + struct intel_shared_dpll *pll; > > > + enum port port = encoder->port; > > > + enum intel_dpll_id min, max; > > > + bool ret; > > > + > > > + if (!intel_port_is_combophy(dev_priv, port)) { > > > + MISSING_CASE(port); > > > + return NULL; > > > + } > > > + > > > + min = DPLL_ID_ICL_DPLL0; > > > + max = DPLL_ID_ICL_DPLL1; > > > + ret = icl_calc_dpll_state(crtc_state, encoder); > > > + if (ret) { > > > + pll = intel_find_shared_dpll(crtc_state, min, > > > max); > > > + if (pll) { > > > + intel_reference_shared_dpll(pll, > > > crtc_state); > > > + return pll; > > > + } > > > + } else { > > > + DRM_DEBUG_KMS("Could not calculate PLL > > > state.\n"); > > > + } > > > + > > > + if (encoder->type == INTEL_OUTPUT_EDP) { > > > + DRM_DEBUG_KMS("Cannot use DPLL4 with EDP.\n"); > > > + return NULL; > > > + } > > > + > > > + min = max = DPLL_ID_ICL_MGPLL1; > > > + ret = icl_calc_mg_pll_state(crtc_state); > > > + if (!ret) { > > > + DRM_DEBUG_KMS("Could not calculate PLL > > > state.\n"); > > > + return NULL; > > > + } > > > + > > > + pll = intel_find_shared_dpll(crtc_state, min, max); > > > + if (!pll) { > > > + DRM_DEBUG_KMS("No PLL selected\n"); > > > + return NULL; > > > + } > > > + > > > + intel_reference_shared_dpll(pll, crtc_state); > > > + return pll; > > > +} > > > + > > > static bool mg_pll_get_hw_state(struct drm_i915_private > > > *dev_priv, struct intel_shared_dpll *pll, > > > struct intel_dpll_hw_state > > > *hw_state) @@ -3115,6 +3165,13 @@ static void > > > mg_pll_enable(struct drm_i915_private *dev_priv, i915_reg_t > > > enable_reg = MG_PLL_ENABLE(icl_pll_id_to_tc_port(pll->info->id)); > > > > > > + if (IS_ELKHARTLAKE(dev_priv) && > > > +(I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5 || > > > + I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC6)) { > > > + DRM_ERROR("Cant enable DPLL4 when DC5 or DC6 are > > > enabled\n"); > > > + return; > > &g
[Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v3)
This patch adds support for DPLL4 on EHL that include the following restrictions: - DPLL4 cannot be used with DDIA (combo port A internal eDP usage). DPLL4 can be used with other DDIs, including DDID (combo port A external usage). - DPLL4 cannot be enabled when DC5 or DC6 are enabled. - The DPLL4 enable, lock, power enabled, and power state are connected to the MGPLL1_ENABLE register. v2: (suggestions from Bob Paauwe) - Rework ehl_get_dpll() function to call intel_find_shared_dpll() and iterate twice: once for Combo plls and once for MG plls. - Use MG pll funcs for DPLL4 instead of creating new ones and modify mg_pll_enable to include the restrictions for EHL. v3: Fix compilation error Cc: Lucas De Marchi Signed-off-by: Vivek Kasireddy Reviewed-by: Bob Paauwe --- drivers/gpu/drm/i915/intel_dpll_mgr.c | 60 ++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index e01c057ce50b..c3f0b9720c54 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2870,6 +2870,56 @@ icl_get_dpll(struct intel_crtc_state *crtc_state, return pll; } +static struct intel_shared_dpll * +ehl_get_dpll(struct intel_crtc_state *crtc_state, +struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct intel_shared_dpll *pll; + enum port port = encoder->port; + enum intel_dpll_id min, max; + bool ret; + + if (!intel_port_is_combophy(dev_priv, port)) { + MISSING_CASE(port); + return NULL; + } + + min = DPLL_ID_ICL_DPLL0; + max = DPLL_ID_ICL_DPLL1; + ret = icl_calc_dpll_state(crtc_state, encoder); + if (ret) { + pll = intel_find_shared_dpll(crtc_state, min, max); + if (pll) { + intel_reference_shared_dpll(pll, crtc_state); + return pll; + } + } else { + DRM_DEBUG_KMS("Could not calculate PLL state.\n"); + } + + if (encoder->type == INTEL_OUTPUT_EDP) { + DRM_DEBUG_KMS("Cannot use DPLL4 with EDP.\n"); + return NULL; + } + + min = max = DPLL_ID_ICL_MGPLL1; + ret = icl_calc_mg_pll_state(crtc_state); + if (!ret) { + DRM_DEBUG_KMS("Could not calculate PLL state.\n"); + return NULL; + } + + pll = intel_find_shared_dpll(crtc_state, min, max); + if (!pll) { + DRM_DEBUG_KMS("No PLL selected\n"); + return NULL; + } + + intel_reference_shared_dpll(pll, crtc_state); + return pll; +} + static bool mg_pll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state) @@ -3115,6 +3165,13 @@ static void mg_pll_enable(struct drm_i915_private *dev_priv, i915_reg_t enable_reg = MG_PLL_ENABLE(icl_pll_id_to_tc_port(pll->info->id)); + if (IS_ELKHARTLAKE(dev_priv) && + (I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5 || + I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC6)) { + DRM_ERROR("Cant enable DPLL4 when DC5 or DC6 are enabled\n"); + return; + } + icl_pll_power_enable(dev_priv, pll, enable_reg); icl_mg_pll_write(dev_priv, pll); @@ -3249,12 +3306,13 @@ static const struct intel_dpll_mgr icl_pll_mgr = { static const struct dpll_info ehl_plls[] = { { "DPLL 0", _pll_funcs, DPLL_ID_ICL_DPLL0, 0 }, { "DPLL 1", _pll_funcs, DPLL_ID_ICL_DPLL1, 0 }, + { "DPLL 4", _pll_funcs, DPLL_ID_ICL_MGPLL1,0 }, { }, }; static const struct intel_dpll_mgr ehl_pll_mgr = { .dpll_info = ehl_plls, - .get_dpll = icl_get_dpll, + .get_dpll = ehl_get_dpll, .dump_hw_state = icl_dump_hw_state, }; -- 2.14.5 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v2)
This patch adds support for DPLL4 on EHL that include the following restrictions: - DPLL4 cannot be used with DDIA (combo port A internal eDP usage). DPLL4 can be used with other DDIs, including DDID (combo port A external usage). - DPLL4 cannot be enabled when DC5 or DC6 are enabled. - The DPLL4 enable, lock, power enabled, and power state are connected to the MGPLL1_ENABLE register. v2: (suggestions from Bob Paauwe) - Rework ehl_get_dpll() function to call intel_find_shared_dpll() and iterate twice: once for Combo plls and once for MG plls. - Use MG pll funcs for DPLL4 instead of creating new ones and modify mg_pll_enable to include the restrictions for EHL. Cc: Lucas De Marchi Signed-off-by: Vivek Kasireddy Reviewed-by: Bob Paauwe --- drivers/gpu/drm/i915/intel_dpll_mgr.c | 60 ++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index e01c057ce50b..cb756acedc94 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2870,6 +2870,56 @@ icl_get_dpll(struct intel_crtc_state *crtc_state, return pll; } +static struct intel_shared_dpll * +ehl_get_dpll(struct intel_crtc_state *crtc_state, +struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct intel_shared_dpll *pll; + enum port port = encoder->port; + enum intel_dpll_id min, max; + bool ret; + + if (!intel_port_is_combophy(dev_priv, port)) { + MISSING_CASE(port); + return NULL; + } + + min = DPLL_ID_ICL_DPLL0; + max = DPLL_ID_ICL_DPLL1; + ret = icl_calc_dpll_state(crtc_state, encoder); + if (ret) { + pll = intel_find_shared_dpll(crtc_state, min, max); + if (pll) { + intel_reference_shared_dpll(pll, crtc_state); + return pll; + } + } else { + DRM_DEBUG_KMS("Could not calculate PLL state.\n"); + } + + if (encoder->type == INTEL_OUTPUT_EDP) { + DRM_DEBUG_KMS("Cannot use DPLL4 with EDP.\n"); + return NULL; + } + + min = max = DPLL_ID_ICL_MGPLL1; + ret = icl_calc_mg_pll_state(crtc_state, false); + if (!ret) { + DRM_DEBUG_KMS("Could not calculate PLL state.\n"); + return NULL; + } + + pll = intel_find_shared_dpll(crtc_state, min, max); + if (!pll) { + DRM_DEBUG_KMS("No PLL selected\n"); + return NULL; + } + + intel_reference_shared_dpll(pll, crtc_state); + return pll; +} + static bool mg_pll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state) @@ -3115,6 +3165,13 @@ static void mg_pll_enable(struct drm_i915_private *dev_priv, i915_reg_t enable_reg = MG_PLL_ENABLE(icl_pll_id_to_tc_port(pll->info->id)); + if (IS_ELKHARTLAKE(dev_priv) && + (I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5 || + I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC6)) { + DRM_ERROR("Cant enable DPLL4 when DC5 or DC6 are enabled\n"); + return; + } + icl_pll_power_enable(dev_priv, pll, enable_reg); icl_mg_pll_write(dev_priv, pll); @@ -3249,12 +3306,13 @@ static const struct intel_dpll_mgr icl_pll_mgr = { static const struct dpll_info ehl_plls[] = { { "DPLL 0", _pll_funcs, DPLL_ID_ICL_DPLL0, 0 }, { "DPLL 1", _pll_funcs, DPLL_ID_ICL_DPLL1, 0 }, + { "DPLL 4", _pll_funcs, DPLL_ID_ICL_MGPLL1,0 }, { }, }; static const struct intel_dpll_mgr ehl_pll_mgr = { .dpll_info = ehl_plls, - .get_dpll = icl_get_dpll, + .get_dpll = ehl_get_dpll, .dump_hw_state = icl_dump_hw_state, }; -- 2.14.5 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH i-g-t v2] lib/igt_kms: Introduce get_first_connected_output macro
In some cases, we just need one valid (connected) output to perform a test. This macro can help in these situations by not having to put the test code inside a for loop that iterates over all the outputs. v2: Added a brief documentation for this macro. Suggested-by: Matt Roper Cc: Thomas Wood <thomas.w...@intel.com> Signed-off-by: Vivek Kasireddy <vivek.kasire...@intel.com> --- lib/igt_kms.h | 12 1 file changed, 12 insertions(+) diff --git a/lib/igt_kms.h b/lib/igt_kms.h index 965c47c..a0bb066 100644 --- a/lib/igt_kms.h +++ b/lib/igt_kms.h @@ -279,6 +279,18 @@ void igt_wait_for_vblank(int drm_fd, enum pipe pipe); for (int i__ = 0; (plane) = &(display)->pipes[(pipe)].planes[i__], \ i__ < (display)->pipes[(pipe)].n_planes; i__++) +/** + * get_first_connected_output: + * @display: Initialized igt_display_t type object + * @output: igt_output_t type object + * + * Returns: First valid (connected) output. + */ +#define get_first_connected_output(display, output)\ + for (int i__ = 0; i__ < (display)->n_outputs; i__++) \ + if ((output = &(display)->outputs[i__]), output->valid) \ + break + #define IGT_FIXED(i,f) ((i) << 16 | (f)) void igt_enable_connectors(void); -- 2.4.3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH i-g-t] tests/kms_rotation_crc: Use get_first_connected_output macro
In some cases, the only connected connector might not occupy the first slot and hence output[0] might be empty. Therefore, use the get_first_connected_output macro to find the output object associated with the connected connector. Signed-off-by: Vivek Kasireddy <vivek.kasire...@intel.com> --- tests/kms_rotation_crc.c | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/kms_rotation_crc.c b/tests/kms_rotation_crc.c index c3241cf..94b4486 100644 --- a/tests/kms_rotation_crc.c +++ b/tests/kms_rotation_crc.c @@ -323,14 +323,15 @@ static void test_plane_rotation_ytiled_obj(data_t *data, enum igt_plane plane_ty int bpp = igt_drm_format_to_bpp(format); enum igt_commit_style commit = COMMIT_LEGACY; int fd = data->gfx_fd; - igt_output_t *output = >outputs[0]; + igt_output_t *output = NULL; igt_plane_t *plane; drmModeModeInfo *mode; unsigned int stride, size, w, h; uint32_t gem_handle; int ret; - igt_require(output != NULL && output->valid == true); + get_first_connected_output(display, output); + igt_require(output != NULL); plane = igt_output_get_plane(output, plane_type); igt_require(igt_plane_supports_rotation(plane)); @@ -385,7 +386,7 @@ static void test_plane_rotation_exhaust_fences(data_t *data, enum igt_plane plan int bpp = igt_drm_format_to_bpp(format); enum igt_commit_style commit = COMMIT_LEGACY; int fd = data->gfx_fd; - igt_output_t *output = >outputs[0]; + igt_output_t *output = NULL; igt_plane_t *plane; drmModeModeInfo *mode; data_t data2[MAX_FENCES+1] = {}; @@ -394,7 +395,8 @@ static void test_plane_rotation_exhaust_fences(data_t *data, enum igt_plane plan uint64_t total_aperture_size, total_fbs_size; int i, ret; - igt_require(output != NULL && output->valid == true); + get_first_connected_output(display, output); + igt_require(output != NULL); plane = igt_output_get_plane(output, plane_type); igt_require(igt_plane_supports_rotation(plane)); -- 2.4.3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] igt/igt_kms: Introduce get_first_connected_output (v2)
On Fri, 13 Nov 2015 15:59:21 + Thomas Wood <thomas.w...@intel.com> wrote: > On 5 November 2015 at 01:34, Vivek Kasireddy > <vivek.kasire...@intel.com> wrote: > > In some cases, we just need one valid (connected) output to perform > > a test. This macro can help in these situations by not having to > > put the test code inside a for loop that iterates over all the > > outputs. > > > > v2: Added a brief documentation for this macro. > > The new macro is no longer being used anywhere. Is there a new patch > that uses the macro? > Hi Thomas, I wanted to have this patch merged before I updated the tests to use the macro. > > Also, if re-sending the patch, please make sure it is tagged correctly > as described in: > > http://lists.freedesktop.org/archives/intel-gfx/2015-November/079712.html > > This also explains how to manage the version tag in the subject line. Thanks for the link; I wasn't aware of it. Do you want me to resend the patch in this format? Thanks and Regards, Vivek > > > > > > Suggested-by: Matt Roper > > Cc: Thomas Wood <thomas.w...@intel.com> > > Signed-off-by: Vivek Kasireddy <vivek.kasire...@intel.com> > > --- > > lib/igt_kms.h | 12 > > 1 file changed, 12 insertions(+) > > > > diff --git a/lib/igt_kms.h b/lib/igt_kms.h > > index 09c08aa..91fa206 100644 > > --- a/lib/igt_kms.h > > +++ b/lib/igt_kms.h > > @@ -278,6 +278,18 @@ void igt_wait_for_vblank(int drm_fd, enum pipe > > pipe); for (int i__ = 0; (plane) = > > &(display)->pipes[(pipe)].planes[i__], \ i__ < > > (display)->pipes[(pipe)].n_planes; i__++) > > > > +/** > > + * get_first_connected_output: > > + * @display: Initialized igt_display_t type object > > + * @output: igt_output_t type object > > + * > > + * Returns: First valid (connected) output. > > + */ > > +#define get_first_connected_output(display, output)\ > > + for (int i__ = 0; i__ < (display)->n_outputs; i__++) \ > > + if ((output = &(display)->outputs[i__]), > > output->valid) \ > > + break > > + > > /* > > * Can be used with igt_output_set_pipe() to mean we don't care > > about the pipe > > * that should drive this output > > -- > > 2.4.3 > > ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] igt/kms_rotation_crc: Add a new subtest to exhaustively test for fence leaks (v3)
In this subtest, as a first step, MAX_FENCES+1 number of framebuffers are created backed up by objects that have multiple GGTT views (normal and rotated). Next, we have the i915 driver instantiate a normal view followed by a rotated view. We continue doing the above MAX_FENCES + 1 times. v2: - Add a igt_require() to check if there is enough GTT space left for MAX_FENCES+1 framebuffers. (Tvrtko) - Make data2 local to test_plane_rotation_exhaust_fences(). (Tvrtko) - If there is a failure, deallocate all the previously allocated framebuffers before asserting. v3: Close the gem handle if set_tiling or addfb fails. (Tvrtko) Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com> Signed-off-by: Vivek Kasireddy <vivek.kasire...@intel.com> --- tests/kms_rotation_crc.c | 108 +++ 1 file changed, 108 insertions(+) diff --git a/tests/kms_rotation_crc.c b/tests/kms_rotation_crc.c index ed6eeef..7e18b1e 100644 --- a/tests/kms_rotation_crc.c +++ b/tests/kms_rotation_crc.c @@ -25,6 +25,7 @@ #include "igt.h" #include +#define MAX_FENCES 32 typedef struct { int gfx_fd; @@ -376,6 +377,108 @@ static void test_plane_rotation_ytiled_obj(data_t *data, enum igt_plane plane_ty igt_assert(ret == 0); } +static void test_plane_rotation_exhaust_fences(data_t *data, enum igt_plane plane_type) +{ + igt_display_t *display = >display; + uint64_t tiling = LOCAL_I915_FORMAT_MOD_Y_TILED; + uint32_t format = DRM_FORMAT_XRGB; + int bpp = igt_drm_format_to_bpp(format); + enum igt_commit_style commit = COMMIT_LEGACY; + int fd = data->gfx_fd; + igt_output_t *output = >outputs[0]; + igt_plane_t *plane; + drmModeModeInfo *mode; + data_t data2[MAX_FENCES+1] = {}; + unsigned int stride, size, w, h; + uint32_t gem_handle; + uint64_t total_aperture_size, total_fbs_size; + int i, ret; + + igt_require(output != NULL && output->valid == true); + + plane = igt_output_get_plane(output, plane_type); + igt_require(igt_plane_supports_rotation(plane)); + + if (plane_type == IGT_PLANE_PRIMARY || plane_type == IGT_PLANE_CURSOR) { + igt_require(data->display.has_universal_planes); + commit = COMMIT_UNIVERSAL; + } + + mode = igt_output_get_mode(output); + w = mode->hdisplay; + h = mode->vdisplay; + + for (stride = 512; stride < (w * bpp / 8); stride *= 2) + ; + for (size = 1024*1024; size < stride * h; size *= 2) + ; + + /* +* Make sure there is atleast 90% of the available GTT space left +* for creating (MAX_FENCES+1) framebuffers. +*/ + total_fbs_size = size * (MAX_FENCES + 1); + total_aperture_size = gem_available_aperture_size(fd); + igt_require(total_fbs_size < total_aperture_size * 0.9); + + igt_plane_set_fb(plane, NULL); + igt_display_commit(display); + + for (i = 0; i < MAX_FENCES + 1; i++) { + gem_handle = gem_create(fd, size); + ret = __gem_set_tiling(fd, gem_handle, I915_TILING_Y, stride); + if (ret) { + igt_warn("failed to set tiling\n"); + goto err_alloc; + } + + ret = (__kms_addfb(fd, gem_handle, w, h, stride, + format, tiling, LOCAL_DRM_MODE_FB_MODIFIERS, + [i].fb.fb_id)); + if (ret) { + igt_warn("failed to create framebuffer\n"); + goto err_alloc; + } + + data2[i].fb.width = w; + data2[i].fb.height = h; + data2[i].fb.gem_handle = gem_handle; + + igt_plane_set_fb(plane, [i].fb); + igt_plane_set_rotation(plane, IGT_ROTATION_0); + + ret = igt_display_try_commit2(display, commit); + if (ret) { + igt_warn("failed to commit unrotated fb\n"); + goto err_commit; + } + + igt_plane_set_rotation(plane, IGT_ROTATION_90); + + drmModeObjectSetProperty(fd, plane->drm_plane->plane_id, +DRM_MODE_OBJECT_PLANE, +plane->rotation_property, +plane->rotation); + ret = igt_display_try_commit2(display, commit); + if (ret) { + igt_warn("failed to commit hardware rotated fb\n"); + goto err_commit; + } + } + +err_alloc: + if (ret) + gem_close(fd, gem_handle); + + i--; +err_commit: + for (; i >= 0; i--) + igt_remove_fb(fd, [i].fb); + + kmstest_rest
[Intel-gfx] [PATCH] igt/igt_kms: Introduce get_first_connected_output (v2)
In some cases, we just need one valid (connected) output to perform a test. This macro can help in these situations by not having to put the test code inside a for loop that iterates over all the outputs. v2: Added a brief documentation for this macro. Suggested-by: Matt Roper Cc: Thomas Wood <thomas.w...@intel.com> Signed-off-by: Vivek Kasireddy <vivek.kasire...@intel.com> --- lib/igt_kms.h | 12 1 file changed, 12 insertions(+) diff --git a/lib/igt_kms.h b/lib/igt_kms.h index 09c08aa..91fa206 100644 --- a/lib/igt_kms.h +++ b/lib/igt_kms.h @@ -278,6 +278,18 @@ void igt_wait_for_vblank(int drm_fd, enum pipe pipe); for (int i__ = 0; (plane) = &(display)->pipes[(pipe)].planes[i__], \ i__ < (display)->pipes[(pipe)].n_planes; i__++) +/** + * get_first_connected_output: + * @display: Initialized igt_display_t type object + * @output: igt_output_t type object + * + * Returns: First valid (connected) output. + */ +#define get_first_connected_output(display, output)\ + for (int i__ = 0; i__ < (display)->n_outputs; i__++) \ + if ((output = &(display)->outputs[i__]), output->valid) \ + break + /* * Can be used with igt_output_set_pipe() to mean we don't care about the pipe * that should drive this output -- 2.4.3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] gbm: Add a flag to enable creation of rotated scanout buffers (v2)
For certain platforms that support rotated scanout buffers, currently, there is no way to create them with the GBM DRI interface. This flag will instruct the DRI driver to create the buffer by setting additional requirements such as tiling mode. v2: Reserve a bit per angle. (Ville and Michel) Cc: Michel Danzer <mic...@daenzer.net> Cc: Ville Syrjala <ville.syrj...@linux.intel.com> Signed-off-by: Vivek Kasireddy <vivek.kasire...@intel.com> --- include/GL/internal/dri_interface.h | 3 +++ src/gbm/backends/dri/gbm_dri.c | 19 +-- src/gbm/main/gbm.h | 7 +++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/include/GL/internal/dri_interface.h b/include/GL/internal/dri_interface.h index 6bbd3fa..cd1bf62 100644 --- a/include/GL/internal/dri_interface.h +++ b/include/GL/internal/dri_interface.h @@ -1100,6 +1100,9 @@ struct __DRIdri2ExtensionRec { #define __DRI_IMAGE_USE_SCANOUT0x0002 #define __DRI_IMAGE_USE_CURSOR 0x0004 /* Depricated */ #define __DRI_IMAGE_USE_LINEAR 0x0008 +#define __DRI_IMAGE_USE_ROTATION_900x0010 +#define __DRI_IMAGE_USE_ROTATION_180 0x0020 +#define __DRI_IMAGE_USE_ROTATION_270 0x0040 /** diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c index 57cdeac..a997946 100644 --- a/src/gbm/backends/dri/gbm_dri.c +++ b/src/gbm/backends/dri/gbm_dri.c @@ -539,7 +539,8 @@ gbm_dri_is_format_supported(struct gbm_device *gbm, break; case GBM_BO_FORMAT_ARGB: case GBM_FORMAT_ARGB: - if (usage & GBM_BO_USE_SCANOUT) + if (usage & (GBM_BO_USE_SCANOUT | GBM_BO_USE_ROTATION_90 | + GBM_BO_USE_ROTATION_180 | GBM_BO_USE_ROTATION_270)) return 0; break; default: @@ -748,6 +749,12 @@ gbm_dri_bo_import(struct gbm_device *gbm, if (usage & GBM_BO_USE_SCANOUT) dri_use |= __DRI_IMAGE_USE_SCANOUT; + if (usage & GBM_BO_USE_ROTATION_90) + dri_use |= __DRI_IMAGE_USE_ROTATION_90; + if (usage & GBM_BO_USE_ROTATION_180) + dri_use |= __DRI_IMAGE_USE_ROTATION_180; + if (usage & GBM_BO_USE_ROTATION_270) + dri_use |= __DRI_IMAGE_USE_ROTATION_270; if (usage & GBM_BO_USE_CURSOR) dri_use |= __DRI_IMAGE_USE_CURSOR; if (dri->image->base.version >= 2 && @@ -786,7 +793,9 @@ create_dumb(struct gbm_device *gbm, is_cursor = (usage & GBM_BO_USE_CURSOR) != 0 && format == GBM_FORMAT_ARGB; - is_scanout = (usage & GBM_BO_USE_SCANOUT) != 0 && + is_scanout = (usage & (GBM_BO_USE_SCANOUT | + GBM_BO_USE_ROTATION_90 | GBM_BO_USE_ROTATION_180 | + GBM_BO_USE_ROTATION_270)) != 0 && format == GBM_FORMAT_XRGB; if (!is_cursor && !is_scanout) { errno = EINVAL; @@ -880,6 +889,12 @@ gbm_dri_bo_create(struct gbm_device *gbm, if (usage & GBM_BO_USE_SCANOUT) dri_use |= __DRI_IMAGE_USE_SCANOUT; + if (usage & GBM_BO_USE_ROTATION_90) + dri_use |= __DRI_IMAGE_USE_ROTATION_90; + if (usage & GBM_BO_USE_ROTATION_180) + dri_use |= __DRI_IMAGE_USE_ROTATION_180; + if (usage & GBM_BO_USE_ROTATION_270) + dri_use |= __DRI_IMAGE_USE_ROTATION_270; if (usage & GBM_BO_USE_CURSOR) dri_use |= __DRI_IMAGE_USE_CURSOR; if (usage & GBM_BO_USE_LINEAR) diff --git a/src/gbm/main/gbm.h b/src/gbm/main/gbm.h index 8db2153..b200ca6 100644 --- a/src/gbm/main/gbm.h +++ b/src/gbm/main/gbm.h @@ -214,6 +214,13 @@ enum gbm_bo_flags { * Buffer is linear, i.e. not tiled. */ GBM_BO_USE_LINEAR = (1 << 4), + /** +* Buffer would be rotated and some platforms have additional tiling +* requirements for rotated scanout buffers. +*/ + GBM_BO_USE_ROTATION_90 = (1 << 5), + GBM_BO_USE_ROTATION_180 = (1 << 6), + GBM_BO_USE_ROTATION_270 = (1 << 7), }; int -- 2.4.3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] igt/kms_rotation_crc: Add a new subtest to exhaustively test for fence leaks (v2)
In this subtest, as a first step, MAX_FENCES+1 number of framebuffers are created backed up by objects that have multiple GGTT views (normal and rotated). Next, we have the i915 driver instantiate a normal view followed by a rotated view. We continue doing the above MAX_FENCES + 1 times. v2: - Add a igt_require() to check if there is enough GTT space left for MAX_FENCES+1 framebuffers. (Tvrtko) - Make data2 local to test_plane_rotation_exhaust_fences(). (Tvrtko) - If there is a failure, deallocate all the previously allocated framebuffers before asserting. Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com> Signed-off-by: Vivek Kasireddy <vivek.kasire...@intel.com> --- tests/kms_rotation_crc.c | 106 +++ 1 file changed, 106 insertions(+) diff --git a/tests/kms_rotation_crc.c b/tests/kms_rotation_crc.c index ed6eeef..154c6a1 100644 --- a/tests/kms_rotation_crc.c +++ b/tests/kms_rotation_crc.c @@ -25,6 +25,7 @@ #include "igt.h" #include +#define MAX_FENCES 32 typedef struct { int gfx_fd; @@ -376,6 +377,106 @@ static void test_plane_rotation_ytiled_obj(data_t *data, enum igt_plane plane_ty igt_assert(ret == 0); } +static void test_plane_rotation_exhaust_fences(data_t *data, enum igt_plane plane_type) +{ + igt_display_t *display = >display; + uint64_t tiling = LOCAL_I915_FORMAT_MOD_Y_TILED; + uint32_t format = DRM_FORMAT_XRGB; + int bpp = igt_drm_format_to_bpp(format); + enum igt_commit_style commit = COMMIT_LEGACY; + int fd = data->gfx_fd; + igt_output_t *output = >outputs[0]; + igt_plane_t *plane; + drmModeModeInfo *mode; + data_t data2[MAX_FENCES+1] = {}; + unsigned int stride, size, w, h; + uint32_t gem_handle; + uint64_t total_aperture_size, total_fbs_size; + int i, ret; + + igt_require(output != NULL && output->valid == true); + + plane = igt_output_get_plane(output, plane_type); + igt_require(igt_plane_supports_rotation(plane)); + + if (plane_type == IGT_PLANE_PRIMARY || plane_type == IGT_PLANE_CURSOR) { + igt_require(data->display.has_universal_planes); + commit = COMMIT_UNIVERSAL; + } + + mode = igt_output_get_mode(output); + w = mode->hdisplay; + h = mode->vdisplay; + + for (stride = 512; stride < (w * bpp / 8); stride *= 2) + ; + for (size = 1024*1024; size < stride * h; size *= 2) + ; + + /* +* Make sure there is atleast 90% of the available GTT space left +* for creating (MAX_FENCES+1) framebuffers. +*/ + total_fbs_size = size * (MAX_FENCES + 1); + total_aperture_size = gem_available_aperture_size(fd); + igt_require(total_fbs_size < total_aperture_size * 0.9); + + igt_plane_set_fb(plane, NULL); + igt_display_commit(display); + + for (i = 0; i < MAX_FENCES + 1; i++) { + gem_handle = gem_create(fd, size); + ret = __gem_set_tiling(fd, gem_handle, I915_TILING_Y, stride); + if (ret) { + igt_warn("failed to set tiling\n"); + goto err_alloc; + } + + ret = (__kms_addfb(fd, gem_handle, w, h, stride, + format, tiling, LOCAL_DRM_MODE_FB_MODIFIERS, + [i].fb.fb_id)); + if (ret) { + igt_warn("failed to create framebuffer\n"); + goto err_alloc; + } + + data2[i].fb.width = w; + data2[i].fb.height = h; + data2[i].fb.gem_handle = gem_handle; + + igt_plane_set_fb(plane, [i].fb); + igt_plane_set_rotation(plane, IGT_ROTATION_0); + + ret = igt_display_try_commit2(display, commit); + if (ret) { + igt_warn("failed to commit unrotated fb\n"); + goto err_commit; + } + + igt_plane_set_rotation(plane, IGT_ROTATION_90); + + drmModeObjectSetProperty(fd, plane->drm_plane->plane_id, +DRM_MODE_OBJECT_PLANE, +plane->rotation_property, +plane->rotation); + ret = igt_display_try_commit2(display, commit); + if (ret) { + igt_warn("failed to commit hardware rotated fb\n"); + goto err_commit; + } + } + +err_alloc: + i--; +err_commit: + kmstest_restore_vt_mode(); + + for (; i >= 0; i--) + igt_remove_fb(fd, [i].fb); + + igt_assert(ret == 0); +} + igt_main { data_t data = {}; @@ -471,6 +572,11 @@ igt_main
[Intel-gfx] [PATCH] igt/kms_rotation_crc: Add a new subtest to exhaustively test for fence leaks
In this subtest, as a first step, MAX_FENCES+1 number of framebuffers are created backed up by objects that have multiple GGTT views (normal and rotated). Next, we have the i915 driver instantiate a normal view followed by a rotated view. We continue doing the above MAX_FENCES + 1 times. Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com> Signed-off-by: Vivek Kasireddy <vivek.kasire...@intel.com> --- tests/kms_rotation_crc.c | 79 1 file changed, 79 insertions(+) diff --git a/tests/kms_rotation_crc.c b/tests/kms_rotation_crc.c index ed6eeef..44691d1 100644 --- a/tests/kms_rotation_crc.c +++ b/tests/kms_rotation_crc.c @@ -25,6 +25,7 @@ #include "igt.h" #include +#define MAX_FENCES 32 typedef struct { int gfx_fd; @@ -376,6 +377,78 @@ static void test_plane_rotation_ytiled_obj(data_t *data, enum igt_plane plane_ty igt_assert(ret == 0); } +static void test_plane_rotation_exhaust_fences(data_t *data, data_t *data2, + enum igt_plane plane_type) +{ + igt_display_t *display = >display; + uint64_t tiling = LOCAL_I915_FORMAT_MOD_Y_TILED; + uint32_t format = DRM_FORMAT_XRGB; + int bpp = igt_drm_format_to_bpp(format); + enum igt_commit_style commit = COMMIT_LEGACY; + int fd = data->gfx_fd; + igt_output_t *output = >outputs[0]; + igt_plane_t *plane; + drmModeModeInfo *mode; + unsigned int stride, size, w, h; + uint32_t gem_handle; + int i, ret; + + igt_require(output != NULL && output->valid == true); + + plane = igt_output_get_plane(output, plane_type); + igt_require(igt_plane_supports_rotation(plane)); + + if (plane_type == IGT_PLANE_PRIMARY || plane_type == IGT_PLANE_CURSOR) { + igt_require(data->display.has_universal_planes); + commit = COMMIT_UNIVERSAL; + } + + mode = igt_output_get_mode(output); + w = mode->hdisplay; + h = mode->vdisplay; + + for (stride = 512; stride < (w * bpp / 8); stride *= 2) + ; + for (size = 1024*1024; size < stride * h; size *= 2) + ; + + igt_plane_set_fb(plane, NULL); + igt_display_commit(display); + + for (i = 0; i < MAX_FENCES + 1; i++) { + gem_handle = gem_create(fd, size); + ret = __gem_set_tiling(fd, gem_handle, I915_TILING_Y, stride); + igt_assert(ret == 0); + + do_or_die(__kms_addfb(fd, gem_handle, w, h, stride, + format, tiling, LOCAL_DRM_MODE_FB_MODIFIERS, + [i].fb.fb_id)); + data2[i].fb.width = w; + data2[i].fb.height = h; + data2[i].fb.gem_handle = gem_handle; + + igt_plane_set_fb(plane, [i].fb); + igt_plane_set_rotation(plane, IGT_ROTATION_0); + + ret = igt_display_try_commit2(display, commit); + igt_assert(ret == 0); + + igt_plane_set_rotation(plane, IGT_ROTATION_90); + + drmModeObjectSetProperty(fd, plane->drm_plane->plane_id, +DRM_MODE_OBJECT_PLANE, +plane->rotation_property, +plane->rotation); + ret = igt_display_try_commit2(display, commit); + igt_assert(ret == 0); + } + + kmstest_restore_vt_mode(); + + for (i = 0; i < MAX_FENCES + 1; i++) + igt_remove_fb(fd, [i].fb); +} + igt_main { data_t data = {}; @@ -471,6 +544,12 @@ igt_main test_plane_rotation_ytiled_obj(, IGT_PLANE_PRIMARY); } + igt_subtest_f("exhaust-fences") { + data_t data2[MAX_FENCES+1] = {}; + igt_require(gen >= 9); + test_plane_rotation_exhaust_fences(, data2, IGT_PLANE_PRIMARY); + } + igt_fixture { igt_display_fini(); } -- 2.4.3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] igt/kms_rotation_crc: Add a subtest to validate Y-tiled obj + Y fb modifier (v5)
Hi Tvrtko, On Fri, 30 Oct 2015 10:22:08 + Tvrtko Ursulin <tvrtko.ursu...@linux.intel.com> wrote: > > On 30/10/15 01:44, Vivek Kasireddy wrote: > > The main goal of this subtest is to trigger the following warning in > > the function i915_gem_object_get_fence(): > > if (WARN_ON(!obj->map_and_fenceable)) > > > > To trigger this warning, the subtest first creates a Y-tiled object > > and an associated framebuffer with the Y-fb modifier. Furthermore, > > to prevent the map_and_fenceable from being set, we make sure that > > the object does not have a normal VMA by refraining from rendering > > to the object and by setting the rotation property upfront before > > calling commit. > > > > v2: Do not call paint_squares and just use one output. > > > > v3: Convert an if condition to igt_require and move the plane > > rotation requirement further up before the fb allocation. > > > > v4: After setting rotation to 90 and committing, change the > > rotation to 0 and commit once more. This is to test if the i915 > > driver hits any warnings while pinning and unpinning an object that > > has both normal and rotated views. > > > > v5: > > - Add another subtest to toggle the order of rotation > > - Exhaustively test the i915 driver's pinning and unpinning code > > paths for any fence leaks by iterating until MAX available fences. > > > > Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com> > > Signed-off-by: Vivek Kasireddy <vivek.kasire...@intel.com> > > --- > > tests/kms_rotation_crc.c | 84 > > 1 file changed, 84 > > insertions(+) > > > > diff --git a/tests/kms_rotation_crc.c b/tests/kms_rotation_crc.c > > index cc9847e..34f8150 100644 > > --- a/tests/kms_rotation_crc.c > > +++ b/tests/kms_rotation_crc.c > > @@ -264,6 +264,80 @@ static void test_plane_rotation(data_t *data, > > enum igt_plane plane_type) igt_require_f(valid_tests, "no valid > > crtc/connector combinations found\n"); } > > > > +static void test_plane_rotation_ytiled_obj(data_t *data, enum > > igt_plane plane_type, > > + int toggle) > > +{ > > + igt_display_t *display = >display; > > + uint64_t tiling = LOCAL_I915_FORMAT_MOD_Y_TILED; > > + uint32_t format = DRM_FORMAT_XRGB; > > + int bpp = igt_drm_format_to_bpp(format); > > + enum igt_commit_style commit = COMMIT_LEGACY; > > + int fd = data->gfx_fd; > > + igt_output_t *output = >outputs[0]; > > + igt_plane_t *plane; > > + drmModeModeInfo *mode; > > + unsigned int stride, size, w, h; > > + uint32_t gem_handle; > > + int num_fences = gem_available_fences(fd); > > + int i, ret; > > + > > + igt_require(output != NULL && output->valid == true); > > + > > + plane = igt_output_get_plane(output, plane_type); > > + igt_require(igt_plane_supports_rotation(plane)); > > + > > + if (plane_type == IGT_PLANE_PRIMARY || plane_type == > > IGT_PLANE_CURSOR) { > > + igt_require(data->display.has_universal_planes); > > + commit = COMMIT_UNIVERSAL; > > + } > > + > > + mode = igt_output_get_mode(output); > > + w = mode->hdisplay; > > + h = mode->vdisplay; > > + > > + for (stride = 512; stride < (w * bpp / 8); stride *= 2) > > + ; > > + for (size = 1024*1024; size < stride * h; size *= 2) > > + ; > > + > > + gem_handle = gem_create(fd, size); > > + ret = __gem_set_tiling(fd, gem_handle, I915_TILING_Y, > > stride); > > + igt_assert(ret == 0); > > + > > + do_or_die(__kms_addfb(fd, gem_handle, w, h, stride, > > + format, tiling, LOCAL_DRM_MODE_FB_MODIFIERS, > > + >fb.fb_id)); > > + data->fb.width = w; > > + data->fb.height = h; > > + data->fb.gem_handle = gem_handle; > > + > > + igt_plane_set_fb(plane, NULL); > > + igt_display_commit(display); > > + > > + igt_plane_set_fb(plane, >fb); > > + > > + for (i = 0; i < num_fences + 1; i++) { > > + igt_plane_set_rotation(plane, toggle ? > > IGT_ROTATION_0 : IGT_ROTATION_90); > > + drmModeObjectSetProperty(fd, > > plane->drm_plane->plane_id, > > +DRM_MODE_OBJECT_PLANE, > > +plane->rotation_property, > > +plane->rotation); > &
[Intel-gfx] [PATCH] igt/kms_rotation_crc: Add a subtest to validate Y-tiled obj + Y fb modifier (v5)
The main goal of this subtest is to trigger the following warning in the function i915_gem_object_get_fence(): if (WARN_ON(!obj->map_and_fenceable)) To trigger this warning, the subtest first creates a Y-tiled object and an associated framebuffer with the Y-fb modifier. Furthermore, to prevent the map_and_fenceable from being set, we make sure that the object does not have a normal VMA by refraining from rendering to the object and by setting the rotation property upfront before calling commit. v2: Do not call paint_squares and just use one output. v3: Convert an if condition to igt_require and move the plane rotation requirement further up before the fb allocation. v4: After setting rotation to 90 and committing, change the rotation to 0 and commit once more. This is to test if the i915 driver hits any warnings while pinning and unpinning an object that has both normal and rotated views. v5: - Add another subtest to toggle the order of rotation - Exhaustively test the i915 driver's pinning and unpinning code paths for any fence leaks by iterating until MAX available fences. Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com> Signed-off-by: Vivek Kasireddy <vivek.kasire...@intel.com> --- tests/kms_rotation_crc.c | 84 1 file changed, 84 insertions(+) diff --git a/tests/kms_rotation_crc.c b/tests/kms_rotation_crc.c index cc9847e..34f8150 100644 --- a/tests/kms_rotation_crc.c +++ b/tests/kms_rotation_crc.c @@ -264,6 +264,80 @@ static void test_plane_rotation(data_t *data, enum igt_plane plane_type) igt_require_f(valid_tests, "no valid crtc/connector combinations found\n"); } +static void test_plane_rotation_ytiled_obj(data_t *data, enum igt_plane plane_type, + int toggle) +{ + igt_display_t *display = >display; + uint64_t tiling = LOCAL_I915_FORMAT_MOD_Y_TILED; + uint32_t format = DRM_FORMAT_XRGB; + int bpp = igt_drm_format_to_bpp(format); + enum igt_commit_style commit = COMMIT_LEGACY; + int fd = data->gfx_fd; + igt_output_t *output = >outputs[0]; + igt_plane_t *plane; + drmModeModeInfo *mode; + unsigned int stride, size, w, h; + uint32_t gem_handle; + int num_fences = gem_available_fences(fd); + int i, ret; + + igt_require(output != NULL && output->valid == true); + + plane = igt_output_get_plane(output, plane_type); + igt_require(igt_plane_supports_rotation(plane)); + + if (plane_type == IGT_PLANE_PRIMARY || plane_type == IGT_PLANE_CURSOR) { + igt_require(data->display.has_universal_planes); + commit = COMMIT_UNIVERSAL; + } + + mode = igt_output_get_mode(output); + w = mode->hdisplay; + h = mode->vdisplay; + + for (stride = 512; stride < (w * bpp / 8); stride *= 2) + ; + for (size = 1024*1024; size < stride * h; size *= 2) + ; + + gem_handle = gem_create(fd, size); + ret = __gem_set_tiling(fd, gem_handle, I915_TILING_Y, stride); + igt_assert(ret == 0); + + do_or_die(__kms_addfb(fd, gem_handle, w, h, stride, + format, tiling, LOCAL_DRM_MODE_FB_MODIFIERS, + >fb.fb_id)); + data->fb.width = w; + data->fb.height = h; + data->fb.gem_handle = gem_handle; + + igt_plane_set_fb(plane, NULL); + igt_display_commit(display); + + igt_plane_set_fb(plane, >fb); + + for (i = 0; i < num_fences + 1; i++) { + igt_plane_set_rotation(plane, toggle ? IGT_ROTATION_0 : IGT_ROTATION_90); + drmModeObjectSetProperty(fd, plane->drm_plane->plane_id, +DRM_MODE_OBJECT_PLANE, +plane->rotation_property, +plane->rotation); + ret = igt_display_try_commit2(display, commit); + igt_assert(ret == 0); + + igt_plane_set_rotation(plane, toggle ? IGT_ROTATION_90 : IGT_ROTATION_0); + drmModeObjectSetProperty(fd, plane->drm_plane->plane_id, +DRM_MODE_OBJECT_PLANE, +plane->rotation_property, +plane->rotation); + ret = igt_display_try_commit2(display, commit); + igt_assert(ret == 0); + } + + kmstest_restore_vt_mode(); + igt_remove_fb(fd, >fb); +} + igt_main { data_t data = {}; @@ -345,6 +419,16 @@ igt_main test_plane_rotation(, IGT_PLANE_PRIMARY); } + igt_subtest_f("primary-rotation-90-to-0-Y-tiled") { + igt_require(gen >= 9); + test_plane_rotation_ytiled_obj(, IGT_PLANE_PRI
[Intel-gfx] [PATCH] drm/i915: Skip fence installation for objects with rotated views (v4)
While pinning a fb object to the display plane, only install a fence if the object is using a normal view. This corresponds with the behavior found in i915_gem_object_do_pin() where the fencability criteria is determined only for objects with normal views. v2: Look at the object's map_and_fenceable flag to determine whether to install a fence or not (Chris). v3: Pin and unpin a fence only if the current view type is normal. v4: Extend the "view type is normal" check for pin_fence as well. Cc: Chris Wilson <ch...@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com> Cc: Ville Syrjala <ville.syrj...@linux.intel.com> Signed-off-by: Vivek Kasireddy <vivek.kasire...@intel.com> --- drivers/gpu/drm/i915/intel_display.c | 36 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 2fdfca1..9c80968 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2419,22 +2419,24 @@ intel_pin_and_fence_fb_obj(struct drm_plane *plane, * framebuffer compression. For simplicity, we always install * a fence as the cost is not that onerous. */ - ret = i915_gem_object_get_fence(obj); - if (ret == -EDEADLK) { - /* -* -EDEADLK means there are no free fences -* no pending flips. -* -* This is propagated to atomic, but it uses -* -EDEADLK to force a locking recovery, so -* change the returned error to -EBUSY. -*/ - ret = -EBUSY; - goto err_unpin; - } else if (ret) - goto err_unpin; + if (view.type == I915_GGTT_VIEW_NORMAL) { + ret = i915_gem_object_get_fence(obj); + if (ret == -EDEADLK) { + /* +* -EDEADLK means there are no free fences +* no pending flips. +* +* This is propagated to atomic, but it uses +* -EDEADLK to force a locking recovery, so +* change the returned error to -EBUSY. +*/ + ret = -EBUSY; + goto err_unpin; + } else if (ret) + goto err_unpin; - i915_gem_object_pin_fence(obj); + i915_gem_object_pin_fence(obj); + } dev_priv->mm.interruptible = true; intel_runtime_pm_put(dev_priv); @@ -2460,7 +2462,9 @@ static void intel_unpin_fb_obj(struct drm_framebuffer *fb, ret = intel_fill_fb_ggtt_view(, fb, plane_state); WARN_ONCE(ret, "Couldn't get view from plane state!"); - i915_gem_object_unpin_fence(obj); + if (view.type == I915_GGTT_VIEW_NORMAL) + i915_gem_object_unpin_fence(obj); + i915_gem_object_unpin_from_display_plane(obj, ); } -- 2.4.3 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx