from:"Vivek Kasireddy"

[RFC v1 3/3] drm/i915: Apply border adjustments and enable scaler on the crtc

2024-02-21 Thread Vivek Kasireddy

If the userspace has enabled the border property on a given
connector, then relevant adjustments to position and size are made
in addition to enabling the scaler on the associated crtc.

Similar to how the panel fitter is implemented, the visible area
of the crtc is tracked using a struct drm_rect object that is
part of the crtc_state. This object is added to the state checker
and support for hardware readout is also included.

Cc: Ville Syrjälä 
Cc: Matt Roper 
Signed-off-by: Vivek Kasireddy 
---
 .../gpu/drm/i915/display/intel_connector.c| 29 +++
 .../gpu/drm/i915/display/intel_connector.h|  2 ++
 drivers/gpu/drm/i915/display/intel_display.c  | 17 ---
 .../drm/i915/display/intel_display_types.h|  5 
 drivers/gpu/drm/i915/display/intel_dp.c   |  9 ++
 drivers/gpu/drm/i915/display/intel_hdmi.c |  9 ++
 drivers/gpu/drm/i915/display/skl_scaler.c | 20 ++---
 7 files changed, 83 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_connector.c 
b/drivers/gpu/drm/i915/display/intel_connector.c
index 05185db6635e..8c5dfbb98811 100644
--- a/drivers/gpu/drm/i915/display/intel_connector.c
+++ b/drivers/gpu/drm/i915/display/intel_connector.c
@@ -219,6 +219,35 @@ static const struct drm_prop_enum_list force_audio_names[] 
= {
{ HDMI_AUDIO_ON, "on" },
 };
 
+int intel_connector_apply_border(struct intel_crtc_state *crtc_state,
+void *border_data)
+{
+   const struct drm_display_mode *adjusted_mode =
+   _state->hw.adjusted_mode;
+   int width = adjusted_mode->crtc_hdisplay;
+   int height = adjusted_mode->crtc_vdisplay;
+   struct drm_rect *border = border_data;
+   int left = border->x1;
+   int top = border->y1;
+   int right = border->x2;
+   int bottom = border->y2;
+
+   if (left < 0 || top < 0 || right < 0 || bottom < 0)
+   return -EINVAL;
+
+   if (left + right >= width || top + bottom >= height)
+   return -EINVAL;
+
+   width -= (left + right);
+   height -= (top + bottom);
+
+   drm_rect_init(_state->border.dst,
+ left, top, width, height);
+   crtc_state->border.enabled = true;
+
+   return 0;
+}
+
 void
 intel_attach_force_audio_property(struct drm_connector *connector)
 {
diff --git a/drivers/gpu/drm/i915/display/intel_connector.h 
b/drivers/gpu/drm/i915/display/intel_connector.h
index ab88b57d475b..93106d855452 100644
--- a/drivers/gpu/drm/i915/display/intel_connector.h
+++ b/drivers/gpu/drm/i915/display/intel_connector.h
@@ -26,6 +26,8 @@ bool intel_connector_get_hw_state(struct intel_connector 
*connector);
 enum pipe intel_connector_get_pipe(struct intel_connector *connector);
 int intel_connector_update_modes(struct drm_connector *connector,
 const struct drm_edid *drm_edid);
+int intel_connector_apply_border(struct intel_crtc_state *crtc_state,
+void *border_data);
 int intel_ddc_get_modes(struct drm_connector *c, struct i2c_adapter *ddc);
 void intel_attach_force_audio_property(struct drm_connector *connector);
 void intel_attach_broadcast_rgb_property(struct drm_connector *connector);
diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
b/drivers/gpu/drm/i915/display/intel_display.c
index 0ea62c278948..af615e576fe7 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -1679,9 +1679,12 @@ static void hsw_crtc_enable(struct intel_atomic_state 
*state,
glk_pipe_scaler_clock_gating_wa(dev_priv, pipe, true);
 
if (DISPLAY_VER(dev_priv) >= 9) {
-   const struct drm_rect *dst = _crtc_state->pch_pfit.dst;
+   const struct drm_rect *dst = new_crtc_state->pch_pfit.enabled ?
+_crtc_state->pch_pfit.dst :
+_crtc_state->border.dst;
 
-   if (new_crtc_state->pch_pfit.enabled)
+   if (new_crtc_state->pch_pfit.enabled ||
+   new_crtc_state->border.enabled)
skl_program_crtc_scaler(new_crtc_state, dst);
} else {
ilk_pfit_enable(new_crtc_state);
@@ -5196,6 +5199,9 @@ intel_pipe_config_compare(const struct intel_crtc_state 
*current_config,
PIPE_CONF_CHECK_BOOL(pch_pfit.enabled);
PIPE_CONF_CHECK_RECT(pch_pfit.dst);
 
+   PIPE_CONF_CHECK_BOOL(border.enabled);
+   PIPE_CONF_CHECK_RECT(border.dst);
+
PIPE_CONF_CHECK_I(scaler_state.scaler_id);
PIPE_CONF_CHECK_I(pixel_rate);
 
@@ -6564,9 +6570,12 @@ static void intel_pipe_fastset(const struct 
intel_crtc_state *old_crtc_state,
 
/* on skylake this is done by detaching scalers */
if (DISPLAY

[RFC v1 1/3] drm/i915: Rename skl_pfit_enable() to skl_program_crtc_scaler()

2024-02-21 Thread Vivek Kasireddy

Given that skl_pfit_enable() mostly enables (or programs) the scaler
at the crtc level, it makes sense to change its name to
skl_program_crtc_scaler(). Also, the rename and the addition of
struct drm_rect * parameter helps if we'd like to use this
function to enable the scaler at the crtc level for features other
than panel fitting.

Cc: Ville Syrjälä 
Cc: Matt Roper 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/display/intel_display.c | 14 ++
 drivers/gpu/drm/i915/display/skl_scaler.c|  7 ++-
 drivers/gpu/drm/i915/display/skl_scaler.h|  3 ++-
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
b/drivers/gpu/drm/i915/display/intel_display.c
index c5de4561f458..0ea62c278948 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -1678,10 +1678,14 @@ static void hsw_crtc_enable(struct intel_atomic_state 
*state,
if (psl_clkgate_wa)
glk_pipe_scaler_clock_gating_wa(dev_priv, pipe, true);
 
-   if (DISPLAY_VER(dev_priv) >= 9)
-   skl_pfit_enable(new_crtc_state);
-   else
+   if (DISPLAY_VER(dev_priv) >= 9) {
+   const struct drm_rect *dst = _crtc_state->pch_pfit.dst;
+
+   if (new_crtc_state->pch_pfit.enabled)
+   skl_program_crtc_scaler(new_crtc_state, dst);
+   } else {
ilk_pfit_enable(new_crtc_state);
+   }
 
/*
 * On ILK+ LUT must be loaded before the pipe is running but with
@@ -6560,8 +6564,10 @@ static void intel_pipe_fastset(const struct 
intel_crtc_state *old_crtc_state,
 
/* on skylake this is done by detaching scalers */
if (DISPLAY_VER(dev_priv) >= 9) {
+   const struct drm_rect *dst = _crtc_state->pch_pfit.dst;
+
if (new_crtc_state->pch_pfit.enabled)
-   skl_pfit_enable(new_crtc_state);
+   skl_program_crtc_scaler(new_crtc_state, dst);
} else if (HAS_PCH_SPLIT(dev_priv)) {
if (new_crtc_state->pch_pfit.enabled)
ilk_pfit_enable(new_crtc_state);
diff --git a/drivers/gpu/drm/i915/display/skl_scaler.c 
b/drivers/gpu/drm/i915/display/skl_scaler.c
index 8a934bada624..67a87cc0411a 100644
--- a/drivers/gpu/drm/i915/display/skl_scaler.c
+++ b/drivers/gpu/drm/i915/display/skl_scaler.c
@@ -704,13 +704,13 @@ static void skl_scaler_setup_filter(struct 
drm_i915_private *dev_priv, enum pipe
}
 }
 
-void skl_pfit_enable(const struct intel_crtc_state *crtc_state)
+void skl_program_crtc_scaler(const struct intel_crtc_state *crtc_state,
+const struct drm_rect *dst)
 {
struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
const struct intel_crtc_scaler_state *scaler_state =
_state->scaler_state;
-   const struct drm_rect *dst = _state->pch_pfit.dst;
u16 uv_rgb_hphase, uv_rgb_vphase;
enum pipe pipe = crtc->pipe;
int width = drm_rect_width(dst);
@@ -722,9 +722,6 @@ void skl_pfit_enable(const struct intel_crtc_state 
*crtc_state)
int id;
u32 ps_ctrl;
 
-   if (!crtc_state->pch_pfit.enabled)
-   return;
-
if (drm_WARN_ON(_priv->drm,
crtc_state->scaler_state.scaler_id < 0))
return;
diff --git a/drivers/gpu/drm/i915/display/skl_scaler.h 
b/drivers/gpu/drm/i915/display/skl_scaler.h
index 63f93ca03c89..45b9ac3ec779 100644
--- a/drivers/gpu/drm/i915/display/skl_scaler.h
+++ b/drivers/gpu/drm/i915/display/skl_scaler.h
@@ -24,7 +24,8 @@ int intel_atomic_setup_scalers(struct drm_i915_private 
*dev_priv,
   struct intel_crtc *intel_crtc,
   struct intel_crtc_state *crtc_state);
 
-void skl_pfit_enable(const struct intel_crtc_state *crtc_state);
+void skl_program_crtc_scaler(const struct intel_crtc_state *crtc_state,
+const struct drm_rect *dst);
 
 void skl_program_plane_scaler(struct intel_plane *plane,
  const struct intel_crtc_state *crtc_state,
-- 
2.43.0

[RFC v1 2/3] drm/i915: Attach the Border property to DP and HDMI connectors

2024-02-21 Thread Vivek Kasireddy

The Border property is created as a blob if it doesn't exist and
then attached to DP and HDMI connectors. When userspace wants
to populate this blob, it is expected that it provides data of
size sizeof(struct drm_rect).

Cc: Ville Syrjälä 
Cc: Matt Roper 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/display/intel_atomic.c   | 29 ++-
 .../gpu/drm/i915/display/intel_connector.c| 20 +
 .../gpu/drm/i915/display/intel_connector.h|  1 +
 .../gpu/drm/i915/display/intel_display_core.h |  1 +
 .../drm/i915/display/intel_display_types.h|  1 +
 drivers/gpu/drm/i915/display/intel_dp.c   |  2 ++
 drivers/gpu/drm/i915/display/intel_hdmi.c |  2 ++
 7 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c 
b/drivers/gpu/drm/i915/display/intel_atomic.c
index ec0d5168b503..76cdcad175cc 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic.c
@@ -67,6 +67,9 @@ int intel_digital_connector_atomic_get_property(struct 
drm_connector *connector,
*val = intel_conn_state->force_audio;
else if (property == dev_priv->display.properties.broadcast_rgb)
*val = intel_conn_state->broadcast_rgb;
+   else if (property == dev_priv->display.properties.border)
+   *val = (intel_conn_state->border) ?
+  intel_conn_state->border->base.id : 0;
else {
drm_dbg_atomic(_priv->drm,
   "Unknown property [PROP:%d:%s]\n",
@@ -95,6 +98,8 @@ int intel_digital_connector_atomic_set_property(struct 
drm_connector *connector,
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_digital_connector_state *intel_conn_state =
to_intel_digital_connector_state(state);
+   bool replaced;
+   int ret;
 
if (property == dev_priv->display.properties.force_audio) {
intel_conn_state->force_audio = val;
@@ -106,11 +111,32 @@ int intel_digital_connector_atomic_set_property(struct 
drm_connector *connector,
return 0;
}
 
+   if (property == dev_priv->display.properties.border) {
+   ret = drm_property_replace_blob_from_id(dev,
+   _conn_state->border,
+   val,
+   sizeof(struct drm_rect), -1,
+   );
+   return ret;
+   }
+
drm_dbg_atomic(_priv->drm, "Unknown property [PROP:%d:%s]\n",
   property->base.id, property->name);
return -EINVAL;
 }
 
+static bool intel_connector_blob_equal(struct drm_property_blob *old_blob,
+  struct drm_property_blob *new_blob)
+{
+   if (!old_blob || !new_blob)
+   return false;
+
+   if (old_blob->length != new_blob->length)
+   return false;
+
+   return !memcmp(old_blob->data, new_blob->data, old_blob->length);
+}
+
 int intel_digital_connector_atomic_check(struct drm_connector *conn,
 struct drm_atomic_state *state)
 {
@@ -142,7 +168,8 @@ int intel_digital_connector_atomic_check(struct 
drm_connector *conn,
new_conn_state->base.content_type != 
old_conn_state->base.content_type ||
new_conn_state->base.scaling_mode != 
old_conn_state->base.scaling_mode ||
new_conn_state->base.privacy_screen_sw_state != 
old_conn_state->base.privacy_screen_sw_state ||
-   !drm_connector_atomic_hdr_metadata_equal(old_state, new_state))
+   !drm_connector_atomic_hdr_metadata_equal(old_state, new_state) ||
+   !intel_connector_blob_equal(old_conn_state->border, 
new_conn_state->border))
crtc_state->mode_changed = true;
 
return 0;
diff --git a/drivers/gpu/drm/i915/display/intel_connector.c 
b/drivers/gpu/drm/i915/display/intel_connector.c
index c65887870ddc..05185db6635e 100644
--- a/drivers/gpu/drm/i915/display/intel_connector.c
+++ b/drivers/gpu/drm/i915/display/intel_connector.c
@@ -308,3 +308,23 @@ intel_attach_scaling_mode_property(struct drm_connector 
*connector)
 
connector->state->scaling_mode = DRM_MODE_SCALE_ASPECT;
 }
+
+void
+intel_attach_border_property(struct drm_connector *connector)
+{
+   struct drm_device *dev = connector->dev;
+   struct drm_i915_private *dev_priv = to_i915(dev);
+   struct drm_property *prop;
+
+   prop = dev_priv->display.properties.border;
+   if (prop == NULL) {
+   prop = drm_property_create(dev, DRM_MODE_PROP_BLOB,
+  "Border", 0);
+   if (prop == NULL)
+   return;
+
+

[RFC v1 0/3] drm/i915: Add support for XRandR Border property

2024-02-21 Thread Vivek Kasireddy

Some customers and users have expressed interest in adding borders
(or margins) to certain displays in their multi-display configurations.
To address this need, this patchset implements the XRandR Border
property as defined here:
https://cgit.freedesktop.org/xorg/proto/randrproto/tree/randrproto.txt#n2032

---

Patchset overview:

Patch 1: Create skl_program_crtc_scaler() to program scaler for crtc
Patch 2: Create and attach the Border property to DP and HDMI
Patch 3: Implement Border property by enabling crtc scalar

This series is tested using following method:
- Run the following xrandr command with different parameters:
xrandr --output HDMI-3 --pos 1920x0 --mode 1280x1024 --fb 3840x2160 --scale 
2.11x2.11 --set "Border" 150,0,150,0

The following patch was also added to the modesetting driver to
implement the Border property:
https://gitlab.freedesktop.org/Vivek/xserver/-/commit/62abfc438f0d17fe7f88bf2826c9784c2b36443b

Cc: Ville Syrjälä 
Cc: Matt Roper 
Cc: Dongwon Kim 

Vivek Kasireddy (3):
  drm/i915: Rename skl_pfit_enable() to skl_program_crtc_scaler()
  drm/i915: Attach the Border property to DP and HDMI connectors
  drm/i915: Apply border adjustments and enable scaler on the crtc

 drivers/gpu/drm/i915/display/intel_atomic.c   | 29 ++-
 .../gpu/drm/i915/display/intel_connector.c| 49 +++
 .../gpu/drm/i915/display/intel_connector.h|  3 ++
 drivers/gpu/drm/i915/display/intel_display.c  | 25 --
 .../gpu/drm/i915/display/intel_display_core.h |  1 +
 .../drm/i915/display/intel_display_types.h|  6 +++
 drivers/gpu/drm/i915/display/intel_dp.c   | 11 +
 drivers/gpu/drm/i915/display/intel_hdmi.c | 11 +
 drivers/gpu/drm/i915/display/skl_scaler.c | 27 ++
 drivers/gpu/drm/i915/display/skl_scaler.h |  3 +-
 10 files changed, 149 insertions(+), 16 deletions(-)

-- 
2.43.0

[Intel-gfx] [PATCH v2 1/1] drm/i915/tc: Don't default disconnected legacy Type-C ports to TBT mode (v2)

2022-05-25 Thread Vivek Kasireddy

Commit 30e114ef4b16 ("drm/i915/tc: Check for DP-alt, legacy sinks before
taking PHY ownership") defaults any disconnected Type-C ports to TBT-alt
mode which presents a problem (which could most likely result in a system
hang) when userspace forces a modeset on a Type-C port that is wired for
legacy HDMI. The following warning is seen when Weston forces a modeset
on a disconnected legacy Type-C port (HDMI) on a TGL based Gigabyte system:
(https://www.gigabyte.com/Mini-PcBarebone/GB-BSi3-1115G4-rev-10#ov)

Missing case (clock == 173000)
WARNING: CPU: 1 PID: 438 at drivers/gpu/drm/i915/display/intel_ddi.c:245
icl_ddi_tc_enable_clock.cold+0x16a/0x1cf [i915]
CPU: 1 PID: 438 Comm: kworker/u8:3 Tainted: G U  W   E
5.18.0-rc5-drm-tip+ #20
Hardware name: GIGABYTE GB-BSi3-1115G4/GB-BSi3-1115G4, BIOS F9
10/16/2021
Workqueue: i915_modeset intel_atomic_commit_work [i915]
RIP: 0010:icl_ddi_tc_enable_clock.cold+0x16a/0x1cf [i915]
Code: 74 6c 7f 10 81 fd d0 78 02 00 74 6d 81 fd b0 1e 04 00 74 70 48 63
d5 48 c7 c6 c0 7b ab c0 48 c7 c7 20 75 ab c0 e8 b8 b5 c1 f0 <0f> 0b 45
31 ed e9 fb fe ff ff 49 63 d5
 48 c7 c6 80 7b ab c0 48 c7
RSP: 0018:8882522c78f0 EFLAGS: 00010282
RAX:  RBX: 0003 RCX: 
RDX: 0027 RSI: 0004 RDI: ed104a458f10
RBP: 00011558 R08: b078de4e R09: 888269ca748b
R10: ed104d394e91 R11:  R12: 888255a318f8
R13: 0002 R14: 888255a3 R15: 88823ef00348
FS:  () GS:888269c8()
knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 7fd7afa42000 CR3: 000255c02004 CR4: 007706e0
PKRU: 5554
Call Trace:

intel_ddi_pre_enable.cold+0x96/0x5bf [i915]
intel_encoders_pre_enable+0x10e/0x140 [i915]
hsw_crtc_enable+0x207/0x99d [i915]
? ilk_crtc_enable.cold+0x2a/0x2a [i915]
? prepare_to_wait_exclusive+0x120/0x120
intel_enable_crtc+0x9a/0xf0 [i915]
skl_commit_modeset_enables+0x466/0x820 [i915]
? intel_commit_modeset_enables+0xd0/0xd0 [i915]
? intel_mbus_dbox_update+0x1ed/0x250 [i915]
intel_atomic_commit_tail+0xf2d/0x3040 [i915]
_raw_spin_lock_irqsave+0x87/0xe0
_raw_read_unlock_irqrestore+0x40/0x40
__update_load_avg_cfs_rq+0x70/0x5c0
__i915_sw_fence_complete+0x85/0x3b0 [i915]
? intel_get_crtc_new_encoder+0x190/0x190 [i915]
? sysvec_irq_work+0x13/0x90
? asm_sysvec_irq_work+0x12/0x20
? _raw_spin_lock_irq+0x82/0xd0
? read_word_at_a_time+0xe/0x20
? process_one_work+0x393/0x690
process_one_work+0x393/0x690
worker_thread+0x2b7/0x620
? process_one_work+0x690/0x690
kthread+0x15a/0x190
? kthread_complete_and_exit+0x20/0x20
ret_from_fork+0x1f/0x30

Continuing with the modeset without setting the DDI clock results in
more warnings and eventually a system hang. This does not seem to
happen with disconnected legacy or DP-alt DP ports because the clock
rate defaults to 162000 (which is a valid TBT clock) during the link
training process. Therefore, to fix this issue, this patch avoids
setting disconnected Type-C legacy ports to TBT-alt mode which prevents
the selection of TBT PLL when a modeset is forced.

v2: (Imre)
- Retain the check for legacy hotplug live status to account for
incorrect VBTs.

Cc: Imre Deak 
Cc: José Roberto de Souza 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/display/intel_tc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_tc.c 
b/drivers/gpu/drm/i915/display/intel_tc.c
index b8b822ea3755..6773840f6cc7 100644
--- a/drivers/gpu/drm/i915/display/intel_tc.c
+++ b/drivers/gpu/drm/i915/display/intel_tc.c
@@ -494,7 +494,8 @@ static void icl_tc_phy_connect(struct intel_digital_port 
*dig_port,
}
 
live_status_mask = tc_port_live_status_mask(dig_port);
-   if (!(live_status_mask & (BIT(TC_PORT_DP_ALT) | BIT(TC_PORT_LEGACY {
+   if (!(live_status_mask & (BIT(TC_PORT_DP_ALT) | BIT(TC_PORT_LEGACY))) &&
+   !dig_port->tc_legacy_port) {
drm_dbg_kms(>drm, "Port %s: PHY ownership not required 
(live status %02x)\n",
dig_port->tc_port_name, live_status_mask);
goto out_set_tbt_alt_mode;
-- 
2.35.1

[Intel-gfx] [PATCH v2 0/1] drm/i915/tc: Prevent system hang when modesetting disconnected Type-C ports (v2)

2022-05-25 Thread Vivek Kasireddy

The following patch tries to prevent a system hang when a modeset
is forced by userspace (Weston) on legacy Type-C ports that are
disconnected. This issue was accidentally discovered while trying
to modeset one of the HDMI ports on the TGL based Gigabyte system
(https://www.gigabyte.com/Mini-PcBarebone/GB-BSi3-1115G4-rev-10#ov)
using the following Weston settings (configured via weston.ini):

[output]
name=HDMI-A-3
mode=173.00 1920 2048 2248 2576  1080 1083 1088 1120 -hsync +vsync
force-on=true

Entering the name of the HDMI connector incorrectly above (for example
HDMI-A-3 (disconnected) instead of HDMI-A-2 (connected)) lead to 
warnings in the log followed by a system hang. To fix this issue,
the following patch prevents the selection of TBT PLL for legacy Type-C
ports.

v2: Drop the second patch (that rejects modesets on disconnected tc
ports) from this series.

Cc: Imre Deak 
Cc: José Roberto de Souza 
Cc: Ville Syrjälä 

Vivek Kasireddy (1):
  drm/i915/tc: Don't default disconnected legacy Type-C ports to TBT
mode (v2)

 drivers/gpu/drm/i915/display/intel_tc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

-- 
2.35.1

[Intel-gfx] [PATCH v1 1/2] drm/i915/tc: Don't default disconnected legacy Type-C ports to TBT mode

2022-05-16 Thread Vivek Kasireddy

Commit 30e114ef4b16 ("drm/i915/tc: Check for DP-alt, legacy sinks before
taking PHY ownership") defaults any disconnected Type-C ports to TBT-alt
mode which presents a problem (which could most likely result in a system
hang) when userspace forces a modeset on a Type-C port that is wired for
legacy HDMI. The following warning is seen when Weston forces a modeset
on a disconnected legacy Type-C port (HDMI) on a TGL based Gigabyte system:
(https://www.gigabyte.com/Mini-PcBarebone/GB-BSi3-1115G4-rev-10#ov)

Missing case (clock == 173000)
WARNING: CPU: 1 PID: 438 at drivers/gpu/drm/i915/display/intel_ddi.c:245
icl_ddi_tc_enable_clock.cold+0x16a/0x1cf [i915]
CPU: 1 PID: 438 Comm: kworker/u8:3 Tainted: G U  W   E
5.18.0-rc5-drm-tip+ #20
Hardware name: GIGABYTE GB-BSi3-1115G4/GB-BSi3-1115G4, BIOS F9
10/16/2021
Workqueue: i915_modeset intel_atomic_commit_work [i915]
RIP: 0010:icl_ddi_tc_enable_clock.cold+0x16a/0x1cf [i915]
Code: 74 6c 7f 10 81 fd d0 78 02 00 74 6d 81 fd b0 1e 04 00 74 70 48 63
d5 48 c7 c6 c0 7b ab c0 48 c7 c7 20 75 ab c0 e8 b8 b5 c1 f0 <0f> 0b 45
31 ed e9 fb fe ff ff 49 63 d5
 48 c7 c6 80 7b ab c0 48 c7
RSP: 0018:8882522c78f0 EFLAGS: 00010282
RAX:  RBX: 0003 RCX: 
RDX: 0027 RSI: 0004 RDI: ed104a458f10
RBP: 00011558 R08: b078de4e R09: 888269ca748b
R10: ed104d394e91 R11:  R12: 888255a318f8
R13: 0002 R14: 888255a3 R15: 88823ef00348
FS:  () GS:888269c8()
knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 7fd7afa42000 CR3: 000255c02004 CR4: 007706e0
PKRU: 5554
Call Trace:

intel_ddi_pre_enable.cold+0x96/0x5bf [i915]
intel_encoders_pre_enable+0x10e/0x140 [i915]
hsw_crtc_enable+0x207/0x99d [i915]
? ilk_crtc_enable.cold+0x2a/0x2a [i915]
? prepare_to_wait_exclusive+0x120/0x120
intel_enable_crtc+0x9a/0xf0 [i915]
skl_commit_modeset_enables+0x466/0x820 [i915]
? intel_commit_modeset_enables+0xd0/0xd0 [i915]
? intel_mbus_dbox_update+0x1ed/0x250 [i915]
intel_atomic_commit_tail+0xf2d/0x3040 [i915]
_raw_spin_lock_irqsave+0x87/0xe0
_raw_read_unlock_irqrestore+0x40/0x40
__update_load_avg_cfs_rq+0x70/0x5c0
__i915_sw_fence_complete+0x85/0x3b0 [i915]
? intel_get_crtc_new_encoder+0x190/0x190 [i915]
? sysvec_irq_work+0x13/0x90
? asm_sysvec_irq_work+0x12/0x20
? _raw_spin_lock_irq+0x82/0xd0
? read_word_at_a_time+0xe/0x20
? process_one_work+0x393/0x690
process_one_work+0x393/0x690
worker_thread+0x2b7/0x620
? process_one_work+0x690/0x690
kthread+0x15a/0x190
? kthread_complete_and_exit+0x20/0x20
ret_from_fork+0x1f/0x30

Continuing with the modeset without setting the DDI clock results in
more warnings and eventually a system hang. This does not seem to
happen with disconnected legacy or DP-alt DP ports because the clock
rate defaults to 162000 (which is a valid TBT clock) during the link
training process. Therefore, to fix this issue, this patch avoids
setting disconnected Type-C legacy ports to TBT-alt mode which prevents
the selection of TBT PLL when a modeset is forced.

Cc: Imre Deak 
Cc: José Roberto de Souza 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/display/intel_tc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_tc.c 
b/drivers/gpu/drm/i915/display/intel_tc.c
index b8b822ea3755..0c3304be0602 100644
--- a/drivers/gpu/drm/i915/display/intel_tc.c
+++ b/drivers/gpu/drm/i915/display/intel_tc.c
@@ -494,7 +494,8 @@ static void icl_tc_phy_connect(struct intel_digital_port 
*dig_port,
}
 
live_status_mask = tc_port_live_status_mask(dig_port);
-   if (!(live_status_mask & (BIT(TC_PORT_DP_ALT) | BIT(TC_PORT_LEGACY {
+   if (!(live_status_mask & BIT(TC_PORT_DP_ALT)) &&
+   !dig_port->tc_legacy_port) {
drm_dbg_kms(>drm, "Port %s: PHY ownership not required 
(live status %02x)\n",
dig_port->tc_port_name, live_status_mask);
goto out_set_tbt_alt_mode;
-- 
2.35.1

[Intel-gfx] [PATCH v1 2/2] drm/i915: Reject the atomic modeset if an associated Type-C port is disconnected

2022-05-16 Thread Vivek Kasireddy

Although, doing a modeset on any disconnected connector might be futile,
it can be particularly problematic if the connector is a Type-C port
without a sink. And, the spec only says "Display software must not use
a disconnected port" while referring to the Type-C DDI seqeuence, it does
not spell out what happens if such an attempt is made. Experimental results
have shown that this can lead to serious issues including a system hang.
Therefore, reject the atomic modeset if we detect that the Type-C port
is not connected.

Cc: Ville Syrjälä 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/display/intel_atomic.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c 
b/drivers/gpu/drm/i915/display/intel_atomic.c
index 40da7910f845..40576964b8c1 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic.c
@@ -114,6 +114,8 @@ int intel_digital_connector_atomic_set_property(struct 
drm_connector *connector,
 int intel_digital_connector_atomic_check(struct drm_connector *conn,
 struct drm_atomic_state *state)
 {
+   struct drm_device *dev = conn->dev;
+   struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_connector_state *new_state =
drm_atomic_get_new_connector_state(state, conn);
struct intel_digital_connector_state *new_conn_state =
@@ -122,6 +124,10 @@ int intel_digital_connector_atomic_check(struct 
drm_connector *conn,
drm_atomic_get_old_connector_state(state, conn);
struct intel_digital_connector_state *old_conn_state =
to_intel_digital_connector_state(old_state);
+   struct intel_encoder *encoder =
+   intel_attached_encoder(to_intel_connector(conn));
+   struct intel_digital_port *dig_port =
+   encoder ? enc_to_dig_port(encoder) : NULL;
struct drm_crtc_state *crtc_state;
 
intel_hdcp_atomic_check(conn, old_state, new_state);
@@ -131,6 +137,20 @@ int intel_digital_connector_atomic_check(struct 
drm_connector *conn,
 
crtc_state = drm_atomic_get_new_crtc_state(state, new_state->crtc);
 
+   /*
+* The spec says that it is not safe to use a disconnected Type-C port.
+* Therefore, check to see if this connector is connected and reject
+* the modeset if there is no sink detected.
+*/
+   if (dig_port && !dig_port->connected(encoder) &&
+   intel_phy_is_tc(dev_priv,
+   intel_port_to_phy(dev_priv, encoder->port))) {
+   drm_dbg_atomic(_priv->drm,
+  "[CONNECTOR:%d:%s] is not connected; rejecting 
the modeset\n",
+  conn->base.id, conn->name);
+   return -EINVAL;
+   }
+
/*
 * These properties are handled by fastset, and might not end
 * up in a modeset.
-- 
2.35.1

[Intel-gfx] [PATCH v1 0/2] drm/i915/tc: Prevent system hang when modesetting disconnected Type-C ports

2022-05-16 Thread Vivek Kasireddy

The following two patches try to prevent a system hang when a modeset
is forced by userspace (Weston) on legacy Type-C ports that are
disconnected. This issue was accidentally discovered while trying
to modeset one of the HDMI ports on the TGL based Gigabyte system
(https://www.gigabyte.com/Mini-PcBarebone/GB-BSi3-1115G4-rev-10#ov)
using the following Weston settings (configured via weston.ini):

[output]
name=HDMI-A-3
mode=173.00 1920 2048 2248 2576  1080 1083 1088 1120 -hsync +vsync
force-on=true

Entering the name of the HDMI connector incorrectly above (for example
HDMI-A-3 (disconnected) instead of HDMI-A-2 (connected)) lead to 
warnings in the log followed by a system hang. To fix this issue,
the first patch prevents the selection of TBT PLL for legacy Type-C
ports and the second one rejects any attempts to modeset disconnected
Type-C ports.

Cc: Imre Deak 
Cc: José Roberto de Souza 
Cc: Ville Syrjälä 

Vivek Kasireddy (2):
  drm/i915/tc: Don't default disconnected legacy Type-C ports to TBT
mode
  drm/i915: Reject the atomic modeset if an associated Type-C port is
disconnected

 drivers/gpu/drm/i915/display/intel_atomic.c | 20 
 drivers/gpu/drm/i915/display/intel_tc.c |  3 ++-
 2 files changed, 22 insertions(+), 1 deletion(-)

-- 
2.35.1

[Intel-gfx] [PATCH v2] drm/i915/gem: Don't evict unmappable VMAs when pinning with PIN_MAPPABLE (v2)

2022-03-20 Thread Vivek Kasireddy

On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or
more framebuffers/scanout buffers results in only one that is mappable/
fenceable. Therefore, pageflipping between these 2 FBs where only one
is mappable/fenceable creates latencies large enough to miss alternate
vblanks thereby producing less optimal framerate.

This mainly happens because when i915_gem_object_pin_to_display_plane()
is called to pin one of the FB objs, the associated vma is identified
as misplaced -- because there is no space for it in the aperture --
and therefore i915_vma_unbind() is called which unbinds and evicts it.
This misplaced vma gets subseqently pinned only when
i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This whole
thing results in a latency of ~10ms and happens every other repaint cycle.
Therefore, to fix this issue, we just ensure that the misplaced VMA
does not get evicted when we try to pin it with PIN_MAPPABLE -- by
returning early if the mappable/fenceable flag is not set.

Testcase:
Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform
with a 8K@60 mode results in only ~40 FPS (compared to ~59 FPS with
this patch). Since upstream Weston submits a frame ~7ms before the
next vblank, the latencies seen between atomic commit and flip event
are 7, 24 (7 + 16.66), 7, 24. suggesting that it misses the
vblank every other frame.

Here is the ftrace snippet that shows the source of the ~10ms latency:
  i915_gem_object_pin_to_display_plane() {
0.102 us   |i915_gem_object_set_cache_level();
i915_gem_object_ggtt_pin_ww() {
0.390 us   |  i915_vma_instance();
0.178 us   |  i915_vma_misplaced();
  i915_vma_unbind() {
  __i915_active_wait() {
0.082 us   |i915_active_acquire_if_busy();
0.475 us   |  }
  intel_runtime_pm_get() {
0.087 us   |intel_runtime_pm_acquire();
0.259 us   |  }
  __i915_active_wait() {
0.085 us   |i915_active_acquire_if_busy();
0.240 us   |  }
  __i915_vma_evict() {
ggtt_unbind_vma() {
  gen8_ggtt_clear_range() {
10507.255 us |}
10507.689 us |  }
10508.516 us |   }

v2:
- Expand the code comments to describe the ping-pong issue.

Cc: Tvrtko Ursulin 
Reviewed-by: Tvrtko Ursulin 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/i915_gem.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9747924cc57b..44741f842852 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -939,8 +939,19 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object 
*obj,
if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))
return ERR_PTR(-ENOSPC);
 
+   /*
+* If this misplaced vma is too big (i.e, at-least
+* half the size of aperture) or hasn't been pinned
+* mappable before, we ignore the misplacement when
+* PIN_NONBLOCK is set in order to avoid the ping-pong
+* issue described above. In other words, we try to
+* avoid the costly operation of unbinding this vma
+* from the GGTT and rebinding it back because there
+* may not be enough space for this vma in the aperture.
+*/
if (flags & PIN_MAPPABLE &&
-   vma->fence_size > ggtt->mappable_end / 2)
+   (vma->fence_size > ggtt->mappable_end / 2 ||
+   !i915_vma_is_map_and_fenceable(vma)))
return ERR_PTR(-ENOSPC);
}
 
-- 
2.35.1

[Intel-gfx] [PATCH v1] drm/i915/gem: Don't evict unmappable VMAs when pinning with PIN_MAPPABLE

2022-03-17 Thread Vivek Kasireddy

On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or
more framebuffers/scanout buffers results in only one that is mappable/
fenceable. Therefore, pageflipping between these 2 FBs where only one
is mappable/fenceable creates latencies large enough to miss alternate
vblanks thereby producing less optimal framerate.

This mainly happens because when i915_gem_object_pin_to_display_plane()
is called to pin one of the FB objs, the associated vma is identified
as misplaced -- because there is no space for it in the aperture --
and therefore i915_vma_unbind() is called which unbinds and evicts it.
This misplaced vma gets subseqently pinned only when
i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This whole
thing results in a latency of ~10ms and happens every other repaint cycle.
Therefore, to fix this issue, we just ensure that the misplaced VMA
does not get evicted when we try to pin it with PIN_MAPPABLE -- by
returning early if the mappable/fenceable flag is not set.

Testcase:
Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform
with a 8K@60 mode results in only ~40 FPS (compared to ~59 FPS with
this patch). Since upstream Weston submits a frame ~7ms before the
next vblank, the latencies seen between atomic commit and flip event
are 7, 24 (7 + 16.66), 7, 24. suggesting that it misses the
vblank every other frame.

Here is the ftrace snippet that shows the source of the ~10ms latency:
  i915_gem_object_pin_to_display_plane() {
0.102 us   |i915_gem_object_set_cache_level();
i915_gem_object_ggtt_pin_ww() {
0.390 us   |  i915_vma_instance();
0.178 us   |  i915_vma_misplaced();
  i915_vma_unbind() {
  __i915_active_wait() {
0.082 us   |i915_active_acquire_if_busy();
0.475 us   |  }
  intel_runtime_pm_get() {
0.087 us   |intel_runtime_pm_acquire();
0.259 us   |  }
  __i915_active_wait() {
0.085 us   |i915_active_acquire_if_busy();
0.240 us   |  }
  __i915_vma_evict() {
ggtt_unbind_vma() {
  gen8_ggtt_clear_range() {
10507.255 us |}
10507.689 us |  }
10508.516 us |   }

Cc: Tvrtko Ursulin 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/i915_gem.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9747924cc57b..7307c5de1c58 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -939,8 +939,14 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object 
*obj,
if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))
return ERR_PTR(-ENOSPC);
 
+   /*
+* If this misplaced vma is too big (i.e, at-least
+* half the size of aperture) or just unmappable,
+* we would not be able to pin with PIN_MAPPABLE.
+*/
if (flags & PIN_MAPPABLE &&
-   vma->fence_size > ggtt->mappable_end / 2)
+   (vma->fence_size > ggtt->mappable_end / 2 ||
+   !i915_vma_is_map_and_fenceable(vma)))
return ERR_PTR(-ENOSPC);
}
 
-- 
2.35.1

[Intel-gfx] [PATCH v6 2/2] drm/i915/gem: Don't try to map and fence large scanout buffers (v9)

2022-03-07 Thread Vivek Kasireddy

On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or
more framebuffers/scanout buffers results in only one that is mappable/
fenceable. Therefore, pageflipping between these 2 FBs where only one
is mappable/fenceable creates latencies large enough to miss alternate
vblanks thereby producing less optimal framerate.

This mainly happens because when i915_gem_object_pin_to_display_plane()
is called to pin one of the FB objs, the associated vma is identified
as misplaced and therefore i915_vma_unbind() is called which unbinds and
evicts it. This misplaced vma gets subseqently pinned only when
i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This
results in a latency of ~10ms and happens every other vblank/repaint cycle.
Therefore, to fix this issue, we try to see if there is space to map
at-least two objects of a given size and return early if there isn't. This
would ensure that we do not try with PIN_MAPPABLE for any objects that
are too big to map thereby preventing unncessary unbind.

Testcase:
Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform
with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits
a frame ~7ms before the next vblank, the latencies seen between atomic
commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that
it misses the vblank every other frame.

Here is the ftrace snippet that shows the source of the ~10ms latency:
  i915_gem_object_pin_to_display_plane() {
0.102 us   |i915_gem_object_set_cache_level();
i915_gem_object_ggtt_pin_ww() {
0.390 us   |  i915_vma_instance();
0.178 us   |  i915_vma_misplaced();
  i915_vma_unbind() {
  __i915_active_wait() {
0.082 us   |i915_active_acquire_if_busy();
0.475 us   |  }
  intel_runtime_pm_get() {
0.087 us   |intel_runtime_pm_acquire();
0.259 us   |  }
  __i915_active_wait() {
0.085 us   |i915_active_acquire_if_busy();
0.240 us   |  }
  __i915_vma_evict() {
ggtt_unbind_vma() {
  gen8_ggtt_clear_range() {
10507.255 us |}
10507.689 us |  }
10508.516 us |   }

v2: Instead of using bigjoiner checks, determine whether a scanout
buffer is too big by checking to see if it is possible to map
two of them into the ggtt.

v3 (Ville):
- Count how many fb objects can be fit into the available holes
  instead of checking for a hole twice the object size.
- Take alignment constraints into account.
- Limit this large scanout buffer check to >= Gen 11 platforms.

v4:
- Remove existing heuristic that checks just for size. (Ville)
- Return early if we find space to map at-least two objects. (Tvrtko)
- Slightly update the commit message.

v5: (Tvrtko)
- Rename the function to indicate that the object may be too big to
  map into the aperture.
- Account for guard pages while calculating the total size required
  for the object.
- Do not subject all objects to the heuristic check and instead
  consider objects only of a certain size.
- Do the hole walk using the rbtree.
- Preserve the existing PIN_NONBLOCK logic.
- Drop the PIN_MAPPABLE check while pinning the VMA.

v6: (Tvrtko)
- Return 0 on success and the specific error code on failure to
  preserve the existing behavior.

v7: (Ville)
- Drop the HAS_GMCH(i915), DISPLAY_VER(i915) < 11 and
  size < ggtt->mappable_end / 4 checks.
- Drop the redundant check that is based on previous heuristic.

v8:
- Make sure that we are holding the mutex associated with ggtt vm
  as we traverse the hole nodes.

v9: (Tvrtko)
- Use mutex_lock_interruptible_nested() instead of mutex_lock().

Cc: Ville Syrjälä 
Cc: Maarten Lankhorst 
Cc: Tvrtko Ursulin 
Cc: Manasi Navare 
Reviewed-by: Tvrtko Ursulin 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/i915_gem.c | 128 +++-
 1 file changed, 94 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9747924cc57b..e0d731b3f215 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -49,6 +49,7 @@
 #include "gem/i915_gem_pm.h"
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_userptr.h"
+#include "gem/i915_gem_tiling.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
@@ -882,6 +883,96 @@ static void discard_ggtt_vma(struct i915_vma *vma)
spin_unlock(>vma.lock);
 }
 
+static int
+i915_gem_object_fits_in_aperture(struct drm_i915_gem_object *obj,
+u64 alignment, u64 flags)
+{
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
+   struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+   struct drm_mm_node *hole;
+   u64 hole_start, hole_end, start, end;
+

[Intel-gfx] [PATCH v6 1/2] drm/mm: Add an iterator to optimally walk over holes for an allocation (v6)

2022-03-07 Thread Vivek Kasireddy

This iterator relies on drm_mm_first_hole() and drm_mm_next_hole()
functions to identify suitable holes for an allocation of a given
size by efficiently traversing the rbtree associated with the given
allocator.

It replaces the for loop in drm_mm_insert_node_in_range() and can
also be used by drm drivers to quickly identify holes of a certain
size within a given range.

v2: (Tvrtko)
- Prepend a double underscore for the newly exported first/next_hole
- s/each_best_hole/each_suitable_hole/g
- Mask out DRM_MM_INSERT_ONCE from the mode before calling
  first/next_hole and elsewhere.

v3: (Tvrtko)
- Reduce the number of hunks by retaining the "mode" variable name

v4:
- Typo: s/__drm_mm_next_hole(.., hole/__drm_mm_next_hole(.., pos

v5: (Tvrtko)
- Fixed another typo: should pass caller_mode instead of mode to
  the iterator in drm_mm_insert_node_in_range().

v6: (Tvrtko)
- Fix the checkpatch warning that warns about precedence issues.

Reviewed-by: Tvrtko Ursulin 
Acked-by: Christian König 
Suggested-by: Tvrtko Ursulin 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/drm_mm.c | 32 +++-
 include/drm/drm_mm.h | 36 
 2 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 8257f9d4f619..6ff98a0e4df3 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -352,10 +352,10 @@ static struct drm_mm_node *find_hole_addr(struct drm_mm 
*mm, u64 addr, u64 size)
return node;
 }
 
-static struct drm_mm_node *
-first_hole(struct drm_mm *mm,
-  u64 start, u64 end, u64 size,
-  enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+__drm_mm_first_hole(struct drm_mm *mm,
+   u64 start, u64 end, u64 size,
+   enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -374,6 +374,7 @@ first_hole(struct drm_mm *mm,
hole_stack);
}
 }
+EXPORT_SYMBOL(__drm_mm_first_hole);
 
 /**
  * DECLARE_NEXT_HOLE_ADDR - macro to declare next hole functions
@@ -410,11 +411,11 @@ static struct drm_mm_node *name(struct drm_mm_node 
*entry, u64 size)  \
 DECLARE_NEXT_HOLE_ADDR(next_hole_high_addr, rb_left, rb_right)
 DECLARE_NEXT_HOLE_ADDR(next_hole_low_addr, rb_right, rb_left)
 
-static struct drm_mm_node *
-next_hole(struct drm_mm *mm,
- struct drm_mm_node *node,
- u64 size,
- enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+__drm_mm_next_hole(struct drm_mm *mm,
+  struct drm_mm_node *node,
+  u64 size,
+  enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -432,6 +433,7 @@ next_hole(struct drm_mm *mm,
return >hole_stack == >hole_stack ? NULL : node;
}
 }
+EXPORT_SYMBOL(__drm_mm_next_hole);
 
 /**
  * drm_mm_reserve_node - insert an pre-initialized node
@@ -516,11 +518,11 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
u64 size, u64 alignment,
unsigned long color,
u64 range_start, u64 range_end,
-   enum drm_mm_insert_mode mode)
+   enum drm_mm_insert_mode caller_mode)
 {
struct drm_mm_node *hole;
u64 remainder_mask;
-   bool once;
+   enum drm_mm_insert_mode mode = caller_mode & ~DRM_MM_INSERT_ONCE;
 
DRM_MM_BUG_ON(range_start > range_end);
 
@@ -533,13 +535,9 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
if (alignment <= 1)
alignment = 0;
 
-   once = mode & DRM_MM_INSERT_ONCE;
-   mode &= ~DRM_MM_INSERT_ONCE;
-
remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0;
-   for (hole = first_hole(mm, range_start, range_end, size, mode);
-hole;
-hole = once ? NULL : next_hole(mm, hole, size, mode)) {
+   drm_mm_for_each_suitable_hole(hole, mm, range_start, range_end,
+ size, caller_mode) {
u64 hole_start = __drm_mm_hole_node_start(hole);
u64 hole_end = hole_start + hole->hole_size;
u64 adj_start, adj_end;
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index ac33ba1b18bc..896754fa6d69 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -400,6 +400,42 @@ static inline u64 drm_mm_hole_node_end(const struct 
drm_mm_node *hole_node)
 1 : 0; \
 pos = list_next_entry(pos, hole_stack))
 
+struct drm_mm_node *
+__drm_mm_first_hole(struct drm_mm *mm,
+   u64 start, u64 end, u64 size,
+   enum drm_mm_insert_mode mode);
+
+struct drm_mm_node *
+__drm_mm_next_hole(struct drm_mm *mm,
+  struct drm_mm_node *node,
+  u64 size,
+

[Intel-gfx] [PATCH v6 0/2] drm/mm: Add an iterator to optimally walk over holes suitable for an allocation

2022-03-07 Thread Vivek Kasireddy

The first patch is a drm core patch that replaces the for loop in
drm_mm_insert_node_in_range() with the iterator and would not
cause any functional changes. The second patch is a i915 driver
specific patch that also uses the iterator but solves a different
problem.

v2:
- Added a new patch to this series to fix a potential NULL
  dereference.
- Fixed a typo associated with the iterator introduced in the
  drm core patch.
- Added locking around the snippet in the i915 patch that
  traverses the GGTT hole nodes.

v3: (Tvrtko)
- Replaced mutex_lock with mutex_lock_interruptible_nested() in
  the i915 patch.

v4: (Tvrtko)
- Dropped the patch added in v2 as it was deemed unnecessary.

v5: (Tvrtko)
- Fixed yet another typo in the drm core patch: should have
  passed caller_mode instead of mode to the iterator.

v6: (Tvrtko)
- Fixed the checkpatch warning that warns about precedence issues.

Cc: Tvrtko Ursulin 
Cc: Nirmoy Das 
Cc: Christian König 

Vivek Kasireddy (2):
  drm/mm: Add an iterator to optimally walk over holes for an allocation
(v6)
  drm/i915/gem: Don't try to map and fence large scanout buffers (v9)

 drivers/gpu/drm/drm_mm.c|  32 
 drivers/gpu/drm/i915/i915_gem.c | 128 +++-
 include/drm/drm_mm.h|  36 +
 3 files changed, 145 insertions(+), 51 deletions(-)

-- 
2.35.1

[Intel-gfx] [CI 1/2] drm/mm: Add an iterator to optimally walk over holes for an allocation (v5)

2022-03-05 Thread Vivek Kasireddy

This iterator relies on drm_mm_first_hole() and drm_mm_next_hole()
functions to identify suitable holes for an allocation of a given
size by efficiently traversing the rbtree associated with the given
allocator.

It replaces the for loop in drm_mm_insert_node_in_range() and can
also be used by drm drivers to quickly identify holes of a certain
size within a given range.

v2: (Tvrtko)
- Prepend a double underscore for the newly exported first/next_hole
- s/each_best_hole/each_suitable_hole/g
- Mask out DRM_MM_INSERT_ONCE from the mode before calling
  first/next_hole and elsewhere.

v3: (Tvrtko)
- Reduce the number of hunks by retaining the "mode" variable name

v4:
- Typo: s/__drm_mm_next_hole(.., hole/__drm_mm_next_hole(.., pos

v5: (Tvrtko)
- Fixed another typo: should pass caller_mode instead of mode to
  the iterator in drm_mm_insert_node_in_range().

Reviewed-by: Tvrtko Ursulin 
Acked-by: Christian König 
Suggested-by: Tvrtko Ursulin 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/drm_mm.c | 32 +++-
 include/drm/drm_mm.h | 36 
 2 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 8257f9d4f619..6ff98a0e4df3 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -352,10 +352,10 @@ static struct drm_mm_node *find_hole_addr(struct drm_mm 
*mm, u64 addr, u64 size)
return node;
 }
 
-static struct drm_mm_node *
-first_hole(struct drm_mm *mm,
-  u64 start, u64 end, u64 size,
-  enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+__drm_mm_first_hole(struct drm_mm *mm,
+   u64 start, u64 end, u64 size,
+   enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -374,6 +374,7 @@ first_hole(struct drm_mm *mm,
hole_stack);
}
 }
+EXPORT_SYMBOL(__drm_mm_first_hole);
 
 /**
  * DECLARE_NEXT_HOLE_ADDR - macro to declare next hole functions
@@ -410,11 +411,11 @@ static struct drm_mm_node *name(struct drm_mm_node 
*entry, u64 size)  \
 DECLARE_NEXT_HOLE_ADDR(next_hole_high_addr, rb_left, rb_right)
 DECLARE_NEXT_HOLE_ADDR(next_hole_low_addr, rb_right, rb_left)
 
-static struct drm_mm_node *
-next_hole(struct drm_mm *mm,
- struct drm_mm_node *node,
- u64 size,
- enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+__drm_mm_next_hole(struct drm_mm *mm,
+  struct drm_mm_node *node,
+  u64 size,
+  enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -432,6 +433,7 @@ next_hole(struct drm_mm *mm,
return >hole_stack == >hole_stack ? NULL : node;
}
 }
+EXPORT_SYMBOL(__drm_mm_next_hole);
 
 /**
  * drm_mm_reserve_node - insert an pre-initialized node
@@ -516,11 +518,11 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
u64 size, u64 alignment,
unsigned long color,
u64 range_start, u64 range_end,
-   enum drm_mm_insert_mode mode)
+   enum drm_mm_insert_mode caller_mode)
 {
struct drm_mm_node *hole;
u64 remainder_mask;
-   bool once;
+   enum drm_mm_insert_mode mode = caller_mode & ~DRM_MM_INSERT_ONCE;
 
DRM_MM_BUG_ON(range_start > range_end);
 
@@ -533,13 +535,9 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
if (alignment <= 1)
alignment = 0;
 
-   once = mode & DRM_MM_INSERT_ONCE;
-   mode &= ~DRM_MM_INSERT_ONCE;
-
remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0;
-   for (hole = first_hole(mm, range_start, range_end, size, mode);
-hole;
-hole = once ? NULL : next_hole(mm, hole, size, mode)) {
+   drm_mm_for_each_suitable_hole(hole, mm, range_start, range_end,
+ size, caller_mode) {
u64 hole_start = __drm_mm_hole_node_start(hole);
u64 hole_end = hole_start + hole->hole_size;
u64 adj_start, adj_end;
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index ac33ba1b18bc..dff6db627807 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -400,6 +400,42 @@ static inline u64 drm_mm_hole_node_end(const struct 
drm_mm_node *hole_node)
 1 : 0; \
 pos = list_next_entry(pos, hole_stack))
 
+struct drm_mm_node *
+__drm_mm_first_hole(struct drm_mm *mm,
+   u64 start, u64 end, u64 size,
+   enum drm_mm_insert_mode mode);
+
+struct drm_mm_node *
+__drm_mm_next_hole(struct drm_mm *mm,
+  struct drm_mm_node *node,
+  u64 size,
+  enum drm_mm_insert_mode mode);
+
+/**
+ * drm_mm_for_each_suitable_hole

[Intel-gfx] [CI 0/2] drm/mm: Add an iterator to optimally walk over holes suitable for an allocation

2022-03-05 Thread Vivek Kasireddy

The first patch is a drm core patch that replaces the for loop in
drm_mm_insert_node_in_range() with the iterator and would not
cause any functional changes. The second patch is a i915 driver
specific patch that also uses the iterator but solves a different
problem.

v2:
- Added a new patch to this series to fix a potential NULL
  dereference.
- Fixed a typo associated with the iterator introduced in the
  drm core patch.
- Added locking around the snippet in the i915 patch that
  traverses the GGTT hole nodes.

v3: (Tvrtko)
- Replaced mutex_lock with mutex_lock_interruptible_nested() in
  the i915 patch.

v4: (Tvrtko)
- Dropped the patch added in v2 as it was deemed unnecessary.

v5: (Tvrtko)
- Fixed yet another typo in the drm core patch: should have
  passed caller_mode instead of mode to the iterator.

Cc: Tvrtko Ursulin 
Cc: Nirmoy Das 
Cc: Christian König 

Vivek Kasireddy (2):
  drm/mm: Add an iterator to optimally walk over holes for an allocation
(v5)
  drm/i915/gem: Don't try to map and fence large scanout buffers (v9)

 drivers/gpu/drm/drm_mm.c|  32 
 drivers/gpu/drm/i915/i915_gem.c | 128 +++-
 include/drm/drm_mm.h|  36 +
 3 files changed, 145 insertions(+), 51 deletions(-)

-- 
2.35.1

[Intel-gfx] [CI 2/2] drm/i915/gem: Don't try to map and fence large scanout buffers (v9)

2022-03-05 Thread Vivek Kasireddy

On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or
more framebuffers/scanout buffers results in only one that is mappable/
fenceable. Therefore, pageflipping between these 2 FBs where only one
is mappable/fenceable creates latencies large enough to miss alternate
vblanks thereby producing less optimal framerate.

This mainly happens because when i915_gem_object_pin_to_display_plane()
is called to pin one of the FB objs, the associated vma is identified
as misplaced and therefore i915_vma_unbind() is called which unbinds and
evicts it. This misplaced vma gets subseqently pinned only when
i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This
results in a latency of ~10ms and happens every other vblank/repaint cycle.
Therefore, to fix this issue, we try to see if there is space to map
at-least two objects of a given size and return early if there isn't. This
would ensure that we do not try with PIN_MAPPABLE for any objects that
are too big to map thereby preventing unncessary unbind.

Testcase:
Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform
with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits
a frame ~7ms before the next vblank, the latencies seen between atomic
commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that
it misses the vblank every other frame.

Here is the ftrace snippet that shows the source of the ~10ms latency:
  i915_gem_object_pin_to_display_plane() {
0.102 us   |i915_gem_object_set_cache_level();
i915_gem_object_ggtt_pin_ww() {
0.390 us   |  i915_vma_instance();
0.178 us   |  i915_vma_misplaced();
  i915_vma_unbind() {
  __i915_active_wait() {
0.082 us   |i915_active_acquire_if_busy();
0.475 us   |  }
  intel_runtime_pm_get() {
0.087 us   |intel_runtime_pm_acquire();
0.259 us   |  }
  __i915_active_wait() {
0.085 us   |i915_active_acquire_if_busy();
0.240 us   |  }
  __i915_vma_evict() {
ggtt_unbind_vma() {
  gen8_ggtt_clear_range() {
10507.255 us |}
10507.689 us |  }
10508.516 us |   }

v2: Instead of using bigjoiner checks, determine whether a scanout
buffer is too big by checking to see if it is possible to map
two of them into the ggtt.

v3 (Ville):
- Count how many fb objects can be fit into the available holes
  instead of checking for a hole twice the object size.
- Take alignment constraints into account.
- Limit this large scanout buffer check to >= Gen 11 platforms.

v4:
- Remove existing heuristic that checks just for size. (Ville)
- Return early if we find space to map at-least two objects. (Tvrtko)
- Slightly update the commit message.

v5: (Tvrtko)
- Rename the function to indicate that the object may be too big to
  map into the aperture.
- Account for guard pages while calculating the total size required
  for the object.
- Do not subject all objects to the heuristic check and instead
  consider objects only of a certain size.
- Do the hole walk using the rbtree.
- Preserve the existing PIN_NONBLOCK logic.
- Drop the PIN_MAPPABLE check while pinning the VMA.

v6: (Tvrtko)
- Return 0 on success and the specific error code on failure to
  preserve the existing behavior.

v7: (Ville)
- Drop the HAS_GMCH(i915), DISPLAY_VER(i915) < 11 and
  size < ggtt->mappable_end / 4 checks.
- Drop the redundant check that is based on previous heuristic.

v8:
- Make sure that we are holding the mutex associated with ggtt vm
  as we traverse the hole nodes.

v9: (Tvrtko)
- Use mutex_lock_interruptible_nested() instead of mutex_lock().

Cc: Ville Syrjälä 
Cc: Maarten Lankhorst 
Cc: Tvrtko Ursulin 
Cc: Manasi Navare 
Reviewed-by: Tvrtko Ursulin 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/i915_gem.c | 128 +++-
 1 file changed, 94 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2e10187cd0a0..4bef9eaa8b2e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -49,6 +49,7 @@
 #include "gem/i915_gem_pm.h"
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_userptr.h"
+#include "gem/i915_gem_tiling.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
@@ -879,6 +880,96 @@ static void discard_ggtt_vma(struct i915_vma *vma)
spin_unlock(>vma.lock);
 }
 
+static int
+i915_gem_object_fits_in_aperture(struct drm_i915_gem_object *obj,
+u64 alignment, u64 flags)
+{
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
+   struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+   struct drm_mm_node *hole;
+   u64 hole_start, hole_end, start, end;
+

[Intel-gfx] [CI 0/2] drm/mm: Add an iterator to optimally walk over holes suitable for an allocation

2022-02-28 Thread Vivek Kasireddy

The first patch is a drm core patch that replaces the for loop in
drm_mm_insert_node_in_range() with the iterator and would not
cause any functional changes. The second patch is a i915 driver
specific patch that also uses the iterator but solves a different
problem.

v2:
- Added a new patch to this series to fix a potential NULL
  dereference.
- Fixed a typo associated with the iterator introduced in the
  drm core patch.
- Added locking around the snippet in the i915 patch that
  traverses the GGTT hole nodes.

v3: (Tvrtko)
- Replaced mutex_lock with mutex_lock_interruptible_nested() in
  the i915 patch.

v4: (Tvrtko)
- Dropped the patch added in v2 as it was deemed unnecessary.

v5: (Tvrtko)
- Fixed yet another typo in the drm core patch: should have
  passed caller_mode instead of mode to the iterator.

Cc: Tvrtko Ursulin 
Cc: Nirmoy Das 
Cc: Christian König 

Vivek Kasireddy (2):
  drm/mm: Add an iterator to optimally walk over holes for an allocation
(v5)
  drm/i915/gem: Don't try to map and fence large scanout buffers (v9)

 drivers/gpu/drm/drm_mm.c|  32 
 drivers/gpu/drm/i915/i915_gem.c | 128 +++-
 include/drm/drm_mm.h|  36 +
 3 files changed, 145 insertions(+), 51 deletions(-)

-- 
2.34.1

[Intel-gfx] [CI 2/2] drm/i915/gem: Don't try to map and fence large scanout buffers (v9)

2022-02-28 Thread Vivek Kasireddy

On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or
more framebuffers/scanout buffers results in only one that is mappable/
fenceable. Therefore, pageflipping between these 2 FBs where only one
is mappable/fenceable creates latencies large enough to miss alternate
vblanks thereby producing less optimal framerate.

This mainly happens because when i915_gem_object_pin_to_display_plane()
is called to pin one of the FB objs, the associated vma is identified
as misplaced and therefore i915_vma_unbind() is called which unbinds and
evicts it. This misplaced vma gets subseqently pinned only when
i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This
results in a latency of ~10ms and happens every other vblank/repaint cycle.
Therefore, to fix this issue, we try to see if there is space to map
at-least two objects of a given size and return early if there isn't. This
would ensure that we do not try with PIN_MAPPABLE for any objects that
are too big to map thereby preventing unncessary unbind.

Testcase:
Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform
with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits
a frame ~7ms before the next vblank, the latencies seen between atomic
commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that
it misses the vblank every other frame.

Here is the ftrace snippet that shows the source of the ~10ms latency:
  i915_gem_object_pin_to_display_plane() {
0.102 us   |i915_gem_object_set_cache_level();
i915_gem_object_ggtt_pin_ww() {
0.390 us   |  i915_vma_instance();
0.178 us   |  i915_vma_misplaced();
  i915_vma_unbind() {
  __i915_active_wait() {
0.082 us   |i915_active_acquire_if_busy();
0.475 us   |  }
  intel_runtime_pm_get() {
0.087 us   |intel_runtime_pm_acquire();
0.259 us   |  }
  __i915_active_wait() {
0.085 us   |i915_active_acquire_if_busy();
0.240 us   |  }
  __i915_vma_evict() {
ggtt_unbind_vma() {
  gen8_ggtt_clear_range() {
10507.255 us |}
10507.689 us |  }
10508.516 us |   }

v2: Instead of using bigjoiner checks, determine whether a scanout
buffer is too big by checking to see if it is possible to map
two of them into the ggtt.

v3 (Ville):
- Count how many fb objects can be fit into the available holes
  instead of checking for a hole twice the object size.
- Take alignment constraints into account.
- Limit this large scanout buffer check to >= Gen 11 platforms.

v4:
- Remove existing heuristic that checks just for size. (Ville)
- Return early if we find space to map at-least two objects. (Tvrtko)
- Slightly update the commit message.

v5: (Tvrtko)
- Rename the function to indicate that the object may be too big to
  map into the aperture.
- Account for guard pages while calculating the total size required
  for the object.
- Do not subject all objects to the heuristic check and instead
  consider objects only of a certain size.
- Do the hole walk using the rbtree.
- Preserve the existing PIN_NONBLOCK logic.
- Drop the PIN_MAPPABLE check while pinning the VMA.

v6: (Tvrtko)
- Return 0 on success and the specific error code on failure to
  preserve the existing behavior.

v7: (Ville)
- Drop the HAS_GMCH(i915), DISPLAY_VER(i915) < 11 and
  size < ggtt->mappable_end / 4 checks.
- Drop the redundant check that is based on previous heuristic.

v8:
- Make sure that we are holding the mutex associated with ggtt vm
  as we traverse the hole nodes.

v9: (Tvrtko)
- Use mutex_lock_interruptible_nested() instead of mutex_lock().

Cc: Ville Syrjälä 
Cc: Maarten Lankhorst 
Cc: Tvrtko Ursulin 
Cc: Manasi Navare 
Reviewed-by: Tvrtko Ursulin 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/i915_gem.c | 128 +++-
 1 file changed, 94 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2e10187cd0a0..4bef9eaa8b2e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -49,6 +49,7 @@
 #include "gem/i915_gem_pm.h"
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_userptr.h"
+#include "gem/i915_gem_tiling.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
@@ -879,6 +880,96 @@ static void discard_ggtt_vma(struct i915_vma *vma)
spin_unlock(>vma.lock);
 }
 
+static int
+i915_gem_object_fits_in_aperture(struct drm_i915_gem_object *obj,
+u64 alignment, u64 flags)
+{
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
+   struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+   struct drm_mm_node *hole;
+   u64 hole_start, hole_end, start, end;
+

[Intel-gfx] [CI 1/2] drm/mm: Add an iterator to optimally walk over holes for an allocation (v5)

2022-02-28 Thread Vivek Kasireddy

This iterator relies on drm_mm_first_hole() and drm_mm_next_hole()
functions to identify suitable holes for an allocation of a given
size by efficiently traversing the rbtree associated with the given
allocator.

It replaces the for loop in drm_mm_insert_node_in_range() and can
also be used by drm drivers to quickly identify holes of a certain
size within a given range.

v2: (Tvrtko)
- Prepend a double underscore for the newly exported first/next_hole
- s/each_best_hole/each_suitable_hole/g
- Mask out DRM_MM_INSERT_ONCE from the mode before calling
  first/next_hole and elsewhere.

v3: (Tvrtko)
- Reduce the number of hunks by retaining the "mode" variable name

v4:
- Typo: s/__drm_mm_next_hole(.., hole/__drm_mm_next_hole(.., pos

v5: (Tvrtko)
- Fixed another typo: should pass caller_mode instead of mode to
  the iterator in drm_mm_insert_node_in_range().

Reviewed-by: Tvrtko Ursulin 
Acked-by: Christian König 
Suggested-by: Tvrtko Ursulin 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/drm_mm.c | 32 +++-
 include/drm/drm_mm.h | 36 
 2 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 8257f9d4f619..6ff98a0e4df3 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -352,10 +352,10 @@ static struct drm_mm_node *find_hole_addr(struct drm_mm 
*mm, u64 addr, u64 size)
return node;
 }
 
-static struct drm_mm_node *
-first_hole(struct drm_mm *mm,
-  u64 start, u64 end, u64 size,
-  enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+__drm_mm_first_hole(struct drm_mm *mm,
+   u64 start, u64 end, u64 size,
+   enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -374,6 +374,7 @@ first_hole(struct drm_mm *mm,
hole_stack);
}
 }
+EXPORT_SYMBOL(__drm_mm_first_hole);
 
 /**
  * DECLARE_NEXT_HOLE_ADDR - macro to declare next hole functions
@@ -410,11 +411,11 @@ static struct drm_mm_node *name(struct drm_mm_node 
*entry, u64 size)  \
 DECLARE_NEXT_HOLE_ADDR(next_hole_high_addr, rb_left, rb_right)
 DECLARE_NEXT_HOLE_ADDR(next_hole_low_addr, rb_right, rb_left)
 
-static struct drm_mm_node *
-next_hole(struct drm_mm *mm,
- struct drm_mm_node *node,
- u64 size,
- enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+__drm_mm_next_hole(struct drm_mm *mm,
+  struct drm_mm_node *node,
+  u64 size,
+  enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -432,6 +433,7 @@ next_hole(struct drm_mm *mm,
return >hole_stack == >hole_stack ? NULL : node;
}
 }
+EXPORT_SYMBOL(__drm_mm_next_hole);
 
 /**
  * drm_mm_reserve_node - insert an pre-initialized node
@@ -516,11 +518,11 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
u64 size, u64 alignment,
unsigned long color,
u64 range_start, u64 range_end,
-   enum drm_mm_insert_mode mode)
+   enum drm_mm_insert_mode caller_mode)
 {
struct drm_mm_node *hole;
u64 remainder_mask;
-   bool once;
+   enum drm_mm_insert_mode mode = caller_mode & ~DRM_MM_INSERT_ONCE;
 
DRM_MM_BUG_ON(range_start > range_end);
 
@@ -533,13 +535,9 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
if (alignment <= 1)
alignment = 0;
 
-   once = mode & DRM_MM_INSERT_ONCE;
-   mode &= ~DRM_MM_INSERT_ONCE;
-
remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0;
-   for (hole = first_hole(mm, range_start, range_end, size, mode);
-hole;
-hole = once ? NULL : next_hole(mm, hole, size, mode)) {
+   drm_mm_for_each_suitable_hole(hole, mm, range_start, range_end,
+ size, caller_mode) {
u64 hole_start = __drm_mm_hole_node_start(hole);
u64 hole_end = hole_start + hole->hole_size;
u64 adj_start, adj_end;
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index ac33ba1b18bc..dff6db627807 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -400,6 +400,42 @@ static inline u64 drm_mm_hole_node_end(const struct 
drm_mm_node *hole_node)
 1 : 0; \
 pos = list_next_entry(pos, hole_stack))
 
+struct drm_mm_node *
+__drm_mm_first_hole(struct drm_mm *mm,
+   u64 start, u64 end, u64 size,
+   enum drm_mm_insert_mode mode);
+
+struct drm_mm_node *
+__drm_mm_next_hole(struct drm_mm *mm,
+  struct drm_mm_node *node,
+  u64 size,
+  enum drm_mm_insert_mode mode);
+
+/**
+ * drm_mm_for_each_suitable_hole

[Intel-gfx] [CI 0/2] drm/mm: Add an iterator to optimally walk over holes suitable for an allocation

2022-02-27 Thread Vivek Kasireddy

The first patch is a drm core patch that replaces the for loop in
drm_mm_insert_node_in_range() with the iterator and would not
cause any functional changes. The second patch is a i915 driver
specific patch that also uses the iterator but solves a different
problem.

v2:
- Added a new patch to this series to fix a potential NULL
  dereference.
- Fixed a typo associated with the iterator introduced in the
  drm core patch.
- Added locking around the snippet in the i915 patch that
  traverses the GGTT hole nodes.

v3: (Tvrtko)
- Replaced mutex_lock with mutex_lock_interruptible_nested() in
  the i915 patch.

v4: (Tvrtko)
- Dropped the patch added in v2 as it was deemed unnecessary.

Cc: Tvrtko Ursulin 
Cc: Nirmoy Das 
Cc: Christian König 

Vivek Kasireddy (2):
  drm/mm: Add an iterator to optimally walk over holes for an allocation
(v4)
  drm/i915/gem: Don't try to map and fence large scanout buffers (v9)

 drivers/gpu/drm/drm_mm.c|  32 
 drivers/gpu/drm/i915/i915_gem.c | 128 +++-
 include/drm/drm_mm.h|  36 +
 3 files changed, 145 insertions(+), 51 deletions(-)

-- 
2.34.1

[Intel-gfx] [CI 2/2] drm/i915/gem: Don't try to map and fence large scanout buffers (v9)

2022-02-27 Thread Vivek Kasireddy

On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or
more framebuffers/scanout buffers results in only one that is mappable/
fenceable. Therefore, pageflipping between these 2 FBs where only one
is mappable/fenceable creates latencies large enough to miss alternate
vblanks thereby producing less optimal framerate.

This mainly happens because when i915_gem_object_pin_to_display_plane()
is called to pin one of the FB objs, the associated vma is identified
as misplaced and therefore i915_vma_unbind() is called which unbinds and
evicts it. This misplaced vma gets subseqently pinned only when
i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This
results in a latency of ~10ms and happens every other vblank/repaint cycle.
Therefore, to fix this issue, we try to see if there is space to map
at-least two objects of a given size and return early if there isn't. This
would ensure that we do not try with PIN_MAPPABLE for any objects that
are too big to map thereby preventing unncessary unbind.

Testcase:
Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform
with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits
a frame ~7ms before the next vblank, the latencies seen between atomic
commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that
it misses the vblank every other frame.

Here is the ftrace snippet that shows the source of the ~10ms latency:
  i915_gem_object_pin_to_display_plane() {
0.102 us   |i915_gem_object_set_cache_level();
i915_gem_object_ggtt_pin_ww() {
0.390 us   |  i915_vma_instance();
0.178 us   |  i915_vma_misplaced();
  i915_vma_unbind() {
  __i915_active_wait() {
0.082 us   |i915_active_acquire_if_busy();
0.475 us   |  }
  intel_runtime_pm_get() {
0.087 us   |intel_runtime_pm_acquire();
0.259 us   |  }
  __i915_active_wait() {
0.085 us   |i915_active_acquire_if_busy();
0.240 us   |  }
  __i915_vma_evict() {
ggtt_unbind_vma() {
  gen8_ggtt_clear_range() {
10507.255 us |}
10507.689 us |  }
10508.516 us |   }

v2: Instead of using bigjoiner checks, determine whether a scanout
buffer is too big by checking to see if it is possible to map
two of them into the ggtt.

v3 (Ville):
- Count how many fb objects can be fit into the available holes
  instead of checking for a hole twice the object size.
- Take alignment constraints into account.
- Limit this large scanout buffer check to >= Gen 11 platforms.

v4:
- Remove existing heuristic that checks just for size. (Ville)
- Return early if we find space to map at-least two objects. (Tvrtko)
- Slightly update the commit message.

v5: (Tvrtko)
- Rename the function to indicate that the object may be too big to
  map into the aperture.
- Account for guard pages while calculating the total size required
  for the object.
- Do not subject all objects to the heuristic check and instead
  consider objects only of a certain size.
- Do the hole walk using the rbtree.
- Preserve the existing PIN_NONBLOCK logic.
- Drop the PIN_MAPPABLE check while pinning the VMA.

v6: (Tvrtko)
- Return 0 on success and the specific error code on failure to
  preserve the existing behavior.

v7: (Ville)
- Drop the HAS_GMCH(i915), DISPLAY_VER(i915) < 11 and
  size < ggtt->mappable_end / 4 checks.
- Drop the redundant check that is based on previous heuristic.

v8:
- Make sure that we are holding the mutex associated with ggtt vm
  as we traverse the hole nodes.

v9: (Tvrtko)
- Use mutex_lock_interruptible_nested() instead of mutex_lock().

Cc: Ville Syrjälä 
Cc: Maarten Lankhorst 
Cc: Tvrtko Ursulin 
Cc: Manasi Navare 
Reviewed-by: Tvrtko Ursulin 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/i915_gem.c | 128 +++-
 1 file changed, 94 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2e10187cd0a0..4bef9eaa8b2e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -49,6 +49,7 @@
 #include "gem/i915_gem_pm.h"
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_userptr.h"
+#include "gem/i915_gem_tiling.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
@@ -879,6 +880,96 @@ static void discard_ggtt_vma(struct i915_vma *vma)
spin_unlock(>vma.lock);
 }
 
+static int
+i915_gem_object_fits_in_aperture(struct drm_i915_gem_object *obj,
+u64 alignment, u64 flags)
+{
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
+   struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+   struct drm_mm_node *hole;
+   u64 hole_start, hole_end, start, end;
+

[Intel-gfx] [CI 1/2] drm/mm: Add an iterator to optimally walk over holes for an allocation (v4)

2022-02-27 Thread Vivek Kasireddy

This iterator relies on drm_mm_first_hole() and drm_mm_next_hole()
functions to identify suitable holes for an allocation of a given
size by efficiently traversing the rbtree associated with the given
allocator.

It replaces the for loop in drm_mm_insert_node_in_range() and can
also be used by drm drivers to quickly identify holes of a certain
size within a given range.

v2: (Tvrtko)
- Prepend a double underscore for the newly exported first/next_hole
- s/each_best_hole/each_suitable_hole/g
- Mask out DRM_MM_INSERT_ONCE from the mode before calling
  first/next_hole and elsewhere.

v3: (Tvrtko)
- Reduce the number of hunks by retaining the "mode" variable name

v4:
- Typo: s/__drm_mm_next_hole(.., hole/__drm_mm_next_hole(.., pos

Reviewed-by: Tvrtko Ursulin 
Acked-by: Christian König 
Suggested-by: Tvrtko Ursulin 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/drm_mm.c | 32 +++-
 include/drm/drm_mm.h | 36 
 2 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 8257f9d4f619..8efea548ae9f 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -352,10 +352,10 @@ static struct drm_mm_node *find_hole_addr(struct drm_mm 
*mm, u64 addr, u64 size)
return node;
 }
 
-static struct drm_mm_node *
-first_hole(struct drm_mm *mm,
-  u64 start, u64 end, u64 size,
-  enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+__drm_mm_first_hole(struct drm_mm *mm,
+   u64 start, u64 end, u64 size,
+   enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -374,6 +374,7 @@ first_hole(struct drm_mm *mm,
hole_stack);
}
 }
+EXPORT_SYMBOL(__drm_mm_first_hole);
 
 /**
  * DECLARE_NEXT_HOLE_ADDR - macro to declare next hole functions
@@ -410,11 +411,11 @@ static struct drm_mm_node *name(struct drm_mm_node 
*entry, u64 size)  \
 DECLARE_NEXT_HOLE_ADDR(next_hole_high_addr, rb_left, rb_right)
 DECLARE_NEXT_HOLE_ADDR(next_hole_low_addr, rb_right, rb_left)
 
-static struct drm_mm_node *
-next_hole(struct drm_mm *mm,
- struct drm_mm_node *node,
- u64 size,
- enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+__drm_mm_next_hole(struct drm_mm *mm,
+  struct drm_mm_node *node,
+  u64 size,
+  enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -432,6 +433,7 @@ next_hole(struct drm_mm *mm,
return >hole_stack == >hole_stack ? NULL : node;
}
 }
+EXPORT_SYMBOL(__drm_mm_next_hole);
 
 /**
  * drm_mm_reserve_node - insert an pre-initialized node
@@ -516,11 +518,11 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
u64 size, u64 alignment,
unsigned long color,
u64 range_start, u64 range_end,
-   enum drm_mm_insert_mode mode)
+   enum drm_mm_insert_mode caller_mode)
 {
struct drm_mm_node *hole;
u64 remainder_mask;
-   bool once;
+   enum drm_mm_insert_mode mode = caller_mode & ~DRM_MM_INSERT_ONCE;
 
DRM_MM_BUG_ON(range_start > range_end);
 
@@ -533,13 +535,9 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
if (alignment <= 1)
alignment = 0;
 
-   once = mode & DRM_MM_INSERT_ONCE;
-   mode &= ~DRM_MM_INSERT_ONCE;
-
remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0;
-   for (hole = first_hole(mm, range_start, range_end, size, mode);
-hole;
-hole = once ? NULL : next_hole(mm, hole, size, mode)) {
+   drm_mm_for_each_suitable_hole(hole, mm, range_start, range_end,
+ size, mode) {
u64 hole_start = __drm_mm_hole_node_start(hole);
u64 hole_end = hole_start + hole->hole_size;
u64 adj_start, adj_end;
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index ac33ba1b18bc..dff6db627807 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -400,6 +400,42 @@ static inline u64 drm_mm_hole_node_end(const struct 
drm_mm_node *hole_node)
 1 : 0; \
 pos = list_next_entry(pos, hole_stack))
 
+struct drm_mm_node *
+__drm_mm_first_hole(struct drm_mm *mm,
+   u64 start, u64 end, u64 size,
+   enum drm_mm_insert_mode mode);
+
+struct drm_mm_node *
+__drm_mm_next_hole(struct drm_mm *mm,
+  struct drm_mm_node *node,
+  u64 size,
+  enum drm_mm_insert_mode mode);
+
+/**
+ * drm_mm_for_each_suitable_hole - iterator to optimally walk over all
+ * holes that can fit an allocation of the given @size.
+ * @pos: _mm_node used internally to track pr

[Intel-gfx] [CI 2/2] drm/i915/gem: Don't try to map and fence large scanout buffers (v9)

2022-02-23 Thread Vivek Kasireddy

On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or
more framebuffers/scanout buffers results in only one that is mappable/
fenceable. Therefore, pageflipping between these 2 FBs where only one
is mappable/fenceable creates latencies large enough to miss alternate
vblanks thereby producing less optimal framerate.

This mainly happens because when i915_gem_object_pin_to_display_plane()
is called to pin one of the FB objs, the associated vma is identified
as misplaced and therefore i915_vma_unbind() is called which unbinds and
evicts it. This misplaced vma gets subseqently pinned only when
i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This
results in a latency of ~10ms and happens every other vblank/repaint cycle.
Therefore, to fix this issue, we try to see if there is space to map
at-least two objects of a given size and return early if there isn't. This
would ensure that we do not try with PIN_MAPPABLE for any objects that
are too big to map thereby preventing unncessary unbind.

Testcase:
Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform
with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits
a frame ~7ms before the next vblank, the latencies seen between atomic
commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that
it misses the vblank every other frame.

Here is the ftrace snippet that shows the source of the ~10ms latency:
  i915_gem_object_pin_to_display_plane() {
0.102 us   |i915_gem_object_set_cache_level();
i915_gem_object_ggtt_pin_ww() {
0.390 us   |  i915_vma_instance();
0.178 us   |  i915_vma_misplaced();
  i915_vma_unbind() {
  __i915_active_wait() {
0.082 us   |i915_active_acquire_if_busy();
0.475 us   |  }
  intel_runtime_pm_get() {
0.087 us   |intel_runtime_pm_acquire();
0.259 us   |  }
  __i915_active_wait() {
0.085 us   |i915_active_acquire_if_busy();
0.240 us   |  }
  __i915_vma_evict() {
ggtt_unbind_vma() {
  gen8_ggtt_clear_range() {
10507.255 us |}
10507.689 us |  }
10508.516 us |   }

v2: Instead of using bigjoiner checks, determine whether a scanout
buffer is too big by checking to see if it is possible to map
two of them into the ggtt.

v3 (Ville):
- Count how many fb objects can be fit into the available holes
  instead of checking for a hole twice the object size.
- Take alignment constraints into account.
- Limit this large scanout buffer check to >= Gen 11 platforms.

v4:
- Remove existing heuristic that checks just for size. (Ville)
- Return early if we find space to map at-least two objects. (Tvrtko)
- Slightly update the commit message.

v5: (Tvrtko)
- Rename the function to indicate that the object may be too big to
  map into the aperture.
- Account for guard pages while calculating the total size required
  for the object.
- Do not subject all objects to the heuristic check and instead
  consider objects only of a certain size.
- Do the hole walk using the rbtree.
- Preserve the existing PIN_NONBLOCK logic.
- Drop the PIN_MAPPABLE check while pinning the VMA.

v6: (Tvrtko)
- Return 0 on success and the specific error code on failure to
  preserve the existing behavior.

v7: (Ville)
- Drop the HAS_GMCH(i915), DISPLAY_VER(i915) < 11 and
  size < ggtt->mappable_end / 4 checks.
- Drop the redundant check that is based on previous heuristic.

v8:
- Make sure that we are holding the mutex associated with ggtt vm
  as we traverse the hole nodes.

v9: (Tvrtko)
- Use mutex_lock_interruptible_nested() instead of mutex_lock().

Cc: Ville Syrjälä 
Cc: Maarten Lankhorst 
Cc: Tvrtko Ursulin 
Cc: Manasi Navare 
Reviewed-by: Tvrtko Ursulin 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/i915_gem.c | 128 +++-
 1 file changed, 94 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2e10187cd0a0..4bef9eaa8b2e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -49,6 +49,7 @@
 #include "gem/i915_gem_pm.h"
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_userptr.h"
+#include "gem/i915_gem_tiling.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
@@ -879,6 +880,96 @@ static void discard_ggtt_vma(struct i915_vma *vma)
spin_unlock(>vma.lock);
 }
 
+static int
+i915_gem_object_fits_in_aperture(struct drm_i915_gem_object *obj,
+u64 alignment, u64 flags)
+{
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
+   struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+   struct drm_mm_node *hole;
+   u64 hole_start, hole_end, start, end;
+

[Intel-gfx] [CI 1/2] drm/mm: Add an iterator to optimally walk over holes for an allocation (v4)

2022-02-23 Thread Vivek Kasireddy

This iterator relies on drm_mm_first_hole() and drm_mm_next_hole()
functions to identify suitable holes for an allocation of a given
size by efficiently traversing the rbtree associated with the given
allocator.

It replaces the for loop in drm_mm_insert_node_in_range() and can
also be used by drm drivers to quickly identify holes of a certain
size within a given range.

v2: (Tvrtko)
- Prepend a double underscore for the newly exported first/next_hole
- s/each_best_hole/each_suitable_hole/g
- Mask out DRM_MM_INSERT_ONCE from the mode before calling
  first/next_hole and elsewhere.

v3: (Tvrtko)
- Reduce the number of hunks by retaining the "mode" variable name

v4:
- Typo: s/__drm_mm_next_hole(.., hole/__drm_mm_next_hole(.., pos

Reviewed-by: Tvrtko Ursulin 
Acked-by: Christian König 
Suggested-by: Tvrtko Ursulin 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/drm_mm.c | 32 +++-
 include/drm/drm_mm.h | 36 
 2 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 8257f9d4f619..8efea548ae9f 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -352,10 +352,10 @@ static struct drm_mm_node *find_hole_addr(struct drm_mm 
*mm, u64 addr, u64 size)
return node;
 }
 
-static struct drm_mm_node *
-first_hole(struct drm_mm *mm,
-  u64 start, u64 end, u64 size,
-  enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+__drm_mm_first_hole(struct drm_mm *mm,
+   u64 start, u64 end, u64 size,
+   enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -374,6 +374,7 @@ first_hole(struct drm_mm *mm,
hole_stack);
}
 }
+EXPORT_SYMBOL(__drm_mm_first_hole);
 
 /**
  * DECLARE_NEXT_HOLE_ADDR - macro to declare next hole functions
@@ -410,11 +411,11 @@ static struct drm_mm_node *name(struct drm_mm_node 
*entry, u64 size)  \
 DECLARE_NEXT_HOLE_ADDR(next_hole_high_addr, rb_left, rb_right)
 DECLARE_NEXT_HOLE_ADDR(next_hole_low_addr, rb_right, rb_left)
 
-static struct drm_mm_node *
-next_hole(struct drm_mm *mm,
- struct drm_mm_node *node,
- u64 size,
- enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+__drm_mm_next_hole(struct drm_mm *mm,
+  struct drm_mm_node *node,
+  u64 size,
+  enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -432,6 +433,7 @@ next_hole(struct drm_mm *mm,
return >hole_stack == >hole_stack ? NULL : node;
}
 }
+EXPORT_SYMBOL(__drm_mm_next_hole);
 
 /**
  * drm_mm_reserve_node - insert an pre-initialized node
@@ -516,11 +518,11 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
u64 size, u64 alignment,
unsigned long color,
u64 range_start, u64 range_end,
-   enum drm_mm_insert_mode mode)
+   enum drm_mm_insert_mode caller_mode)
 {
struct drm_mm_node *hole;
u64 remainder_mask;
-   bool once;
+   enum drm_mm_insert_mode mode = caller_mode & ~DRM_MM_INSERT_ONCE;
 
DRM_MM_BUG_ON(range_start > range_end);
 
@@ -533,13 +535,9 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
if (alignment <= 1)
alignment = 0;
 
-   once = mode & DRM_MM_INSERT_ONCE;
-   mode &= ~DRM_MM_INSERT_ONCE;
-
remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0;
-   for (hole = first_hole(mm, range_start, range_end, size, mode);
-hole;
-hole = once ? NULL : next_hole(mm, hole, size, mode)) {
+   drm_mm_for_each_suitable_hole(hole, mm, range_start, range_end,
+ size, mode) {
u64 hole_start = __drm_mm_hole_node_start(hole);
u64 hole_end = hole_start + hole->hole_size;
u64 adj_start, adj_end;
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index ac33ba1b18bc..dff6db627807 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -400,6 +400,42 @@ static inline u64 drm_mm_hole_node_end(const struct 
drm_mm_node *hole_node)
 1 : 0; \
 pos = list_next_entry(pos, hole_stack))
 
+struct drm_mm_node *
+__drm_mm_first_hole(struct drm_mm *mm,
+   u64 start, u64 end, u64 size,
+   enum drm_mm_insert_mode mode);
+
+struct drm_mm_node *
+__drm_mm_next_hole(struct drm_mm *mm,
+  struct drm_mm_node *node,
+  u64 size,
+  enum drm_mm_insert_mode mode);
+
+/**
+ * drm_mm_for_each_suitable_hole - iterator to optimally walk over all
+ * holes that can fit an allocation of the given @size.
+ * @pos: _mm_node used internally to track pr

[Intel-gfx] [CI 0/2] drm/mm: Add an iterator to optimally walk over holes suitable for an allocation

2022-02-23 Thread Vivek Kasireddy

The first patch is a drm core patch that replaces the for loop in
drm_mm_insert_node_in_range() with the iterator and would not
cause any functional changes. The second patch is a i915 driver
specific patch that also uses the iterator but solves a different
problem.

v2:
- Added a new patch to this series to fix a potential NULL
  dereference.
- Fixed a typo associated with the iterator introduced in the
  drm core patch.
- Added locking around the snippet in the i915 patch that
  traverses the GGTT hole nodes.

v3: (Tvrtko)
- Replaced mutex_lock with mutex_lock_interruptible_nested() in
  the i915 patch.

v4: (Tvrtko)
- Dropped the patch added in v2 as it was deemed unnecessary.

Cc: Tvrtko Ursulin 
Cc: Nirmoy Das 
Cc: Christian König 

Vivek Kasireddy (2):
  drm/mm: Add an iterator to optimally walk over holes for an allocation
(v4)
  drm/i915/gem: Don't try to map and fence large scanout buffers (v9)

 drivers/gpu/drm/drm_mm.c|  32 
 drivers/gpu/drm/i915/i915_gem.c | 128 +++-
 include/drm/drm_mm.h|  36 +
 3 files changed, 145 insertions(+), 51 deletions(-)

-- 
2.34.1

[Intel-gfx] [PATCH v3 3/3] drm/i915/gem: Don't try to map and fence large scanout buffers (v9)

2022-02-19 Thread Vivek Kasireddy

On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or
more framebuffers/scanout buffers results in only one that is mappable/
fenceable. Therefore, pageflipping between these 2 FBs where only one
is mappable/fenceable creates latencies large enough to miss alternate
vblanks thereby producing less optimal framerate.

This mainly happens because when i915_gem_object_pin_to_display_plane()
is called to pin one of the FB objs, the associated vma is identified
as misplaced and therefore i915_vma_unbind() is called which unbinds and
evicts it. This misplaced vma gets subseqently pinned only when
i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This
results in a latency of ~10ms and happens every other vblank/repaint cycle.
Therefore, to fix this issue, we try to see if there is space to map
at-least two objects of a given size and return early if there isn't. This
would ensure that we do not try with PIN_MAPPABLE for any objects that
are too big to map thereby preventing unncessary unbind.

Testcase:
Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform
with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits
a frame ~7ms before the next vblank, the latencies seen between atomic
commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that
it misses the vblank every other frame.

Here is the ftrace snippet that shows the source of the ~10ms latency:
  i915_gem_object_pin_to_display_plane() {
0.102 us   |i915_gem_object_set_cache_level();
i915_gem_object_ggtt_pin_ww() {
0.390 us   |  i915_vma_instance();
0.178 us   |  i915_vma_misplaced();
  i915_vma_unbind() {
  __i915_active_wait() {
0.082 us   |i915_active_acquire_if_busy();
0.475 us   |  }
  intel_runtime_pm_get() {
0.087 us   |intel_runtime_pm_acquire();
0.259 us   |  }
  __i915_active_wait() {
0.085 us   |i915_active_acquire_if_busy();
0.240 us   |  }
  __i915_vma_evict() {
ggtt_unbind_vma() {
  gen8_ggtt_clear_range() {
10507.255 us |}
10507.689 us |  }
10508.516 us |   }

v2: Instead of using bigjoiner checks, determine whether a scanout
buffer is too big by checking to see if it is possible to map
two of them into the ggtt.

v3 (Ville):
- Count how many fb objects can be fit into the available holes
  instead of checking for a hole twice the object size.
- Take alignment constraints into account.
- Limit this large scanout buffer check to >= Gen 11 platforms.

v4:
- Remove existing heuristic that checks just for size. (Ville)
- Return early if we find space to map at-least two objects. (Tvrtko)
- Slightly update the commit message.

v5: (Tvrtko)
- Rename the function to indicate that the object may be too big to
  map into the aperture.
- Account for guard pages while calculating the total size required
  for the object.
- Do not subject all objects to the heuristic check and instead
  consider objects only of a certain size.
- Do the hole walk using the rbtree.
- Preserve the existing PIN_NONBLOCK logic.
- Drop the PIN_MAPPABLE check while pinning the VMA.

v6: (Tvrtko)
- Return 0 on success and the specific error code on failure to
  preserve the existing behavior.

v7: (Ville)
- Drop the HAS_GMCH(i915), DISPLAY_VER(i915) < 11 and
  size < ggtt->mappable_end / 4 checks.
- Drop the redundant check that is based on previous heuristic.

v8:
- Make sure that we are holding the mutex associated with ggtt vm
  as we traverse the hole nodes.

v9: (Tvrtko)
- Use mutex_lock_interruptible_nested() instead of mutex_lock().

Cc: Ville Syrjälä 
Cc: Maarten Lankhorst 
Cc: Tvrtko Ursulin 
Cc: Manasi Navare 
Reviewed-by: Tvrtko Ursulin 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/i915_gem.c | 128 +++-
 1 file changed, 94 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2e10187cd0a0..4bef9eaa8b2e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -49,6 +49,7 @@
 #include "gem/i915_gem_pm.h"
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_userptr.h"
+#include "gem/i915_gem_tiling.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
@@ -879,6 +880,96 @@ static void discard_ggtt_vma(struct i915_vma *vma)
spin_unlock(>vma.lock);
 }
 
+static int
+i915_gem_object_fits_in_aperture(struct drm_i915_gem_object *obj,
+u64 alignment, u64 flags)
+{
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
+   struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+   struct drm_mm_node *hole;
+   u64 hole_start, hole_end, start, end;
+

[Intel-gfx] [PATCH v3 1/3] drm/mm: Ensure that the entry is not NULL before extracting rb_node

2022-02-19 Thread Vivek Kasireddy

While looking for next holes suitable for an allocation, although,
it is highly unlikely, make sure that the DECLARE_NEXT_HOLE_ADDR
macro is using a valid node before it extracts the rb_node from it.

Cc: Tvrtko Ursulin 
Cc: Christian König 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/drm_mm.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 8257f9d4f619..499d8874e4ed 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -389,11 +389,12 @@ first_hole(struct drm_mm *mm,
 #define DECLARE_NEXT_HOLE_ADDR(name, first, last)  \
 static struct drm_mm_node *name(struct drm_mm_node *entry, u64 size)   \
 {  \
-   struct rb_node *parent, *node = >rb_hole_addr;   \
+   struct rb_node *parent, *node;  \
\
-   if (!entry || RB_EMPTY_NODE(node))  \
+   if (!entry || RB_EMPTY_NODE(>rb_hole_addr))  \
return NULL;\
\
+   node = >rb_hole_addr;\
if (usable_hole_addr(node->first, size)) {  \
node = node->first; \
while (usable_hole_addr(node->last, size))  \
-- 
2.34.1

[Intel-gfx] [PATCH v3 2/3] drm/mm: Add an iterator to optimally walk over holes for an allocation (v4)

2022-02-19 Thread Vivek Kasireddy

This iterator relies on drm_mm_first_hole() and drm_mm_next_hole()
functions to identify suitable holes for an allocation of a given
size by efficiently traversing the rbtree associated with the given
allocator.

It replaces the for loop in drm_mm_insert_node_in_range() and can
also be used by drm drivers to quickly identify holes of a certain
size within a given range.

v2: (Tvrtko)
- Prepend a double underscore for the newly exported first/next_hole
- s/each_best_hole/each_suitable_hole/g
- Mask out DRM_MM_INSERT_ONCE from the mode before calling
  first/next_hole and elsewhere.

v3: (Tvrtko)
- Reduce the number of hunks by retaining the "mode" variable name

v4:
- Typo: s/__drm_mm_next_hole(.., hole/__drm_mm_next_hole(.., pos

Reviewed-by: Tvrtko Ursulin 
Acked-by: Christian König 
Suggested-by: Tvrtko Ursulin 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/drm_mm.c | 32 +++-
 include/drm/drm_mm.h | 36 
 2 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 499d8874e4ed..f5339610361c 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -352,10 +352,10 @@ static struct drm_mm_node *find_hole_addr(struct drm_mm 
*mm, u64 addr, u64 size)
return node;
 }
 
-static struct drm_mm_node *
-first_hole(struct drm_mm *mm,
-  u64 start, u64 end, u64 size,
-  enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+__drm_mm_first_hole(struct drm_mm *mm,
+   u64 start, u64 end, u64 size,
+   enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -374,6 +374,7 @@ first_hole(struct drm_mm *mm,
hole_stack);
}
 }
+EXPORT_SYMBOL(__drm_mm_first_hole);
 
 /**
  * DECLARE_NEXT_HOLE_ADDR - macro to declare next hole functions
@@ -411,11 +412,11 @@ static struct drm_mm_node *name(struct drm_mm_node 
*entry, u64 size)  \
 DECLARE_NEXT_HOLE_ADDR(next_hole_high_addr, rb_left, rb_right)
 DECLARE_NEXT_HOLE_ADDR(next_hole_low_addr, rb_right, rb_left)
 
-static struct drm_mm_node *
-next_hole(struct drm_mm *mm,
- struct drm_mm_node *node,
- u64 size,
- enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+__drm_mm_next_hole(struct drm_mm *mm,
+  struct drm_mm_node *node,
+  u64 size,
+  enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -433,6 +434,7 @@ next_hole(struct drm_mm *mm,
return >hole_stack == >hole_stack ? NULL : node;
}
 }
+EXPORT_SYMBOL(__drm_mm_next_hole);
 
 /**
  * drm_mm_reserve_node - insert an pre-initialized node
@@ -517,11 +519,11 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
u64 size, u64 alignment,
unsigned long color,
u64 range_start, u64 range_end,
-   enum drm_mm_insert_mode mode)
+   enum drm_mm_insert_mode caller_mode)
 {
struct drm_mm_node *hole;
u64 remainder_mask;
-   bool once;
+   enum drm_mm_insert_mode mode = caller_mode & ~DRM_MM_INSERT_ONCE;
 
DRM_MM_BUG_ON(range_start > range_end);
 
@@ -534,13 +536,9 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
if (alignment <= 1)
alignment = 0;
 
-   once = mode & DRM_MM_INSERT_ONCE;
-   mode &= ~DRM_MM_INSERT_ONCE;
-
remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0;
-   for (hole = first_hole(mm, range_start, range_end, size, mode);
-hole;
-hole = once ? NULL : next_hole(mm, hole, size, mode)) {
+   drm_mm_for_each_suitable_hole(hole, mm, range_start, range_end,
+ size, mode) {
u64 hole_start = __drm_mm_hole_node_start(hole);
u64 hole_end = hole_start + hole->hole_size;
u64 adj_start, adj_end;
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index ac33ba1b18bc..dff6db627807 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -400,6 +400,42 @@ static inline u64 drm_mm_hole_node_end(const struct 
drm_mm_node *hole_node)
 1 : 0; \
 pos = list_next_entry(pos, hole_stack))
 
+struct drm_mm_node *
+__drm_mm_first_hole(struct drm_mm *mm,
+   u64 start, u64 end, u64 size,
+   enum drm_mm_insert_mode mode);
+
+struct drm_mm_node *
+__drm_mm_next_hole(struct drm_mm *mm,
+  struct drm_mm_node *node,
+  u64 size,
+  enum drm_mm_insert_mode mode);
+
+/**
+ * drm_mm_for_each_suitable_hole - iterator to optimally walk over all
+ * holes that can fit an allocation of the given @size.
+ * @pos: _mm_node used internally to track pr

[Intel-gfx] [PATCH v3 0/3] drm/mm: Add an iterator to optimally walk over holes suitable for an allocation

2022-02-19 Thread Vivek Kasireddy

The first patch is a drm core patch that replaces the for loop in
drm_mm_insert_node_in_range() with the iterator and would not
cause any functional changes. The second patch is a i915 driver
specific patch that also uses the iterator but solves a different
problem.

v2:
- Added a new patch to this series to fix a potential NULL
  dereference.
- Fixed a typo associated with the iterator introduced in the
  drm core patch.
- Added locking around the snippet in the i915 patch that
  traverses the GGTT hole nodes.

v3: (Tvrtko)
- Replaced mutex_lock with mutex_lock_interruptible_nested() in
  the i915 patch.

Cc: Tvrtko Ursulin 
Cc: Nirmoy Das 
Cc: Christian König 

Vivek Kasireddy (3):
  drm/mm: Ensure that the entry is not NULL before extracting rb_node
  drm/mm: Add an iterator to optimally walk over holes for an allocation
(v4)
  drm/i915/gem: Don't try to map and fence large scanout buffers (v9)

 drivers/gpu/drm/drm_mm.c|  37 +
 drivers/gpu/drm/i915/i915_gem.c | 128 +++-
 include/drm/drm_mm.h|  36 +
 3 files changed, 148 insertions(+), 53 deletions(-)

-- 
2.34.1

[Intel-gfx] [PATCH v2 3/3] drm/i915/gem: Don't try to map and fence large scanout buffers (v8)

2022-02-17 Thread Vivek Kasireddy

On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or
more framebuffers/scanout buffers results in only one that is mappable/
fenceable. Therefore, pageflipping between these 2 FBs where only one
is mappable/fenceable creates latencies large enough to miss alternate
vblanks thereby producing less optimal framerate.

This mainly happens because when i915_gem_object_pin_to_display_plane()
is called to pin one of the FB objs, the associated vma is identified
as misplaced and therefore i915_vma_unbind() is called which unbinds and
evicts it. This misplaced vma gets subseqently pinned only when
i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This
results in a latency of ~10ms and happens every other vblank/repaint cycle.
Therefore, to fix this issue, we try to see if there is space to map
at-least two objects of a given size and return early if there isn't. This
would ensure that we do not try with PIN_MAPPABLE for any objects that
are too big to map thereby preventing unncessary unbind.

Testcase:
Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform
with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits
a frame ~7ms before the next vblank, the latencies seen between atomic
commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that
it misses the vblank every other frame.

Here is the ftrace snippet that shows the source of the ~10ms latency:
  i915_gem_object_pin_to_display_plane() {
0.102 us   |i915_gem_object_set_cache_level();
i915_gem_object_ggtt_pin_ww() {
0.390 us   |  i915_vma_instance();
0.178 us   |  i915_vma_misplaced();
  i915_vma_unbind() {
  __i915_active_wait() {
0.082 us   |i915_active_acquire_if_busy();
0.475 us   |  }
  intel_runtime_pm_get() {
0.087 us   |intel_runtime_pm_acquire();
0.259 us   |  }
  __i915_active_wait() {
0.085 us   |i915_active_acquire_if_busy();
0.240 us   |  }
  __i915_vma_evict() {
ggtt_unbind_vma() {
  gen8_ggtt_clear_range() {
10507.255 us |}
10507.689 us |  }
10508.516 us |   }

v2: Instead of using bigjoiner checks, determine whether a scanout
buffer is too big by checking to see if it is possible to map
two of them into the ggtt.

v3 (Ville):
- Count how many fb objects can be fit into the available holes
  instead of checking for a hole twice the object size.
- Take alignment constraints into account.
- Limit this large scanout buffer check to >= Gen 11 platforms.

v4:
- Remove existing heuristic that checks just for size. (Ville)
- Return early if we find space to map at-least two objects. (Tvrtko)
- Slightly update the commit message.

v5: (Tvrtko)
- Rename the function to indicate that the object may be too big to
  map into the aperture.
- Account for guard pages while calculating the total size required
  for the object.
- Do not subject all objects to the heuristic check and instead
  consider objects only of a certain size.
- Do the hole walk using the rbtree.
- Preserve the existing PIN_NONBLOCK logic.
- Drop the PIN_MAPPABLE check while pinning the VMA.

v6: (Tvrtko)
- Return 0 on success and the specific error code on failure to
  preserve the existing behavior.

v7: (Ville)
- Drop the HAS_GMCH(i915), DISPLAY_VER(i915) < 11 and
  size < ggtt->mappable_end / 4 checks.
- Drop the redundant check that is based on previous heuristic.

v8:
- Make sure that we are holding the mutex associated with ggtt vm
  as we traverse the hole nodes.

Cc: Ville Syrjälä 
Cc: Maarten Lankhorst 
Cc: Tvrtko Ursulin 
Cc: Manasi Navare 
Reviewed-by: Tvrtko Ursulin 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/i915_gem.c | 124 +++-
 1 file changed, 90 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2e10187cd0a0..db00e71ce328 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -49,6 +49,7 @@
 #include "gem/i915_gem_pm.h"
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_userptr.h"
+#include "gem/i915_gem_tiling.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
@@ -879,6 +880,92 @@ static void discard_ggtt_vma(struct i915_vma *vma)
spin_unlock(>vma.lock);
 }
 
+static int
+i915_gem_object_fits_in_aperture(struct drm_i915_gem_object *obj,
+u64 alignment, u64 flags)
+{
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
+   struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+   struct drm_mm_node *hole;
+   u64 hole_start, hole_end, start, end;
+   u64 fence_size, fence_alignment;
+   unsigned int count = 0;
+
+   /*
+* If the requi

[Intel-gfx] [PATCH v2 2/3] drm/mm: Add an iterator to optimally walk over holes for an allocation (v4)

2022-02-17 Thread Vivek Kasireddy

This iterator relies on drm_mm_first_hole() and drm_mm_next_hole()
functions to identify suitable holes for an allocation of a given
size by efficiently traversing the rbtree associated with the given
allocator.

It replaces the for loop in drm_mm_insert_node_in_range() and can
also be used by drm drivers to quickly identify holes of a certain
size within a given range.

v2: (Tvrtko)
- Prepend a double underscore for the newly exported first/next_hole
- s/each_best_hole/each_suitable_hole/g
- Mask out DRM_MM_INSERT_ONCE from the mode before calling
  first/next_hole and elsewhere.

v3: (Tvrtko)
- Reduce the number of hunks by retaining the "mode" variable name

v4:
- Typo: s/__drm_mm_next_hole(.., hole/__drm_mm_next_hole(.., pos

Reviewed-by: Tvrtko Ursulin 
Acked-by: Christian König 
Suggested-by: Tvrtko Ursulin 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/drm_mm.c | 32 +++-
 include/drm/drm_mm.h | 36 
 2 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 499d8874e4ed..f5339610361c 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -352,10 +352,10 @@ static struct drm_mm_node *find_hole_addr(struct drm_mm 
*mm, u64 addr, u64 size)
return node;
 }
 
-static struct drm_mm_node *
-first_hole(struct drm_mm *mm,
-  u64 start, u64 end, u64 size,
-  enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+__drm_mm_first_hole(struct drm_mm *mm,
+   u64 start, u64 end, u64 size,
+   enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -374,6 +374,7 @@ first_hole(struct drm_mm *mm,
hole_stack);
}
 }
+EXPORT_SYMBOL(__drm_mm_first_hole);
 
 /**
  * DECLARE_NEXT_HOLE_ADDR - macro to declare next hole functions
@@ -411,11 +412,11 @@ static struct drm_mm_node *name(struct drm_mm_node 
*entry, u64 size)  \
 DECLARE_NEXT_HOLE_ADDR(next_hole_high_addr, rb_left, rb_right)
 DECLARE_NEXT_HOLE_ADDR(next_hole_low_addr, rb_right, rb_left)
 
-static struct drm_mm_node *
-next_hole(struct drm_mm *mm,
- struct drm_mm_node *node,
- u64 size,
- enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+__drm_mm_next_hole(struct drm_mm *mm,
+  struct drm_mm_node *node,
+  u64 size,
+  enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -433,6 +434,7 @@ next_hole(struct drm_mm *mm,
return >hole_stack == >hole_stack ? NULL : node;
}
 }
+EXPORT_SYMBOL(__drm_mm_next_hole);
 
 /**
  * drm_mm_reserve_node - insert an pre-initialized node
@@ -517,11 +519,11 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
u64 size, u64 alignment,
unsigned long color,
u64 range_start, u64 range_end,
-   enum drm_mm_insert_mode mode)
+   enum drm_mm_insert_mode caller_mode)
 {
struct drm_mm_node *hole;
u64 remainder_mask;
-   bool once;
+   enum drm_mm_insert_mode mode = caller_mode & ~DRM_MM_INSERT_ONCE;
 
DRM_MM_BUG_ON(range_start > range_end);
 
@@ -534,13 +536,9 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
if (alignment <= 1)
alignment = 0;
 
-   once = mode & DRM_MM_INSERT_ONCE;
-   mode &= ~DRM_MM_INSERT_ONCE;
-
remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0;
-   for (hole = first_hole(mm, range_start, range_end, size, mode);
-hole;
-hole = once ? NULL : next_hole(mm, hole, size, mode)) {
+   drm_mm_for_each_suitable_hole(hole, mm, range_start, range_end,
+ size, mode) {
u64 hole_start = __drm_mm_hole_node_start(hole);
u64 hole_end = hole_start + hole->hole_size;
u64 adj_start, adj_end;
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index ac33ba1b18bc..dff6db627807 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -400,6 +400,42 @@ static inline u64 drm_mm_hole_node_end(const struct 
drm_mm_node *hole_node)
 1 : 0; \
 pos = list_next_entry(pos, hole_stack))
 
+struct drm_mm_node *
+__drm_mm_first_hole(struct drm_mm *mm,
+   u64 start, u64 end, u64 size,
+   enum drm_mm_insert_mode mode);
+
+struct drm_mm_node *
+__drm_mm_next_hole(struct drm_mm *mm,
+  struct drm_mm_node *node,
+  u64 size,
+  enum drm_mm_insert_mode mode);
+
+/**
+ * drm_mm_for_each_suitable_hole - iterator to optimally walk over all
+ * holes that can fit an allocation of the given @size.
+ * @pos: _mm_node used internally to track pr

[Intel-gfx] [PATCH v2 1/3] drm/mm: Ensure that the entry is not NULL before extracting rb_node

2022-02-17 Thread Vivek Kasireddy

While looking for next holes suitable for an allocation, although,
it is highly unlikely, make sure that the DECLARE_NEXT_HOLE_ADDR
macro is using a valid node before it extracts the rb_node from it.

Cc: Tvrtko Ursulin 
Cc: Christian König 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/drm_mm.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 8257f9d4f619..499d8874e4ed 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -389,11 +389,12 @@ first_hole(struct drm_mm *mm,
 #define DECLARE_NEXT_HOLE_ADDR(name, first, last)  \
 static struct drm_mm_node *name(struct drm_mm_node *entry, u64 size)   \
 {  \
-   struct rb_node *parent, *node = >rb_hole_addr;   \
+   struct rb_node *parent, *node;  \
\
-   if (!entry || RB_EMPTY_NODE(node))  \
+   if (!entry || RB_EMPTY_NODE(>rb_hole_addr))  \
return NULL;\
\
+   node = >rb_hole_addr;\
if (usable_hole_addr(node->first, size)) {  \
node = node->first; \
while (usable_hole_addr(node->last, size))  \
-- 
2.34.1

[Intel-gfx] [PATCH v2 0/3] drm/mm: Add an iterator to optimally walk over holes suitable for an allocation

2022-02-17 Thread Vivek Kasireddy

The first patch is a drm core patch that replaces the for loop in
drm_mm_insert_node_in_range() with the iterator and would not
cause any functional changes. The second patch is a i915 driver
specific patch that also uses the iterator but solves a different
problem.

v2:
- Added a new patch to this series to fix a potential NULL
  dereference.
- Fixed a typo associated with the iterator introduced in the
  drm core patch.
- Added locking around the snippet in the i915 patch that
  traverses the GGTT hole nodes.

Cc: Tvrtko Ursulin 
Cc: Nirmoy Das 
Cc: Christian König 

Vivek Kasireddy (3):
  drm/mm: Ensure that the entry is not NULL before extracting rb_node
  drm/mm: Add an iterator to optimally walk over holes for an allocation
(v4)
  drm/i915/gem: Don't try to map and fence large scanout buffers (v8)

 drivers/gpu/drm/drm_mm.c|  37 +-
 drivers/gpu/drm/i915/i915_gem.c | 124 +++-
 include/drm/drm_mm.h|  36 ++
 3 files changed, 144 insertions(+), 53 deletions(-)

-- 
2.34.1

[Intel-gfx] [PATCH 1/2] drm/mm: Add an iterator to optimally walk over holes for an allocation (v3)

2022-02-15 Thread Vivek Kasireddy

This iterator relies on drm_mm_first_hole() and drm_mm_next_hole()
functions to identify suitable holes for an allocation of a given
size by efficiently traversing the rbtree associated with the given
allocator.

It replaces the for loop in drm_mm_insert_node_in_range() and can
also be used by drm drivers to quickly identify holes of a certain
size within a given range.

v2: (Tvrtko)
- Prepend a double underscore for the newly exported first/next_hole
- s/each_best_hole/each_suitable_hole/g
- Mask out DRM_MM_INSERT_ONCE from the mode before calling
  first/next_hole and elsewhere.

v3: (Tvrtko)
- Reduce the number of hunks by retaining the "mode" variable name

Cc: Christian König 
Reviewed-by: Tvrtko Ursulin 
Suggested-by: Tvrtko Ursulin 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/drm_mm.c | 32 +++-
 include/drm/drm_mm.h | 36 
 2 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 8257f9d4f619..8efea548ae9f 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -352,10 +352,10 @@ static struct drm_mm_node *find_hole_addr(struct drm_mm 
*mm, u64 addr, u64 size)
return node;
 }
 
-static struct drm_mm_node *
-first_hole(struct drm_mm *mm,
-  u64 start, u64 end, u64 size,
-  enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+__drm_mm_first_hole(struct drm_mm *mm,
+   u64 start, u64 end, u64 size,
+   enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -374,6 +374,7 @@ first_hole(struct drm_mm *mm,
hole_stack);
}
 }
+EXPORT_SYMBOL(__drm_mm_first_hole);
 
 /**
  * DECLARE_NEXT_HOLE_ADDR - macro to declare next hole functions
@@ -410,11 +411,11 @@ static struct drm_mm_node *name(struct drm_mm_node 
*entry, u64 size)  \
 DECLARE_NEXT_HOLE_ADDR(next_hole_high_addr, rb_left, rb_right)
 DECLARE_NEXT_HOLE_ADDR(next_hole_low_addr, rb_right, rb_left)
 
-static struct drm_mm_node *
-next_hole(struct drm_mm *mm,
- struct drm_mm_node *node,
- u64 size,
- enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+__drm_mm_next_hole(struct drm_mm *mm,
+  struct drm_mm_node *node,
+  u64 size,
+  enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -432,6 +433,7 @@ next_hole(struct drm_mm *mm,
return >hole_stack == >hole_stack ? NULL : node;
}
 }
+EXPORT_SYMBOL(__drm_mm_next_hole);
 
 /**
  * drm_mm_reserve_node - insert an pre-initialized node
@@ -516,11 +518,11 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
u64 size, u64 alignment,
unsigned long color,
u64 range_start, u64 range_end,
-   enum drm_mm_insert_mode mode)
+   enum drm_mm_insert_mode caller_mode)
 {
struct drm_mm_node *hole;
u64 remainder_mask;
-   bool once;
+   enum drm_mm_insert_mode mode = caller_mode & ~DRM_MM_INSERT_ONCE;
 
DRM_MM_BUG_ON(range_start > range_end);
 
@@ -533,13 +535,9 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
if (alignment <= 1)
alignment = 0;
 
-   once = mode & DRM_MM_INSERT_ONCE;
-   mode &= ~DRM_MM_INSERT_ONCE;
-
remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0;
-   for (hole = first_hole(mm, range_start, range_end, size, mode);
-hole;
-hole = once ? NULL : next_hole(mm, hole, size, mode)) {
+   drm_mm_for_each_suitable_hole(hole, mm, range_start, range_end,
+ size, mode) {
u64 hole_start = __drm_mm_hole_node_start(hole);
u64 hole_end = hole_start + hole->hole_size;
u64 adj_start, adj_end;
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index ac33ba1b18bc..777f659f9692 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -400,6 +400,42 @@ static inline u64 drm_mm_hole_node_end(const struct 
drm_mm_node *hole_node)
 1 : 0; \
 pos = list_next_entry(pos, hole_stack))
 
+struct drm_mm_node *
+__drm_mm_first_hole(struct drm_mm *mm,
+   u64 start, u64 end, u64 size,
+   enum drm_mm_insert_mode mode);
+
+struct drm_mm_node *
+__drm_mm_next_hole(struct drm_mm *mm,
+  struct drm_mm_node *node,
+  u64 size,
+  enum drm_mm_insert_mode mode);
+
+/**
+ * drm_mm_for_each_suitable_hole - iterator to optimally walk over all
+ * holes that can fit an allocation of the given @size.
+ * @pos: _mm_node used internally to track progress
+ * @mm: _mm allocator to walk
+ * @range_start: start of

[Intel-gfx] [PATCH 2/2] drm/i915/gem: Don't try to map and fence large scanout buffers (v7)

2022-02-14 Thread Vivek Kasireddy

On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or
more framebuffers/scanout buffers results in only one that is mappable/
fenceable. Therefore, pageflipping between these 2 FBs where only one
is mappable/fenceable creates latencies large enough to miss alternate
vblanks thereby producing less optimal framerate.

This mainly happens because when i915_gem_object_pin_to_display_plane()
is called to pin one of the FB objs, the associated vma is identified
as misplaced and therefore i915_vma_unbind() is called which unbinds and
evicts it. This misplaced vma gets subseqently pinned only when
i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This
results in a latency of ~10ms and happens every other vblank/repaint cycle.
Therefore, to fix this issue, we try to see if there is space to map
at-least two objects of a given size and return early if there isn't. This
would ensure that we do not try with PIN_MAPPABLE for any objects that
are too big to map thereby preventing unncessary unbind.

Testcase:
Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform
with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits
a frame ~7ms before the next vblank, the latencies seen between atomic
commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that
it misses the vblank every other frame.

Here is the ftrace snippet that shows the source of the ~10ms latency:
  i915_gem_object_pin_to_display_plane() {
0.102 us   |i915_gem_object_set_cache_level();
i915_gem_object_ggtt_pin_ww() {
0.390 us   |  i915_vma_instance();
0.178 us   |  i915_vma_misplaced();
  i915_vma_unbind() {
  __i915_active_wait() {
0.082 us   |i915_active_acquire_if_busy();
0.475 us   |  }
  intel_runtime_pm_get() {
0.087 us   |intel_runtime_pm_acquire();
0.259 us   |  }
  __i915_active_wait() {
0.085 us   |i915_active_acquire_if_busy();
0.240 us   |  }
  __i915_vma_evict() {
ggtt_unbind_vma() {
  gen8_ggtt_clear_range() {
10507.255 us |}
10507.689 us |  }
10508.516 us |   }

v2: Instead of using bigjoiner checks, determine whether a scanout
buffer is too big by checking to see if it is possible to map
two of them into the ggtt.

v3 (Ville):
- Count how many fb objects can be fit into the available holes
  instead of checking for a hole twice the object size.
- Take alignment constraints into account.
- Limit this large scanout buffer check to >= Gen 11 platforms.

v4:
- Remove existing heuristic that checks just for size. (Ville)
- Return early if we find space to map at-least two objects. (Tvrtko)
- Slightly update the commit message.

v5: (Tvrtko)
- Rename the function to indicate that the object may be too big to
  map into the aperture.
- Account for guard pages while calculating the total size required
  for the object.
- Do not subject all objects to the heuristic check and instead
  consider objects only of a certain size.
- Do the hole walk using the rbtree.
- Preserve the existing PIN_NONBLOCK logic.
- Drop the PIN_MAPPABLE check while pinning the VMA.

v6: (Tvrtko)
- Return 0 on success and the specific error code on failure to
  preserve the existing behavior.

v7: (Ville)
- Drop the HAS_GMCH(i915), DISPLAY_VER(i915) < 11 and
  size < ggtt->mappable_end / 4 checks.
- Drop the redundant check that is based on previous heuristic.

Cc: Ville Syrjälä 
Cc: Maarten Lankhorst 
Cc: Tvrtko Ursulin 
Cc: Manasi Navare 
Reviewed-by: Tvrtko Ursulin 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/i915_gem.c | 120 +++-
 1 file changed, 86 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2e10187cd0a0..260cd3961ca1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -49,6 +49,7 @@
 #include "gem/i915_gem_pm.h"
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_userptr.h"
+#include "gem/i915_gem_tiling.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
@@ -879,6 +880,88 @@ static void discard_ggtt_vma(struct i915_vma *vma)
spin_unlock(>vma.lock);
 }
 
+static int
+i915_gem_object_fits_in_aperture(struct drm_i915_gem_object *obj,
+u64 alignment, u64 flags)
+{
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
+   struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+   struct drm_mm_node *hole;
+   u64 hole_start, hole_end, start, end;
+   u64 fence_size, fence_alignment;
+   unsigned int count = 0;
+
+   /*
+* If the required space is larger than the available
+* aperture, we will not able to find a slot for the
+

[Intel-gfx] [PATCH 1/2] drm/mm: Add an iterator to optimally walk over holes for an allocation (v3)

2022-02-14 Thread Vivek Kasireddy

This iterator relies on drm_mm_first_hole() and drm_mm_next_hole()
functions to identify suitable holes for an allocation of a given
size by efficiently traversing the rbtree associated with the given
allocator.

It replaces the for loop in drm_mm_insert_node_in_range() and can
also be used by drm drivers to quickly identify holes of a certain
size within a given range.

v2: (Tvrtko)
- Prepend a double underscore for the newly exported first/next_hole
- s/each_best_hole/each_suitable_hole/g
- Mask out DRM_MM_INSERT_ONCE from the mode before calling
  first/next_hole and elsewhere.

v3: (Tvrtko)
- Reduce the number of hunks by retaining the "mode" variable name

Reviewed-by: Tvrtko Ursulin 
Suggested-by: Tvrtko Ursulin 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/drm_mm.c | 32 +++-
 include/drm/drm_mm.h | 36 
 2 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 8257f9d4f619..8efea548ae9f 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -352,10 +352,10 @@ static struct drm_mm_node *find_hole_addr(struct drm_mm 
*mm, u64 addr, u64 size)
return node;
 }
 
-static struct drm_mm_node *
-first_hole(struct drm_mm *mm,
-  u64 start, u64 end, u64 size,
-  enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+__drm_mm_first_hole(struct drm_mm *mm,
+   u64 start, u64 end, u64 size,
+   enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -374,6 +374,7 @@ first_hole(struct drm_mm *mm,
hole_stack);
}
 }
+EXPORT_SYMBOL(__drm_mm_first_hole);
 
 /**
  * DECLARE_NEXT_HOLE_ADDR - macro to declare next hole functions
@@ -410,11 +411,11 @@ static struct drm_mm_node *name(struct drm_mm_node 
*entry, u64 size)  \
 DECLARE_NEXT_HOLE_ADDR(next_hole_high_addr, rb_left, rb_right)
 DECLARE_NEXT_HOLE_ADDR(next_hole_low_addr, rb_right, rb_left)
 
-static struct drm_mm_node *
-next_hole(struct drm_mm *mm,
- struct drm_mm_node *node,
- u64 size,
- enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+__drm_mm_next_hole(struct drm_mm *mm,
+  struct drm_mm_node *node,
+  u64 size,
+  enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -432,6 +433,7 @@ next_hole(struct drm_mm *mm,
return >hole_stack == >hole_stack ? NULL : node;
}
 }
+EXPORT_SYMBOL(__drm_mm_next_hole);
 
 /**
  * drm_mm_reserve_node - insert an pre-initialized node
@@ -516,11 +518,11 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
u64 size, u64 alignment,
unsigned long color,
u64 range_start, u64 range_end,
-   enum drm_mm_insert_mode mode)
+   enum drm_mm_insert_mode caller_mode)
 {
struct drm_mm_node *hole;
u64 remainder_mask;
-   bool once;
+   enum drm_mm_insert_mode mode = caller_mode & ~DRM_MM_INSERT_ONCE;
 
DRM_MM_BUG_ON(range_start > range_end);
 
@@ -533,13 +535,9 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
if (alignment <= 1)
alignment = 0;
 
-   once = mode & DRM_MM_INSERT_ONCE;
-   mode &= ~DRM_MM_INSERT_ONCE;
-
remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0;
-   for (hole = first_hole(mm, range_start, range_end, size, mode);
-hole;
-hole = once ? NULL : next_hole(mm, hole, size, mode)) {
+   drm_mm_for_each_suitable_hole(hole, mm, range_start, range_end,
+ size, mode) {
u64 hole_start = __drm_mm_hole_node_start(hole);
u64 hole_end = hole_start + hole->hole_size;
u64 adj_start, adj_end;
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index ac33ba1b18bc..777f659f9692 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -400,6 +400,42 @@ static inline u64 drm_mm_hole_node_end(const struct 
drm_mm_node *hole_node)
 1 : 0; \
 pos = list_next_entry(pos, hole_stack))
 
+struct drm_mm_node *
+__drm_mm_first_hole(struct drm_mm *mm,
+   u64 start, u64 end, u64 size,
+   enum drm_mm_insert_mode mode);
+
+struct drm_mm_node *
+__drm_mm_next_hole(struct drm_mm *mm,
+  struct drm_mm_node *node,
+  u64 size,
+  enum drm_mm_insert_mode mode);
+
+/**
+ * drm_mm_for_each_suitable_hole - iterator to optimally walk over all
+ * holes that can fit an allocation of the given @size.
+ * @pos: _mm_node used internally to track progress
+ * @mm: _mm allocator to walk
+ * @range_start: start of the allowed range for the allocati

[Intel-gfx] [PATCH 0/2] drm/mm: Add an iterator to optimally walk over holes suitable for an allocation

2022-02-14 Thread Vivek Kasireddy

The first patch is a drm core patch that replaces the for loop in
drm_mm_insert_node_in_range() with the iterator and would not
cause any functional changes. The second patch is a i915 driver
specific patch that also uses the iterator but solves a different
problem.

Cc: Tvrtko Ursulin 
Cc: Nirmoy Das 
Cc: Christian König 

Vivek Kasireddy (2):
  drm/mm: Add an iterator to optimally walk over holes for an allocation
(v3)
  drm/i915/gem: Don't try to map and fence large scanout buffers (v7)

 drivers/gpu/drm/drm_mm.c|  32 -
 drivers/gpu/drm/i915/i915_gem.c | 120 +++-
 include/drm/drm_mm.h|  36 ++
 3 files changed, 137 insertions(+), 51 deletions(-)

-- 
2.34.1

[Intel-gfx] [PATCH 2/2] drm/i915/gem: Don't try to map and fence large scanout buffers (v6)

2022-02-03 Thread Vivek Kasireddy

On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or
more framebuffers/scanout buffers results in only one that is mappable/
fenceable. Therefore, pageflipping between these 2 FBs where only one
is mappable/fenceable creates latencies large enough to miss alternate
vblanks thereby producing less optimal framerate.

This mainly happens because when i915_gem_object_pin_to_display_plane()
is called to pin one of the FB objs, the associated vma is identified
as misplaced and therefore i915_vma_unbind() is called which unbinds and
evicts it. This misplaced vma gets subseqently pinned only when
i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This
results in a latency of ~10ms and happens every other vblank/repaint cycle.
Therefore, to fix this issue, we try to see if there is space to map
at-least two objects of a given size and return early if there isn't. This
would ensure that we do not try with PIN_MAPPABLE for any objects that
are too big to map thereby preventing unncessary unbind.

Testcase:
Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform
with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits
a frame ~7ms before the next vblank, the latencies seen between atomic
commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that
it misses the vblank every other frame.

Here is the ftrace snippet that shows the source of the ~10ms latency:
  i915_gem_object_pin_to_display_plane() {
0.102 us   |i915_gem_object_set_cache_level();
i915_gem_object_ggtt_pin_ww() {
0.390 us   |  i915_vma_instance();
0.178 us   |  i915_vma_misplaced();
  i915_vma_unbind() {
  __i915_active_wait() {
0.082 us   |i915_active_acquire_if_busy();
0.475 us   |  }
  intel_runtime_pm_get() {
0.087 us   |intel_runtime_pm_acquire();
0.259 us   |  }
  __i915_active_wait() {
0.085 us   |i915_active_acquire_if_busy();
0.240 us   |  }
  __i915_vma_evict() {
ggtt_unbind_vma() {
  gen8_ggtt_clear_range() {
10507.255 us |}
10507.689 us |  }
10508.516 us |   }

v2: Instead of using bigjoiner checks, determine whether a scanout
buffer is too big by checking to see if it is possible to map
two of them into the ggtt.

v3 (Ville):
- Count how many fb objects can be fit into the available holes
  instead of checking for a hole twice the object size.
- Take alignment constraints into account.
- Limit this large scanout buffer check to >= Gen 11 platforms.

v4:
- Remove existing heuristic that checks just for size. (Ville)
- Return early if we find space to map at-least two objects. (Tvrtko)
- Slightly update the commit message.

v5: (Tvrtko)
- Rename the function to indicate that the object may be too big to
  map into the aperture.
- Account for guard pages while calculating the total size required
  for the object.
- Do not subject all objects to the heuristic check and instead
  consider objects only of a certain size.
- Do the hole walk using the rbtree.
- Preserve the existing PIN_NONBLOCK logic.
- Drop the PIN_MAPPABLE check while pinning the VMA.

v6: (Tvrtko)
- Return 0 on success and the specific error code on failure to
  preserve the existing behavior.

Cc: Ville Syrjälä 
Cc: Maarten Lankhorst 
Cc: Tvrtko Ursulin 
Cc: Manasi Navare 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/i915_gem.c | 120 
 1 file changed, 90 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e3a2c2a0e156..39f0d17550c3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -46,6 +46,7 @@
 #include "gem/i915_gem_mman.h"
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_userptr.h"
+#include "gem/i915_gem_tiling.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
@@ -876,6 +877,92 @@ static void discard_ggtt_vma(struct i915_vma *vma)
spin_unlock(>vma.lock);
 }
 
+static int
+i915_gem_object_fits_in_aperture(struct drm_i915_gem_object *obj,
+u64 alignment, u64 flags)
+{
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
+   struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+   struct drm_mm_node *hole;
+   u64 hole_start, hole_end, start, end;
+   u64 fence_size, fence_alignment;
+   unsigned int count = 0;
+
+   /*
+* If the required space is larger than the available
+* aperture, we will not able to find a slot for the
+* object and unbinding the object now will be in
+* vain. Worse, doing so may cause us to ping-pong
+* the object in and out of the Global GTT and
+* waste a lot of c

[Intel-gfx] [PATCH 1/2] drm/mm: Add an iterator to optimally walk over holes for an allocation (v2)

2022-02-03 Thread Vivek Kasireddy

This iterator relies on drm_mm_first_hole() and drm_mm_next_hole()
functions to identify suitable holes for an allocation of a given
size by efficiently traversing the rbtree associated with the given
allocator.

It replaces the for loop in drm_mm_insert_node_in_range() and can
also be used by drm drivers to quickly identify holes of a certain
size within a given range.

v2: (Tvrtko)
- Prepend a double underscore for the newly exported first/next_hole
- s/each_best_hole/each_suitable_hole/g
- Mask out DRM_MM_INSERT_ONCE from the mode before calling
  first/next_hole and elsewhere.

Suggested-by: Tvrtko Ursulin 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/drm_mm.c | 38 ++
 include/drm/drm_mm.h | 36 
 2 files changed, 54 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 8257f9d4f619..b6da1dffcfcb 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -352,10 +352,10 @@ static struct drm_mm_node *find_hole_addr(struct drm_mm 
*mm, u64 addr, u64 size)
return node;
 }
 
-static struct drm_mm_node *
-first_hole(struct drm_mm *mm,
-  u64 start, u64 end, u64 size,
-  enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+__drm_mm_first_hole(struct drm_mm *mm,
+   u64 start, u64 end, u64 size,
+   enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -374,6 +374,7 @@ first_hole(struct drm_mm *mm,
hole_stack);
}
 }
+EXPORT_SYMBOL(__drm_mm_first_hole);
 
 /**
  * DECLARE_NEXT_HOLE_ADDR - macro to declare next hole functions
@@ -410,11 +411,11 @@ static struct drm_mm_node *name(struct drm_mm_node 
*entry, u64 size)  \
 DECLARE_NEXT_HOLE_ADDR(next_hole_high_addr, rb_left, rb_right)
 DECLARE_NEXT_HOLE_ADDR(next_hole_low_addr, rb_right, rb_left)
 
-static struct drm_mm_node *
-next_hole(struct drm_mm *mm,
- struct drm_mm_node *node,
- u64 size,
- enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+__drm_mm_next_hole(struct drm_mm *mm,
+  struct drm_mm_node *node,
+  u64 size,
+  enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -432,6 +433,7 @@ next_hole(struct drm_mm *mm,
return >hole_stack == >hole_stack ? NULL : node;
}
 }
+EXPORT_SYMBOL(__drm_mm_next_hole);
 
 /**
  * drm_mm_reserve_node - insert an pre-initialized node
@@ -520,7 +522,6 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
 {
struct drm_mm_node *hole;
u64 remainder_mask;
-   bool once;
 
DRM_MM_BUG_ON(range_start > range_end);
 
@@ -533,22 +534,19 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
if (alignment <= 1)
alignment = 0;
 
-   once = mode & DRM_MM_INSERT_ONCE;
-   mode &= ~DRM_MM_INSERT_ONCE;
-
remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0;
-   for (hole = first_hole(mm, range_start, range_end, size, mode);
-hole;
-hole = once ? NULL : next_hole(mm, hole, size, mode)) {
+   drm_mm_for_each_suitable_hole(hole, mm, range_start, range_end,
+ size, mode) {
u64 hole_start = __drm_mm_hole_node_start(hole);
u64 hole_end = hole_start + hole->hole_size;
u64 adj_start, adj_end;
u64 col_start, col_end;
+   enum drm_mm_insert_mode placement = mode & ~DRM_MM_INSERT_ONCE;
 
-   if (mode == DRM_MM_INSERT_LOW && hole_start >= range_end)
+   if (placement == DRM_MM_INSERT_LOW && hole_start >= range_end)
break;
 
-   if (mode == DRM_MM_INSERT_HIGH && hole_end <= range_start)
+   if (placement == DRM_MM_INSERT_HIGH && hole_end <= range_start)
break;
 
col_start = hole_start;
@@ -562,7 +560,7 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
if (adj_end <= adj_start || adj_end - adj_start < size)
continue;
 
-   if (mode == DRM_MM_INSERT_HIGH)
+   if (placement == DRM_MM_INSERT_HIGH)
adj_start = adj_end - size;
 
if (alignment) {
@@ -574,7 +572,7 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
div64_u64_rem(adj_start, alignment, );
if (rem) {
adj_start -= rem;
-   if (mode != DRM_MM_INSERT_HIGH)
+   if (placement != DRM_MM_INSERT_HIGH)
adj_start += alignment;
 
if (adj_start < max(col

[Intel-gfx] [PATCH 2/2] drm/i915/gem: Don't try to map and fence large scanout buffers (v5)

2022-02-01 Thread Vivek Kasireddy

On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or
more framebuffers/scanout buffers results in only one that is mappable/
fenceable. Therefore, pageflipping between these 2 FBs where only one
is mappable/fenceable creates latencies large enough to miss alternate
vblanks thereby producing less optimal framerate.

This mainly happens because when i915_gem_object_pin_to_display_plane()
is called to pin one of the FB objs, the associated vma is identified
as misplaced and therefore i915_vma_unbind() is called which unbinds and
evicts it. This misplaced vma gets subseqently pinned only when
i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This
results in a latency of ~10ms and happens every other vblank/repaint cycle.
Therefore, to fix this issue, we try to see if there is space to map
at-least two objects of a given size and return early if there isn't. This
would ensure that we do not try with PIN_MAPPABLE for any objects that
are too big to map thereby preventing unncessary unbind.

Testcase:
Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform
with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits
a frame ~7ms before the next vblank, the latencies seen between atomic
commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that
it misses the vblank every other frame.

Here is the ftrace snippet that shows the source of the ~10ms latency:
  i915_gem_object_pin_to_display_plane() {
0.102 us   |i915_gem_object_set_cache_level();
i915_gem_object_ggtt_pin_ww() {
0.390 us   |  i915_vma_instance();
0.178 us   |  i915_vma_misplaced();
  i915_vma_unbind() {
  __i915_active_wait() {
0.082 us   |i915_active_acquire_if_busy();
0.475 us   |  }
  intel_runtime_pm_get() {
0.087 us   |intel_runtime_pm_acquire();
0.259 us   |  }
  __i915_active_wait() {
0.085 us   |i915_active_acquire_if_busy();
0.240 us   |  }
  __i915_vma_evict() {
ggtt_unbind_vma() {
  gen8_ggtt_clear_range() {
10507.255 us |}
10507.689 us |  }
10508.516 us |   }

v2: Instead of using bigjoiner checks, determine whether a scanout
buffer is too big by checking to see if it is possible to map
two of them into the ggtt.

v3 (Ville):
- Count how many fb objects can be fit into the available holes
  instead of checking for a hole twice the object size.
- Take alignment constraints into account.
- Limit this large scanout buffer check to >= Gen 11 platforms.

v4:
- Remove existing heuristic that checks just for size. (Ville)
- Return early if we find space to map at-least two objects. (Tvrtko)
- Slightly update the commit message.

v5: (Tvrtko)
- Rename the function to indicate that the object may be too big to
  map into the aperture.
- Account for guard pages while calculating the total size required
  for the object.
- Do not subject all objects to the heuristic check and instead
  consider objects only of a certain size.
- Do the hole walk using the rbtree.
- Preserve the existing PIN_NONBLOCK logic.
- Drop the PIN_MAPPABLE check while pinning the VMA.

Cc: Ville Syrjälä 
Cc: Maarten Lankhorst 
Cc: Tvrtko Ursulin 
Cc: Manasi Navare 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/i915_gem.c | 117 
 1 file changed, 88 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e3a2c2a0e156..752fec2b4c60 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -46,6 +46,7 @@
 #include "gem/i915_gem_mman.h"
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_userptr.h"
+#include "gem/i915_gem_tiling.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
@@ -876,6 +877,92 @@ static void discard_ggtt_vma(struct i915_vma *vma)
spin_unlock(>vma.lock);
 }
 
+static bool
+i915_gem_object_fits_in_aperture(struct drm_i915_gem_object *obj,
+u64 alignment, u64 flags)
+{
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
+   struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+   struct drm_mm_node *hole;
+   u64 hole_start, hole_end, start, end;
+   u64 fence_size, fence_alignment;
+   unsigned int count = 0;
+
+   /*
+* If the required space is larger than the available
+* aperture, we will not able to find a slot for the
+* object and unbinding the object now will be in
+* vain. Worse, doing so may cause us to ping-pong
+* the object in and out of the Global GTT and
+* waste a lot of cycles under the mutex.
+*/
+   if (obj->base.size > ggtt->mappable_end)
+   return true;
+
+

[Intel-gfx] [PATCH 1/2] drm/mm: Add an iterator to optimally walk over holes for an allocation

2022-02-01 Thread Vivek Kasireddy

This iterator relies on drm_mm_first_hole() and drm_mm_next_hole()
functions to identify suitable holes for an allocation of a given
size by efficently traversing the rbtree associated with the given
allocator.

It replaces the for loop in drm_mm_insert_node_in_range() and can
also be used by drm drivers to quickly identify holes of a certain
size within a given range.

Suggested-by: Tvrtko Ursulin 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/drm_mm.c | 28 
 include/drm/drm_mm.h | 32 
 2 files changed, 44 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 8257f9d4f619..416c849c10e5 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -352,10 +352,10 @@ static struct drm_mm_node *find_hole_addr(struct drm_mm 
*mm, u64 addr, u64 size)
return node;
 }
 
-static struct drm_mm_node *
-first_hole(struct drm_mm *mm,
-  u64 start, u64 end, u64 size,
-  enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+drm_mm_first_hole(struct drm_mm *mm,
+ u64 start, u64 end, u64 size,
+ enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -374,6 +374,7 @@ first_hole(struct drm_mm *mm,
hole_stack);
}
 }
+EXPORT_SYMBOL(drm_mm_first_hole);
 
 /**
  * DECLARE_NEXT_HOLE_ADDR - macro to declare next hole functions
@@ -410,11 +411,11 @@ static struct drm_mm_node *name(struct drm_mm_node 
*entry, u64 size)  \
 DECLARE_NEXT_HOLE_ADDR(next_hole_high_addr, rb_left, rb_right)
 DECLARE_NEXT_HOLE_ADDR(next_hole_low_addr, rb_right, rb_left)
 
-static struct drm_mm_node *
-next_hole(struct drm_mm *mm,
- struct drm_mm_node *node,
- u64 size,
- enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+drm_mm_next_hole(struct drm_mm *mm,
+struct drm_mm_node *node,
+u64 size,
+enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -432,6 +433,7 @@ next_hole(struct drm_mm *mm,
return >hole_stack == >hole_stack ? NULL : node;
}
 }
+EXPORT_SYMBOL(drm_mm_next_hole);
 
 /**
  * drm_mm_reserve_node - insert an pre-initialized node
@@ -520,7 +522,6 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
 {
struct drm_mm_node *hole;
u64 remainder_mask;
-   bool once;
 
DRM_MM_BUG_ON(range_start > range_end);
 
@@ -533,13 +534,8 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
if (alignment <= 1)
alignment = 0;
 
-   once = mode & DRM_MM_INSERT_ONCE;
-   mode &= ~DRM_MM_INSERT_ONCE;
-
remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0;
-   for (hole = first_hole(mm, range_start, range_end, size, mode);
-hole;
-hole = once ? NULL : next_hole(mm, hole, size, mode)) {
+   drm_mm_for_each_best_hole(hole, mm, range_start, range_end, size, mode) 
{
u64 hole_start = __drm_mm_hole_node_start(hole);
u64 hole_end = hole_start + hole->hole_size;
u64 adj_start, adj_end;
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index ac33ba1b18bc..5055447697fa 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -322,6 +322,17 @@ static inline u64 __drm_mm_hole_node_end(const struct 
drm_mm_node *hole_node)
return list_next_entry(hole_node, node_list)->start;
 }
 
+struct drm_mm_node *
+drm_mm_first_hole(struct drm_mm *mm,
+ u64 start, u64 end, u64 size,
+ enum drm_mm_insert_mode mode);
+
+struct drm_mm_node *
+drm_mm_next_hole(struct drm_mm *mm,
+struct drm_mm_node *node,
+u64 size,
+enum drm_mm_insert_mode mode);
+
 /**
  * drm_mm_hole_node_end - computes the end of the hole following @node
  * @hole_node: drm_mm_node which implicitly tracks the following hole
@@ -400,6 +411,27 @@ static inline u64 drm_mm_hole_node_end(const struct 
drm_mm_node *hole_node)
 1 : 0; \
 pos = list_next_entry(pos, hole_stack))
 
+/**
+ * drm_mm_for_each_best_hole - iterator to optimally walk over all holes >= 
@size
+ * @pos: _mm_node used internally to track progress
+ * @mm: _mm allocator to walk
+ * @range_start: start of the allowed range for the allocation
+ * @range_end: end of the allowed range for the allocation
+ * @size: size of the allocation
+ * @mode: fine-tune the allocation search
+ *
+ * This iterator walks over all holes suitable for the allocation of given
+ * @size in a very efficient manner. It is implemented by calling
+ * drm_mm_first_hole() and drm_mm_next_hole() which identify the
+ * appropriate holes within the given range by efficently traversing the
+ * rbtree associated with @mm.
+ */
+#define drm_mm_for_each_best_hole(pos, mm, range

[Intel-gfx] [PATCH v4 RESEND] drm/i915/gem: Don't try to map and fence large scanout buffers (v4)

2022-01-19 Thread Vivek Kasireddy

On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or
more framebuffers/scanout buffers results in only one that is mappable/
fenceable. Therefore, pageflipping between these 2 FBs where only one
is mappable/fenceable creates latencies large enough to miss alternate
vblanks thereby producing less optimal framerate.

This mainly happens because when i915_gem_object_pin_to_display_plane()
is called to pin one of the FB objs, the associated vma is identified
as misplaced and therefore i915_vma_unbind() is called which unbinds and
evicts it. This misplaced vma gets subseqently pinned only when
i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This
results in a latency of ~10ms and happens every other vblank/repaint cycle.
Therefore, to fix this issue, we try to see if there is space to map
at-least two objects of a given size and return early if there isn't. This
would ensure that we do not try with PIN_MAPPABLE for any objects that
are too big to map thereby preventing unncessary unbind.

Testcase:
Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform
with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits
a frame ~7ms before the next vblank, the latencies seen between atomic
commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that
it misses the vblank every other frame.

Here is the ftrace snippet that shows the source of the ~10ms latency:
  i915_gem_object_pin_to_display_plane() {
0.102 us   |i915_gem_object_set_cache_level();
i915_gem_object_ggtt_pin_ww() {
0.390 us   |  i915_vma_instance();
0.178 us   |  i915_vma_misplaced();
  i915_vma_unbind() {
  __i915_active_wait() {
0.082 us   |i915_active_acquire_if_busy();
0.475 us   |  }
  intel_runtime_pm_get() {
0.087 us   |intel_runtime_pm_acquire();
0.259 us   |  }
  __i915_active_wait() {
0.085 us   |i915_active_acquire_if_busy();
0.240 us   |  }
  __i915_vma_evict() {
ggtt_unbind_vma() {
  gen8_ggtt_clear_range() {
10507.255 us |}
10507.689 us |  }
10508.516 us |   }

v2: Instead of using bigjoiner checks, determine whether a scanout
buffer is too big by checking to see if it is possible to map
two of them into the ggtt.

v3 (Ville):
- Count how many fb objects can be fit into the available holes
  instead of checking for a hole twice the object size.
- Take alignment constraints into account.
- Limit this large scanout buffer check to >= Gen 11 platforms.

v4:
- Remove existing heuristic that checks just for size. (Ville)
- Return early if we find space to map at-least two objects. (Tvrtko)
- Slightly update the commit message.

Cc: Ville Syrjälä 
Cc: Maarten Lankhorst 
Cc: Tvrtko Ursulin 
Cc: Manasi Navare 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/i915_gem.c | 88 ++---
 drivers/gpu/drm/i915/i915_vma.c |  2 +-
 2 files changed, 60 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e3a2c2a0e156..95ec972f8c8a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -46,6 +46,7 @@
 #include "gem/i915_gem_mman.h"
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_userptr.h"
+#include "gem/i915_gem_tiling.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
@@ -876,6 +877,63 @@ static void discard_ggtt_vma(struct i915_vma *vma)
spin_unlock(>vma.lock);
 }
 
+static bool i915_gem_obj_too_big(struct drm_i915_gem_object *obj,
+u64 alignment)
+{
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
+   struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+   struct drm_mm_node *hole;
+   u64 hole_start, hole_end, start, end;
+   u64 fence_size, fence_alignment;
+   unsigned int count = 0;
+
+   /*
+* If the required space is larger than the available
+* aperture, we will not able to find a slot for the
+* object and unbinding the object now will be in
+* vain. Worse, doing so may cause us to ping-pong
+* the object in and out of the Global GTT and
+* waste a lot of cycles under the mutex.
+*/
+   if (obj->base.size > ggtt->mappable_end)
+   return true;
+
+   if (HAS_GMCH(i915) || DISPLAY_VER(i915) < 11 ||
+   !i915_gem_object_is_framebuffer(obj))
+   return false;
+
+   fence_size = i915_gem_fence_size(i915, obj->base.size,
+i915_gem_object_get_tiling(obj),
+i915_gem_object_get_stride(obj));
+   fence_alignment = i915_gem_fence_alignment(i915, o

[Intel-gfx] [PATCH] drm/i915/gem: Don't try to map and fence large scanout buffers (v4)

2021-11-02 Thread Vivek Kasireddy

On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or
more framebuffers/scanout buffers results in only one that is mappable/
fenceable. Therefore, pageflipping between these 2 FBs where only one
is mappable/fenceable creates latencies large enough to miss alternate
vblanks thereby producing less optimal framerate.

This mainly happens because when i915_gem_object_pin_to_display_plane()
is called to pin one of the FB objs, the associated vma is identified
as misplaced and therefore i915_vma_unbind() is called which unbinds and
evicts it. This misplaced vma gets subseqently pinned only when
i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This
results in a latency of ~10ms and happens every other vblank/repaint cycle.
Therefore, to fix this issue, we try to see if there is space to map
at-least two objects of a given size and return early if there isn't. This
would ensure that we do not try with PIN_MAPPABLE for any objects that
are too big to map thereby preventing unncessary unbind.

Testcase:
Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform
with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits
a frame ~7ms before the next vblank, the latencies seen between atomic
commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that
it misses the vblank every other frame.

Here is the ftrace snippet that shows the source of the ~10ms latency:
  i915_gem_object_pin_to_display_plane() {
0.102 us   |i915_gem_object_set_cache_level();
i915_gem_object_ggtt_pin_ww() {
0.390 us   |  i915_vma_instance();
0.178 us   |  i915_vma_misplaced();
  i915_vma_unbind() {
  __i915_active_wait() {
0.082 us   |i915_active_acquire_if_busy();
0.475 us   |  }
  intel_runtime_pm_get() {
0.087 us   |intel_runtime_pm_acquire();
0.259 us   |  }
  __i915_active_wait() {
0.085 us   |i915_active_acquire_if_busy();
0.240 us   |  }
  __i915_vma_evict() {
ggtt_unbind_vma() {
  gen8_ggtt_clear_range() {
10507.255 us |}
10507.689 us |  }
10508.516 us |   }

v2: Instead of using bigjoiner checks, determine whether a scanout
buffer is too big by checking to see if it is possible to map
two of them into the ggtt.

v3 (Ville):
- Count how many fb objects can be fit into the available holes
  instead of checking for a hole twice the object size.
- Take alignment constraints into account.
- Limit this large scanout buffer check to >= Gen 11 platforms.

v4:
- Remove existing heuristic that checks just for size. (Ville)
- Return early if we find space to map at-least two objects. (Tvrtko)
- Slightly update the commit message.

Cc: Ville Syrjälä 
Cc: Maarten Lankhorst 
Cc: Tvrtko Ursulin 
Cc: Manasi Navare 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/i915_gem.c | 87 ++---
 drivers/gpu/drm/i915/i915_vma.c |  2 +-
 2 files changed, 59 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d0e642c82064..287508c37a9a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -866,6 +866,63 @@ static void discard_ggtt_vma(struct i915_vma *vma)
spin_unlock(>vma.lock);
 }
 
+static bool i915_gem_obj_too_big(struct drm_i915_gem_object *obj,
+u64 alignment)
+{
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
+   struct i915_ggtt *ggtt = >ggtt;
+   struct drm_mm_node *hole;
+   u64 hole_start, hole_end, start, end;
+   u64 fence_size, fence_alignment;
+   unsigned int count = 0;
+
+   /*
+* If the required space is larger than the available
+* aperture, we will not able to find a slot for the
+* object and unbinding the object now will be in
+* vain. Worse, doing so may cause us to ping-pong
+* the object in and out of the Global GTT and
+* waste a lot of cycles under the mutex.
+*/
+   if (obj->base.size > ggtt->mappable_end)
+   return true;
+
+   if (HAS_GMCH(i915) || DISPLAY_VER(i915) < 11 ||
+   !i915_gem_object_is_framebuffer(obj))
+   return false;
+
+   fence_size = i915_gem_fence_size(i915, obj->base.size,
+i915_gem_object_get_tiling(obj),
+i915_gem_object_get_stride(obj));
+   fence_alignment = i915_gem_fence_alignment(i915, obj->base.size,
+  
i915_gem_object_get_tiling(obj),
+  
i915_gem_object_get_stride(obj));
+   alignment = max_t(u64, alignment, fence_alignment);
+
+   /*
+* Assuming this object is a large scanout buffer,

[Intel-gfx] [PATCH] drm/i915/gem: Don't try to map and fence large scanout buffers (v3)

2021-10-29 Thread Vivek Kasireddy

On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or
more framebuffers/scanout buffers results in only one that is mappable/
fenceable. Therefore, pageflipping between these 2 FBs where only one
is mappable/fenceable creates latencies large enough to miss alternate
vblanks thereby producing less optimal framerate.

This mainly happens because when i915_gem_object_pin_to_display_plane()
is called to pin one of the FB objs, the associated vma is identified
as misplaced and therefore i915_vma_unbind() is called which unbinds and
evicts it. This misplaced vma gets subseqently pinned only when
i915_gem_object_ggtt_pin_ww() is called without the mappable flag. This
results in a latency of ~10ms and happens every other vblank/repaint cycle.

Testcase:
Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform
with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits
a frame ~7ms before the next vblank, the latencies seen between atomic
commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that
it misses the vblank every other frame.

Here is the ftrace snippet that shows the source of the ~10ms latency:
  i915_gem_object_pin_to_display_plane() {
0.102 us   |i915_gem_object_set_cache_level();
i915_gem_object_ggtt_pin_ww() {
0.390 us   |  i915_vma_instance();
0.178 us   |  i915_vma_misplaced();
  i915_vma_unbind() {
  __i915_active_wait() {
0.082 us   |i915_active_acquire_if_busy();
0.475 us   |  }
  intel_runtime_pm_get() {
0.087 us   |intel_runtime_pm_acquire();
0.259 us   |  }
  __i915_active_wait() {
0.085 us   |i915_active_acquire_if_busy();
0.240 us   |  }
  __i915_vma_evict() {
ggtt_unbind_vma() {
  gen8_ggtt_clear_range() {
10507.255 us |}
10507.689 us |  }
10508.516 us |   }

v2: Instead of using bigjoiner checks, determine whether a scanout
buffer is too big by checking to see if it is possible to map
two of them into the ggtt.

v3 (Ville):
- Count how many fb objects can be fit into the available holes
  instead of checking for a hole twice the object size.
- Take alignment constraints into account.
- Limit this large scanout buffer check to >= Gen 12 platforms.

Cc: Ville Syrjälä 
Cc: Maarten Lankhorst 
Cc: Tvrtko Ursulin 
Cc: Manasi Navare 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/i915_gem.c | 65 -
 drivers/gpu/drm/i915/i915_vma.c |  2 +-
 2 files changed, 57 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 981e383d1a5d..761dc385fbfc 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -866,6 +866,61 @@ static void discard_ggtt_vma(struct i915_vma *vma)
spin_unlock(>vma.lock);
 }
 
+static bool i915_gem_obj_too_big(struct drm_i915_gem_object *obj,
+u64 alignment)
+{
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
+   struct i915_ggtt *ggtt = >ggtt;
+   struct drm_mm_node *hole;
+   u64 hole_start, hole_end, start, end;
+   u64 fence_size, fence_alignment;
+   unsigned int count = 0;
+
+   /*
+* If the required space is larger than the available
+* aperture, we will not able to find a slot for the
+* object and unbinding the object now will be in
+* vain. Worse, doing so may cause us to ping-pong
+* the object in and out of the Global GTT and
+* waste a lot of cycles under the mutex.
+*/
+   if (obj->base.size > ggtt->mappable_end)
+   return true;
+
+   if (HAS_GMCH(i915) || DISPLAY_VER(i915) < 11 ||
+   !i915_gem_object_is_framebuffer(obj))
+   return false;
+
+   fence_size = i915_gem_fence_size(i915, obj->base.size,
+i915_gem_object_get_tiling(obj),
+i915_gem_object_get_stride(obj));
+   fence_alignment = i915_gem_fence_alignment(i915, obj->base.size,
+i915_gem_object_get_tiling(obj),
+i915_gem_object_get_stride(obj));
+   alignment = max_t(u64, alignment, fence_alignment);
+
+   /*
+* Assuming this object is a large scanout buffer, we try to find
+* out if there is room to map at-least two of them. There could
+* be space available to map one but to be consistent, we try to
+* avoid mapping/fencing any of them.
+*/
+   drm_mm_for_each_hole(hole, >vm.mm, hole_start, hole_end) {
+   do {
+   start = round_up(hole_start, alignment);
+   end = min_t(u64, hole_end, ggtt->mappable_end);
+
+   i

[Intel-gfx] [PATCH] drm/i915/gem: Don't try to map and fence large scanout buffers

2021-10-28 Thread Vivek Kasireddy

On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or
more framebuffers/scanout buffers results in only one that is mappable/
fenceable. Therefore, pageflipping between these 2 FBs where only one
is mappable/fenceable creates latencies large enough to miss alternate
vblanks thereby producing less optimal framerate.

This mainly happens because when i915_gem_object_pin_to_display_plane()
is called to pin one of the FB objs, the associated vma is identified
as misplaced and therefore i915_vma_unbind() is called which unbinds and
evicts it. This misplaced vma gets subseqently pinned only when
i915_gem_object_ggtt_pin_ww() is called without the mappable flag. This
results in a latency of ~10ms and happens every other vblank/repaint cycle.

Testcase:
Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform
with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits
a frame ~7ms before the next vblank, the latencies seen between atomic
commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that
it misses the vblank every other frame.

Here is the ftrace snippet that shows the source of the ~10ms latency:
  i915_gem_object_pin_to_display_plane() {
0.102 us   |i915_gem_object_set_cache_level();
i915_gem_object_ggtt_pin_ww() {
0.390 us   |  i915_vma_instance();
0.178 us   |  i915_vma_misplaced();
  i915_vma_unbind() {
  __i915_active_wait() {
0.082 us   |i915_active_acquire_if_busy();
0.475 us   |  }
  intel_runtime_pm_get() {
0.087 us   |intel_runtime_pm_acquire();
0.259 us   |  }
  __i915_active_wait() {
0.085 us   |i915_active_acquire_if_busy();
0.240 us   |  }
  __i915_vma_evict() {
ggtt_unbind_vma() {
  gen8_ggtt_clear_range() {
10507.255 us |}
10507.689 us |  }
10508.516 us |   }

v2: Instead of using bigjoiner checks, determine whether a scanout
buffer is too big by checking to see if it is possible to map
two of them into the ggtt.

Cc: Ville Syrjälä 
Cc: Maarten Lankhorst 
Cc: Tvrtko Ursulin 
Cc: Manasi Navare 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/i915_gem.c | 48 ++---
 drivers/gpu/drm/i915/i915_vma.c |  2 +-
 2 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 981e383d1a5d..0050c7e4bb51 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -866,6 +866,44 @@ static void discard_ggtt_vma(struct i915_vma *vma)
spin_unlock(>vma.lock);
 }
 
+static bool i915_gem_obj_too_big(struct drm_i915_gem_object *obj)
+{
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
+   struct i915_ggtt *ggtt = >ggtt;
+   struct drm_mm_node *hole;
+   u64 hole_start, hole_end;
+   u64 fence_size;
+
+   /*
+* If the required space is larger than the available
+* aperture, we will not able to find a slot for the
+* object and unbinding the object now will be in
+* vain. Worse, doing so may cause us to ping-pong
+* the object in and out of the Global GTT and
+* waste a lot of cycles under the mutex.
+*/
+   if (obj->base.size > ggtt->mappable_end)
+   return true;
+
+   fence_size = i915_gem_fence_size(i915, obj->base.size,
+i915_gem_object_get_tiling(obj),
+i915_gem_object_get_stride(obj));
+
+   /*
+* Assuming this object is a large scanout buffer, we try to find
+* out if there is room to map at-least two of them. There could
+* be space available to map one but to be consistent, we try to
+* avoid mapping/fencing any of them.
+*/
+   drm_mm_for_each_hole(hole, >vm.mm, hole_start, hole_end) {
+   if (hole_end - hole_start > 2 * fence_size &&
+   hole_start + 2 * fence_size < ggtt->mappable_end)
+   return false;
+   }
+
+   return true;
+}
+
 struct i915_vma *
 i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj,
struct i915_gem_ww_ctx *ww,
@@ -879,15 +917,7 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object 
*obj,
 
if (flags & PIN_MAPPABLE &&
(!view || view->type == I915_GGTT_VIEW_NORMAL)) {
-   /*
-* If the required space is larger than the available
-* aperture, we will not able to find a slot for the
-* object and unbinding the object now will be in
-* vain. Worse, doing so may cause us to ping-pong
-* the object in and out of the Global GTT and
-* waste a lot of cycles unde

[Intel-gfx] [PATCH] drm/i915/gem: Don't try to map and fence 8K/bigjoiner scanout buffers

2021-10-26 Thread Vivek Kasireddy

On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or
more framebuffers/scanout buffers results in only one that is mappable/
fenceable. Therefore, pageflipping between these 2 FBs where only one
is mappable/fenceable creates latencies large enough to miss alternate
vblanks thereby producing less optimal framerate.

This mainly happens because when i915_gem_object_pin_to_display_plane()
is called to pin one of the FB objs, the associated vma is identified
as misplaced and therefore i915_vma_unbind() is called which unbinds and
evicts it. This misplaced vma gets subseqently pinned only when
i915_gem_object_ggtt_pin_ww() is called without the mappable flag. This
results in a latency of ~10ms and happens every other vblank/repaint cycle.

Testcase:
Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform
with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits
a frame ~7ms before the next vblank, the latencies seen between atomic
commit and flip event is 7, 24 (7 + 16.66), 7, 24. suggesting that
it misses the vblank every other frame.

Here is the ftrace snippet that shows the source of the ~10ms latency:
  i915_gem_object_pin_to_display_plane() {
0.102 us   |i915_gem_object_set_cache_level();
i915_gem_object_ggtt_pin_ww() {
0.390 us   |  i915_vma_instance();
0.178 us   |  i915_vma_misplaced();
  i915_vma_unbind() {
  __i915_active_wait() {
0.082 us   |i915_active_acquire_if_busy();
0.475 us   |  }
  intel_runtime_pm_get() {
0.087 us   |intel_runtime_pm_acquire();
0.259 us   |  }
  __i915_active_wait() {
0.085 us   |i915_active_acquire_if_busy();
0.240 us   |  }
  __i915_vma_evict() {
ggtt_unbind_vma() {
  gen8_ggtt_clear_range() {
10507.255 us |}
10507.689 us |  }
10508.516 us |   }

Cc: Maarten Lankhorst 
Cc: Tvrtko Ursulin 
Cc: Manasi Navare 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/display/intel_fb_pin.c  | 11 +--
 drivers/gpu/drm/i915/display/intel_overlay.c | 11 ---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c   |  6 --
 drivers/gpu/drm/i915/gem/i915_gem_object.h   |  3 ++-
 4 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c 
b/drivers/gpu/drm/i915/display/intel_fb_pin.c
index 3f77f3013584..53c156d9a9f9 100644
--- a/drivers/gpu/drm/i915/display/intel_fb_pin.c
+++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c
@@ -144,7 +144,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 
if (!ret) {
vma = i915_gem_object_pin_to_display_plane(obj, , alignment,
-  view, pinctl);
+  view, pinctl, 
uses_fence);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto err_unpin;
@@ -218,9 +218,16 @@ int intel_plane_pin_fb(struct intel_plane_state 
*plane_state)
INTEL_INFO(dev_priv)->display.cursor_needs_physical;
 
if (!intel_fb_uses_dpt(fb)) {
+   struct intel_crtc *crtc = to_intel_crtc(plane_state->hw.crtc);
+   struct intel_crtc_state *crtc_state =
+   to_intel_crtc_state(crtc->base.state);
+   bool uses_fence = intel_plane_uses_fence(plane_state);
+   bool is_bigjoiner = crtc_state->bigjoiner ||
+   crtc_state->bigjoiner_slave;
+
vma = intel_pin_and_fence_fb_obj(fb, phys_cursor,
 _state->view.gtt,
-
intel_plane_uses_fence(plane_state),
+uses_fence && !is_bigjoiner,
 _state->flags);
if (IS_ERR(vma))
return PTR_ERR(vma);
diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c 
b/drivers/gpu/drm/i915/display/intel_overlay.c
index 7e3f5c6ca484..e9563b40b911 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -755,10 +755,14 @@ static u32 overlay_cmd_reg(struct 
drm_intel_overlay_put_image *params)
return cmd;
 }
 
-static struct i915_vma *intel_overlay_pin_fb(struct drm_i915_gem_object 
*new_bo)
+static struct i915_vma *intel_overlay_pin_fb(struct drm_i915_gem_object 
*new_bo,
+struct intel_overlay *overlay)
 {
struct i915_gem_ww_ctx ww;
struct i915_vma *vma;
+   const struct intel_plane_state *plane_state =
+   to_intel_plane_state(overlay->crtc->base.primary->state);
+   bool uses_fence = intel_plane_us

[Intel-gfx] [PATCH] drm/i915/dsi: Dont forget to clean up the connector on error (v2)

2020-05-22 Thread Vivek Kasireddy

If an error is encountered during the DSI initialization setup, the
drm connector object also needs to be cleaned up along with the encoder.
The error can happen due to a missing mode in the VBT or for other
reasons.

v2: Rephrase the commit message to make it more clear.

Cc: Jani Nikula 
Cc: Vandita Kulkarni 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/display/icl_dsi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c 
b/drivers/gpu/drm/i915/display/icl_dsi.c
index 4fec5bd64920..f93f72463df5 100644
--- a/drivers/gpu/drm/i915/display/icl_dsi.c
+++ b/drivers/gpu/drm/i915/display/icl_dsi.c
@@ -1954,6 +1954,7 @@ void icl_dsi_init(struct drm_i915_private *dev_priv)
return;
 
 err:
+   drm_connector_cleanup(connector);
drm_encoder_cleanup(>base);
kfree(intel_dsi);
kfree(intel_connector);
-- 
2.21.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/dsi: Dont forget to clean up the connector on error

2020-05-06 Thread Vivek Kasireddy

During the DSI initialization setup, after instantiating the relevant
drm connector and encoder objects, the connector also needs to be
cleaned up along with the encoder if an error is encountered. The error
can happen due to a missing mode in the VBT or for other reasons.

Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/display/icl_dsi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c 
b/drivers/gpu/drm/i915/display/icl_dsi.c
index 4fec5bd64920..f93f72463df5 100644
--- a/drivers/gpu/drm/i915/display/icl_dsi.c
+++ b/drivers/gpu/drm/i915/display/icl_dsi.c
@@ -1954,6 +1954,7 @@ void icl_dsi_init(struct drm_i915_private *dev_priv)
return;
 
 err:
+   drm_connector_cleanup(connector);
drm_encoder_cleanup(>base);
kfree(intel_dsi);
kfree(intel_connector);
-- 
2.21.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/hotplug: Use phy to get the hpd_pin instead of the port (v5)

2020-03-04 Thread Vivek Kasireddy

On some platforms such as Elkhart Lake, although we may use DDI D
to drive a connector, we have to use PHY A (Combo Phy PORT A) to
detect the hotplug interrupts as per the spec because there is no
one-to-one mapping between DDIs and PHYs. Therefore, use the
function intel_port_to_phy() which contains the logic for such
mapping(s) to find the correct hpd_pin.

This change should not affect other platforms as there is always
a one-to-one mapping between DDIs and PHYs.

v2:
- Convert the case statements to use PHYs instead of PORTs (Jani)

v3:
- Refactor the function to reduce the number of return statements by
  lumping all the case statements together except PHY_F which needs
  special handling (Jose)

v4:
- Add a comment describing how the HPD pin value associated with any
  port can be retrieved using port or phy enum value. (Jani)

v5:
- Use case ranges instead of individual labels and also normalize the
  return statement by adding -PHY_A to the expression (Ville)

Cc: Jani Nikula 
Cc: Matt Roper 
Cc: José Roberto de Souza 
Cc: Ville Syrjala 
Signed-off-by: Vivek Kasireddy 
Reviewed-by: José Roberto de Souza 
---
 drivers/gpu/drm/i915/display/intel_hotplug.c | 31 ++--
 drivers/gpu/drm/i915/i915_drv.h  |  7 +
 2 files changed, 16 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c 
b/drivers/gpu/drm/i915/display/intel_hotplug.c
index 4a6208857488..562227d54ccc 100644
--- a/drivers/gpu/drm/i915/display/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/display/intel_hotplug.c
@@ -87,29 +87,16 @@
 enum hpd_pin intel_hpd_pin_default(struct drm_i915_private *dev_priv,
   enum port port)
 {
-   switch (port) {
-   case PORT_A:
-   return HPD_PORT_A;
-   case PORT_B:
-   return HPD_PORT_B;
-   case PORT_C:
-   return HPD_PORT_C;
-   case PORT_D:
-   return HPD_PORT_D;
-   case PORT_E:
-   return HPD_PORT_E;
-   case PORT_F:
-   if (IS_CNL_WITH_PORT_F(dev_priv))
-   return HPD_PORT_E;
-   return HPD_PORT_F;
-   case PORT_G:
-   return HPD_PORT_G;
-   case PORT_H:
-   return HPD_PORT_H;
-   case PORT_I:
-   return HPD_PORT_I;
+   enum phy phy = intel_port_to_phy(dev_priv, port);
+
+   switch (phy) {
+   case PHY_F:
+   return IS_CNL_WITH_PORT_F(dev_priv) ? HPD_PORT_E : HPD_PORT_F;
+   case PHY_A ... PHY_E:
+   case PHY_G ... PHY_I:
+   return HPD_PORT_A + phy - PHY_A;
default:
-   MISSING_CASE(port);
+   MISSING_CASE(phy);
return HPD_NONE;
}
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 123d0fadfafc..21e4c0852e23 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -114,6 +114,13 @@
 
 struct drm_i915_gem_object;
 
+/*
+ * The code assumes that the hpd_pins below have consecutive values and
+ * starting with HPD_PORT_A, the HPD pin associated with any port can be
+ * retrieved by adding the corresponding port (or phy) enum value to
+ * HPD_PORT_A in most cases. For example:
+ * HPD_PORT_C = HPD_PORT_A + PHY_C - PHY_A
+ */
 enum hpd_pin {
HPD_NONE = 0,
HPD_TV = HPD_NONE, /* TV is known to be unreliable */
-- 
2.21.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/hotplug: Use phy to get the hpd_pin instead of the port (v4)

2020-02-28 Thread Vivek Kasireddy

On some platforms such as Elkhart Lake, although we may use DDI D
to drive a connector, we have to use PHY A (Combo Phy PORT A) to
detect the hotplug interrupts as per the spec because there is no
one-to-one mapping between DDIs and PHYs. Therefore, use the
function intel_port_to_phy() which contains the logic for such
mapping(s) to find the correct hpd_pin.

This change should not affect other platforms as there is always
a one-to-one mapping between DDIs and PHYs.

v2:
- Convert the case statements to use PHYs instead of PORTs (Jani)

v3:
- Refactor the function to reduce the number of return statements by
  lumping all the case statements together except PHY_F which needs
  special handling (Jose)

v4:
- Add a comment describing how the HPD pin value associated with any
  port can be retrieved using port or phy enum value. (Jani)

Cc: Jani Nikula 
Cc: Matt Roper 
Cc: José Roberto de Souza 
Signed-off-by: Vivek Kasireddy 
Reviewed-by: José Roberto de Souza 
---
 drivers/gpu/drm/i915/display/intel_hotplug.c | 37 
 drivers/gpu/drm/i915/i915_drv.h  |  6 
 2 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c 
b/drivers/gpu/drm/i915/display/intel_hotplug.c
index 4a6208857488..e1ddccc2ce97 100644
--- a/drivers/gpu/drm/i915/display/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/display/intel_hotplug.c
@@ -87,29 +87,22 @@
 enum hpd_pin intel_hpd_pin_default(struct drm_i915_private *dev_priv,
   enum port port)
 {
-   switch (port) {
-   case PORT_A:
-   return HPD_PORT_A;
-   case PORT_B:
-   return HPD_PORT_B;
-   case PORT_C:
-   return HPD_PORT_C;
-   case PORT_D:
-   return HPD_PORT_D;
-   case PORT_E:
-   return HPD_PORT_E;
-   case PORT_F:
-   if (IS_CNL_WITH_PORT_F(dev_priv))
-   return HPD_PORT_E;
-   return HPD_PORT_F;
-   case PORT_G:
-   return HPD_PORT_G;
-   case PORT_H:
-   return HPD_PORT_H;
-   case PORT_I:
-   return HPD_PORT_I;
+   enum phy phy = intel_port_to_phy(dev_priv, port);
+
+   switch (phy) {
+   case PHY_F:
+   return IS_CNL_WITH_PORT_F(dev_priv) ? HPD_PORT_E : HPD_PORT_F;
+   case PHY_A:
+   case PHY_B:
+   case PHY_C:
+   case PHY_D:
+   case PHY_E:
+   case PHY_G:
+   case PHY_H:
+   case PHY_I:
+   return HPD_PORT_A + phy;
default:
-   MISSING_CASE(port);
+   MISSING_CASE(phy);
return HPD_NONE;
}
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b621df933212..c9d7b9127b6e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -117,6 +117,12 @@
 
 struct drm_i915_gem_object;
 
+/*
+ * The code assumes that the hpd_pins below have consecutive values and
+ * starting with HPD_PORT_A, the HPD pin associated with any port can be
+ * retrieved by adding the corresponding port (or phy) enum value to
+ * HPD_PORT_A. For example, HPD_PORT_C = HPD_PORT_A + PORT_C/PHY_C.
+ */
 enum hpd_pin {
HPD_NONE = 0,
HPD_TV = HPD_NONE, /* TV is known to be unreliable */
-- 
2.21.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH] drm/i915/hotplug: Use phy to get the hpd_pin instead of the port (v3)

2020-02-04 Thread Vivek Kasireddy

On Fri, 31 Jan 2020 11:35:35 +0200
Jani Nikula  wrote:
Hi Jani,

> On Thu, 30 Jan 2020, Vivek Kasireddy 
> wrote:
> > On some platforms such as Elkhart Lake, although we may use DDI D
> > to drive a connector, we have to use PHY A (Combo Phy PORT A) to
> > detect the hotplug interrupts as per the spec because there is no
> > one-to-one mapping between DDIs and PHYs. Therefore, use the
> > function intel_port_to_phy() which contains the logic for such
> > mapping(s) to find the correct hpd_pin.
> >
> > This change should not affect other platforms as there is always
> > a one-to-one mapping between DDIs and PHYs.
> >
> > v2:
> > - Convert the case statements to use PHYs instead of PORTs (Jani)
> >
> > v3:
> > - Refactor the function to reduce the number of return statements by
> >   lumping all the case statements together except PHY_F which needs
> >   special handling (Jose)
> >
> > Cc: Jani Nikula 
> > Cc: Matt Roper 
> > Cc: José Roberto de Souza 
> > Signed-off-by: Vivek Kasireddy 
> > ---
> >  drivers/gpu/drm/i915/display/intel_hotplug.c | 37
> >  1 file changed, 15 insertions(+), 22
> > deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c
> > b/drivers/gpu/drm/i915/display/intel_hotplug.c index
> > 042d98bae1ea..27e3033278a0 100644 ---
> > a/drivers/gpu/drm/i915/display/intel_hotplug.c +++
> > b/drivers/gpu/drm/i915/display/intel_hotplug.c @@ -89,29 +89,22 @@
> >  enum hpd_pin intel_hpd_pin_default(struct drm_i915_private
> > *dev_priv, enum port port)
> >  {
> > -   switch (port) {
> > -   case PORT_A:
> > -   return HPD_PORT_A;
> > -   case PORT_B:
> > -   return HPD_PORT_B;
> > -   case PORT_C:
> > -   return HPD_PORT_C;
> > -   case PORT_D:
> > -   return HPD_PORT_D;
> > -   case PORT_E:
> > -   return HPD_PORT_E;
> > -   case PORT_F:
> > -   if (IS_CNL_WITH_PORT_F(dev_priv))
> > -   return HPD_PORT_E;
> > -   return HPD_PORT_F;
> > -   case PORT_G:
> > -   return HPD_PORT_G;
> > -   case PORT_H:
> > -   return HPD_PORT_H;
> > -   case PORT_I:
> > -   return HPD_PORT_I;
> > +   enum phy phy = intel_port_to_phy(dev_priv, port);
> > +
> > +   switch (phy) {
> > +   case PHY_F:
> > +   return IS_CNL_WITH_PORT_F(dev_priv) ? HPD_PORT_E :
> > HPD_PORT_F;
> > +   case PHY_A:
> > +   case PHY_B:
> > +   case PHY_C:
> > +   case PHY_D:
> > +   case PHY_E:
> > +   case PHY_G:
> > +   case PHY_H:
> > +   case PHY_I:
> > +   return HPD_PORT_A + phy;  
> 
> I know José asked you to do this, but now you've tied two enum
> sequences together without explaining it anywhere. Before this,
> AFAICT, enum hpd_pin was just an abstract enumeration where the
> actual values of the enums didn't mean a thing, apart from 0 for
> HPD_NONE.
> 
> Maybe this is what we want to do, but we should never be so casual
> about it.
Do you suggest that I explain this in the description associated
with v3 that we now have a switch/case fallthrough in this function?
Or, do you want me to send a v4 to include this in a comment?

Thanks,
Vivek

> 
> 
> BR,
> Jani.
> 
> 
> > default:
> > -   MISSING_CASE(port);
> > +   MISSING_CASE(phy);
> > return HPD_NONE;
> > }
> >  }  
> 

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH] drm/i915/ehl: Check VBT before updating the transcoder for pipe

2020-02-04 Thread Vivek Kasireddy

On Tue, 4 Feb 2020 12:50:25 +0200
Jani Nikula  wrote:
Hi Jani,

> On Mon, 03 Feb 2020, Vivek Kasireddy 
> wrote:
> > Since the pipe->transcoder mapping is not expected to change unless
> > there is either eDP or DSI connectors present, check the VBT to
> > confirm their presence in addition to checking
> > TRANS_DDI_FUNC_CTL(transcoder). This additional check is needed on
> > platforms like Elkhart Lake because we cannot just rely on
> > GOP/Firmware programmed values in TRANS_DDI_FUNC_CTL(transcoder)
> > before updating the transcoder mapping.
> >
> > This patch is only relevant to EHL -- and a no-op on others --
> > because some of the PHYs are shared between the different DDIs and
> > we rely on the VBT to present the most accurate information to the
> > driver.
> >
> > Cc: Matt Roper 
> > Cc: José Roberto de Souza 
> > Signed-off-by: Vivek Kasireddy 
> > ---
> >  drivers/gpu/drm/i915/display/intel_display.c | 15 ++-
> >  1 file changed, 14 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/i915/display/intel_display.c
> > b/drivers/gpu/drm/i915/display/intel_display.c index
> > c0e5002ce64c..4b38f293bd88 100644 ---
> > a/drivers/gpu/drm/i915/display/intel_display.c +++
> > b/drivers/gpu/drm/i915/display/intel_display.c @@ -10805,6
> > +10805,18 @@ static void hsw_get_ddi_pll(struct drm_i915_private
> > *dev_priv, enum port port, pipe_config->shared_dpll =
> > intel_get_shared_dpll_by_id(dev_priv, id); } 
> > +static bool ehl_vbt_edp_dsi_present(struct drm_i915_private
> > *dev_priv,
> > +   enum transcoder transcoder)
> > +{
> > +   bool edp_present = intel_bios_is_port_present(dev_priv,
> > PORT_A);
> > +   bool dsi_present = intel_bios_is_dsi_present(dev_priv,
> > NULL); +
> > +   if (IS_ELKHARTLAKE(dev_priv))
> > +   return transcoder == TRANSCODER_EDP ? edp_present
> > : dsi_present; +
> > +   return true;
> > +}  
> 
> One of those things... this jumps out and immediately feels all wrong,
> just like ehl_vbt_ddi_d_present() feels all wrong in
> intel_combo_phy.c. But I don't know what would be the right thing to
> do without spending time that I don't have on this.

Is there a particular approach you want me to take to address this
issue? All I am trying to do is address the plausible scenario(s) where
the GOP/firmware may program the hardware in a certain way that seems
incorrect from what i915 does based on the info in the VBT. I noticed 
this issue on the EHL board I am working on; therefore, I limited the
fix to EHL only.

Thanks,
Vivek 

> 
> BR,
> Jani.
> 
> 
> 
> > +
> >  static bool hsw_get_transcoder_state(struct intel_crtc *crtc,
> >  struct intel_crtc_state
> > *pipe_config, u64 *power_domain_mask,
> > @@ -10844,7 +10856,8 @@ static bool hsw_get_transcoder_state(struct
> > intel_crtc *crtc, 
> > tmp = intel_de_read(dev_priv,
> > TRANS_DDI_FUNC_CTL(panel_transcoder));
> > -   if (!(tmp & TRANS_DDI_FUNC_ENABLE))
> > +   if (!(tmp & TRANS_DDI_FUNC_ENABLE) ||
> > +   !ehl_vbt_edp_dsi_present(dev_priv,
> > panel_transcoder)) continue;
> >  
> > /*  
> 

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/ehl: Check VBT before updating the transcoder for pipe

2020-02-03 Thread Vivek Kasireddy

Since the pipe->transcoder mapping is not expected to change unless
there is either eDP or DSI connectors present, check the VBT to confirm
their presence in addition to checking TRANS_DDI_FUNC_CTL(transcoder).
This additional check is needed on platforms like Elkhart Lake because
we cannot just rely on GOP/Firmware programmed values in
TRANS_DDI_FUNC_CTL(transcoder) before updating the transcoder mapping.

This patch is only relevant to EHL -- and a no-op on others --
because some of the PHYs are shared between the different DDIs and
we rely on the VBT to present the most accurate information to the
driver.

Cc: Matt Roper 
Cc: José Roberto de Souza 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/display/intel_display.c | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
b/drivers/gpu/drm/i915/display/intel_display.c
index c0e5002ce64c..4b38f293bd88 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -10805,6 +10805,18 @@ static void hsw_get_ddi_pll(struct drm_i915_private 
*dev_priv, enum port port,
pipe_config->shared_dpll = intel_get_shared_dpll_by_id(dev_priv, id);
 }
 
+static bool ehl_vbt_edp_dsi_present(struct drm_i915_private *dev_priv,
+   enum transcoder transcoder)
+{
+   bool edp_present = intel_bios_is_port_present(dev_priv, PORT_A);
+   bool dsi_present = intel_bios_is_dsi_present(dev_priv, NULL);
+
+   if (IS_ELKHARTLAKE(dev_priv))
+   return transcoder == TRANSCODER_EDP ? edp_present : dsi_present;
+
+   return true;
+}
+
 static bool hsw_get_transcoder_state(struct intel_crtc *crtc,
 struct intel_crtc_state *pipe_config,
 u64 *power_domain_mask,
@@ -10844,7 +10856,8 @@ static bool hsw_get_transcoder_state(struct intel_crtc 
*crtc,
 
tmp = intel_de_read(dev_priv,
TRANS_DDI_FUNC_CTL(panel_transcoder));
-   if (!(tmp & TRANS_DDI_FUNC_ENABLE))
+   if (!(tmp & TRANS_DDI_FUNC_ENABLE) ||
+   !ehl_vbt_edp_dsi_present(dev_priv, panel_transcoder))
continue;
 
/*
-- 
2.21.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/hotplug: Use phy to get the hpd_pin instead of the port (v3)

2020-01-30 Thread Vivek Kasireddy

On some platforms such as Elkhart Lake, although we may use DDI D
to drive a connector, we have to use PHY A (Combo Phy PORT A) to
detect the hotplug interrupts as per the spec because there is no
one-to-one mapping between DDIs and PHYs. Therefore, use the
function intel_port_to_phy() which contains the logic for such
mapping(s) to find the correct hpd_pin.

This change should not affect other platforms as there is always
a one-to-one mapping between DDIs and PHYs.

v2:
- Convert the case statements to use PHYs instead of PORTs (Jani)

v3:
- Refactor the function to reduce the number of return statements by
  lumping all the case statements together except PHY_F which needs
  special handling (Jose)

Cc: Jani Nikula 
Cc: Matt Roper 
Cc: José Roberto de Souza 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/display/intel_hotplug.c | 37 
 1 file changed, 15 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c 
b/drivers/gpu/drm/i915/display/intel_hotplug.c
index 042d98bae1ea..27e3033278a0 100644
--- a/drivers/gpu/drm/i915/display/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/display/intel_hotplug.c
@@ -89,29 +89,22 @@
 enum hpd_pin intel_hpd_pin_default(struct drm_i915_private *dev_priv,
   enum port port)
 {
-   switch (port) {
-   case PORT_A:
-   return HPD_PORT_A;
-   case PORT_B:
-   return HPD_PORT_B;
-   case PORT_C:
-   return HPD_PORT_C;
-   case PORT_D:
-   return HPD_PORT_D;
-   case PORT_E:
-   return HPD_PORT_E;
-   case PORT_F:
-   if (IS_CNL_WITH_PORT_F(dev_priv))
-   return HPD_PORT_E;
-   return HPD_PORT_F;
-   case PORT_G:
-   return HPD_PORT_G;
-   case PORT_H:
-   return HPD_PORT_H;
-   case PORT_I:
-   return HPD_PORT_I;
+   enum phy phy = intel_port_to_phy(dev_priv, port);
+
+   switch (phy) {
+   case PHY_F:
+   return IS_CNL_WITH_PORT_F(dev_priv) ? HPD_PORT_E : HPD_PORT_F;
+   case PHY_A:
+   case PHY_B:
+   case PHY_C:
+   case PHY_D:
+   case PHY_E:
+   case PHY_G:
+   case PHY_H:
+   case PHY_I:
+   return HPD_PORT_A + phy;
default:
-   MISSING_CASE(port);
+   MISSING_CASE(phy);
return HPD_NONE;
}
 }
-- 
2.21.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/hotplug: Use phy to get the hpd_pin instead of the port (v2)

2020-01-30 Thread Vivek Kasireddy

On some platforms such as Elkhart Lake, although we may use DDI D
to drive a connector, we have to use PHY A (Combo Phy PORT A) to
detect the hotplug interrupts as per the spec because there is no
one-to-one mapping between DDIs and PHYs. Therefore, use the
function intel_port_to_phy() which contains the logic for such
mapping(s) to find the correct hpd_pin.

This change should not affect other platforms as there is always
a one-to-one mapping between DDIs and PHYs.

v2:
- Convert the case statements to use PHYs instead of PORTs (Jani)

Cc: Jani Nikula 
Cc: Matt Roper 
Cc: José Roberto de Souza 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/display/intel_hotplug.c | 24 +++-
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c 
b/drivers/gpu/drm/i915/display/intel_hotplug.c
index 042d98bae1ea..2bcfa4682511 100644
--- a/drivers/gpu/drm/i915/display/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/display/intel_hotplug.c
@@ -89,29 +89,31 @@
 enum hpd_pin intel_hpd_pin_default(struct drm_i915_private *dev_priv,
   enum port port)
 {
-   switch (port) {
-   case PORT_A:
+   enum phy phy = intel_port_to_phy(dev_priv, port);
+
+   switch (phy) {
+   case PHY_A:
return HPD_PORT_A;
-   case PORT_B:
+   case PHY_B:
return HPD_PORT_B;
-   case PORT_C:
+   case PHY_C:
return HPD_PORT_C;
-   case PORT_D:
+   case PHY_D:
return HPD_PORT_D;
-   case PORT_E:
+   case PHY_E:
return HPD_PORT_E;
-   case PORT_F:
+   case PHY_F:
if (IS_CNL_WITH_PORT_F(dev_priv))
return HPD_PORT_E;
return HPD_PORT_F;
-   case PORT_G:
+   case PHY_G:
return HPD_PORT_G;
-   case PORT_H:
+   case PHY_H:
return HPD_PORT_H;
-   case PORT_I:
+   case PHY_I:
return HPD_PORT_I;
default:
-   MISSING_CASE(port);
+   MISSING_CASE(phy);
return HPD_NONE;
}
 }
-- 
2.21.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/hotplug: Use phy to get the hpd_pin instead of the port

2020-01-29 Thread Vivek Kasireddy

On some platforms such as Elkhart Lake, although we may use DDI D
to drive a connector, we have to use PHY A (Combo Phy PORT A) to
detect the hotplug interrupts as per the spec because there is no
one-to-one mapping between DDIs and PHYs. Therefore, use the
function intel_port_to_phy() which contains the logic for such
mapping(s) to find the correct hpd_pin.

This change should not affect other platforms as there is always
a one-to-one mapping between DDIs and PHYs.

Cc: Matt Roper 
Cc: José Roberto de Souza 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/display/intel_hotplug.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c 
b/drivers/gpu/drm/i915/display/intel_hotplug.c
index 042d98bae1ea..491f6b6f920d 100644
--- a/drivers/gpu/drm/i915/display/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/display/intel_hotplug.c
@@ -89,7 +89,8 @@
 enum hpd_pin intel_hpd_pin_default(struct drm_i915_private *dev_priv,
   enum port port)
 {
-   switch (port) {
+   enum phy phy = intel_port_to_phy(dev_priv, port);
+   switch (phy) {
case PORT_A:
return HPD_PORT_A;
case PORT_B:
@@ -111,7 +112,7 @@ enum hpd_pin intel_hpd_pin_default(struct drm_i915_private 
*dev_priv,
case PORT_I:
return HPD_PORT_I;
default:
-   MISSING_CASE(port);
+   MISSING_CASE(phy);
return HPD_NONE;
}
 }
-- 
2.21.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/ddi: Ensure that the value assigned to ddi_clk_needed is a bool

2020-01-29 Thread Vivek Kasireddy

Currently, the value assigned to the bool variable ddi_clk_needed
is a pointer -- which appears to have happened inadvertently. Therefore,
add a "!!" before the expression on the right to ensure that it results
in a bool.

Cc: Jani Nikula 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/display/intel_ddi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c 
b/drivers/gpu/drm/i915/display/intel_ddi.c
index c96f629cddc3..6df485289bc6 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -3109,7 +3109,7 @@ void icl_sanitize_encoder_pll_mapping(struct 
intel_encoder *encoder)
}
 
port_mask = BIT(encoder->port);
-   ddi_clk_needed = encoder->base.crtc;
+   ddi_clk_needed = !!encoder->base.crtc;
 
if (encoder->type == INTEL_OUTPUT_DSI) {
struct intel_encoder *other_encoder;
-- 
2.21.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/ehl: Ensure that the DDI selection MUX is programmed correctly

2020-01-21 Thread Vivek Kasireddy

Perhaps in some cases the BIOS/GOP or other firmware may turn on
PHY A but may not program the MUX correctly. Therefore, re-program
PHY A if it is determined after reading the VBT that the value
programmed for the MUX bit does not match the expected value.

Cc: Matt Roper 
Signed-off-by: Vivek Kasireddy 
---
 .../gpu/drm/i915/display/intel_combo_phy.c| 74 +++
 1 file changed, 45 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_combo_phy.c 
b/drivers/gpu/drm/i915/display/intel_combo_phy.c
index 5f54aca7c36f..ec63c2657923 100644
--- a/drivers/gpu/drm/i915/display/intel_combo_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_combo_phy.c
@@ -191,20 +191,57 @@ static bool icl_combo_phy_enabled(struct drm_i915_private 
*dev_priv,
(I915_READ(ICL_PORT_COMP_DW0(phy)) & COMP_INIT);
 }
 
+static bool ehl_vbt_ddi_d_present(struct drm_i915_private *i915)
+{
+   bool ddi_a_present = intel_bios_is_port_present(i915, PORT_A);
+   bool ddi_d_present = intel_bios_is_port_present(i915, PORT_D);
+   bool dsi_present = intel_bios_is_dsi_present(i915, NULL);
+
+   /*
+* VBT's 'dvo port' field for child devices references the DDI, not
+* the PHY.  So if combo PHY A is wired up to drive an external
+* display, we should see a child device present on PORT_D and
+* nothing on PORT_A and no DSI.
+*/
+   if (ddi_d_present && !ddi_a_present && !dsi_present)
+   return true;
+
+   /*
+* If we encounter a VBT that claims to have an external display on
+* DDI-D _and_ an internal display on DDI-A/DSI leave an error message
+* in the log and let the internal display win.
+*/
+   if (ddi_d_present)
+   DRM_ERROR("VBT claims to have both internal and external 
displays on PHY A.  Configuring for internal.\n");
+
+   return false;
+}
+
 static bool icl_combo_phy_verify_state(struct drm_i915_private *dev_priv,
   enum phy phy)
 {
bool ret;
+   u32 expected_val = 0;
 
if (!icl_combo_phy_enabled(dev_priv, phy))
return false;
 
ret = cnl_verify_procmon_ref_values(dev_priv, phy);
 
-   if (phy == PHY_A)
+   if (phy == PHY_A) {
ret &= check_phy_reg(dev_priv, phy, ICL_PORT_COMP_DW8(phy),
 IREFGEN, IREFGEN);
 
+   if (IS_ELKHARTLAKE(dev_priv)) {
+   if (ehl_vbt_ddi_d_present(dev_priv))
+   expected_val = ICL_PHY_MISC_MUX_DDID;
+
+   ret &= check_phy_reg(dev_priv, phy, ICL_PHY_MISC(phy),
+ICL_PHY_MISC_MUX_DDID,
+expected_val);
+   }
+   }
+
ret &= check_phy_reg(dev_priv, phy, ICL_PORT_CL_DW5(phy),
 CL_POWER_DOWN_ENABLE, CL_POWER_DOWN_ENABLE);
 
@@ -263,32 +300,6 @@ void intel_combo_phy_power_up_lanes(struct 
drm_i915_private *dev_priv,
I915_WRITE(ICL_PORT_CL_DW10(phy), val);
 }
 
-static u32 ehl_combo_phy_a_mux(struct drm_i915_private *i915, u32 val)
-{
-   bool ddi_a_present = intel_bios_is_port_present(i915, PORT_A);
-   bool ddi_d_present = intel_bios_is_port_present(i915, PORT_D);
-   bool dsi_present = intel_bios_is_dsi_present(i915, NULL);
-
-   /*
-* VBT's 'dvo port' field for child devices references the DDI, not
-* the PHY.  So if combo PHY A is wired up to drive an external
-* display, we should see a child device present on PORT_D and
-* nothing on PORT_A and no DSI.
-*/
-   if (ddi_d_present && !ddi_a_present && !dsi_present)
-   return val | ICL_PHY_MISC_MUX_DDID;
-
-   /*
-* If we encounter a VBT that claims to have an external display on
-* DDI-D _and_ an internal display on DDI-A/DSI leave an error message
-* in the log and let the internal display win.
-*/
-   if (ddi_d_present)
-   DRM_ERROR("VBT claims to have both internal and external 
displays on PHY A.  Configuring for internal.\n");
-
-   return val & ~ICL_PHY_MISC_MUX_DDID;
-}
-
 static void icl_combo_phys_init(struct drm_i915_private *dev_priv)
 {
enum phy phy;
@@ -319,8 +330,13 @@ static void icl_combo_phys_init(struct drm_i915_private 
*dev_priv)
 * "internal" child devices.
 */
val = I915_READ(ICL_PHY_MISC(phy));
-   if (IS_ELKHARTLAKE(dev_priv) && phy == PHY_A)
-   val = ehl_combo_phy_a_mux(dev_priv, val);
+   if (IS_ELKHARTLAKE(dev_priv) && phy == PHY_A) {
+   val &= ~ICL_PHY_MISC_MUX_DDID;
+
+   if (ehl_vbt_ddi_d_present(dev_priv))

[Intel-gfx] [PATCH] drm/i915/dsi: Ensure that the ACPI adapter lookup overrides the bus num

2020-01-17 Thread Vivek Kasireddy

Remove the i2c_bus_num >= 0 check from the adapter lookup function
as this would prevent ACPI bus number override. This check was mainly
there to return early if the bus number has already been found but we
anyway return in the next line if the slave address does not match.

Fixes: 8cbf89db2941 ("drm/i915/dsi: Parse the I2C element from the VBT MIPI 
sequence block (v3)")
Cc: Hans de Goede 
Cc: Nabendu Maiti 
Cc: Matt Roper 
Cc: Bob Paauwe 
Cc: Jani Nikula 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c 
b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index 6ec35d975bd7..04f953ba8f00 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -394,8 +394,7 @@ static int i2c_adapter_lookup(struct acpi_resource *ares, 
void *data)
acpi_handle adapter_handle;
acpi_status status;
 
-   if (intel_dsi->i2c_bus_num >= 0 ||
-   !i2c_acpi_get_i2c_resource(ares, ))
+   if (!i2c_acpi_get_i2c_resource(ares, ))
return 1;
 
if (lookup->slave_addr != sb->slave_address)
-- 
2.21.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/dsi: Lookup the i2c bus from ACPI NS only if CONFIG_ACPI=y (v3)

2020-01-15 Thread Vivek Kasireddy

Perform the i2c bus/adapter lookup from ACPI Namespace only if
ACPI is enabled in the kernel config. If ACPI is not enabled or if
the lookup fails, we'll fallback to using the VBT for identiying
the i2c bus.

v2: Clearly identify the commit this patch is fixing (Jani)

v3: Remove the i2c_bus_num >= 0 check from the adapter lookup function
as this would prevent ACPI bus number override. This check was mainly
there to return early if the bus number has already been found but we
anyway return in the next line if the slave address does not match.

Fixes: 8cbf89db2941 ("drm/i915/dsi: Parse the I2C element from the VBT MIPI 
sequence block (v3)")
Cc: Hans de Goede 
Cc: Nabendu Maiti 
Cc: Matt Roper 
Cc: Bob Paauwe 
Cc: Ville Syrjälä 
Cc: Jani Nikula 
Cc: Zhang Xiaoxu 
Reported-by: Hulk Robot 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 50 +---
 1 file changed, 32 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c 
b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index 89fb0d90b694..04f953ba8f00 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -384,6 +384,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi 
*intel_dsi, const u8 *data)
return data;
 }
 
+#ifdef CONFIG_ACPI
 static int i2c_adapter_lookup(struct acpi_resource *ares, void *data)
 {
struct i2c_adapter_lookup *lookup = data;
@@ -393,8 +394,7 @@ static int i2c_adapter_lookup(struct acpi_resource *ares, 
void *data)
acpi_handle adapter_handle;
acpi_status status;
 
-   if (intel_dsi->i2c_bus_num >= 0 ||
-   !i2c_acpi_get_i2c_resource(ares, ))
+   if (!i2c_acpi_get_i2c_resource(ares, ))
return 1;
 
if (lookup->slave_addr != sb->slave_address)
@@ -413,14 +413,41 @@ static int i2c_adapter_lookup(struct acpi_resource *ares, 
void *data)
return 1;
 }
 
-static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data)
+static void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi,
+ const u16 slave_addr)
 {
struct drm_device *drm_dev = intel_dsi->base.base.dev;
struct device *dev = _dev->pdev->dev;
-   struct i2c_adapter *adapter;
struct acpi_device *acpi_dev;
struct list_head resource_list;
struct i2c_adapter_lookup lookup;
+
+   acpi_dev = ACPI_COMPANION(dev);
+   if (acpi_dev) {
+   memset(, 0, sizeof(lookup));
+   lookup.slave_addr = slave_addr;
+   lookup.intel_dsi = intel_dsi;
+   lookup.dev_handle = acpi_device_handle(acpi_dev);
+
+   INIT_LIST_HEAD(_list);
+   acpi_dev_get_resources(acpi_dev, _list,
+  i2c_adapter_lookup,
+  );
+   acpi_dev_free_resource_list(_list);
+   }
+}
+#else
+static inline void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi,
+const u16 slave_addr)
+{
+}
+#endif
+
+static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data)
+{
+   struct drm_device *drm_dev = intel_dsi->base.base.dev;
+   struct device *dev = _dev->pdev->dev;
+   struct i2c_adapter *adapter;
struct i2c_msg msg;
int ret;
u8 vbt_i2c_bus_num = *(data + 2);
@@ -431,20 +458,7 @@ static const u8 *mipi_exec_i2c(struct intel_dsi 
*intel_dsi, const u8 *data)
 
if (intel_dsi->i2c_bus_num < 0) {
intel_dsi->i2c_bus_num = vbt_i2c_bus_num;
-
-   acpi_dev = ACPI_COMPANION(dev);
-   if (acpi_dev) {
-   memset(, 0, sizeof(lookup));
-   lookup.slave_addr = slave_addr;
-   lookup.intel_dsi = intel_dsi;
-   lookup.dev_handle = acpi_device_handle(acpi_dev);
-
-   INIT_LIST_HEAD(_list);
-   acpi_dev_get_resources(acpi_dev, _list,
-  i2c_adapter_lookup,
-  );
-   acpi_dev_free_resource_list(_list);
-   }
+   i2c_acpi_find_adapter(intel_dsi, slave_addr);
}
 
adapter = i2c_get_adapter(intel_dsi->i2c_bus_num);
-- 
2.21.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/dsi: Lookup the i2c bus from ACPI NS only if CONFIG_ACPI=y (v2)

2020-01-14 Thread Vivek Kasireddy

Perform the i2c bus/adapter lookup from ACPI Namespace only if
ACPI is enabled in the kernel config. If ACPI is not enabled or if
the lookup fails, we'll fallback to using the VBT for identiying
the i2c bus.

This patch
Fixes: 8cbf89db2941 ("drm/i915/dsi: Parse the I2C element from the VBT
MIPI sequence block (v3)")

v2: Reformat the above line to clearly identify the commit this patch is
fixing for CI (Jani)

Cc: Hans de Goede 
Cc: Nabendu Maiti 
Cc: Matt Roper 
Cc: Bob Paauwe 
Cc: Ville Syrjälä 
Cc: Jani Nikula 
Cc: Zhang Xiaoxu 
Reported-by: Hulk Robot 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 47 +---
 1 file changed, 31 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c 
b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index 89fb0d90b694..6ec35d975bd7 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -384,6 +384,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi 
*intel_dsi, const u8 *data)
return data;
 }
 
+#ifdef CONFIG_ACPI
 static int i2c_adapter_lookup(struct acpi_resource *ares, void *data)
 {
struct i2c_adapter_lookup *lookup = data;
@@ -413,14 +414,41 @@ static int i2c_adapter_lookup(struct acpi_resource *ares, 
void *data)
return 1;
 }
 
-static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data)
+static void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi,
+ const u16 slave_addr)
 {
struct drm_device *drm_dev = intel_dsi->base.base.dev;
struct device *dev = _dev->pdev->dev;
-   struct i2c_adapter *adapter;
struct acpi_device *acpi_dev;
struct list_head resource_list;
struct i2c_adapter_lookup lookup;
+
+   acpi_dev = ACPI_COMPANION(dev);
+   if (acpi_dev) {
+   memset(, 0, sizeof(lookup));
+   lookup.slave_addr = slave_addr;
+   lookup.intel_dsi = intel_dsi;
+   lookup.dev_handle = acpi_device_handle(acpi_dev);
+
+   INIT_LIST_HEAD(_list);
+   acpi_dev_get_resources(acpi_dev, _list,
+  i2c_adapter_lookup,
+  );
+   acpi_dev_free_resource_list(_list);
+   }
+}
+#else
+static inline void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi,
+const u16 slave_addr)
+{
+}
+#endif
+
+static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data)
+{
+   struct drm_device *drm_dev = intel_dsi->base.base.dev;
+   struct device *dev = _dev->pdev->dev;
+   struct i2c_adapter *adapter;
struct i2c_msg msg;
int ret;
u8 vbt_i2c_bus_num = *(data + 2);
@@ -431,20 +459,7 @@ static const u8 *mipi_exec_i2c(struct intel_dsi 
*intel_dsi, const u8 *data)
 
if (intel_dsi->i2c_bus_num < 0) {
intel_dsi->i2c_bus_num = vbt_i2c_bus_num;
-
-   acpi_dev = ACPI_COMPANION(dev);
-   if (acpi_dev) {
-   memset(, 0, sizeof(lookup));
-   lookup.slave_addr = slave_addr;
-   lookup.intel_dsi = intel_dsi;
-   lookup.dev_handle = acpi_device_handle(acpi_dev);
-
-   INIT_LIST_HEAD(_list);
-   acpi_dev_get_resources(acpi_dev, _list,
-  i2c_adapter_lookup,
-  );
-   acpi_dev_free_resource_list(_list);
-   }
+   i2c_acpi_find_adapter(intel_dsi, slave_addr);
}
 
adapter = i2c_get_adapter(intel_dsi->i2c_bus_num);
-- 
2.21.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/dsi: Lookup the i2c bus from ACPI NS only if CONFIG_ACPI=y

2020-01-13 Thread Vivek Kasireddy

Perform the i2c bus/adapter lookup from ACPI Namespace only if
ACPI is enabled in the kernel config. If ACPI is not enabled or if
the lookup fails, we'll fallback to using the VBT for identiying
the i2c bus.

This fixes commit 8cbf89db2941("drm/i915/dsi: Parse the I2C element
from the VBT MIPI sequence block (v3).")

Cc: Hans de Goede 
Cc: Nabendu Maiti 
Cc: Matt Roper 
Cc: Bob Paauwe 
Cc: Ville Syrjälä 
Cc: Jani Nikula 
Cc: Zhang Xiaoxu 
Reported-by: Hulk Robot 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 47 +---
 1 file changed, 31 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c 
b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index 89fb0d90b694..6ec35d975bd7 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -384,6 +384,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi 
*intel_dsi, const u8 *data)
return data;
 }
 
+#ifdef CONFIG_ACPI
 static int i2c_adapter_lookup(struct acpi_resource *ares, void *data)
 {
struct i2c_adapter_lookup *lookup = data;
@@ -413,14 +414,41 @@ static int i2c_adapter_lookup(struct acpi_resource *ares, 
void *data)
return 1;
 }
 
-static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data)
+static void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi,
+ const u16 slave_addr)
 {
struct drm_device *drm_dev = intel_dsi->base.base.dev;
struct device *dev = _dev->pdev->dev;
-   struct i2c_adapter *adapter;
struct acpi_device *acpi_dev;
struct list_head resource_list;
struct i2c_adapter_lookup lookup;
+
+   acpi_dev = ACPI_COMPANION(dev);
+   if (acpi_dev) {
+   memset(, 0, sizeof(lookup));
+   lookup.slave_addr = slave_addr;
+   lookup.intel_dsi = intel_dsi;
+   lookup.dev_handle = acpi_device_handle(acpi_dev);
+
+   INIT_LIST_HEAD(_list);
+   acpi_dev_get_resources(acpi_dev, _list,
+  i2c_adapter_lookup,
+  );
+   acpi_dev_free_resource_list(_list);
+   }
+}
+#else
+static inline void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi,
+const u16 slave_addr)
+{
+}
+#endif
+
+static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data)
+{
+   struct drm_device *drm_dev = intel_dsi->base.base.dev;
+   struct device *dev = _dev->pdev->dev;
+   struct i2c_adapter *adapter;
struct i2c_msg msg;
int ret;
u8 vbt_i2c_bus_num = *(data + 2);
@@ -431,20 +459,7 @@ static const u8 *mipi_exec_i2c(struct intel_dsi 
*intel_dsi, const u8 *data)
 
if (intel_dsi->i2c_bus_num < 0) {
intel_dsi->i2c_bus_num = vbt_i2c_bus_num;
-
-   acpi_dev = ACPI_COMPANION(dev);
-   if (acpi_dev) {
-   memset(, 0, sizeof(lookup));
-   lookup.slave_addr = slave_addr;
-   lookup.intel_dsi = intel_dsi;
-   lookup.dev_handle = acpi_device_handle(acpi_dev);
-
-   INIT_LIST_HEAD(_list);
-   acpi_dev_get_resources(acpi_dev, _list,
-  i2c_adapter_lookup,
-  );
-   acpi_dev_free_resource_list(_list);
-   }
+   i2c_acpi_find_adapter(intel_dsi, slave_addr);
}
 
adapter = i2c_get_adapter(intel_dsi->i2c_bus_num);
-- 
2.21.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/dsi: Parse the I2C element from the VBT MIPI sequence block (v3)

2020-01-10 Thread Vivek Kasireddy

Parsing the i2c element is mainly done to transfer the payload from the
MIPI sequence block to the relevant slave device. In some cases, the
commands that are part of the payload can be used to turn on the backlight.

This patch is actually a refactored version of this old patch:
https://lists.freedesktop.org/archives/intel-gfx/2014-December/056897.html

In addition to the refactoring, the original patch is augmented by looking up
the i2c bus from ACPI NS instead of relying on the bus number provided
in the VBT.

This patch was tested on Aava Mobile's Inari 10 tablet. It enabled
turning on the backlight by transfering the payload to the device.

v2:
- Add DRM_DEV_ERROR for invalid adapter and failed transfer and also
  drop the DRM_DEBUG that existed originally. (Hans)
- Add two gotos instead of one to clean things up properly.

v3:
- Identify the device on which this patch was tested in the commit
  message (Ville)

Cc: Hans de Goede 
Cc: Nabendu Maiti 
Cc: Matt Roper 
Cc: Bob Paauwe 
Cc: Ville Syrjälä 
Reviewed-by: Hans de Goede 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/display/intel_dsi.h |  3 +
 drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 99 +++-
 2 files changed, 100 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dsi.h 
b/drivers/gpu/drm/i915/display/intel_dsi.h
index 7481a5aa3084..6cef1356b4e6 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi.h
+++ b/drivers/gpu/drm/i915/display/intel_dsi.h
@@ -69,6 +69,9 @@ struct intel_dsi {
/* number of DSI lanes */
unsigned int lane_count;
 
+   /* i2c bus associated with the slave device */
+   int i2c_bus_num;
+
/*
 * video mode pixel format
 *
diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c 
b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index 0032161e0f76..89fb0d90b694 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -86,6 +86,12 @@ static struct gpio_map vlv_gpio_table[] = {
{ VLV_GPIO_NC_11_PANEL1_BKLTCTL },
 };
 
+struct i2c_adapter_lookup {
+   u16 slave_addr;
+   struct intel_dsi *intel_dsi;
+   acpi_handle dev_handle;
+};
+
 #define CHV_GPIO_IDX_START_N   0
 #define CHV_GPIO_IDX_START_E   73
 #define CHV_GPIO_IDX_START_SW  100
@@ -378,11 +384,98 @@ static const u8 *mipi_exec_gpio(struct intel_dsi 
*intel_dsi, const u8 *data)
return data;
 }
 
+static int i2c_adapter_lookup(struct acpi_resource *ares, void *data)
+{
+   struct i2c_adapter_lookup *lookup = data;
+   struct intel_dsi *intel_dsi = lookup->intel_dsi;
+   struct acpi_resource_i2c_serialbus *sb;
+   struct i2c_adapter *adapter;
+   acpi_handle adapter_handle;
+   acpi_status status;
+
+   if (intel_dsi->i2c_bus_num >= 0 ||
+   !i2c_acpi_get_i2c_resource(ares, ))
+   return 1;
+
+   if (lookup->slave_addr != sb->slave_address)
+   return 1;
+
+   status = acpi_get_handle(lookup->dev_handle,
+sb->resource_source.string_ptr,
+_handle);
+   if (ACPI_FAILURE(status))
+   return 1;
+
+   adapter = i2c_acpi_find_adapter_by_handle(adapter_handle);
+   if (adapter)
+   intel_dsi->i2c_bus_num = adapter->nr;
+
+   return 1;
+}
+
 static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data)
 {
-   DRM_DEBUG_KMS("Skipping I2C element execution\n");
+   struct drm_device *drm_dev = intel_dsi->base.base.dev;
+   struct device *dev = _dev->pdev->dev;
+   struct i2c_adapter *adapter;
+   struct acpi_device *acpi_dev;
+   struct list_head resource_list;
+   struct i2c_adapter_lookup lookup;
+   struct i2c_msg msg;
+   int ret;
+   u8 vbt_i2c_bus_num = *(data + 2);
+   u16 slave_addr = *(u16 *)(data + 3);
+   u8 reg_offset = *(data + 5);
+   u8 payload_size = *(data + 6);
+   u8 *payload_data;
+
+   if (intel_dsi->i2c_bus_num < 0) {
+   intel_dsi->i2c_bus_num = vbt_i2c_bus_num;
+
+   acpi_dev = ACPI_COMPANION(dev);
+   if (acpi_dev) {
+   memset(, 0, sizeof(lookup));
+   lookup.slave_addr = slave_addr;
+   lookup.intel_dsi = intel_dsi;
+   lookup.dev_handle = acpi_device_handle(acpi_dev);
+
+   INIT_LIST_HEAD(_list);
+   acpi_dev_get_resources(acpi_dev, _list,
+  i2c_adapter_lookup,
+  );
+   acpi_dev_free_resource_list(_list);
+   }
+   }
 
-   return data + *(data + 6) + 7;
+   adapter = i2c_get_adapter(intel_dsi->i2c_bus_num);
+   if (!adapter) {
+

[Intel-gfx] [PATCH] drm/i915/dsi: Parse the I2C element from the VBT MIPI sequence block (v2)

2020-01-03 Thread Vivek Kasireddy

Parsing the i2c element is mainly done to transfer the payload from the
MIPI sequence block to the relevant slave device. In some cases, the
commands that are part of the payload can be used to turn on the backlight.

This patch is actually a refactored version of this old patch:
https://lists.freedesktop.org/archives/intel-gfx/2014-December/056897.html

In addition to the refactoring, the original patch is augmented by looking up
the i2c bus from ACPI NS instead of relying on the bus number provided
in the VBT.

v2:
- Add DRM_DEV_ERROR for invalid adapter and failed transfer and also
  drop the DRM_DEBUG that existed originally. (Hans)
- Add two gotos instead of one to clean things up properly.

CC: Hans de Goede 
Cc: Nabendu Maiti 
Cc: Matt Roper 
Cc: Bob Paauwe 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/display/intel_dsi.h |  3 +
 drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 99 +++-
 2 files changed, 100 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dsi.h 
b/drivers/gpu/drm/i915/display/intel_dsi.h
index b15be5814599..5651bc8aa5c2 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi.h
+++ b/drivers/gpu/drm/i915/display/intel_dsi.h
@@ -68,6 +68,9 @@ struct intel_dsi {
/* number of DSI lanes */
unsigned int lane_count;
 
+   /* i2c bus associated with the slave device */
+   int i2c_bus_num;
+
/*
 * video mode pixel format
 *
diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c 
b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index f90946c912ee..35fcef7c0d70 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -83,6 +83,12 @@ static struct gpio_map vlv_gpio_table[] = {
{ VLV_GPIO_NC_11_PANEL1_BKLTCTL },
 };
 
+struct i2c_adapter_lookup {
+   u16 slave_addr;
+   struct intel_dsi *intel_dsi;
+   acpi_handle dev_handle;
+};
+
 #define CHV_GPIO_IDX_START_N   0
 #define CHV_GPIO_IDX_START_E   73
 #define CHV_GPIO_IDX_START_SW  100
@@ -375,11 +381,98 @@ static const u8 *mipi_exec_gpio(struct intel_dsi 
*intel_dsi, const u8 *data)
return data;
 }
 
+static int i2c_adapter_lookup(struct acpi_resource *ares, void *data)
+{
+   struct i2c_adapter_lookup *lookup = data;
+   struct intel_dsi *intel_dsi = lookup->intel_dsi;
+   struct acpi_resource_i2c_serialbus *sb;
+   struct i2c_adapter *adapter;
+   acpi_handle adapter_handle;
+   acpi_status status;
+
+   if (intel_dsi->i2c_bus_num >= 0 ||
+   !i2c_acpi_get_i2c_resource(ares, ))
+   return 1;
+
+   if (lookup->slave_addr != sb->slave_address)
+   return 1;
+
+   status = acpi_get_handle(lookup->dev_handle,
+sb->resource_source.string_ptr,
+_handle);
+   if (ACPI_FAILURE(status))
+   return 1;
+
+   adapter = i2c_acpi_find_adapter_by_handle(adapter_handle);
+   if (adapter)
+   intel_dsi->i2c_bus_num = adapter->nr;
+
+   return 1;
+}
+
 static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data)
 {
-   DRM_DEBUG_KMS("Skipping I2C element execution\n");
+   struct drm_device *drm_dev = intel_dsi->base.base.dev;
+   struct device *dev = _dev->pdev->dev;
+   struct i2c_adapter *adapter;
+   struct acpi_device *acpi_dev;
+   struct list_head resource_list;
+   struct i2c_adapter_lookup lookup;
+   struct i2c_msg msg;
+   int ret;
+   u8 vbt_i2c_bus_num = *(data + 2);
+   u16 slave_addr = *(u16 *)(data + 3);
+   u8 reg_offset = *(data + 5);
+   u8 payload_size = *(data + 6);
+   u8 *payload_data;
+
+   if (intel_dsi->i2c_bus_num < 0) {
+   intel_dsi->i2c_bus_num = vbt_i2c_bus_num;
+
+   acpi_dev = ACPI_COMPANION(dev);
+   if (acpi_dev) {
+   memset(, 0, sizeof(lookup));
+   lookup.slave_addr = slave_addr;
+   lookup.intel_dsi = intel_dsi;
+   lookup.dev_handle = acpi_device_handle(acpi_dev);
+
+   INIT_LIST_HEAD(_list);
+   acpi_dev_get_resources(acpi_dev, _list,
+  i2c_adapter_lookup,
+  );
+   acpi_dev_free_resource_list(_list);
+   }
+   }
 
-   return data + *(data + 6) + 7;
+   adapter = i2c_get_adapter(intel_dsi->i2c_bus_num);
+   if (!adapter) {
+   DRM_DEV_ERROR(dev, "Cannot find a valid i2c bus for xfer\n");
+   goto err_bus;
+   }
+
+   payload_data = kzalloc(payload_size + 1, GFP_KERNEL);
+   if (!payload_data)
+   goto err_alloc;
+
+   payload_data[0] = reg_offset;
+   memcpy(

Re: [Intel-gfx] [PATCH] drm/i915/dsi: Parse the I2C element from the VBT MIPI sequence block

2020-01-03 Thread Vivek Kasireddy

On Fri, 3 Jan 2020 12:05:11 +0100
Hans de Goede  wrote:
Hi Hans,

> Hi Vivek,
> 
> On 03-01-2020 01:00, Vivek Kasireddy wrote:
> > Parsing the i2c element is mainly done to transfer the payload from
> > the MIPI sequence block to the relevant slave device. In some
> > cases, the commands that are part of the payload can be used to
> > turn on the backlight.
> > 
> > This patch is actually a refactored version of this old patch:
> > https://lists.freedesktop.org/archives/intel-gfx/2014-December/056897.html
> > 
> > In addition to the refactoring, the old patch is augmented by
> > looking up the i2c bus from ACPI NS instead of relying on the bus
> > number provided in the VBT.
> > 
> > Cc: Deepak M 
> > Cc: Nabendu Maiti 
> > Cc: Matt Roper 
> > Cc: Bob Paauwe 
> > Signed-off-by: Vivek Kasireddy   
> 
> Thank you for this patch, I have been doing a lot of work to make
> DSI panels on Bay Trail and Cherry Trail devices work better, as such
> I've done a lot of testing of DSI panels. But I have never seen any
> MIPI sequences actually use the i2c commands. May I ask how you have
> tested this? Do you have a device which actually uses the i2c
> commands?
Oh, they sure exist; we do have a device that uses i2c commands to turn
on the backlight that we have tested this patch on. 

> 
> I also have some small review comments inline:
> 
> > ---
> >   drivers/gpu/drm/i915/display/intel_dsi.h |  3 +
> >   drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 93
> >  2 files changed, 96 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/display/intel_dsi.h
> > b/drivers/gpu/drm/i915/display/intel_dsi.h index
> > b15be5814599..5651bc8aa5c2 100644 ---
> > a/drivers/gpu/drm/i915/display/intel_dsi.h +++
> > b/drivers/gpu/drm/i915/display/intel_dsi.h @@ -68,6 +68,9 @@ struct
> > intel_dsi { /* number of DSI lanes */
> > unsigned int lane_count;
> >   
> > +   /* i2c bus associated with the slave device */
> > +   int i2c_bus_num;
> > +
> > /*
> >  * video mode pixel format
> >  *
> > diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
> > b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index
> > f90946c912ee..60441a5a3dba 100644 ---
> > a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++
> > b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -83,6 +83,12 @@
> > static struct gpio_map vlv_gpio_table[] = { {
> > VLV_GPIO_NC_11_PANEL1_BKLTCTL }, };
> >   
> > +struct i2c_adapter_lookup {
> > +   u16 slave_addr;
> > +   struct intel_dsi *intel_dsi;
> > +   acpi_handle dev_handle;
> > +};
> > +
> >   #define CHV_GPIO_IDX_START_N  0
> >   #define CHV_GPIO_IDX_START_E  73
> >   #define CHV_GPIO_IDX_START_SW 100
> > @@ -375,8 +381,93 @@ static const u8 *mipi_exec_gpio(struct
> > intel_dsi *intel_dsi, const u8 *data) return data;
> >   }
> >   
> > +static int i2c_adapter_lookup(struct acpi_resource *ares, void
> > *data) +{
> > +   struct i2c_adapter_lookup *lookup = data;
> > +   struct intel_dsi *intel_dsi = lookup->intel_dsi;
> > +   struct acpi_resource_i2c_serialbus *sb;
> > +   struct i2c_adapter *adapter;
> > +   acpi_handle adapter_handle;
> > +   acpi_status status;
> > +
> > +   if (intel_dsi->i2c_bus_num >= 0 ||
> > +   !i2c_acpi_get_i2c_resource(ares, ))
> > +   return 1;
> > +
> > +   if (lookup->slave_addr != sb->slave_address)
> > +   return 1;
> > +
> > +   status = acpi_get_handle(lookup->dev_handle,
> > +sb->resource_source.string_ptr,
> > +_handle);
> > +   if (ACPI_FAILURE(status))
> > +   return 1;
> > +
> > +   adapter = i2c_acpi_find_adapter_by_handle(adapter_handle);
> > +   if (adapter)
> > +   intel_dsi->i2c_bus_num = adapter->nr;
> > +
> > +   return 1;
> > +}
> > +
> >   static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const
> > u8 *data) {
> > +   struct drm_device *dev = intel_dsi->base.base.dev;
> > +   struct i2c_adapter *adapter;
> > +   struct acpi_device *acpi_dev;
> > +   struct list_head resource_list;
> > +   struct i2c_adapter_lookup lookup;
> > +   struct i2c_msg msg;
> > +   int ret;
> > +   u8 vbt_i2c_bus_num = *(data + 2);
> > +   u16 slave_addr = *(u16 *)(data + 3);
> > +   u8 reg_offset = *(data + 5);
> > +   u8 payload_size

[Intel-gfx] [PATCH] drm/i915/dsi: Parse the I2C element from the VBT MIPI sequence block

2020-01-02 Thread Vivek Kasireddy

Parsing the i2c element is mainly done to transfer the payload from the
MIPI sequence block to the relevant slave device. In some cases, the
commands that are part of the payload can be used to turn on the backlight.

This patch is actually a refactored version of this old patch:
https://lists.freedesktop.org/archives/intel-gfx/2014-December/056897.html

In addition to the refactoring, the old patch is augmented by looking up
the i2c bus from ACPI NS instead of relying on the bus number provided
in the VBT.

Cc: Deepak M 
Cc: Nabendu Maiti 
Cc: Matt Roper 
Cc: Bob Paauwe 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/display/intel_dsi.h |  3 +
 drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 93 
 2 files changed, 96 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_dsi.h 
b/drivers/gpu/drm/i915/display/intel_dsi.h
index b15be5814599..5651bc8aa5c2 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi.h
+++ b/drivers/gpu/drm/i915/display/intel_dsi.h
@@ -68,6 +68,9 @@ struct intel_dsi {
/* number of DSI lanes */
unsigned int lane_count;
 
+   /* i2c bus associated with the slave device */
+   int i2c_bus_num;
+
/*
 * video mode pixel format
 *
diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c 
b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index f90946c912ee..60441a5a3dba 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -83,6 +83,12 @@ static struct gpio_map vlv_gpio_table[] = {
{ VLV_GPIO_NC_11_PANEL1_BKLTCTL },
 };
 
+struct i2c_adapter_lookup {
+   u16 slave_addr;
+   struct intel_dsi *intel_dsi;
+   acpi_handle dev_handle;
+};
+
 #define CHV_GPIO_IDX_START_N   0
 #define CHV_GPIO_IDX_START_E   73
 #define CHV_GPIO_IDX_START_SW  100
@@ -375,8 +381,93 @@ static const u8 *mipi_exec_gpio(struct intel_dsi 
*intel_dsi, const u8 *data)
return data;
 }
 
+static int i2c_adapter_lookup(struct acpi_resource *ares, void *data)
+{
+   struct i2c_adapter_lookup *lookup = data;
+   struct intel_dsi *intel_dsi = lookup->intel_dsi;
+   struct acpi_resource_i2c_serialbus *sb;
+   struct i2c_adapter *adapter;
+   acpi_handle adapter_handle;
+   acpi_status status;
+
+   if (intel_dsi->i2c_bus_num >= 0 ||
+   !i2c_acpi_get_i2c_resource(ares, ))
+   return 1;
+
+   if (lookup->slave_addr != sb->slave_address)
+   return 1;
+
+   status = acpi_get_handle(lookup->dev_handle,
+sb->resource_source.string_ptr,
+_handle);
+   if (ACPI_FAILURE(status))
+   return 1;
+
+   adapter = i2c_acpi_find_adapter_by_handle(adapter_handle);
+   if (adapter)
+   intel_dsi->i2c_bus_num = adapter->nr;
+
+   return 1;
+}
+
 static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data)
 {
+   struct drm_device *dev = intel_dsi->base.base.dev;
+   struct i2c_adapter *adapter;
+   struct acpi_device *acpi_dev;
+   struct list_head resource_list;
+   struct i2c_adapter_lookup lookup;
+   struct i2c_msg msg;
+   int ret;
+   u8 vbt_i2c_bus_num = *(data + 2);
+   u16 slave_addr = *(u16 *)(data + 3);
+   u8 reg_offset = *(data + 5);
+   u8 payload_size = *(data + 6);
+   u8 *payload_data;
+
+   if (intel_dsi->i2c_bus_num < 0) {
+   intel_dsi->i2c_bus_num = vbt_i2c_bus_num;
+
+   acpi_dev = ACPI_COMPANION(>pdev->dev);
+   if (acpi_dev) {
+   memset(, 0, sizeof(lookup));
+   lookup.slave_addr = slave_addr;
+   lookup.intel_dsi = intel_dsi;
+   lookup.dev_handle = acpi_device_handle(acpi_dev);
+
+   INIT_LIST_HEAD(_list);
+   acpi_dev_get_resources(acpi_dev, _list,
+  i2c_adapter_lookup,
+  );
+   acpi_dev_free_resource_list(_list);
+   }
+   }
+
+   adapter = i2c_get_adapter(intel_dsi->i2c_bus_num);
+   if (!adapter)
+   goto out;
+
+   payload_data = kzalloc(payload_size + 1, GFP_KERNEL);
+   if (!payload_data)
+   goto out;
+
+   payload_data[0] = reg_offset;
+   memcpy(_data[1], (data + 7), payload_size);
+
+   msg.addr = slave_addr;
+   msg.flags = 0;
+   msg.len = payload_size + 1;
+   msg.buf = payload_data;
+
+   ret = i2c_transfer(adapter, , 1);
+   if (ret < 0)
+   DRM_ERROR("i2c transfer failed");
+
+   kfree(payload_data);
+   i2c_put_adapter(adapter);
+
+   return data + payload_size + 7;
+out:
DRM_DEBUG_KMS("Skipping I2C element execution\n&qu

[Intel-gfx] [PATCH] drm/i915: Correct the PCH type in irq postinstall

2019-10-16 Thread Vivek Kasireddy

JasperLake PCH (JSP) has DDI HPD pin mappings similar to TGP and not
MCC. Also add the correct HPD pin mappings for the MCC PCH.

Cc: Matt Roper 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/i915_irq.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index ef09fbb36f37..e618f4621308 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3803,8 +3803,11 @@ static void icp_irq_postinstall(struct drm_i915_private 
*dev_priv)
if (HAS_PCH_TGP(dev_priv))
icp_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK,
TGP_TC_HPD_ENABLE_MASK);
-   else if (HAS_PCH_MCC(dev_priv))
+   else if (HAS_PCH_JSP(dev_priv))
icp_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK, 0);
+   else if (HAS_PCH_MCC(dev_priv))
+   icp_hpd_detection_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK,
+   ICP_TC_HPD_ENABLE(PORT_TC1));
else
icp_hpd_detection_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK,
ICP_TC_HPD_ENABLE_MASK);
-- 
2.21.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v3] drm/i915: Introduce Jasper Lake PCH

2019-10-15 Thread Vivek Kasireddy

On Tue, 15 Oct 2019 09:28:54 -0700
Matt Roper  wrote:

> The Jasper Lake PCH follows ICP/TGP's south display behavior and is
> identical to MCC graphics-wise except that it does not use the unusual
> (port C -> TC1) pin mapping that MCC does.
> 
> Also, it turns out the extra PCH ID that we had previously thought
> was a form of MCC is actually a second ID for JSP (i.e., port C uses
> the port C pins instead of the TC1 pins).
> 
> v2:
>  - Also update the port masks (not just the pin table) in
>mcc_hpd_irq_setup.  (Vivek)
> 
> v3:
>  - Break jsp_hpd_irq_setup out into its own function for clarity.
>(Vivek)
> 
> Cc: José Roberto de Souza 
> Cc: James Ausmus 
> Cc: Vivek Kasireddy 
> Signed-off-by: Matt Roper 
> Reviewed-by: Vivek Kasireddy 
> ---
>  drivers/gpu/drm/i915/i915_irq.c  | 24 +++-
>  drivers/gpu/drm/i915/intel_pch.c |  6 +-
>  drivers/gpu/drm/i915/intel_pch.h |  5 -
>  3 files changed, 32 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c
> b/drivers/gpu/drm/i915/i915_irq.c index d20ca02d3166..448390ad2128
> 100644 --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2248,11 +2248,18 @@ static void icp_irq_handler(struct
> drm_i915_private *dev_priv, u32 pch_iir) tc_hotplug_trigger = pch_iir
> & SDE_TC_MASK_TGP; tc_port_hotplug_long_detect =
> tgp_tc_port_hotplug_long_detect; pins = hpd_tgp;
> + } else if (HAS_PCH_JSP(dev_priv)) {
> + ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP;
> + tc_hotplug_trigger = 0;
> + pins = hpd_tgp;
>   } else if (HAS_PCH_MCC(dev_priv)) {
>   ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP;
>   tc_hotplug_trigger = pch_iir &
> SDE_TC_HOTPLUG_ICP(PORT_TC1); pins = hpd_icp;
>   } else {
> + WARN(!HAS_PCH_ICP(dev_priv),
> +  "Unrecognized PCH type 0x%x\n",
> INTEL_PCH_TYPE(dev_priv)); +
>   ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP;
>   tc_hotplug_trigger = pch_iir & SDE_TC_MASK_ICP;
>   tc_port_hotplug_long_detect =
> icp_tc_port_hotplug_long_detect; @@ -3384,6 +3391,19 @@ static void
> mcc_hpd_irq_setup(struct drm_i915_private *dev_priv) hpd_icp);
>  }
>  
> +/*
> + * JSP behaves exactly the same as MCC above except that port C is
> mapped to
> + * the DDI-C pins instead of the TC1 pins.  This means we should
> follow TGP's
> + * masks & tables rather than ICP's masks & tables.
> + */
> +static void jsp_hpd_irq_setup(struct drm_i915_private *dev_priv)
> +{
> + icp_hpd_irq_setup(dev_priv,
> +   SDE_DDI_MASK_TGP, 0,
> +   TGP_DDI_HPD_ENABLE_MASK, 0,
> +   hpd_tgp);
> +}
> +
Looks good.
Reviewed-by: Vivek Kasireddy 

>  static void gen11_hpd_detection_setup(struct drm_i915_private
> *dev_priv) {
>   u32 hotplug;
> @@ -4315,7 +4335,9 @@ void intel_irq_init(struct drm_i915_private
> *dev_priv) if (I915_HAS_HOTPLUG(dev_priv))
>   dev_priv->display.hpd_irq_setup =
> i915_hpd_irq_setup; } else {
> - if (HAS_PCH_MCC(dev_priv))
> + if (HAS_PCH_JSP(dev_priv))
> + dev_priv->display.hpd_irq_setup =
> jsp_hpd_irq_setup;
> + else if (HAS_PCH_MCC(dev_priv))
>   dev_priv->display.hpd_irq_setup =
> mcc_hpd_irq_setup; else if (INTEL_GEN(dev_priv) >= 11)
>   dev_priv->display.hpd_irq_setup =
> gen11_hpd_irq_setup; diff --git a/drivers/gpu/drm/i915/intel_pch.c
> b/drivers/gpu/drm/i915/intel_pch.c index 15f8bff141f9..1035d3d46fd8
> 100644 --- a/drivers/gpu/drm/i915/intel_pch.c
> +++ b/drivers/gpu/drm/i915/intel_pch.c
> @@ -79,7 +79,6 @@ intel_pch_type(const struct drm_i915_private
> *dev_priv, unsigned short id) WARN_ON(!IS_ICELAKE(dev_priv));
>   return PCH_ICP;
>   case INTEL_PCH_MCC_DEVICE_ID_TYPE:
> - case INTEL_PCH_MCC2_DEVICE_ID_TYPE:
>   DRM_DEBUG_KMS("Found Mule Creek Canyon PCH\n");
>   WARN_ON(!IS_ELKHARTLAKE(dev_priv));
>   return PCH_MCC;
> @@ -87,6 +86,11 @@ intel_pch_type(const struct drm_i915_private
> *dev_priv, unsigned short id) DRM_DEBUG_KMS("Found Tiger Lake LP
> PCH\n"); WARN_ON(!IS_TIGERLAKE(dev_priv));
>   return PCH_TGP;
> + case INTEL_PCH_JSP_DEVICE_ID_TYPE:
> + case INTEL_PCH_JSP2_DEVICE_ID_TYPE:
> + DRM_DEBUG_KMS("Found Jasper Lake PCH\n");
> + WARN_ON(!IS_ELKHARTLAKE(dev_priv));
> + return PCH_JSP;
>   default:
>

Re: [Intel-gfx] [PATCH] drm/i915/ehl: Don't forget to set TC long detect function

2019-10-15 Thread Vivek Kasireddy

On Tue, 15 Oct 2019 09:11:31 -0700
Matt Roper  wrote:

> Since EHL's MCC PCH reuses one of the TC pins we need to supply a TC
> long detect function when handling the interrupts.
> 
> Fixes: 53448aed7b80 ("drm/i915/ehl: Port C's hotplug interrupt is
> associated with TC1 bits") Reported-by: kbuild test robot
>  Reported-by: Dan Carpenter 
> Cc: Vivek Kasireddy 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/i915_irq.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c
> b/drivers/gpu/drm/i915/i915_irq.c index a7c968b01af3..af7426cd8de9
> 100644 --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2251,6 +2251,7 @@ static void icp_irq_handler(struct
> drm_i915_private *dev_priv, u32 pch_iir) } else if
> (HAS_PCH_MCC(dev_priv)) { ddi_hotplug_trigger = pch_iir &
> SDE_DDI_MASK_ICP; tc_hotplug_trigger = pch_iir &
> SDE_TC_HOTPLUG_ICP(PORT_TC1);
> + tc_port_hotplug_long_detect =
> icp_tc_port_hotplug_long_detect; pins = hpd_icp;

Reviewed-by: Vivek Kasireddy 

>   } else {
>   ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP;

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v2] drm/i915: Introduce Jasper Lake PCH

2019-10-14 Thread Vivek Kasireddy

On Mon, 14 Oct 2019 15:43:41 -0700
Matt Roper  wrote:

> The Jasper Lake PCH follows ICP/TGP's south display behavior and is
> identical to MCC graphics-wise except that it does not use the unusual
> (port C -> TC1) pin mapping that MCC does.
> 
> Also, it turns out the extra PCH ID that we had previously thought
> was a form of MCC is actually a second ID for JSP (i.e., port C uses
> the port C pins instead of the TC1 pins).
> 
> v2:
>  - Also update the port masks (not just the pin table) in
>mcc_hpd_irq_setup.  (Vivek)
> 
> Cc: José Roberto de Souza 
> Cc: James Ausmus 
> Cc: Vivek Kasireddy 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/i915_irq.c  | 31 +--
>  drivers/gpu/drm/i915/intel_pch.c |  6 +-
>  drivers/gpu/drm/i915/intel_pch.h |  5 -
>  3 files changed, 34 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c
> b/drivers/gpu/drm/i915/i915_irq.c index d20ca02d3166..81e9ed48ce9f
> 100644 --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2248,11 +2248,18 @@ static void icp_irq_handler(struct
> drm_i915_private *dev_priv, u32 pch_iir) tc_hotplug_trigger = pch_iir
> & SDE_TC_MASK_TGP; tc_port_hotplug_long_detect =
> tgp_tc_port_hotplug_long_detect; pins = hpd_tgp;
> + } else if (HAS_PCH_JSP(dev_priv)) {
> + ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP;
> + tc_hotplug_trigger = 0;
> + pins = hpd_tgp;
>   } else if (HAS_PCH_MCC(dev_priv)) {
>   ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP;
>   tc_hotplug_trigger = pch_iir &
> SDE_TC_HOTPLUG_ICP(PORT_TC1); pins = hpd_icp;
>   } else {
> + WARN(!HAS_PCH_ICP(dev_priv),
> +  "Unrecognized PCH type 0x%x\n",
> INTEL_PCH_TYPE(dev_priv)); +
>   ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP;
>   tc_hotplug_trigger = pch_iir & SDE_TC_MASK_ICP;
>   tc_port_hotplug_long_detect =
> icp_tc_port_hotplug_long_detect; @@ -3373,15 +3380,27 @@ static void
> icp_hpd_irq_setup(struct drm_i915_private *dev_priv, }
>  
>  /*
> - * EHL doesn't need most of gen11_hpd_irq_setup, it's handling only
> the
> + * EHL/JSL don't need most of gen11_hpd_irq_setup, they're handling
> only the
>   * equivalent of SDE.
> + *
> + * Note that MCC and JSP have different port C pin mappings, hence
> the use of
> + * ICP's masks & tables (hpd C on TC1) vs TGP's masks & tables (hpd
> C on DDIC)
> + * depending on platform.
>   */
>  static void mcc_hpd_irq_setup(struct drm_i915_private *dev_priv)
>  {
> - icp_hpd_irq_setup(dev_priv,
> -   SDE_DDI_MASK_ICP,
> SDE_TC_HOTPLUG_ICP(PORT_TC1),
> -   ICP_DDI_HPD_ENABLE_MASK,
> ICP_TC_HPD_ENABLE(PORT_TC1),
> -   hpd_icp);
> + if (HAS_PCH_JSP(dev_priv))
> + icp_hpd_irq_setup(dev_priv,
> +   SDE_DDI_MASK_TGP, 0,
> +   TGP_DDI_HPD_ENABLE_MASK, 0,
> +   hpd_tgp);
> + else
> + icp_hpd_irq_setup(dev_priv,
> +   SDE_DDI_MASK_ICP,
> +   SDE_TC_HOTPLUG_ICP(PORT_TC1),
> +   ICP_DDI_HPD_ENABLE_MASK,
> +       ICP_TC_HPD_ENABLE(PORT_TC1),
> +   hpd_icp);

Although MCC and JSL PCH are similar, wouldn't it be a bit cleaner if we
had a separate function for JSP? Something like jsp_hpd_irq_setup()...

Regarldess, this patch is 
Reviewed-by: Vivek Kasireddy 

Thanks,
Vivek

>  }
>  
>  static void gen11_hpd_detection_setup(struct drm_i915_private
> *dev_priv) @@ -4315,7 +4334,7 @@ void intel_irq_init(struct
> drm_i915_private *dev_priv) if (I915_HAS_HOTPLUG(dev_priv))
>   dev_priv->display.hpd_irq_setup =
> i915_hpd_irq_setup; } else {
> - if (HAS_PCH_MCC(dev_priv))
> + if (HAS_PCH_MCC(dev_priv) || HAS_PCH_JSP(dev_priv))
>   dev_priv->display.hpd_irq_setup =
> mcc_hpd_irq_setup; else if (INTEL_GEN(dev_priv) >= 11)
>   dev_priv->display.hpd_irq_setup =
> gen11_hpd_irq_setup; diff --git a/drivers/gpu/drm/i915/intel_pch.c
> b/drivers/gpu/drm/i915/intel_pch.c index 15f8bff141f9..1035d3d46fd8
> 100644 --- a/drivers/gpu/drm/i915/intel_pch.c
> +++ b/drivers/gpu/drm/i915/intel_pch.c
> @@ -79,7 +79,6 @@ intel_pch_type(const struct drm_i915_private
> *dev_priv, unsigned short id) WARN_ON(!IS_ICELAKE(dev_priv));
>   return PCH_ICP;
>   case INTEL_PCH_MCC

Re: [Intel-gfx] [PATCH] drm/i915: Introduce Jasper Lake PCH

2019-10-14 Thread Vivek Kasireddy

On Mon, 14 Oct 2019 14:24:31 -0700
Matt Roper  wrote:

> The Jasper Lake PCH follows ICP/TGP's south display behavior and is
> identical to MCC graphics-wise except that it does not use the unusual
> (port C -> TC1) pin mapping that MCC does.
> 
> Also, it turns out the extra PCH ID that we had previously thought
> was a form of MCC is actually a second ID for JSP (i.e., port C uses
> the port C pins instead of the TC1 pins).
> 
> Cc: José Roberto de Souza 
> Cc: James Ausmus 
> Cc: Vivek Kasireddy 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/i915_irq.c  | 17 ++---
>  drivers/gpu/drm/i915/intel_pch.c |  6 +-
>  drivers/gpu/drm/i915/intel_pch.h |  5 -
>  3 files changed, 23 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c
> b/drivers/gpu/drm/i915/i915_irq.c index d20ca02d3166..de16576bb5fa
> 100644 --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2248,11 +2248,18 @@ static void icp_irq_handler(struct
> drm_i915_private *dev_priv, u32 pch_iir) tc_hotplug_trigger = pch_iir
> & SDE_TC_MASK_TGP; tc_port_hotplug_long_detect =
> tgp_tc_port_hotplug_long_detect; pins = hpd_tgp;
> + } else if (HAS_PCH_JSP(dev_priv)) {
> + ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP;
> + tc_hotplug_trigger = 0;
> + pins = hpd_tgp;
>   } else if (HAS_PCH_MCC(dev_priv)) {
>   ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP;
>   tc_hotplug_trigger = pch_iir &
> SDE_TC_HOTPLUG_ICP(PORT_TC1); pins = hpd_icp;
>   } else {
> + WARN(!HAS_PCH_ICP(dev_priv),
> +  "Unrecognized PCH type 0x%x\n",
> INTEL_PCH_TYPE(dev_priv)); +
>   ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP;
>   tc_hotplug_trigger = pch_iir & SDE_TC_MASK_ICP;
>   tc_port_hotplug_long_detect =
> icp_tc_port_hotplug_long_detect; @@ -3373,15 +3380,19 @@ static void
> icp_hpd_irq_setup(struct drm_i915_private *dev_priv, }
>  
>  /*
> - * EHL doesn't need most of gen11_hpd_irq_setup, it's handling only
> the
> + * EHL/JSL don't need most of gen11_hpd_irq_setup, they're handling
> only the
>   * equivalent of SDE.
> + *
> + * Note that MCC and JSP have different port C pin mappings, hence
> the use of
> + * ICP's table (hpd C on TC1) vs TGP's table (hpd C on DDIC)
> depending on
> + * platform.
>   */
>  static void mcc_hpd_irq_setup(struct drm_i915_private *dev_priv)
>  {
>   icp_hpd_irq_setup(dev_priv,
> SDE_DDI_MASK_ICP,
> SDE_TC_HOTPLUG_ICP(PORT_TC1), ICP_DDI_HPD_ENABLE_MASK,
> ICP_TC_HPD_ENABLE(PORT_TC1),
> -   hpd_icp);
> +   HAS_PCH_JSP(dev_priv) ? hpd_tgp : hpd_icp);
Unless I am misreading this, shouldn't you change the ddi_mask and
ddi_enable_mask to _TGP as well?

Thanks,
Vivek
>  }
>  
>  static void gen11_hpd_detection_setup(struct drm_i915_private
> *dev_priv) @@ -4315,7 +4326,7 @@ void intel_irq_init(struct
> drm_i915_private *dev_priv) if (I915_HAS_HOTPLUG(dev_priv))
>   dev_priv->display.hpd_irq_setup =
> i915_hpd_irq_setup; } else {
> - if (HAS_PCH_MCC(dev_priv))
> + if (HAS_PCH_MCC(dev_priv) || HAS_PCH_JSP(dev_priv))
>   dev_priv->display.hpd_irq_setup =
> mcc_hpd_irq_setup; else if (INTEL_GEN(dev_priv) >= 11)
>   dev_priv->display.hpd_irq_setup =
> gen11_hpd_irq_setup; diff --git a/drivers/gpu/drm/i915/intel_pch.c
> b/drivers/gpu/drm/i915/intel_pch.c index 15f8bff141f9..1035d3d46fd8
> 100644 --- a/drivers/gpu/drm/i915/intel_pch.c
> +++ b/drivers/gpu/drm/i915/intel_pch.c
> @@ -79,7 +79,6 @@ intel_pch_type(const struct drm_i915_private
> *dev_priv, unsigned short id) WARN_ON(!IS_ICELAKE(dev_priv));
>   return PCH_ICP;
>   case INTEL_PCH_MCC_DEVICE_ID_TYPE:
> - case INTEL_PCH_MCC2_DEVICE_ID_TYPE:
>   DRM_DEBUG_KMS("Found Mule Creek Canyon PCH\n");
>   WARN_ON(!IS_ELKHARTLAKE(dev_priv));
>   return PCH_MCC;
> @@ -87,6 +86,11 @@ intel_pch_type(const struct drm_i915_private
> *dev_priv, unsigned short id) DRM_DEBUG_KMS("Found Tiger Lake LP
> PCH\n"); WARN_ON(!IS_TIGERLAKE(dev_priv));
>   return PCH_TGP;
> + case INTEL_PCH_JSP_DEVICE_ID_TYPE:
> + case INTEL_PCH_JSP2_DEVICE_ID_TYPE:
> + DRM_DEBUG_KMS("Found Jasper Lake PCH\n");
> + WARN_ON(!IS_ELKHARTLAKE(dev_priv));
> + return PCH_JSP;
>   default:
>   return PCH_NONE;
>   }
> diff --git a/drivers/gpu/drm/i915/intel_pc

[Intel-gfx] [PATCH] drm/i915/ehl: Port C's hotplug interrupt is associated with TC1 bits

2019-10-10 Thread Vivek Kasireddy

On some platforms that have the MCC PCH, Port C's hotplug interrupt
bits are mapped to TC1 bits.

Suggested-by: Matt Roper 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/display/intel_dp.c | 3 +++
 drivers/gpu/drm/i915/i915_irq.c | 8 
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp.c 
b/drivers/gpu/drm/i915/display/intel_dp.c
index 0e45c61d7331..6594f2af1257 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -5282,6 +5282,9 @@ static bool icl_combo_port_connected(struct 
drm_i915_private *dev_priv,
 {
enum port port = intel_dig_port->base.port;
 
+   if (HAS_PCH_MCC(dev_priv) && port == PORT_C)
+   return I915_READ(SDEISR) & SDE_TC_HOTPLUG_ICP(PORT_TC1);
+
return I915_READ(SDEISR) & SDE_DDI_HOTPLUG_ICP(port);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 3af7f7914c40..a7c968b01af3 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2249,8 +2249,8 @@ static void icp_irq_handler(struct drm_i915_private 
*dev_priv, u32 pch_iir)
tc_port_hotplug_long_detect = tgp_tc_port_hotplug_long_detect;
pins = hpd_tgp;
} else if (HAS_PCH_MCC(dev_priv)) {
-   ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP;
-   tc_hotplug_trigger = 0;
+   ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP;
+   tc_hotplug_trigger = pch_iir & SDE_TC_HOTPLUG_ICP(PORT_TC1);
pins = hpd_icp;
} else {
ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP;
@@ -3377,8 +3377,8 @@ static void icp_hpd_irq_setup(struct drm_i915_private 
*dev_priv,
 static void mcc_hpd_irq_setup(struct drm_i915_private *dev_priv)
 {
icp_hpd_irq_setup(dev_priv,
- SDE_DDI_MASK_TGP, 0,
- TGP_DDI_HPD_ENABLE_MASK, 0,
+ SDE_DDI_MASK_ICP, SDE_TC_HOTPLUG_ICP(PORT_TC1),
+ ICP_DDI_HPD_ENABLE_MASK, ICP_TC_HPD_ENABLE(PORT_TC1),
  hpd_icp);
 }
 
-- 
2.21.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/ehl: Use an id of 4 while accessing DPLL4's CR0 and CR1

2019-07-16 Thread Vivek Kasireddy

Although, DPLL4 enable and disable is associated with MGPLL1_ENABLE
register, we can use ICL_DPLL_CFGCR0/CR1 macros to access this dpll's
CR0 and CR1 registers by passing an id of 4 to these macros.

Reported-by: Ville Syrjälä 
Cc: Ville Syrjälä 
Cc: José Roberto de Souza 
Cc: Matt Roper 
Cc: Imre Deak 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c 
b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index 319a26a1ec10..f9bdf8514a53 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -3127,8 +3127,13 @@ static bool icl_pll_get_hw_state(struct drm_i915_private 
*dev_priv,
hw_state->cfgcr0 = I915_READ(TGL_DPLL_CFGCR0(id));
hw_state->cfgcr1 = I915_READ(TGL_DPLL_CFGCR1(id));
} else {
-   hw_state->cfgcr0 = I915_READ(ICL_DPLL_CFGCR0(id));
-   hw_state->cfgcr1 = I915_READ(ICL_DPLL_CFGCR1(id));
+   if (IS_ELKHARTLAKE(dev_priv) && id == DPLL_ID_EHL_DPLL4) {
+   hw_state->cfgcr0 = I915_READ(ICL_DPLL_CFGCR0(4));
+   hw_state->cfgcr1 = I915_READ(ICL_DPLL_CFGCR1(4));
+   } else {
+   hw_state->cfgcr0 = I915_READ(ICL_DPLL_CFGCR0(id));
+   hw_state->cfgcr1 = I915_READ(ICL_DPLL_CFGCR1(id));
+   }
}
 
ret = true;
@@ -3169,8 +3174,13 @@ static void icl_dpll_write(struct drm_i915_private 
*dev_priv,
cfgcr0_reg = TGL_DPLL_CFGCR0(id);
cfgcr1_reg = TGL_DPLL_CFGCR1(id);
} else {
-   cfgcr0_reg = ICL_DPLL_CFGCR0(id);
-   cfgcr1_reg = ICL_DPLL_CFGCR1(id);
+   if (IS_ELKHARTLAKE(dev_priv) && id == DPLL_ID_EHL_DPLL4) {
+   cfgcr0_reg = ICL_DPLL_CFGCR0(4);
+   cfgcr1_reg = ICL_DPLL_CFGCR1(4);
+   } else {
+   cfgcr0_reg = ICL_DPLL_CFGCR0(id);
+   cfgcr1_reg = ICL_DPLL_CFGCR1(id);
+   }
}
 
I915_WRITE(cfgcr0_reg, hw_state->cfgcr0);
-- 
2.21.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v10)

2019-07-16 Thread Vivek Kasireddy

On Wed, 10 Jul 2019 21:47:52 +0300
Ville Syrjälä  wrote:
Hi Ville,

> On Wed, Jul 03, 2019 at 04:03:53PM -0700, Vivek Kasireddy wrote:
> > This patch adds support for DPLL4 on EHL that include the
> > following restrictions:
> > 
> > - DPLL4 cannot be used with DDIA (combo port A internal eDP usage).
> >   DPLL4 can be used with other DDIs, including DDID
> >   (combo port A external usage).
> > 
> > - DPLL4 cannot be enabled when DC5 or DC6 are enabled.
> > 
> > - The DPLL4 enable, lock, power enabled, and power state are
> > connected to the MGPLL1_ENABLE register.
> > 
> > v2: (suggestions from Bob Paauwe)
> > - Rework ehl_get_dpll() function to call intel_find_shared_dpll()
> > and iterate twice: once for Combo plls and once for MG plls.
> > 
> > - Use MG pll funcs for DPLL4 instead of creating new ones and modify
> >   mg_pll_enable to include the restrictions for EHL.
> > 
> > v3: Fix compilation error
> > 
> > v4: (suggestions from Lucas and Ville)
> > - Treat DPLL4 as a combo phy PLL and not as MG PLL
> > - Disable DC states when this DPLL is being enabled
> > - Reuse icl_get_dpll instead of creating a separate one for EHL
> > 
> > v5: (suggestion from Ville)
> > - Refcount the DC OFF power domains during the enabling and
> > disabling of this DPLL.
> > 
> > v6: rebase
> > 
> > v7: (suggestion from Imre)
> > - Add a new power domain instead of iterating over the domains
> >   assoicated with DC OFF power well.
> > 
> > v8: (Ville and Imre)
> > - Rename POWER_DOMAIN_DPLL4 TO POWER_DOMAIN_DPLL_DC_OFF
> > - Grab a reference in intel_modeset_setup_hw_state() if this
> >   DPLL was already enabled perhaps by BIOS.
> > - Check for the port type instead of the encoder
> > 
> > v9: (Ville)
> > - Move the block of code that grabs a reference to the power domain
> >   POWER_DOMAIN_DPLL_DC_OFF to intel_modeset_readout_hw_state() to
> > ensure that there is a reference present before this DPLL might get
> > disabled.
> > 
> > v10: rebase
> > 
> > Cc: José Roberto de Souza 
> > Cc: Ville Syrjälä 
> > Cc: Matt Roper 
> > Cc: Imre Deak 
> > Signed-off-by: Vivek Kasireddy 
> > ---
> >  drivers/gpu/drm/i915/display/intel_display.c  |  7 +++
> >  .../drm/i915/display/intel_display_power.c|  3 ++
> >  .../drm/i915/display/intel_display_power.h|  1 +
> >  drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 47
> > +-- drivers/gpu/drm/i915/display/intel_dpll_mgr.h
> > |  6 +++ 5 files changed, 60 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/display/intel_display.c
> > b/drivers/gpu/drm/i915/display/intel_display.c index
> > 919f5ac844c8..557462208462 100644 ---
> > a/drivers/gpu/drm/i915/display/intel_display.c +++
> > b/drivers/gpu/drm/i915/display/intel_display.c @@ -16653,6
> > +16653,13 @@ static void intel_modeset_readout_hw_state(struct
> > drm_device *dev) pll->on = pll->info->funcs->get_hw_state(dev_priv,
> > pll, >state.hw_state);
> > +
> > +   if (IS_ELKHARTLAKE(dev_priv) && pll->on &&
> > +   pll->info->id == DPLL_ID_EHL_DPLL4) {
> > +   pll->wakeref =
> > intel_display_power_get(dev_priv,
> > +
> > POWER_DOMAIN_DPLL_DC_OFF);
> > +   }
> > +
> > pll->state.crtc_mask = 0;
> > for_each_intel_crtc(dev, crtc) {
> > struct intel_crtc_state *crtc_state =
> > diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c
> > b/drivers/gpu/drm/i915/display/intel_display_power.c index
> > c19b958461ca..7437fc71d289 100644 ---
> > a/drivers/gpu/drm/i915/display/intel_display_power.c +++
> > b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -118,6
> > +118,8 @@ intel_display_power_domain_str(enum
> > intel_display_power_domain domain) return "MODESET"; case
> > POWER_DOMAIN_GT_IRQ: return "GT_IRQ";
> > +   case POWER_DOMAIN_DPLL_DC_OFF:
> > +   return "DPLL_DC_OFF";
> > default:
> > MISSING_CASE(domain);
> > return "?";
> > @@ -2455,6 +2457,7 @@ void intel_display_power_put(struct
> > drm_i915_private *dev_priv, ICL_PW_2_POWER_DOMAINS
> > |   \ BIT_ULL(POWER_DOMAIN_MODESET)
> > |   \ BIT_ULL(POWER_DOMAIN_AUX_A)
> > |   \
> > +   BIT_ULL(POWER_DOMAIN_DPLL_DC_OFF) |
> > \ BIT_ULL(POWER_DOMAIN_INIT))
>

[Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v10)

2019-07-03 Thread Vivek Kasireddy

This patch adds support for DPLL4 on EHL that include the
following restrictions:

- DPLL4 cannot be used with DDIA (combo port A internal eDP usage).
  DPLL4 can be used with other DDIs, including DDID
  (combo port A external usage).

- DPLL4 cannot be enabled when DC5 or DC6 are enabled.

- The DPLL4 enable, lock, power enabled, and power state are connected
  to the MGPLL1_ENABLE register.

v2: (suggestions from Bob Paauwe)
- Rework ehl_get_dpll() function to call intel_find_shared_dpll() and
  iterate twice: once for Combo plls and once for MG plls.

- Use MG pll funcs for DPLL4 instead of creating new ones and modify
  mg_pll_enable to include the restrictions for EHL.

v3: Fix compilation error

v4: (suggestions from Lucas and Ville)
- Treat DPLL4 as a combo phy PLL and not as MG PLL
- Disable DC states when this DPLL is being enabled
- Reuse icl_get_dpll instead of creating a separate one for EHL

v5: (suggestion from Ville)
- Refcount the DC OFF power domains during the enabling and disabling
  of this DPLL.

v6: rebase

v7: (suggestion from Imre)
- Add a new power domain instead of iterating over the domains
  assoicated with DC OFF power well.

v8: (Ville and Imre)
- Rename POWER_DOMAIN_DPLL4 TO POWER_DOMAIN_DPLL_DC_OFF
- Grab a reference in intel_modeset_setup_hw_state() if this
  DPLL was already enabled perhaps by BIOS.
- Check for the port type instead of the encoder

v9: (Ville)
- Move the block of code that grabs a reference to the power domain
  POWER_DOMAIN_DPLL_DC_OFF to intel_modeset_readout_hw_state() to ensure
  that there is a reference present before this DPLL might get disabled.

v10: rebase

Cc: José Roberto de Souza 
Cc: Ville Syrjälä 
Cc: Matt Roper 
Cc: Imre Deak 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/display/intel_display.c  |  7 +++
 .../drm/i915/display/intel_display_power.c|  3 ++
 .../drm/i915/display/intel_display_power.h|  1 +
 drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 47 +--
 drivers/gpu/drm/i915/display/intel_dpll_mgr.h |  6 +++
 5 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
b/drivers/gpu/drm/i915/display/intel_display.c
index 919f5ac844c8..557462208462 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -16653,6 +16653,13 @@ static void intel_modeset_readout_hw_state(struct 
drm_device *dev)
 
pll->on = pll->info->funcs->get_hw_state(dev_priv, pll,
>state.hw_state);
+
+   if (IS_ELKHARTLAKE(dev_priv) && pll->on &&
+   pll->info->id == DPLL_ID_EHL_DPLL4) {
+   pll->wakeref = intel_display_power_get(dev_priv,
+  
POWER_DOMAIN_DPLL_DC_OFF);
+   }
+
pll->state.crtc_mask = 0;
for_each_intel_crtc(dev, crtc) {
struct intel_crtc_state *crtc_state =
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c 
b/drivers/gpu/drm/i915/display/intel_display_power.c
index c19b958461ca..7437fc71d289 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -118,6 +118,8 @@ intel_display_power_domain_str(enum 
intel_display_power_domain domain)
return "MODESET";
case POWER_DOMAIN_GT_IRQ:
return "GT_IRQ";
+   case POWER_DOMAIN_DPLL_DC_OFF:
+   return "DPLL_DC_OFF";
default:
MISSING_CASE(domain);
return "?";
@@ -2455,6 +2457,7 @@ void intel_display_power_put(struct drm_i915_private 
*dev_priv,
ICL_PW_2_POWER_DOMAINS |\
BIT_ULL(POWER_DOMAIN_MODESET) | \
BIT_ULL(POWER_DOMAIN_AUX_A) |   \
+   BIT_ULL(POWER_DOMAIN_DPLL_DC_OFF) | \
BIT_ULL(POWER_DOMAIN_INIT))
 
 #define ICL_DDI_IO_A_POWER_DOMAINS (   \
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h 
b/drivers/gpu/drm/i915/display/intel_display_power.h
index ff57b0a7fe59..8f43f7051a16 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.h
+++ b/drivers/gpu/drm/i915/display/intel_display_power.h
@@ -59,6 +59,7 @@ enum intel_display_power_domain {
POWER_DOMAIN_GMBUS,
POWER_DOMAIN_MODESET,
POWER_DOMAIN_GT_IRQ,
+   POWER_DOMAIN_DPLL_DC_OFF,
POWER_DOMAIN_INIT,
 
POWER_DOMAIN_NUM,
diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c 
b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index f953971e7c3b..67cfe836286e 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -2905,6 +2905,9 @@ static bool icl_get_combo_phy_dpll(st

[Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v9)

2019-06-27 Thread Vivek Kasireddy

This patch adds support for DPLL4 on EHL that include the
following restrictions:

- DPLL4 cannot be used with DDIA (combo port A internal eDP usage).
  DPLL4 can be used with other DDIs, including DDID
  (combo port A external usage).

- DPLL4 cannot be enabled when DC5 or DC6 are enabled.

- The DPLL4 enable, lock, power enabled, and power state are connected
  to the MGPLL1_ENABLE register.

v2: (suggestions from Bob Paauwe)
- Rework ehl_get_dpll() function to call intel_find_shared_dpll() and
  iterate twice: once for Combo plls and once for MG plls.

- Use MG pll funcs for DPLL4 instead of creating new ones and modify
  mg_pll_enable to include the restrictions for EHL.

v3: Fix compilation error

v4: (suggestions from Lucas and Ville)
- Treat DPLL4 as a combo phy PLL and not as MG PLL
- Disable DC states when this DPLL is being enabled
- Reuse icl_get_dpll instead of creating a separate one for EHL

v5: (suggestion from Ville)
- Refcount the DC OFF power domains during the enabling and disabling
  of this DPLL.

v6: rebase

v7: (suggestion from Imre)
- Add a new power domain instead of iterating over the domains
  assoicated with DC OFF power well.

v8: (Ville and Imre)
- Rename POWER_DOMAIN_DPLL4 TO POWER_DOMAIN_DPLL_DC_OFF
- Grab a reference in intel_modeset_setup_hw_state() if this
  DPLL was already enabled perhaps by BIOS.
- Check for the port type instead of the encoder

v9: (Ville)
- Move the block of code that grabs a reference to the power domain
  POWER_DOMAIN_DPLL_DC_OFF to intel_modeset_readout_hw_state() to ensure
  that there is a reference present before this DPLL might get disabled.

Cc: José Roberto de Souza 
Cc: Ville Syrjälä 
Cc: Matt Roper 
Cc: Imre Deak 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/display/intel_display.c  |  7 
 .../drm/i915/display/intel_display_power.c|  3 ++
 .../drm/i915/display/intel_display_power.h|  1 +
 drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 42 +--
 drivers/gpu/drm/i915/display/intel_dpll_mgr.h |  6 +++
 5 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
b/drivers/gpu/drm/i915/display/intel_display.c
index e55bd75528c1..3f1ff3bb5e36 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -16569,6 +16569,13 @@ static void intel_modeset_readout_hw_state(struct 
drm_device *dev)
 
pll->on = pll->info->funcs->get_hw_state(dev_priv, pll,
>state.hw_state);
+
+   if (IS_ELKHARTLAKE(dev_priv) && pll->on &&
+   pll->info->id == DPLL_ID_EHL_DPLL4) {
+   pll->wakeref = intel_display_power_get(dev_priv,
+  
POWER_DOMAIN_DPLL_DC_OFF);
+   }
+
pll->state.crtc_mask = 0;
for_each_intel_crtc(dev, crtc) {
struct intel_crtc_state *crtc_state =
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c 
b/drivers/gpu/drm/i915/display/intel_display_power.c
index c93ad512014c..1c101a842331 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -117,6 +117,8 @@ intel_display_power_domain_str(enum 
intel_display_power_domain domain)
return "MODESET";
case POWER_DOMAIN_GT_IRQ:
return "GT_IRQ";
+   case POWER_DOMAIN_DPLL_DC_OFF:
+   return "DPLL_DC_OFF";
default:
MISSING_CASE(domain);
return "?";
@@ -2361,6 +2363,7 @@ void intel_display_power_put(struct drm_i915_private 
*dev_priv,
ICL_PW_2_POWER_DOMAINS |\
BIT_ULL(POWER_DOMAIN_MODESET) | \
BIT_ULL(POWER_DOMAIN_AUX_A) |   \
+   BIT_ULL(POWER_DOMAIN_DPLL_DC_OFF) | \
BIT_ULL(POWER_DOMAIN_INIT))
 
 #define ICL_DDI_IO_A_POWER_DOMAINS (   \
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h 
b/drivers/gpu/drm/i915/display/intel_display_power.h
index ff57b0a7fe59..8f43f7051a16 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.h
+++ b/drivers/gpu/drm/i915/display/intel_display_power.h
@@ -59,6 +59,7 @@ enum intel_display_power_domain {
POWER_DOMAIN_GMBUS,
POWER_DOMAIN_MODESET,
POWER_DOMAIN_GT_IRQ,
+   POWER_DOMAIN_DPLL_DC_OFF,
POWER_DOMAIN_INIT,
 
POWER_DOMAIN_NUM,
diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c 
b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index 2d4e7b9a7b9d..81e1443cb583 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -2806,6 +2806,10 @@ icl_get_dpll(struct intel_crtc_state

[Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v8)

2019-06-21 Thread Vivek Kasireddy

This patch adds support for DPLL4 on EHL that include the
following restrictions:

- DPLL4 cannot be used with DDIA (combo port A internal eDP usage).
  DPLL4 can be used with other DDIs, including DDID
  (combo port A external usage).

- DPLL4 cannot be enabled when DC5 or DC6 are enabled.

- The DPLL4 enable, lock, power enabled, and power state are connected
  to the MGPLL1_ENABLE register.

v2: (suggestions from Bob Paauwe)
- Rework ehl_get_dpll() function to call intel_find_shared_dpll() and
  iterate twice: once for Combo plls and once for MG plls.

- Use MG pll funcs for DPLL4 instead of creating new ones and modify
  mg_pll_enable to include the restrictions for EHL.

v3: Fix compilation error

v4: (suggestions from Lucas and Ville)
- Treat DPLL4 as a combo phy PLL and not as MG PLL
- Disable DC states when this DPLL is being enabled
- Reuse icl_get_dpll instead of creating a separate one for EHL

v5: (suggestion from Ville)
- Refcount the DC OFF power domains during the enabling and disabling
  of this DPLL.

v6: rebase

v7: (suggestion from Imre)
- Add a new power domain instead of iterating over the domains
  assoicated with DC OFF power well.

v8: (Ville and Imre)
- Rename POWER_DOMAIN_DPLL4 TO POWER_DOMAIN_DPLL_DC_OFF
- Grab a reference in intel_modeset_setup_hw_state() if this
  DPLL was already enabled perhaps by BIOS.
- Check for the port type instead of the encoder

Cc: José Roberto de Souza 
Cc: Ville Syrjälä 
Cc: Matt Roper 
Cc: Imre Deak 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/display/intel_display.c  |  6 +++
 .../drm/i915/display/intel_display_power.c|  3 ++
 .../drm/i915/display/intel_display_power.h|  1 +
 drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 42 +--
 drivers/gpu/drm/i915/display/intel_dpll_mgr.h |  6 +++
 5 files changed, 55 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
b/drivers/gpu/drm/i915/display/intel_display.c
index 8592a7d422de..a5f387e486ee 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -16778,6 +16778,12 @@ intel_modeset_setup_hw_state(struct drm_device *dev,
for (i = 0; i < dev_priv->num_shared_dpll; i++) {
struct intel_shared_dpll *pll = _priv->shared_dplls[i];
 
+   if (IS_ELKHARTLAKE(dev_priv) && pll->on &&
+   pll->info->id == DPLL_ID_EHL_DPLL4) {
+   pll->wakeref = intel_display_power_get(dev_priv,
+  
POWER_DOMAIN_DPLL_DC_OFF);
+   }
+
if (!pll->on || pll->active_mask)
continue;
 
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c 
b/drivers/gpu/drm/i915/display/intel_display_power.c
index c93ad512014c..1c101a842331 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -117,6 +117,8 @@ intel_display_power_domain_str(enum 
intel_display_power_domain domain)
return "MODESET";
case POWER_DOMAIN_GT_IRQ:
return "GT_IRQ";
+   case POWER_DOMAIN_DPLL_DC_OFF:
+   return "DPLL_DC_OFF";
default:
MISSING_CASE(domain);
return "?";
@@ -2361,6 +2363,7 @@ void intel_display_power_put(struct drm_i915_private 
*dev_priv,
ICL_PW_2_POWER_DOMAINS |\
BIT_ULL(POWER_DOMAIN_MODESET) | \
BIT_ULL(POWER_DOMAIN_AUX_A) |   \
+   BIT_ULL(POWER_DOMAIN_DPLL_DC_OFF) | \
BIT_ULL(POWER_DOMAIN_INIT))
 
 #define ICL_DDI_IO_A_POWER_DOMAINS (   \
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h 
b/drivers/gpu/drm/i915/display/intel_display_power.h
index ff57b0a7fe59..8f43f7051a16 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.h
+++ b/drivers/gpu/drm/i915/display/intel_display_power.h
@@ -59,6 +59,7 @@ enum intel_display_power_domain {
POWER_DOMAIN_GMBUS,
POWER_DOMAIN_MODESET,
POWER_DOMAIN_GT_IRQ,
+   POWER_DOMAIN_DPLL_DC_OFF,
POWER_DOMAIN_INIT,
 
POWER_DOMAIN_NUM,
diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c 
b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index 2d4e7b9a7b9d..81e1443cb583 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -2806,6 +2806,10 @@ icl_get_dpll(struct intel_crtc_state *crtc_state,
if (intel_port_is_combophy(dev_priv, port)) {
min = DPLL_ID_ICL_DPLL0;
max = DPLL_ID_ICL_DPLL1;
+
+   if (IS_ELKHARTLAKE(dev_priv) && port != PORT_A)
+   max = DPLL_ID_EHL_DPLL4;
+
ret = icl_calc_dpll_state(crtc_state, encoder

[Intel-gfx] [PATCH] drm/i915/ehl: Add power wells support for Elkhart Lake

2019-06-14 Thread Vivek Kasireddy

The number of power wells and the relevant sequences are common between
ICL and EHL since they both are Gen 11. The only significant differences
are that EHL does not have DDI E and DDI D and type C/TBT ports.

Cc: Clint Taylor 
Cc: José Roberto de Souza 
Cc: Matt Roper 
Cc: Imre Deak 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/intel_display_power.c | 210 -
 1 file changed, 209 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_display_power.c 
b/drivers/gpu/drm/i915/intel_display_power.c
index c672c8080a93..e3ed77b843d2 100644
--- a/drivers/gpu/drm/i915/intel_display_power.c
+++ b/drivers/gpu/drm/i915/intel_display_power.c
@@ -2397,6 +2397,66 @@ void intel_display_power_put(struct drm_i915_private 
*dev_priv,
 #define ICL_AUX_TBT4_IO_POWER_DOMAINS (\
BIT_ULL(POWER_DOMAIN_AUX_TBT4))
 
+#define EHL_PW_4_POWER_DOMAINS (   \
+   BIT_ULL(POWER_DOMAIN_PIPE_C) |  \
+   BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \
+   BIT_ULL(POWER_DOMAIN_INIT))
+   /* VDSC/joining */
+#define EHL_PW_3_POWER_DOMAINS (   \
+   EHL_PW_4_POWER_DOMAINS |\
+   BIT_ULL(POWER_DOMAIN_PIPE_B) |  \
+   BIT_ULL(POWER_DOMAIN_TRANSCODER_A) |\
+   BIT_ULL(POWER_DOMAIN_TRANSCODER_B) |\
+   BIT_ULL(POWER_DOMAIN_TRANSCODER_C) |\
+   BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \
+   BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |\
+   BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO) |   \
+   BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |\
+   BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO) |   \
+   BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) |\
+   BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO) |   \
+   BIT_ULL(POWER_DOMAIN_AUX_B) |   \
+   BIT_ULL(POWER_DOMAIN_AUX_C) |   \
+   BIT_ULL(POWER_DOMAIN_AUX_D) |   \
+   BIT_ULL(POWER_DOMAIN_VGA) | \
+   BIT_ULL(POWER_DOMAIN_AUDIO) |   \
+   BIT_ULL(POWER_DOMAIN_INIT))
+   /*
+* - transcoder WD
+* - KVMR (HW control)
+*/
+#define EHL_PW_2_POWER_DOMAINS (   \
+   EHL_PW_3_POWER_DOMAINS |\
+   BIT_ULL(POWER_DOMAIN_TRANSCODER_EDP_VDSC) | \
+   BIT_ULL(POWER_DOMAIN_INIT))
+   /*
+* - KVMR (HW control)
+*/
+#define EHL_DISPLAY_DC_OFF_POWER_DOMAINS ( \
+   EHL_PW_2_POWER_DOMAINS |\
+   BIT_ULL(POWER_DOMAIN_MODESET) | \
+   BIT_ULL(POWER_DOMAIN_AUX_A) |   \
+   BIT_ULL(POWER_DOMAIN_INIT))
+
+#define EHL_DDI_IO_A_POWER_DOMAINS (   \
+   BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO))
+#define EHL_DDI_IO_B_POWER_DOMAINS (   \
+   BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO))
+#define EHL_DDI_IO_C_POWER_DOMAINS (   \
+   BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO))
+#define EHL_DDI_IO_D_POWER_DOMAINS (   \
+   BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO))
+
+#define EHL_AUX_A_IO_POWER_DOMAINS (   \
+   BIT_ULL(POWER_DOMAIN_AUX_IO_A) |\
+   BIT_ULL(POWER_DOMAIN_AUX_A))
+#define EHL_AUX_B_IO_POWER_DOMAINS (   \
+   BIT_ULL(POWER_DOMAIN_AUX_B))
+#define EHL_AUX_C_IO_POWER_DOMAINS (   \
+   BIT_ULL(POWER_DOMAIN_AUX_C))
+#define EHL_AUX_D_IO_POWER_DOMAINS (   \
+   BIT_ULL(POWER_DOMAIN_AUX_D))
+
 static const struct i915_power_well_ops i9xx_always_on_power_well_ops = {
.sync_hw = i9xx_power_well_sync_hw_noop,
.enable = i9xx_always_on_power_well_noop,
@@ -3354,6 +3414,152 @@ static const struct i915_power_well_desc 
icl_power_wells[] = {
},
 };
 
+static const struct i915_power_well_desc ehl_power_wells[] = {
+   {
+   .name = "always-on",
+   .always_on = true,
+   .domains = POWER_DOMAIN_MASK,
+   .ops = _always_on_power_well_ops,
+   .id = DISP_PW_ID_NONE,
+   },
+   {
+   .name = "power well 1",
+   /* Handled by the DMC firmware */
+   .always_on = true,
+   .domains = 0,
+   .ops = _power_well_ops,
+   .id = SKL_DISP_PW_1,
+   {
+   .hsw.regs = _power_well_regs,
+   .hsw.idx = ICL_PW_CTL_IDX_PW_1,
+   .hsw.has_fuses = true,
+   },
+   },
+   {
+   .name = "DC off",
+   .domains = EHL_DISPLAY_DC_OFF_POWER_DOMAINS,
+   .ops = _dc_off_power_well_ops,
+   .id = DISP_PW_ID_NONE,
+   },
+   {
+   .name = "power well 2",
+

[Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v7)

2019-06-07 Thread Vivek Kasireddy

This patch adds support for DPLL4 on EHL that include the
following restrictions:

- DPLL4 cannot be used with DDIA (combo port A internal eDP usage).
  DPLL4 can be used with other DDIs, including DDID
  (combo port A external usage).

- DPLL4 cannot be enabled when DC5 or DC6 are enabled.

- The DPLL4 enable, lock, power enabled, and power state are connected
  to the MGPLL1_ENABLE register.

v2: (suggestions from Bob Paauwe)
- Rework ehl_get_dpll() function to call intel_find_shared_dpll() and
  iterate twice: once for Combo plls and once for MG plls.

- Use MG pll funcs for DPLL4 instead of creating new ones and modify
  mg_pll_enable to include the restrictions for EHL.

v3: Fix compilation error

v4: (suggestions from Lucas and Ville)
- Treat DPLL4 as a combo phy PLL and not as MG PLL
- Disable DC states when this DPLL is being enabled
- Reuse icl_get_dpll instead of creating a separate one for EHL

v5: (suggestion from Ville)
- Refcount the DC OFF power domains during the enabling and disabling
  of this DPLL.

v6: rebase

v7: (suggestion from Imre)
- Add a new power domain instead of iterating over the domains
  assoicated with DC OFF power well.

Cc: Lucas De Marchi 
Cc: José Roberto de Souza 
Cc: Ville Syrjälä 
Cc: Matt Roper 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/intel_display_power.c |  3 ++
 drivers/gpu/drm/i915/intel_display_power.h |  1 +
 drivers/gpu/drm/i915/intel_dpll_mgr.c  | 44 --
 drivers/gpu/drm/i915/intel_dpll_mgr.h  |  6 +++
 4 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display_power.c 
b/drivers/gpu/drm/i915/intel_display_power.c
index 278a7edc94f5..2134d8b43f58 100644
--- a/drivers/gpu/drm/i915/intel_display_power.c
+++ b/drivers/gpu/drm/i915/intel_display_power.c
@@ -116,6 +116,8 @@ intel_display_power_domain_str(enum 
intel_display_power_domain domain)
return "MODESET";
case POWER_DOMAIN_GT_IRQ:
return "GT_IRQ";
+   case POWER_DOMAIN_DPLL4:
+   return "DPLL4";
default:
MISSING_CASE(domain);
return "?";
@@ -2357,6 +2359,7 @@ void intel_display_power_put(struct drm_i915_private 
*dev_priv,
ICL_PW_2_POWER_DOMAINS |\
BIT_ULL(POWER_DOMAIN_MODESET) | \
BIT_ULL(POWER_DOMAIN_AUX_A) |   \
+   BIT_ULL(POWER_DOMAIN_DPLL4) |   \
BIT_ULL(POWER_DOMAIN_INIT))
 
 #define ICL_DDI_IO_A_POWER_DOMAINS (   \
diff --git a/drivers/gpu/drm/i915/intel_display_power.h 
b/drivers/gpu/drm/i915/intel_display_power.h
index ff57b0a7fe59..47266279 100644
--- a/drivers/gpu/drm/i915/intel_display_power.h
+++ b/drivers/gpu/drm/i915/intel_display_power.h
@@ -59,6 +59,7 @@ enum intel_display_power_domain {
POWER_DOMAIN_GMBUS,
POWER_DOMAIN_MODESET,
POWER_DOMAIN_GT_IRQ,
+   POWER_DOMAIN_DPLL4,
POWER_DOMAIN_INIT,
 
POWER_DOMAIN_NUM,
diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c 
b/drivers/gpu/drm/i915/intel_dpll_mgr.c
index 69787f259677..3d712f54dc56 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c
@@ -2806,6 +2806,12 @@ icl_get_dpll(struct intel_crtc_state *crtc_state,
if (intel_port_is_combophy(dev_priv, port)) {
min = DPLL_ID_ICL_DPLL0;
max = DPLL_ID_ICL_DPLL1;
+
+   if (IS_ELKHARTLAKE(dev_priv)) {
+   if (encoder->type != INTEL_OUTPUT_EDP)
+   max = DPLL_ID_EHL_DPLL4;
+   }
+
ret = icl_calc_dpll_state(crtc_state, encoder);
} else if (intel_port_is_tc(dev_priv, port)) {
if (encoder->type == INTEL_OUTPUT_DP_MST) {
@@ -2945,8 +2951,14 @@ static bool combo_pll_get_hw_state(struct 
drm_i915_private *dev_priv,
   struct intel_shared_dpll *pll,
   struct intel_dpll_hw_state *hw_state)
 {
-   return icl_pll_get_hw_state(dev_priv, pll, hw_state,
-   CNL_DPLL_ENABLE(pll->info->id));
+   i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id);
+
+   if (IS_ELKHARTLAKE(dev_priv) &&
+   pll->info->id == DPLL_ID_EHL_DPLL4) {
+   enable_reg = MG_PLL_ENABLE(0);
+   }
+
+   return icl_pll_get_hw_state(dev_priv, pll, hw_state, enable_reg);
 }
 
 static bool tbt_pll_get_hw_state(struct drm_i915_private *dev_priv,
@@ -3057,6 +3069,19 @@ static void combo_pll_enable(struct drm_i915_private 
*dev_priv,
 {
i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id);
 
+   if (IS_ELKHARTLAKE(dev_priv) &&
+   pll->info->id == DPLL_ID_EHL_DPLL4) {
+   enable_reg = MG_PLL_ENABLE(0);
+
+   /*
+* We need t

[Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v6)

2019-06-05 Thread Vivek Kasireddy

This patch adds support for DPLL4 on EHL that include the
following restrictions:

- DPLL4 cannot be used with DDIA (combo port A internal eDP usage).
  DPLL4 can be used with other DDIs, including DDID
  (combo port A external usage).

- DPLL4 cannot be enabled when DC5 or DC6 are enabled.

- The DPLL4 enable, lock, power enabled, and power state are connected
  to the MGPLL1_ENABLE register.

v2: (suggestions from Bob Paauwe)
- Rework ehl_get_dpll() function to call intel_find_shared_dpll() and
  iterate twice: once for Combo plls and once for MG plls.

- Use MG pll funcs for DPLL4 instead of creating new ones and modify
  mg_pll_enable to include the restrictions for EHL.

v3: Fix compilation error

v4: (suggestions from Lucas and Ville)
- Treat DPLL4 as a combo phy PLL and not as MG PLL
- Disable DC states when this DPLL is being enabled
- Reuse icl_get_dpll instead of creating a separate one for EHL

v5: (suggestion from Ville)
- Refcount the DC OFF power domains during the enabling and disabling
  of this DPLL.

v6: rebase

Cc: Lucas De Marchi 
Cc: José Roberto de Souza 
Cc: Ville Syrjälä 
Cc: Matt Roper 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/intel_display_power.c | 20 +++
 drivers/gpu/drm/i915/intel_display_power.h |  6 
 drivers/gpu/drm/i915/intel_dpll_mgr.c  | 40 +++---
 drivers/gpu/drm/i915/intel_dpll_mgr.h  |  5 +++
 4 files changed, 67 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display_power.c 
b/drivers/gpu/drm/i915/intel_display_power.c
index 278a7edc94f5..fd6d0d6a285a 100644
--- a/drivers/gpu/drm/i915/intel_display_power.c
+++ b/drivers/gpu/drm/i915/intel_display_power.c
@@ -4524,6 +4524,26 @@ void intel_power_domains_resume(struct drm_i915_private 
*i915)
intel_power_domains_verify_state(i915);
 }
 
+void icl_disable_dc_states(struct drm_i915_private *dev_priv,
+  struct intel_shared_dpll *pll)
+{
+   enum intel_display_power_domain domain;
+
+   for_each_power_domain(domain, ICL_DISPLAY_DC_OFF_POWER_DOMAINS)
+   pll->wakerefs[domain] = intel_display_power_get(dev_priv,
+   domain);
+}
+
+void icl_enable_dc_states(struct drm_i915_private *dev_priv,
+ struct intel_shared_dpll *pll)
+{
+   enum intel_display_power_domain domain;
+
+   for_each_power_domain(domain, ICL_DISPLAY_DC_OFF_POWER_DOMAINS)
+   intel_display_power_put(dev_priv, domain,
+   pll->wakerefs[domain]);
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
 
 static void intel_power_domains_dump_info(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/intel_display_power.h 
b/drivers/gpu/drm/i915/intel_display_power.h
index ff57b0a7fe59..2abaa3806ec6 100644
--- a/drivers/gpu/drm/i915/intel_display_power.h
+++ b/drivers/gpu/drm/i915/intel_display_power.h
@@ -12,6 +12,7 @@
 
 struct drm_i915_private;
 struct intel_encoder;
+struct intel_shared_dpll;
 
 enum intel_display_power_domain {
POWER_DOMAIN_DISPLAY_CORE,
@@ -285,4 +286,9 @@ void chv_phy_powergate_lanes(struct intel_encoder *encoder,
 bool chv_phy_powergate_ch(struct drm_i915_private *dev_priv, enum dpio_phy phy,
  enum dpio_channel ch, bool override);
 
+void icl_disable_dc_states(struct drm_i915_private *dev_priv,
+  struct intel_shared_dpll *pll);
+void icl_enable_dc_states(struct drm_i915_private *dev_priv,
+ struct intel_shared_dpll *pll);
+
 #endif /* __INTEL_DISPLAY_POWER_H__ */
diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c 
b/drivers/gpu/drm/i915/intel_dpll_mgr.c
index 69787f259677..2829b37e2909 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c
@@ -22,8 +22,8 @@
  */
 
 #include "intel_dpio_phy.h"
-#include "intel_dpll_mgr.h"
 #include "intel_drv.h"
+#include "intel_dpll_mgr.h"
 
 /**
  * DOC: Display PLLs
@@ -2806,6 +2806,12 @@ icl_get_dpll(struct intel_crtc_state *crtc_state,
if (intel_port_is_combophy(dev_priv, port)) {
min = DPLL_ID_ICL_DPLL0;
max = DPLL_ID_ICL_DPLL1;
+
+   if (IS_ELKHARTLAKE(dev_priv)) {
+   if (encoder->type != INTEL_OUTPUT_EDP)
+   max = DPLL_ID_EHL_DPLL4;
+   }
+
ret = icl_calc_dpll_state(crtc_state, encoder);
} else if (intel_port_is_tc(dev_priv, port)) {
if (encoder->type == INTEL_OUTPUT_DP_MST) {
@@ -2945,8 +2951,14 @@ static bool combo_pll_get_hw_state(struct 
drm_i915_private *dev_priv,
   struct intel_shared_dpll *pll,
   struct intel_dpll_hw_state *hw_state)
 {
-   return icl_pll_get_hw_state(dev_priv, pll, hw_state,
-

[Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v5)

2019-06-05 Thread Vivek Kasireddy

This patch adds support for DPLL4 on EHL that include the
following restrictions:

- DPLL4 cannot be used with DDIA (combo port A internal eDP usage).
  DPLL4 can be used with other DDIs, including DDID
  (combo port A external usage).

- DPLL4 cannot be enabled when DC5 or DC6 are enabled.

- The DPLL4 enable, lock, power enabled, and power state are connected
  to the MGPLL1_ENABLE register.

v2: (suggestions from Bob Paauwe)
- Rework ehl_get_dpll() function to call intel_find_shared_dpll() and
  iterate twice: once for Combo plls and once for MG plls.

- Use MG pll funcs for DPLL4 instead of creating new ones and modify
  mg_pll_enable to include the restrictions for EHL.

v3: Fix compilation error

v4: (suggestions from Lucas and Ville)
- Treat DPLL4 as a combo phy PLL and not as MG PLL
- Disable DC states when this DPLL is being enabled
- Reuse icl_get_dpll instead of creating a separate one for EHL

v5: (suggestion from Ville)
- Refcount the DC OFF power domains during the enabling and disabling
  of this DPLL.

Cc: Lucas De Marchi 
Cc: José Roberto de Souza 
Cc: Ville Syrjälä 
Cc: Matt Roper 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/intel_dpll_mgr.c   | 40 ++---
 drivers/gpu/drm/i915/intel_dpll_mgr.h   |  5 
 drivers/gpu/drm/i915/intel_runtime_pm.c | 21 +
 drivers/gpu/drm/i915/intel_runtime_pm.h |  5 
 4 files changed, 67 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c 
b/drivers/gpu/drm/i915/intel_dpll_mgr.c
index 897d93537414..6d89d231b33d 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c
@@ -22,8 +22,8 @@
  */
 
 #include "intel_dpio_phy.h"
-#include "intel_dpll_mgr.h"
 #include "intel_drv.h"
+#include "intel_dpll_mgr.h"
 
 /**
  * DOC: Display PLLs
@@ -2806,6 +2806,12 @@ icl_get_dpll(struct intel_crtc_state *crtc_state,
if (intel_port_is_combophy(dev_priv, port)) {
min = DPLL_ID_ICL_DPLL0;
max = DPLL_ID_ICL_DPLL1;
+
+   if (IS_ELKHARTLAKE(dev_priv)) {
+   if (encoder->type != INTEL_OUTPUT_EDP)
+   max = DPLL_ID_EHL_DPLL4;
+   }
+
ret = icl_calc_dpll_state(crtc_state, encoder);
} else if (intel_port_is_tc(dev_priv, port)) {
if (encoder->type == INTEL_OUTPUT_DP_MST) {
@@ -2945,8 +2951,14 @@ static bool combo_pll_get_hw_state(struct 
drm_i915_private *dev_priv,
   struct intel_shared_dpll *pll,
   struct intel_dpll_hw_state *hw_state)
 {
-   return icl_pll_get_hw_state(dev_priv, pll, hw_state,
-   CNL_DPLL_ENABLE(pll->info->id));
+   i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id);
+
+   if (IS_ELKHARTLAKE(dev_priv) &&
+   pll->info->id == DPLL_ID_EHL_DPLL4) {
+   enable_reg = MG_PLL_ENABLE(0);
+   }
+
+   return icl_pll_get_hw_state(dev_priv, pll, hw_state, enable_reg);
 }
 
 static bool tbt_pll_get_hw_state(struct drm_i915_private *dev_priv,
@@ -3057,6 +3069,14 @@ static void combo_pll_enable(struct drm_i915_private 
*dev_priv,
 {
i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id);
 
+   if (IS_ELKHARTLAKE(dev_priv) &&
+   pll->info->id == DPLL_ID_EHL_DPLL4) {
+   enable_reg = MG_PLL_ENABLE(0);
+
+   /* Need to disable DC states when this DPLL is enabled. */
+   icl_disable_dc_states(dev_priv, pll);
+   }
+
icl_pll_power_enable(dev_priv, pll, enable_reg);
 
icl_dpll_write(dev_priv, pll);
@@ -3152,7 +3172,18 @@ static void icl_pll_disable(struct drm_i915_private 
*dev_priv,
 static void combo_pll_disable(struct drm_i915_private *dev_priv,
  struct intel_shared_dpll *pll)
 {
-   icl_pll_disable(dev_priv, pll, CNL_DPLL_ENABLE(pll->info->id));
+   i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id);
+
+   if (IS_ELKHARTLAKE(dev_priv) &&
+   pll->info->id == DPLL_ID_EHL_DPLL4) {
+   enable_reg = MG_PLL_ENABLE(0);
+   icl_pll_disable(dev_priv, pll, enable_reg);
+
+   icl_enable_dc_states(dev_priv, pll);
+   return;
+   }
+
+   icl_pll_disable(dev_priv, pll, enable_reg);
 }
 
 static void tbt_pll_disable(struct drm_i915_private *dev_priv,
@@ -3230,6 +3261,7 @@ static const struct intel_dpll_mgr icl_pll_mgr = {
 static const struct dpll_info ehl_plls[] = {
{ "DPLL 0", _pll_funcs, DPLL_ID_ICL_DPLL0, 0 },
{ "DPLL 1", _pll_funcs, DPLL_ID_ICL_DPLL1, 0 },
+   { "DPLL 4", _pll_funcs, DPLL_ID_EHL_DPLL4, 0 },
{ },
 };
 
diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h 
b/drivers/gpu/drm/i915/intel_dpll_mgr.h
index 8835dd20f1d2.

Re: [Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v4)

2019-04-23 Thread Vivek Kasireddy

On Wed, 17 Apr 2019 16:06:11 +0300
Ville Syrjälä  wrote:
Hi Ville,

> On Thu, Apr 11, 2019 at 04:36:00PM -0700, Vivek Kasireddy wrote:
> > This patch adds support for DPLL4 on EHL that include the
> > following restrictions:
> > 
> > - DPLL4 cannot be used with DDIA (combo port A internal eDP usage).
> >   DPLL4 can be used with other DDIs, including DDID
> >   (combo port A external usage).
> > 
> > - DPLL4 cannot be enabled when DC5 or DC6 are enabled.
> > 
> > - The DPLL4 enable, lock, power enabled, and power state are
> > connected to the MGPLL1_ENABLE register.
> > 
> > v2: (suggestions from Bob Paauwe)
> > - Rework ehl_get_dpll() function to call intel_find_shared_dpll()
> > and iterate twice: once for Combo plls and once for MG plls.
> > 
> > - Use MG pll funcs for DPLL4 instead of creating new ones and modify
> >   mg_pll_enable to include the restrictions for EHL.
> > 
> > v3: Fix compilation error
> > 
> > v4: (suggestions from Lucas and Ville)
> > - Treat DPLL4 as a combo phy PLL and not as MG PLL
> > - Disable DC states when this DPLL is being enabled
> > - Reuse icl_get_dpll instead of creating a separate one for EHL
> > 
> > Cc: Lucas De Marchi 
> > Cc: José Roberto de Souza 
> > Cc: Bob Paauwe 
> > Signed-off-by: Vivek Kasireddy 
> > ---
> >  drivers/gpu/drm/i915/intel_dpll_mgr.c | 35
> > ---
> > drivers/gpu/drm/i915/intel_dpll_mgr.h |  4  2 files changed, 36
> > insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c
> > b/drivers/gpu/drm/i915/intel_dpll_mgr.c index
> > e01c057ce50b..207af4af4978 100644 ---
> > a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++
> > b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2825,6 +2825,12 @@
> > icl_get_dpll(struct intel_crtc_state *crtc_state, if
> > (intel_port_is_combophy(dev_priv, port)) { min = DPLL_ID_ICL_DPLL0;
> > max = DPLL_ID_ICL_DPLL1;
> > +
> > +   if (IS_ELKHARTLAKE(dev_priv)) {
> > +   if (encoder->type != INTEL_OUTPUT_EDP)
> > +   max = DPLL_ID_EHL_DPLL4;
> > +   }
> > +
> > ret = icl_calc_dpll_state(crtc_state, encoder);
> > } else if (intel_port_is_tc(dev_priv, port)) {
> > if (encoder->type == INTEL_OUTPUT_DP_MST) {
> > @@ -2964,8 +2970,14 @@ static bool combo_pll_get_hw_state(struct
> > drm_i915_private *dev_priv, struct intel_shared_dpll *pll,
> >struct intel_dpll_hw_state
> > *hw_state) {
> > -   return icl_pll_get_hw_state(dev_priv, pll, hw_state,
> > -
> > CNL_DPLL_ENABLE(pll->info->id));
> > +   i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id);
> > +
> > +   if (IS_ELKHARTLAKE(dev_priv) &&
> > +   pll->info->id == DPLL_ID_EHL_DPLL4) {
> > +   enable_reg = MG_PLL_ENABLE(0);
> > +   }
> > +
> > +   return icl_pll_get_hw_state(dev_priv, pll, hw_state,
> > enable_reg); }
> >  
> >  static bool tbt_pll_get_hw_state(struct drm_i915_private *dev_priv,
> > @@ -3076,6 +3088,14 @@ static void combo_pll_enable(struct
> > drm_i915_private *dev_priv, {
> > i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id);
> >  
> > +   if (IS_ELKHARTLAKE(dev_priv) &&
> > +   pll->info->id == DPLL_ID_EHL_DPLL4) {
> > +   enable_reg = MG_PLL_ENABLE(0);
> > +
> > +   /* Need to disable DC states when this DPLL is
> > enabled. */
> > +   bxt_disable_dc9(dev_priv);  
> 
> You can't simply call that from random places. It needs to be handled
> by the power domain stuff.
The only other places in the driver, the functions bxt_disable/enable_dc9
are called are intel_runtime_suspend/resume and
i915_drm_suspend_late/resume_early. Are you suggesting that I call one
of these functions instead? Or, do you simply want me to pair
bxt_*able_dc9 with intel_power_domains_suspend/resume and/or other
functions similar to what the above mentioned functions do?

Thanks,
Vivek

> 
> > +   }
> > +
> > icl_pll_power_enable(dev_priv, pll, enable_reg);
> >  
> > icl_dpll_write(dev_priv, pll);
> > @@ -3171,7 +3191,15 @@ static void icl_pll_disable(struct
> > drm_i915_private *dev_priv, static void combo_pll_disable(struct
> > drm_i915_private *dev_priv, struct intel_shared_dpll *pll)
> >  {
> > -   icl_pll_disable(dev_priv, pll,
> > CNL_DPLL_ENABLE(pll->info->id));
> > +   i915_reg_t enable_reg = CNL_DPLL_ENABLE(

[Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v4)

2019-04-11 Thread Vivek Kasireddy

This patch adds support for DPLL4 on EHL that include the
following restrictions:

- DPLL4 cannot be used with DDIA (combo port A internal eDP usage).
  DPLL4 can be used with other DDIs, including DDID
  (combo port A external usage).

- DPLL4 cannot be enabled when DC5 or DC6 are enabled.

- The DPLL4 enable, lock, power enabled, and power state are connected
  to the MGPLL1_ENABLE register.

v2: (suggestions from Bob Paauwe)
- Rework ehl_get_dpll() function to call intel_find_shared_dpll() and
  iterate twice: once for Combo plls and once for MG plls.

- Use MG pll funcs for DPLL4 instead of creating new ones and modify
  mg_pll_enable to include the restrictions for EHL.

v3: Fix compilation error

v4: (suggestions from Lucas and Ville)
- Treat DPLL4 as a combo phy PLL and not as MG PLL
- Disable DC states when this DPLL is being enabled
- Reuse icl_get_dpll instead of creating a separate one for EHL

Cc: Lucas De Marchi 
Cc: José Roberto de Souza 
Cc: Bob Paauwe 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/intel_dpll_mgr.c | 35 ---
 drivers/gpu/drm/i915/intel_dpll_mgr.h |  4 
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c 
b/drivers/gpu/drm/i915/intel_dpll_mgr.c
index e01c057ce50b..207af4af4978 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c
@@ -2825,6 +2825,12 @@ icl_get_dpll(struct intel_crtc_state *crtc_state,
if (intel_port_is_combophy(dev_priv, port)) {
min = DPLL_ID_ICL_DPLL0;
max = DPLL_ID_ICL_DPLL1;
+
+   if (IS_ELKHARTLAKE(dev_priv)) {
+   if (encoder->type != INTEL_OUTPUT_EDP)
+   max = DPLL_ID_EHL_DPLL4;
+   }
+
ret = icl_calc_dpll_state(crtc_state, encoder);
} else if (intel_port_is_tc(dev_priv, port)) {
if (encoder->type == INTEL_OUTPUT_DP_MST) {
@@ -2964,8 +2970,14 @@ static bool combo_pll_get_hw_state(struct 
drm_i915_private *dev_priv,
   struct intel_shared_dpll *pll,
   struct intel_dpll_hw_state *hw_state)
 {
-   return icl_pll_get_hw_state(dev_priv, pll, hw_state,
-   CNL_DPLL_ENABLE(pll->info->id));
+   i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id);
+
+   if (IS_ELKHARTLAKE(dev_priv) &&
+   pll->info->id == DPLL_ID_EHL_DPLL4) {
+   enable_reg = MG_PLL_ENABLE(0);
+   }
+
+   return icl_pll_get_hw_state(dev_priv, pll, hw_state, enable_reg);
 }
 
 static bool tbt_pll_get_hw_state(struct drm_i915_private *dev_priv,
@@ -3076,6 +3088,14 @@ static void combo_pll_enable(struct drm_i915_private 
*dev_priv,
 {
i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id);
 
+   if (IS_ELKHARTLAKE(dev_priv) &&
+   pll->info->id == DPLL_ID_EHL_DPLL4) {
+   enable_reg = MG_PLL_ENABLE(0);
+
+   /* Need to disable DC states when this DPLL is enabled. */
+   bxt_disable_dc9(dev_priv);
+   }
+
icl_pll_power_enable(dev_priv, pll, enable_reg);
 
icl_dpll_write(dev_priv, pll);
@@ -3171,7 +3191,15 @@ static void icl_pll_disable(struct drm_i915_private 
*dev_priv,
 static void combo_pll_disable(struct drm_i915_private *dev_priv,
  struct intel_shared_dpll *pll)
 {
-   icl_pll_disable(dev_priv, pll, CNL_DPLL_ENABLE(pll->info->id));
+   i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id);
+
+   if (IS_ELKHARTLAKE(dev_priv) &&
+   pll->info->id == DPLL_ID_EHL_DPLL4) {
+   enable_reg = MG_PLL_ENABLE(0);
+   bxt_enable_dc9(dev_priv);
+   }
+
+   icl_pll_disable(dev_priv, pll, enable_reg);
 }
 
 static void tbt_pll_disable(struct drm_i915_private *dev_priv,
@@ -3249,6 +3277,7 @@ static const struct intel_dpll_mgr icl_pll_mgr = {
 static const struct dpll_info ehl_plls[] = {
{ "DPLL 0", _pll_funcs, DPLL_ID_ICL_DPLL0, 0 },
{ "DPLL 1", _pll_funcs, DPLL_ID_ICL_DPLL1, 0 },
+   { "DPLL 4", _pll_funcs, DPLL_ID_EHL_DPLL4, 0 },
{ },
 };
 
diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h 
b/drivers/gpu/drm/i915/intel_dpll_mgr.h
index bd8124cc81ed..f3f99929cee8 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.h
+++ b/drivers/gpu/drm/i915/intel_dpll_mgr.h
@@ -113,6 +113,10 @@ enum intel_dpll_id {
 * @DPLL_ID_ICL_DPLL1: ICL combo PHY DPLL1
 */
DPLL_ID_ICL_DPLL1 = 1,
+   /**
+* @DPLL_ID_EHL_DPLL4: EHL combo PHY DPLL4
+*/
+   DPLL_ID_EHL_DPLL4 = 2,
/**
 * @DPLL_ID_ICL_TBTPLL: ICL TBT PLL
 */
-- 
2.14.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v3)

2019-04-09 Thread Vivek Kasireddy

On Mon, 8 Apr 2019 12:11:15 +0300
Ville Syrjälä  wrote:
Hi,

> On Fri, Apr 05, 2019 at 04:33:30PM -0700, Vivek Kasireddy wrote:
> > On Fri, 5 Apr 2019 21:39:11 +0300
> > Ville Syrjälä  wrote:
> > Hi Ville,
> >   
> > > On Fri, Apr 05, 2019 at 09:33:56PM +0300, Ville Syrjälä wrote:  
> > > > On Fri, Apr 05, 2019 at 10:59:53AM -0700, Vivek Kasireddy
> > > > wrote:
> > > > > This patch adds support for DPLL4 on EHL that include the
> > > > > following restrictions:
> > > > > 
> > > > > - DPLL4 cannot be used with DDIA (combo port A internal eDP
> > > > > usage). DPLL4 can be used with other DDIs, including DDID
> > > > >   (combo port A external usage).
> > > > > 
> > > > > - DPLL4 cannot be enabled when DC5 or DC6 are enabled.
> > > > > 
> > > > > - The DPLL4 enable, lock, power enabled, and power state are
> > > > > connected to the MGPLL1_ENABLE register.
> > > > > 
> > > > > v2: (suggestions from Bob Paauwe)
> > > > > - Rework ehl_get_dpll() function to call
> > > > > intel_find_shared_dpll() and iterate twice: once for Combo
> > > > > plls and once for MG plls.
> > > > > 
> > > > > - Use MG pll funcs for DPLL4 instead of creating new ones and
> > > > > modify mg_pll_enable to include the restrictions for EHL.
> > > > > 
> > > > > v3: Fix compilation error
> > > > > 
> > > > > Cc: Lucas De Marchi 
> > > > > Signed-off-by: Vivek Kasireddy 
> > > > > Reviewed-by: Bob Paauwe 
> > > > > ---
> > > > >  drivers/gpu/drm/i915/intel_dpll_mgr.c | 60
> > > > > ++- 1 file changed, 59
> > > > > insertions(+), 1 deletion(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c
> > > > > b/drivers/gpu/drm/i915/intel_dpll_mgr.c index
> > > > > e01c057ce50b..c3f0b9720c54 100644 ---
> > > > > a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++
> > > > > b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2870,6 +2870,56 @@
> > > > > icl_get_dpll(struct intel_crtc_state *crtc_state, return pll;
> > > > >  }
> > > > >  
> > > > > +static struct intel_shared_dpll *
> > > > > +ehl_get_dpll(struct intel_crtc_state *crtc_state,
> > > > > +  struct intel_encoder *encoder)
> > > > > +{
> > > > > + struct drm_i915_private *dev_priv =
> > > > > to_i915(crtc_state->base.crtc->dev);
> > > > > + struct intel_shared_dpll *pll;
> > > > > + enum port port = encoder->port;
> > > > > + enum intel_dpll_id min, max;
> > > > > + bool ret;
> > > > > +
> > > > > + if (!intel_port_is_combophy(dev_priv, port)) {
> > > > > + MISSING_CASE(port);
> > > > > + return NULL;
> > > > > + }
> > > > > +
> > > > > + min = DPLL_ID_ICL_DPLL0;
> > > > > + max = DPLL_ID_ICL_DPLL1;
> > > > > + ret = icl_calc_dpll_state(crtc_state, encoder);
> > > > > + if (ret) {
> > > > > + pll = intel_find_shared_dpll(crtc_state, min,
> > > > > max);
> > > > > + if (pll) {
> > > > > + intel_reference_shared_dpll(pll,
> > > > > crtc_state);
> > > > > + return pll;
> > > > > + }
> > > > > + } else {
> > > > > + DRM_DEBUG_KMS("Could not calculate PLL
> > > > > state.\n");
> > > > > + }
> > > > > +
> > > > > + if (encoder->type == INTEL_OUTPUT_EDP) {
> > > > > + DRM_DEBUG_KMS("Cannot use DPLL4 with
> > > > > EDP.\n");
> > > > > + return NULL;
> > > > > + }
> > > > > +
> > > > > + min = max = DPLL_ID_ICL_MGPLL1;
> > > > > + ret = icl_calc_mg_pll_state(crtc_state);
> > > > > + if (!ret) {
> > > > > + DRM_DEBUG_KMS("Could not calculate PLL
> > > > > state.\n");
> > > > > + return NULL;
> > > >

Re: [Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v3)

2019-04-09 Thread Vivek Kasireddy

On Fri, 5 Apr 2019 17:46:38 -0700
Lucas De Marchi  wrote:
Hi,

> On Fri, Apr 05, 2019 at 10:59:53AM -0700, Vivek Kasireddy wrote:
> >This patch adds support for DPLL4 on EHL that include the
> >following restrictions:
> >
> >- DPLL4 cannot be used with DDIA (combo port A internal eDP usage).
> >  DPLL4 can be used with other DDIs, including DDID
> >  (combo port A external usage).
> >
> >- DPLL4 cannot be enabled when DC5 or DC6 are enabled.
> >
> >- The DPLL4 enable, lock, power enabled, and power state are
> >connected
> >  to the MGPLL1_ENABLE register.  
> 
> ok
> 
> >
> >v2: (suggestions from Bob Paauwe)
> >- Rework ehl_get_dpll() function to call intel_find_shared_dpll() and
> >  iterate twice: once for Combo plls and once for MG plls.
> >
> >- Use MG pll funcs for DPLL4 instead of creating new ones and modify
> >  mg_pll_enable to include the restrictions for EHL.  
> 
> these 2 don't match spec.
> 
> "3rd PLL for use with combo PHY (DPLL4) and 3rd combo PHY DDI clocks
> (DDIC clock)"
> 
> This is a combophy pll, not a mg phy pll. The only thing that is
> hooked to mg registers is the enable. So my understanding is that
> what you need:
> 
>   - use the dpll calculations
>   - make sure intel_find_shared_dpll doesn't this if it's for eDP
>   - setup the enable/disable to use MG_ENABLE register
Looks like my interpretation of the spec is different from yours but
your comments make sense. Should I create a new ID for this DPLL
or juse re-use DPLL_ID_ICL_MGPLL1?

> 
> >
> >v3: Fix compilation error
> >
> >Cc: Lucas De Marchi 
> >Signed-off-by: Vivek Kasireddy 
> >Reviewed-by: Bob Paauwe 
> >---
> > drivers/gpu/drm/i915/intel_dpll_mgr.c | 60
> > ++- 1 file changed, 59
> > insertions(+), 1 deletion(-)
> >
> >diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c
> >b/drivers/gpu/drm/i915/intel_dpll_mgr.c index
> >e01c057ce50b..c3f0b9720c54 100644 ---
> >a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++
> >b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2870,6 +2870,56 @@
> >icl_get_dpll(struct intel_crtc_state *crtc_state,
> > return pll;
> > }
> >
> >+static struct intel_shared_dpll *
> >+ehl_get_dpll(struct intel_crtc_state *crtc_state,
> >+ struct intel_encoder *encoder)
> >+{
> >+struct drm_i915_private *dev_priv =
> >to_i915(crtc_state->base.crtc->dev);
> >+struct intel_shared_dpll *pll;
> >+enum port port = encoder->port;
> >+enum intel_dpll_id min, max;
> >+bool ret;
> >+
> >+if (!intel_port_is_combophy(dev_priv, port)) {
> >+MISSING_CASE(port);
> >+return NULL;
> >+}
> >+
> >+min = DPLL_ID_ICL_DPLL0;
> >+max = DPLL_ID_ICL_DPLL1;
> >+ret = icl_calc_dpll_state(crtc_state, encoder);
> >+if (ret) {
> >+pll = intel_find_shared_dpll(crtc_state, min, max);
> >+if (pll) {
> >+intel_reference_shared_dpll(pll,
> >crtc_state);
> >+return pll;
> >+}
> >+} else {
> >+DRM_DEBUG_KMS("Could not calculate PLL state.\n");  
> 
> the check for ret is swapped and you are missing a return here.
Unless I am reading it utterly wrong, icl_get_dpll has this:
if (!ret) {
DRM_DEBUG_KMS("Could not calculate PLL state.\n");
return NULL;

> 
> But given the comments above, I think it would be better to reuse
> icl_get_dpll() rather than what you are doing here.
I could have used icl_get_dpll() but thought it would be much cleaner
to have a separate function for EHL; otherwise, I guess I need to
sprinkle icl_get_dpll with many if(EHL) statements.

> 
> >+}
> >+
> >+if (encoder->type == INTEL_OUTPUT_EDP) {
> >+DRM_DEBUG_KMS("Cannot use DPLL4 with EDP.\n");
> >+return NULL;
> >+}  
> 
> this is already too late
The idea was if we have EDP being used, then we first try to find if
one of the combo PHY DPLLs are available to be used. If they are
not, then we come here and return as we cannot use this one either.

> 
> >+
> >+min = max = DPLL_ID_ICL_MGPLL1;
> >+ret = icl_calc_mg_pll_state(crtc_state);
> >+if (!ret) {
> >+DRM_DEBUG_KMS("Could not calculate PLL state.\n");
> >+return NULL;  
> 
> again... ret == 0 is success, not otherwise
I'll send out a v4 with your suggestions soon.

Thanks,
Vivek
>

Re: [Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v3)

2019-04-05 Thread Vivek Kasireddy

On Fri, 5 Apr 2019 21:39:11 +0300
Ville Syrjälä  wrote:
Hi Ville,

> On Fri, Apr 05, 2019 at 09:33:56PM +0300, Ville Syrjälä wrote:
> > On Fri, Apr 05, 2019 at 10:59:53AM -0700, Vivek Kasireddy wrote:  
> > > This patch adds support for DPLL4 on EHL that include the
> > > following restrictions:
> > > 
> > > - DPLL4 cannot be used with DDIA (combo port A internal eDP
> > > usage). DPLL4 can be used with other DDIs, including DDID
> > >   (combo port A external usage).
> > > 
> > > - DPLL4 cannot be enabled when DC5 or DC6 are enabled.
> > > 
> > > - The DPLL4 enable, lock, power enabled, and power state are
> > > connected to the MGPLL1_ENABLE register.
> > > 
> > > v2: (suggestions from Bob Paauwe)
> > > - Rework ehl_get_dpll() function to call intel_find_shared_dpll()
> > > and iterate twice: once for Combo plls and once for MG plls.
> > > 
> > > - Use MG pll funcs for DPLL4 instead of creating new ones and
> > > modify mg_pll_enable to include the restrictions for EHL.
> > > 
> > > v3: Fix compilation error
> > > 
> > > Cc: Lucas De Marchi 
> > > Signed-off-by: Vivek Kasireddy 
> > > Reviewed-by: Bob Paauwe 
> > > ---
> > >  drivers/gpu/drm/i915/intel_dpll_mgr.c | 60
> > > ++- 1 file changed, 59
> > > insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c
> > > b/drivers/gpu/drm/i915/intel_dpll_mgr.c index
> > > e01c057ce50b..c3f0b9720c54 100644 ---
> > > a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++
> > > b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2870,6 +2870,56 @@
> > > icl_get_dpll(struct intel_crtc_state *crtc_state, return pll;
> > >  }
> > >  
> > > +static struct intel_shared_dpll *
> > > +ehl_get_dpll(struct intel_crtc_state *crtc_state,
> > > +  struct intel_encoder *encoder)
> > > +{
> > > + struct drm_i915_private *dev_priv =
> > > to_i915(crtc_state->base.crtc->dev);
> > > + struct intel_shared_dpll *pll;
> > > + enum port port = encoder->port;
> > > + enum intel_dpll_id min, max;
> > > + bool ret;
> > > +
> > > + if (!intel_port_is_combophy(dev_priv, port)) {
> > > + MISSING_CASE(port);
> > > + return NULL;
> > > + }
> > > +
> > > + min = DPLL_ID_ICL_DPLL0;
> > > + max = DPLL_ID_ICL_DPLL1;
> > > + ret = icl_calc_dpll_state(crtc_state, encoder);
> > > + if (ret) {
> > > + pll = intel_find_shared_dpll(crtc_state, min,
> > > max);
> > > + if (pll) {
> > > + intel_reference_shared_dpll(pll,
> > > crtc_state);
> > > + return pll;
> > > + }
> > > + } else {
> > > + DRM_DEBUG_KMS("Could not calculate PLL
> > > state.\n");
> > > + }
> > > +
> > > + if (encoder->type == INTEL_OUTPUT_EDP) {
> > > + DRM_DEBUG_KMS("Cannot use DPLL4 with EDP.\n");
> > > + return NULL;
> > > + }
> > > +
> > > + min = max = DPLL_ID_ICL_MGPLL1;
> > > + ret = icl_calc_mg_pll_state(crtc_state);
> > > + if (!ret) {
> > > + DRM_DEBUG_KMS("Could not calculate PLL
> > > state.\n");
> > > + return NULL;
> > > + }
> > > +
> > > + pll = intel_find_shared_dpll(crtc_state, min, max);
> > > + if (!pll) {
> > > + DRM_DEBUG_KMS("No PLL selected\n");
> > > + return NULL;
> > > + }
> > > +
> > > + intel_reference_shared_dpll(pll, crtc_state);
> > > + return pll;
> > > +}
> > > +
> > >  static bool mg_pll_get_hw_state(struct drm_i915_private
> > > *dev_priv, struct intel_shared_dpll *pll,
> > >   struct intel_dpll_hw_state
> > > *hw_state) @@ -3115,6 +3165,13 @@ static void
> > > mg_pll_enable(struct drm_i915_private *dev_priv, i915_reg_t
> > > enable_reg = MG_PLL_ENABLE(icl_pll_id_to_tc_port(pll->info->id));
> > >  
> > > + if (IS_ELKHARTLAKE(dev_priv) &&
> > > +(I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5 ||
> > > + I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC6)) {
> > > + DRM_ERROR("Cant enable DPLL4 when DC5 or DC6 are
> > > enabled\n");
> > > + return;
> > &g

[Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v3)

2019-04-05 Thread Vivek Kasireddy

This patch adds support for DPLL4 on EHL that include the
following restrictions:

- DPLL4 cannot be used with DDIA (combo port A internal eDP usage).
  DPLL4 can be used with other DDIs, including DDID
  (combo port A external usage).

- DPLL4 cannot be enabled when DC5 or DC6 are enabled.

- The DPLL4 enable, lock, power enabled, and power state are connected
  to the MGPLL1_ENABLE register.

v2: (suggestions from Bob Paauwe)
- Rework ehl_get_dpll() function to call intel_find_shared_dpll() and
  iterate twice: once for Combo plls and once for MG plls.

- Use MG pll funcs for DPLL4 instead of creating new ones and modify
  mg_pll_enable to include the restrictions for EHL.

v3: Fix compilation error

Cc: Lucas De Marchi 
Signed-off-by: Vivek Kasireddy 
Reviewed-by: Bob Paauwe 
---
 drivers/gpu/drm/i915/intel_dpll_mgr.c | 60 ++-
 1 file changed, 59 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c 
b/drivers/gpu/drm/i915/intel_dpll_mgr.c
index e01c057ce50b..c3f0b9720c54 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c
@@ -2870,6 +2870,56 @@ icl_get_dpll(struct intel_crtc_state *crtc_state,
return pll;
 }
 
+static struct intel_shared_dpll *
+ehl_get_dpll(struct intel_crtc_state *crtc_state,
+struct intel_encoder *encoder)
+{
+   struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+   struct intel_shared_dpll *pll;
+   enum port port = encoder->port;
+   enum intel_dpll_id min, max;
+   bool ret;
+
+   if (!intel_port_is_combophy(dev_priv, port)) {
+   MISSING_CASE(port);
+   return NULL;
+   }
+
+   min = DPLL_ID_ICL_DPLL0;
+   max = DPLL_ID_ICL_DPLL1;
+   ret = icl_calc_dpll_state(crtc_state, encoder);
+   if (ret) {
+   pll = intel_find_shared_dpll(crtc_state, min, max);
+   if (pll) {
+   intel_reference_shared_dpll(pll, crtc_state);
+   return pll;
+   }
+   } else {
+   DRM_DEBUG_KMS("Could not calculate PLL state.\n");
+   }
+
+   if (encoder->type == INTEL_OUTPUT_EDP) {
+   DRM_DEBUG_KMS("Cannot use DPLL4 with EDP.\n");
+   return NULL;
+   }
+
+   min = max = DPLL_ID_ICL_MGPLL1;
+   ret = icl_calc_mg_pll_state(crtc_state);
+   if (!ret) {
+   DRM_DEBUG_KMS("Could not calculate PLL state.\n");
+   return NULL;
+   }
+
+   pll = intel_find_shared_dpll(crtc_state, min, max);
+   if (!pll) {
+   DRM_DEBUG_KMS("No PLL selected\n");
+   return NULL;
+   }
+
+   intel_reference_shared_dpll(pll, crtc_state);
+   return pll;
+}
+
 static bool mg_pll_get_hw_state(struct drm_i915_private *dev_priv,
struct intel_shared_dpll *pll,
struct intel_dpll_hw_state *hw_state)
@@ -3115,6 +3165,13 @@ static void mg_pll_enable(struct drm_i915_private 
*dev_priv,
i915_reg_t enable_reg =
MG_PLL_ENABLE(icl_pll_id_to_tc_port(pll->info->id));
 
+   if (IS_ELKHARTLAKE(dev_priv) &&
+  (I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5 ||
+   I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC6)) {
+   DRM_ERROR("Cant enable DPLL4 when DC5 or DC6 are enabled\n");
+   return;
+   }
+
icl_pll_power_enable(dev_priv, pll, enable_reg);
 
icl_mg_pll_write(dev_priv, pll);
@@ -3249,12 +3306,13 @@ static const struct intel_dpll_mgr icl_pll_mgr = {
 static const struct dpll_info ehl_plls[] = {
{ "DPLL 0", _pll_funcs, DPLL_ID_ICL_DPLL0, 0 },
{ "DPLL 1", _pll_funcs, DPLL_ID_ICL_DPLL1, 0 },
+   { "DPLL 4", _pll_funcs, DPLL_ID_ICL_MGPLL1,0 },
{ },
 };
 
 static const struct intel_dpll_mgr ehl_pll_mgr = {
.dpll_info = ehl_plls,
-   .get_dpll = icl_get_dpll,
+   .get_dpll = ehl_get_dpll,
.dump_hw_state = icl_dump_hw_state,
 };
 
-- 
2.14.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] drm/i915/ehl: Add support for DPLL4 (v2)

2019-04-04 Thread Vivek Kasireddy

This patch adds support for DPLL4 on EHL that include the
following restrictions:

- DPLL4 cannot be used with DDIA (combo port A internal eDP usage).
  DPLL4 can be used with other DDIs, including DDID
  (combo port A external usage).

- DPLL4 cannot be enabled when DC5 or DC6 are enabled.

- The DPLL4 enable, lock, power enabled, and power state are connected
  to the MGPLL1_ENABLE register.

v2: (suggestions from Bob Paauwe)
- Rework ehl_get_dpll() function to call intel_find_shared_dpll() and
  iterate twice: once for Combo plls and once for MG plls.

- Use MG pll funcs for DPLL4 instead of creating new ones and modify
  mg_pll_enable to include the restrictions for EHL.

Cc: Lucas De Marchi 
Signed-off-by: Vivek Kasireddy 
Reviewed-by: Bob Paauwe 
---
 drivers/gpu/drm/i915/intel_dpll_mgr.c | 60 ++-
 1 file changed, 59 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c 
b/drivers/gpu/drm/i915/intel_dpll_mgr.c
index e01c057ce50b..cb756acedc94 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c
@@ -2870,6 +2870,56 @@ icl_get_dpll(struct intel_crtc_state *crtc_state,
return pll;
 }
 
+static struct intel_shared_dpll *
+ehl_get_dpll(struct intel_crtc_state *crtc_state,
+struct intel_encoder *encoder)
+{
+   struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+   struct intel_shared_dpll *pll;
+   enum port port = encoder->port;
+   enum intel_dpll_id min, max;
+   bool ret;
+
+   if (!intel_port_is_combophy(dev_priv, port)) {
+   MISSING_CASE(port);
+   return NULL;
+   }
+
+   min = DPLL_ID_ICL_DPLL0;
+   max = DPLL_ID_ICL_DPLL1;
+   ret = icl_calc_dpll_state(crtc_state, encoder);
+   if (ret) {
+   pll = intel_find_shared_dpll(crtc_state, min, max);
+   if (pll) {
+   intel_reference_shared_dpll(pll, crtc_state);
+   return pll;
+   }
+   } else {
+   DRM_DEBUG_KMS("Could not calculate PLL state.\n");
+   }
+
+   if (encoder->type == INTEL_OUTPUT_EDP) {
+   DRM_DEBUG_KMS("Cannot use DPLL4 with EDP.\n");
+   return NULL;
+   }
+
+   min = max = DPLL_ID_ICL_MGPLL1;
+   ret = icl_calc_mg_pll_state(crtc_state, false);
+   if (!ret) {
+   DRM_DEBUG_KMS("Could not calculate PLL state.\n");
+   return NULL;
+   }
+
+   pll = intel_find_shared_dpll(crtc_state, min, max);
+   if (!pll) {
+   DRM_DEBUG_KMS("No PLL selected\n");
+   return NULL;
+   }
+
+   intel_reference_shared_dpll(pll, crtc_state);
+   return pll;
+}
+
 static bool mg_pll_get_hw_state(struct drm_i915_private *dev_priv,
struct intel_shared_dpll *pll,
struct intel_dpll_hw_state *hw_state)
@@ -3115,6 +3165,13 @@ static void mg_pll_enable(struct drm_i915_private 
*dev_priv,
i915_reg_t enable_reg =
MG_PLL_ENABLE(icl_pll_id_to_tc_port(pll->info->id));
 
+   if (IS_ELKHARTLAKE(dev_priv) &&
+  (I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5 ||
+   I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC6)) {
+   DRM_ERROR("Cant enable DPLL4 when DC5 or DC6 are enabled\n");
+   return;
+   }
+
icl_pll_power_enable(dev_priv, pll, enable_reg);
 
icl_mg_pll_write(dev_priv, pll);
@@ -3249,12 +3306,13 @@ static const struct intel_dpll_mgr icl_pll_mgr = {
 static const struct dpll_info ehl_plls[] = {
{ "DPLL 0", _pll_funcs, DPLL_ID_ICL_DPLL0, 0 },
{ "DPLL 1", _pll_funcs, DPLL_ID_ICL_DPLL1, 0 },
+   { "DPLL 4", _pll_funcs, DPLL_ID_ICL_MGPLL1,0 },
{ },
 };
 
 static const struct intel_dpll_mgr ehl_pll_mgr = {
.dpll_info = ehl_plls,
-   .get_dpll = icl_get_dpll,
+   .get_dpll = ehl_get_dpll,
.dump_hw_state = icl_dump_hw_state,
 };
 
-- 
2.14.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH i-g-t v2] lib/igt_kms: Introduce get_first_connected_output macro

2015-11-20 Thread Vivek Kasireddy

In some cases, we just need one valid (connected) output to perform
a test. This macro can help in these situations by not having to
put the test code inside a for loop that iterates over all the outputs.

v2: Added a brief documentation for this macro.

Suggested-by: Matt Roper 
Cc: Thomas Wood <thomas.w...@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasire...@intel.com>
---
 lib/igt_kms.h | 12 
 1 file changed, 12 insertions(+)

diff --git a/lib/igt_kms.h b/lib/igt_kms.h
index 965c47c..a0bb066 100644
--- a/lib/igt_kms.h
+++ b/lib/igt_kms.h
@@ -279,6 +279,18 @@ void igt_wait_for_vblank(int drm_fd, enum pipe pipe);
for (int i__ = 0; (plane) = &(display)->pipes[(pipe)].planes[i__], \
 i__ < (display)->pipes[(pipe)].n_planes; i__++)
 
+/**
+ * get_first_connected_output:
+ * @display: Initialized igt_display_t type object
+ * @output: igt_output_t type object
+ *
+ * Returns: First valid (connected) output.
+ */
+#define get_first_connected_output(display, output)\
+   for (int i__ = 0;  i__ < (display)->n_outputs; i__++)   \
+   if ((output = &(display)->outputs[i__]), output->valid) \
+   break
+
 #define IGT_FIXED(i,f) ((i) << 16 | (f))
 
 void igt_enable_connectors(void);
-- 
2.4.3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH i-g-t] tests/kms_rotation_crc: Use get_first_connected_output macro

2015-11-20 Thread Vivek Kasireddy

In some cases, the only connected connector might not occupy the
first slot and hence output[0] might be empty. Therefore, use
the get_first_connected_output macro to find the output object
associated with the connected connector.

Signed-off-by: Vivek Kasireddy <vivek.kasire...@intel.com>
---
 tests/kms_rotation_crc.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/kms_rotation_crc.c b/tests/kms_rotation_crc.c
index c3241cf..94b4486 100644
--- a/tests/kms_rotation_crc.c
+++ b/tests/kms_rotation_crc.c
@@ -323,14 +323,15 @@ static void test_plane_rotation_ytiled_obj(data_t *data, 
enum igt_plane plane_ty
int bpp = igt_drm_format_to_bpp(format);
enum igt_commit_style commit = COMMIT_LEGACY;
int fd = data->gfx_fd;
-   igt_output_t *output = >outputs[0];
+   igt_output_t *output = NULL;
igt_plane_t *plane;
drmModeModeInfo *mode;
unsigned int stride, size, w, h;
uint32_t gem_handle;
int ret;
 
-   igt_require(output != NULL && output->valid == true);
+   get_first_connected_output(display, output);
+   igt_require(output != NULL);
 
plane = igt_output_get_plane(output, plane_type);
igt_require(igt_plane_supports_rotation(plane));
@@ -385,7 +386,7 @@ static void test_plane_rotation_exhaust_fences(data_t 
*data, enum igt_plane plan
int bpp = igt_drm_format_to_bpp(format);
enum igt_commit_style commit = COMMIT_LEGACY;
int fd = data->gfx_fd;
-   igt_output_t *output = >outputs[0];
+   igt_output_t *output = NULL;
igt_plane_t *plane;
drmModeModeInfo *mode;
data_t data2[MAX_FENCES+1] = {};
@@ -394,7 +395,8 @@ static void test_plane_rotation_exhaust_fences(data_t 
*data, enum igt_plane plan
uint64_t total_aperture_size, total_fbs_size;
int i, ret;
 
-   igt_require(output != NULL && output->valid == true);
+   get_first_connected_output(display, output);
+   igt_require(output != NULL);
 
plane = igt_output_get_plane(output, plane_type);
igt_require(igt_plane_supports_rotation(plane));
-- 
2.4.3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH] igt/igt_kms: Introduce get_first_connected_output (v2)

2015-11-13 Thread Vivek Kasireddy

On Fri, 13 Nov 2015 15:59:21 +
Thomas Wood <thomas.w...@intel.com> wrote:

> On 5 November 2015 at 01:34, Vivek Kasireddy
> <vivek.kasire...@intel.com> wrote:
> > In some cases, we just need one valid (connected) output to perform
> > a test. This macro can help in these situations by not having to
> > put the test code inside a for loop that iterates over all the
> > outputs.
> >
> > v2: Added a brief documentation for this macro.
> 
> The new macro is no longer being used anywhere. Is there a new patch
> that uses the macro?
> 

Hi Thomas,
I wanted to have this patch merged before I updated the tests to use
the macro.

> 
> Also, if re-sending the patch, please make sure it is tagged correctly
> as described in:
> 
> http://lists.freedesktop.org/archives/intel-gfx/2015-November/079712.html
> 
> This also explains how to manage the version tag in the subject line.

Thanks for the link; I wasn't aware of it. Do you want me to resend the
patch in this format?

Thanks and Regards,
Vivek

> 
> 
> >
> > Suggested-by: Matt Roper 
> > Cc: Thomas Wood <thomas.w...@intel.com>
> > Signed-off-by: Vivek Kasireddy <vivek.kasire...@intel.com>
> > ---
> >  lib/igt_kms.h | 12 
> >  1 file changed, 12 insertions(+)
> >
> > diff --git a/lib/igt_kms.h b/lib/igt_kms.h
> > index 09c08aa..91fa206 100644
> > --- a/lib/igt_kms.h
> > +++ b/lib/igt_kms.h
> > @@ -278,6 +278,18 @@ void igt_wait_for_vblank(int drm_fd, enum pipe
> > pipe); for (int i__ = 0; (plane) =
> > &(display)->pipes[(pipe)].planes[i__], \ i__ <
> > (display)->pipes[(pipe)].n_planes; i__++)
> >
> > +/**
> > + * get_first_connected_output:
> > + * @display: Initialized igt_display_t type object
> > + * @output: igt_output_t type object
> > + *
> > + * Returns: First valid (connected) output.
> > + */
> > +#define get_first_connected_output(display, output)\
> > +   for (int i__ = 0;  i__ < (display)->n_outputs; i__++)   \
> > +   if ((output = &(display)->outputs[i__]),
> > output->valid) \
> > +   break
> > +
> >  /*
> >   * Can be used with igt_output_set_pipe() to mean we don't care
> > about the pipe
> >   * that should drive this output
> > --
> > 2.4.3
> >

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] igt/kms_rotation_crc: Add a new subtest to exhaustively test for fence leaks (v3)

2015-11-04 Thread Vivek Kasireddy

In this subtest, as a first step, MAX_FENCES+1 number of framebuffers are
created backed up by objects that have multiple GGTT views (normal and
rotated). Next, we have the i915 driver instantiate a normal view followed
by a rotated view. We continue doing the above MAX_FENCES + 1 times.

v2:
- Add a igt_require() to check if there is enough GTT space left for
  MAX_FENCES+1 framebuffers. (Tvrtko)
- Make data2 local to test_plane_rotation_exhaust_fences(). (Tvrtko)
- If there is a failure, deallocate all the previously allocated
  framebuffers before asserting.

v3: Close the gem handle if set_tiling or addfb fails. (Tvrtko)

Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasire...@intel.com>
---
 tests/kms_rotation_crc.c | 108 +++
 1 file changed, 108 insertions(+)

diff --git a/tests/kms_rotation_crc.c b/tests/kms_rotation_crc.c
index ed6eeef..7e18b1e 100644
--- a/tests/kms_rotation_crc.c
+++ b/tests/kms_rotation_crc.c
@@ -25,6 +25,7 @@
 #include "igt.h"
 #include 
 
+#define MAX_FENCES 32
 
 typedef struct {
int gfx_fd;
@@ -376,6 +377,108 @@ static void test_plane_rotation_ytiled_obj(data_t *data, 
enum igt_plane plane_ty
igt_assert(ret == 0);
 }
 
+static void test_plane_rotation_exhaust_fences(data_t *data, enum igt_plane 
plane_type)
+{
+   igt_display_t *display = >display;
+   uint64_t tiling = LOCAL_I915_FORMAT_MOD_Y_TILED;
+   uint32_t format = DRM_FORMAT_XRGB;
+   int bpp = igt_drm_format_to_bpp(format);
+   enum igt_commit_style commit = COMMIT_LEGACY;
+   int fd = data->gfx_fd;
+   igt_output_t *output = >outputs[0];
+   igt_plane_t *plane;
+   drmModeModeInfo *mode;
+   data_t data2[MAX_FENCES+1] = {};
+   unsigned int stride, size, w, h;
+   uint32_t gem_handle;
+   uint64_t total_aperture_size, total_fbs_size;
+   int i, ret;
+
+   igt_require(output != NULL && output->valid == true);
+
+   plane = igt_output_get_plane(output, plane_type);
+   igt_require(igt_plane_supports_rotation(plane));
+
+   if (plane_type == IGT_PLANE_PRIMARY || plane_type == IGT_PLANE_CURSOR) {
+   igt_require(data->display.has_universal_planes);
+   commit = COMMIT_UNIVERSAL;
+   }
+
+   mode = igt_output_get_mode(output);
+   w = mode->hdisplay;
+   h = mode->vdisplay;
+
+   for (stride = 512; stride < (w * bpp / 8); stride *= 2)
+   ;
+   for (size = 1024*1024; size < stride * h; size *= 2)
+   ;
+
+   /*
+* Make sure there is atleast 90% of the available GTT space left
+* for creating (MAX_FENCES+1) framebuffers.
+*/
+   total_fbs_size = size * (MAX_FENCES + 1);
+   total_aperture_size = gem_available_aperture_size(fd);
+   igt_require(total_fbs_size < total_aperture_size * 0.9);
+
+   igt_plane_set_fb(plane, NULL);
+   igt_display_commit(display);
+
+   for (i = 0; i < MAX_FENCES + 1; i++) {
+   gem_handle = gem_create(fd, size);
+   ret = __gem_set_tiling(fd, gem_handle, I915_TILING_Y, stride);
+   if (ret) {
+   igt_warn("failed to set tiling\n");
+   goto err_alloc;
+   }
+
+   ret = (__kms_addfb(fd, gem_handle, w, h, stride,
+  format, tiling, LOCAL_DRM_MODE_FB_MODIFIERS,
+  [i].fb.fb_id));
+   if (ret) {
+   igt_warn("failed to create framebuffer\n");
+   goto err_alloc;
+   }
+
+   data2[i].fb.width = w;
+   data2[i].fb.height = h;
+   data2[i].fb.gem_handle = gem_handle;
+
+   igt_plane_set_fb(plane, [i].fb);
+   igt_plane_set_rotation(plane, IGT_ROTATION_0);
+
+   ret = igt_display_try_commit2(display, commit);
+   if (ret) {
+   igt_warn("failed to commit unrotated fb\n");
+   goto err_commit;
+   }
+
+   igt_plane_set_rotation(plane, IGT_ROTATION_90);
+
+   drmModeObjectSetProperty(fd, plane->drm_plane->plane_id,
+DRM_MODE_OBJECT_PLANE,
+plane->rotation_property,
+plane->rotation);
+   ret = igt_display_try_commit2(display, commit);
+   if (ret) {
+   igt_warn("failed to commit hardware rotated fb\n");
+   goto err_commit;
+   }
+   }
+
+err_alloc:
+   if (ret)
+   gem_close(fd, gem_handle);
+
+   i--;
+err_commit:
+   for (; i >= 0; i--)
+   igt_remove_fb(fd, [i].fb);
+
+   kmstest_rest

[Intel-gfx] [PATCH] igt/igt_kms: Introduce get_first_connected_output (v2)

2015-11-04 Thread Vivek Kasireddy

In some cases, we just need one valid (connected) output to perform
a test. This macro can help in these situations by not having to
put the test code inside a for loop that iterates over all the outputs.

v2: Added a brief documentation for this macro.

Suggested-by: Matt Roper 
Cc: Thomas Wood <thomas.w...@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasire...@intel.com>
---
 lib/igt_kms.h | 12 
 1 file changed, 12 insertions(+)

diff --git a/lib/igt_kms.h b/lib/igt_kms.h
index 09c08aa..91fa206 100644
--- a/lib/igt_kms.h
+++ b/lib/igt_kms.h
@@ -278,6 +278,18 @@ void igt_wait_for_vblank(int drm_fd, enum pipe pipe);
for (int i__ = 0; (plane) = &(display)->pipes[(pipe)].planes[i__], \
 i__ < (display)->pipes[(pipe)].n_planes; i__++)
 
+/**
+ * get_first_connected_output:
+ * @display: Initialized igt_display_t type object
+ * @output: igt_output_t type object
+ *
+ * Returns: First valid (connected) output.
+ */
+#define get_first_connected_output(display, output)\
+   for (int i__ = 0;  i__ < (display)->n_outputs; i__++)   \
+   if ((output = &(display)->outputs[i__]), output->valid) \
+   break
+
 /*
  * Can be used with igt_output_set_pipe() to mean we don't care about the pipe
  * that should drive this output
-- 
2.4.3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] gbm: Add a flag to enable creation of rotated scanout buffers (v2)

2015-11-04 Thread Vivek Kasireddy

For certain platforms that support rotated scanout buffers, currently,
there is no way to create them with the GBM DRI interface. This flag
will instruct the DRI driver to create the buffer by setting
additional requirements such as tiling mode.

v2: Reserve a bit per angle. (Ville and Michel)

Cc: Michel Danzer <mic...@daenzer.net>
Cc: Ville Syrjala <ville.syrj...@linux.intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasire...@intel.com>
---
 include/GL/internal/dri_interface.h |  3 +++
 src/gbm/backends/dri/gbm_dri.c  | 19 +--
 src/gbm/main/gbm.h  |  7 +++
 3 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/include/GL/internal/dri_interface.h 
b/include/GL/internal/dri_interface.h
index 6bbd3fa..cd1bf62 100644
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -1100,6 +1100,9 @@ struct __DRIdri2ExtensionRec {
 #define __DRI_IMAGE_USE_SCANOUT0x0002
 #define __DRI_IMAGE_USE_CURSOR 0x0004 /* Depricated */
 #define __DRI_IMAGE_USE_LINEAR 0x0008
+#define __DRI_IMAGE_USE_ROTATION_900x0010
+#define __DRI_IMAGE_USE_ROTATION_180   0x0020
+#define __DRI_IMAGE_USE_ROTATION_270   0x0040
 
 
 /**
diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
index 57cdeac..a997946 100644
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -539,7 +539,8 @@ gbm_dri_is_format_supported(struct gbm_device *gbm,
   break;
case GBM_BO_FORMAT_ARGB:
case GBM_FORMAT_ARGB:
-  if (usage & GBM_BO_USE_SCANOUT)
+  if (usage & (GBM_BO_USE_SCANOUT | GBM_BO_USE_ROTATION_90 |
+  GBM_BO_USE_ROTATION_180 | GBM_BO_USE_ROTATION_270))
  return 0;
   break;
default:
@@ -748,6 +749,12 @@ gbm_dri_bo_import(struct gbm_device *gbm,
 
if (usage & GBM_BO_USE_SCANOUT)
   dri_use |= __DRI_IMAGE_USE_SCANOUT;
+   if (usage & GBM_BO_USE_ROTATION_90)
+  dri_use |= __DRI_IMAGE_USE_ROTATION_90;
+   if (usage & GBM_BO_USE_ROTATION_180)
+  dri_use |= __DRI_IMAGE_USE_ROTATION_180;
+   if (usage & GBM_BO_USE_ROTATION_270)
+  dri_use |= __DRI_IMAGE_USE_ROTATION_270;
if (usage & GBM_BO_USE_CURSOR)
   dri_use |= __DRI_IMAGE_USE_CURSOR;
if (dri->image->base.version >= 2 &&
@@ -786,7 +793,9 @@ create_dumb(struct gbm_device *gbm,
 
is_cursor = (usage & GBM_BO_USE_CURSOR) != 0 &&
   format == GBM_FORMAT_ARGB;
-   is_scanout = (usage & GBM_BO_USE_SCANOUT) != 0 &&
+   is_scanout = (usage & (GBM_BO_USE_SCANOUT |
+  GBM_BO_USE_ROTATION_90 | GBM_BO_USE_ROTATION_180 |
+  GBM_BO_USE_ROTATION_270)) != 0 &&
   format == GBM_FORMAT_XRGB;
if (!is_cursor && !is_scanout) {
   errno = EINVAL;
@@ -880,6 +889,12 @@ gbm_dri_bo_create(struct gbm_device *gbm,
 
if (usage & GBM_BO_USE_SCANOUT)
   dri_use |= __DRI_IMAGE_USE_SCANOUT;
+   if (usage & GBM_BO_USE_ROTATION_90)
+  dri_use |= __DRI_IMAGE_USE_ROTATION_90;
+   if (usage & GBM_BO_USE_ROTATION_180)
+  dri_use |= __DRI_IMAGE_USE_ROTATION_180;
+   if (usage & GBM_BO_USE_ROTATION_270)
+  dri_use |= __DRI_IMAGE_USE_ROTATION_270;
if (usage & GBM_BO_USE_CURSOR)
   dri_use |= __DRI_IMAGE_USE_CURSOR;
if (usage & GBM_BO_USE_LINEAR)
diff --git a/src/gbm/main/gbm.h b/src/gbm/main/gbm.h
index 8db2153..b200ca6 100644
--- a/src/gbm/main/gbm.h
+++ b/src/gbm/main/gbm.h
@@ -214,6 +214,13 @@ enum gbm_bo_flags {
 * Buffer is linear, i.e. not tiled.
 */
GBM_BO_USE_LINEAR = (1 << 4),
+   /**
+* Buffer would be rotated and some platforms have additional tiling
+* requirements for rotated scanout buffers.
+*/
+   GBM_BO_USE_ROTATION_90 = (1 << 5),
+   GBM_BO_USE_ROTATION_180 = (1 << 6),
+   GBM_BO_USE_ROTATION_270 = (1 << 7),
 };
 
 int
-- 
2.4.3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH] igt/kms_rotation_crc: Add a new subtest to exhaustively test for fence leaks (v2)

2015-11-03 Thread Vivek Kasireddy

In this subtest, as a first step, MAX_FENCES+1 number of framebuffers are
created backed up by objects that have multiple GGTT views (normal and
rotated). Next, we have the i915 driver instantiate a normal view followed
by a rotated view. We continue doing the above MAX_FENCES + 1 times.

v2:
- Add a igt_require() to check if there is enough GTT space left for
  MAX_FENCES+1 framebuffers. (Tvrtko)
- Make data2 local to test_plane_rotation_exhaust_fences(). (Tvrtko)
- If there is a failure, deallocate all the previously allocated
  framebuffers before asserting.

Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasire...@intel.com>
---
 tests/kms_rotation_crc.c | 106 +++
 1 file changed, 106 insertions(+)

diff --git a/tests/kms_rotation_crc.c b/tests/kms_rotation_crc.c
index ed6eeef..154c6a1 100644
--- a/tests/kms_rotation_crc.c
+++ b/tests/kms_rotation_crc.c
@@ -25,6 +25,7 @@
 #include "igt.h"
 #include 
 
+#define MAX_FENCES 32
 
 typedef struct {
int gfx_fd;
@@ -376,6 +377,106 @@ static void test_plane_rotation_ytiled_obj(data_t *data, 
enum igt_plane plane_ty
igt_assert(ret == 0);
 }
 
+static void test_plane_rotation_exhaust_fences(data_t *data, enum igt_plane 
plane_type)
+{
+   igt_display_t *display = >display;
+   uint64_t tiling = LOCAL_I915_FORMAT_MOD_Y_TILED;
+   uint32_t format = DRM_FORMAT_XRGB;
+   int bpp = igt_drm_format_to_bpp(format);
+   enum igt_commit_style commit = COMMIT_LEGACY;
+   int fd = data->gfx_fd;
+   igt_output_t *output = >outputs[0];
+   igt_plane_t *plane;
+   drmModeModeInfo *mode;
+   data_t data2[MAX_FENCES+1] = {};
+   unsigned int stride, size, w, h;
+   uint32_t gem_handle;
+   uint64_t total_aperture_size, total_fbs_size;
+   int i, ret;
+
+   igt_require(output != NULL && output->valid == true);
+
+   plane = igt_output_get_plane(output, plane_type);
+   igt_require(igt_plane_supports_rotation(plane));
+
+   if (plane_type == IGT_PLANE_PRIMARY || plane_type == IGT_PLANE_CURSOR) {
+   igt_require(data->display.has_universal_planes);
+   commit = COMMIT_UNIVERSAL;
+   }
+
+   mode = igt_output_get_mode(output);
+   w = mode->hdisplay;
+   h = mode->vdisplay;
+
+   for (stride = 512; stride < (w * bpp / 8); stride *= 2)
+   ;
+   for (size = 1024*1024; size < stride * h; size *= 2)
+   ;
+
+   /*
+* Make sure there is atleast 90% of the available GTT space left
+* for creating (MAX_FENCES+1) framebuffers.
+*/
+   total_fbs_size = size * (MAX_FENCES + 1);
+   total_aperture_size = gem_available_aperture_size(fd);
+   igt_require(total_fbs_size < total_aperture_size * 0.9);
+
+   igt_plane_set_fb(plane, NULL);
+   igt_display_commit(display);
+
+   for (i = 0; i < MAX_FENCES + 1; i++) {
+   gem_handle = gem_create(fd, size);
+   ret = __gem_set_tiling(fd, gem_handle, I915_TILING_Y, stride);
+   if (ret) {
+   igt_warn("failed to set tiling\n");
+   goto err_alloc;
+   }
+
+   ret = (__kms_addfb(fd, gem_handle, w, h, stride,
+  format, tiling, LOCAL_DRM_MODE_FB_MODIFIERS,
+  [i].fb.fb_id));
+   if (ret) {
+   igt_warn("failed to create framebuffer\n");
+   goto err_alloc;
+   }
+
+   data2[i].fb.width = w;
+   data2[i].fb.height = h;
+   data2[i].fb.gem_handle = gem_handle;
+
+   igt_plane_set_fb(plane, [i].fb);
+   igt_plane_set_rotation(plane, IGT_ROTATION_0);
+
+   ret = igt_display_try_commit2(display, commit);
+   if (ret) {
+   igt_warn("failed to commit unrotated fb\n");
+   goto err_commit;
+   }
+
+   igt_plane_set_rotation(plane, IGT_ROTATION_90);
+
+   drmModeObjectSetProperty(fd, plane->drm_plane->plane_id,
+DRM_MODE_OBJECT_PLANE,
+plane->rotation_property,
+plane->rotation);
+   ret = igt_display_try_commit2(display, commit);
+   if (ret) {
+   igt_warn("failed to commit hardware rotated fb\n");
+   goto err_commit;
+   }
+   }
+
+err_alloc:
+   i--;
+err_commit:
+   kmstest_restore_vt_mode();
+
+   for (; i >= 0; i--)
+   igt_remove_fb(fd, [i].fb);
+
+   igt_assert(ret == 0);
+}
+
 igt_main
 {
data_t data = {};
@@ -471,6 +572,11 @@ igt_main

[Intel-gfx] [PATCH] igt/kms_rotation_crc: Add a new subtest to exhaustively test for fence leaks

2015-11-02 Thread Vivek Kasireddy

In this subtest, as a first step, MAX_FENCES+1 number of framebuffers are
created backed up by objects that have multiple GGTT views (normal and
rotated). Next, we have the i915 driver instantiate a normal view followed
by a rotated view. We continue doing the above MAX_FENCES + 1 times.

Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasire...@intel.com>
---
 tests/kms_rotation_crc.c | 79 
 1 file changed, 79 insertions(+)

diff --git a/tests/kms_rotation_crc.c b/tests/kms_rotation_crc.c
index ed6eeef..44691d1 100644
--- a/tests/kms_rotation_crc.c
+++ b/tests/kms_rotation_crc.c
@@ -25,6 +25,7 @@
 #include "igt.h"
 #include 
 
+#define MAX_FENCES 32
 
 typedef struct {
int gfx_fd;
@@ -376,6 +377,78 @@ static void test_plane_rotation_ytiled_obj(data_t *data, 
enum igt_plane plane_ty
igt_assert(ret == 0);
 }
 
+static void test_plane_rotation_exhaust_fences(data_t *data, data_t *data2,
+  enum igt_plane plane_type)
+{
+   igt_display_t *display = >display;
+   uint64_t tiling = LOCAL_I915_FORMAT_MOD_Y_TILED;
+   uint32_t format = DRM_FORMAT_XRGB;
+   int bpp = igt_drm_format_to_bpp(format);
+   enum igt_commit_style commit = COMMIT_LEGACY;
+   int fd = data->gfx_fd;
+   igt_output_t *output = >outputs[0];
+   igt_plane_t *plane;
+   drmModeModeInfo *mode;
+   unsigned int stride, size, w, h;
+   uint32_t gem_handle;
+   int i, ret;
+
+   igt_require(output != NULL && output->valid == true);
+
+   plane = igt_output_get_plane(output, plane_type);
+   igt_require(igt_plane_supports_rotation(plane));
+
+   if (plane_type == IGT_PLANE_PRIMARY || plane_type == IGT_PLANE_CURSOR) {
+   igt_require(data->display.has_universal_planes);
+   commit = COMMIT_UNIVERSAL;
+   }
+
+   mode = igt_output_get_mode(output);
+   w = mode->hdisplay;
+   h = mode->vdisplay;
+
+   for (stride = 512; stride < (w * bpp / 8); stride *= 2)
+   ;
+   for (size = 1024*1024; size < stride * h; size *= 2)
+   ;
+
+   igt_plane_set_fb(plane, NULL);
+   igt_display_commit(display);
+
+   for (i = 0; i < MAX_FENCES + 1; i++) {
+   gem_handle = gem_create(fd, size);
+   ret = __gem_set_tiling(fd, gem_handle, I915_TILING_Y, stride);
+   igt_assert(ret == 0);
+
+   do_or_die(__kms_addfb(fd, gem_handle, w, h, stride,
+ format, tiling, LOCAL_DRM_MODE_FB_MODIFIERS,
+ [i].fb.fb_id));
+   data2[i].fb.width = w;
+   data2[i].fb.height = h;
+   data2[i].fb.gem_handle = gem_handle;
+
+   igt_plane_set_fb(plane, [i].fb);
+   igt_plane_set_rotation(plane, IGT_ROTATION_0);
+
+   ret = igt_display_try_commit2(display, commit);
+   igt_assert(ret == 0);
+
+   igt_plane_set_rotation(plane, IGT_ROTATION_90);
+
+   drmModeObjectSetProperty(fd, plane->drm_plane->plane_id,
+DRM_MODE_OBJECT_PLANE,
+plane->rotation_property,
+plane->rotation);
+   ret = igt_display_try_commit2(display, commit);
+   igt_assert(ret == 0);
+   }
+
+   kmstest_restore_vt_mode();
+
+   for (i = 0; i < MAX_FENCES + 1; i++)
+   igt_remove_fb(fd, [i].fb);
+}
+
 igt_main
 {
data_t data = {};
@@ -471,6 +544,12 @@ igt_main
test_plane_rotation_ytiled_obj(, IGT_PLANE_PRIMARY);
}
 
+   igt_subtest_f("exhaust-fences") {
+   data_t data2[MAX_FENCES+1] = {};
+   igt_require(gen >= 9);
+   test_plane_rotation_exhaust_fences(, data2, 
IGT_PLANE_PRIMARY);
+   }
+
igt_fixture {
igt_display_fini();
}
-- 
2.4.3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH] igt/kms_rotation_crc: Add a subtest to validate Y-tiled obj + Y fb modifier (v5)

2015-10-30 Thread Vivek Kasireddy

Hi Tvrtko,

On Fri, 30 Oct 2015 10:22:08 +
Tvrtko Ursulin <tvrtko.ursu...@linux.intel.com> wrote:

> 
> On 30/10/15 01:44, Vivek Kasireddy wrote:
> > The main goal of this subtest is to trigger the following warning in
> > the function i915_gem_object_get_fence():
> > if (WARN_ON(!obj->map_and_fenceable))
> >
> > To trigger this warning, the subtest first creates a Y-tiled object
> > and an associated framebuffer with the Y-fb modifier. Furthermore,
> > to prevent the map_and_fenceable from being set, we make sure that
> > the object does not have a normal VMA by refraining from rendering
> > to the object and by setting the rotation property upfront before
> > calling commit.
> >
> > v2: Do not call paint_squares and just use one output.
> >
> > v3: Convert an if condition to igt_require and move the plane
> > rotation requirement further up before the fb allocation.
> >
> > v4: After setting rotation to 90 and committing, change the
> > rotation to 0 and commit once more. This is to test if the i915
> > driver hits any warnings while pinning and unpinning an object that
> > has both normal and rotated views.
> >
> > v5:
> > - Add another subtest to toggle the order of rotation
> > - Exhaustively test the i915 driver's pinning and unpinning code
> > paths for any fence leaks by iterating until MAX available fences.
> >
> > Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
> > Signed-off-by: Vivek Kasireddy <vivek.kasire...@intel.com>
> > ---
> >   tests/kms_rotation_crc.c | 84
> >  1 file changed, 84
> > insertions(+)
> >
> > diff --git a/tests/kms_rotation_crc.c b/tests/kms_rotation_crc.c
> > index cc9847e..34f8150 100644
> > --- a/tests/kms_rotation_crc.c
> > +++ b/tests/kms_rotation_crc.c
> > @@ -264,6 +264,80 @@ static void test_plane_rotation(data_t *data,
> > enum igt_plane plane_type) igt_require_f(valid_tests, "no valid
> > crtc/connector combinations found\n"); }
> >
> > +static void test_plane_rotation_ytiled_obj(data_t *data, enum
> > igt_plane plane_type,
> > +  int toggle)
> > +{
> > +   igt_display_t *display = >display;
> > +   uint64_t tiling = LOCAL_I915_FORMAT_MOD_Y_TILED;
> > +   uint32_t format = DRM_FORMAT_XRGB;
> > +   int bpp = igt_drm_format_to_bpp(format);
> > +   enum igt_commit_style commit = COMMIT_LEGACY;
> > +   int fd = data->gfx_fd;
> > +   igt_output_t *output = >outputs[0];
> > +   igt_plane_t *plane;
> > +   drmModeModeInfo *mode;
> > +   unsigned int stride, size, w, h;
> > +   uint32_t gem_handle;
> > +   int num_fences = gem_available_fences(fd);
> > +   int i, ret;
> > +
> > +   igt_require(output != NULL && output->valid == true);
> > +
> > +   plane = igt_output_get_plane(output, plane_type);
> > +   igt_require(igt_plane_supports_rotation(plane));
> > +
> > +   if (plane_type == IGT_PLANE_PRIMARY || plane_type ==
> > IGT_PLANE_CURSOR) {
> > +   igt_require(data->display.has_universal_planes);
> > +   commit = COMMIT_UNIVERSAL;
> > +   }
> > +
> > +   mode = igt_output_get_mode(output);
> > +   w = mode->hdisplay;
> > +   h = mode->vdisplay;
> > +
> > +   for (stride = 512; stride < (w * bpp / 8); stride *= 2)
> > +   ;
> > +   for (size = 1024*1024; size < stride * h; size *= 2)
> > +   ;
> > +
> > +   gem_handle = gem_create(fd, size);
> > +   ret = __gem_set_tiling(fd, gem_handle, I915_TILING_Y,
> > stride);
> > +   igt_assert(ret == 0);
> > +
> > +   do_or_die(__kms_addfb(fd, gem_handle, w, h, stride,
> > + format, tiling, LOCAL_DRM_MODE_FB_MODIFIERS,
> > + >fb.fb_id));
> > +   data->fb.width = w;
> > +   data->fb.height = h;
> > +   data->fb.gem_handle = gem_handle;
> > +
> > +   igt_plane_set_fb(plane, NULL);
> > +   igt_display_commit(display);
> > +
> > +   igt_plane_set_fb(plane, >fb);
> > +
> > +   for (i = 0; i < num_fences + 1; i++) {
> > +   igt_plane_set_rotation(plane, toggle ?
> > IGT_ROTATION_0 : IGT_ROTATION_90);
> > +   drmModeObjectSetProperty(fd,
> > plane->drm_plane->plane_id,
> > +DRM_MODE_OBJECT_PLANE,
> > +plane->rotation_property,
> > +plane->rotation);
> &

[Intel-gfx] [PATCH] igt/kms_rotation_crc: Add a subtest to validate Y-tiled obj + Y fb modifier (v5)

2015-10-29 Thread Vivek Kasireddy

The main goal of this subtest is to trigger the following warning in
the function i915_gem_object_get_fence():
if (WARN_ON(!obj->map_and_fenceable))

To trigger this warning, the subtest first creates a Y-tiled object and
an associated framebuffer with the Y-fb modifier. Furthermore, to
prevent the map_and_fenceable from being set, we make sure that
the object does not have a normal VMA by refraining from rendering to the
object and by setting the rotation property upfront before calling commit.

v2: Do not call paint_squares and just use one output.

v3: Convert an if condition to igt_require and move the plane rotation
requirement further up before the fb allocation.

v4: After setting rotation to 90 and committing, change the rotation to
0 and commit once more. This is to test if the i915 driver hits any
warnings while pinning and unpinning an object that has both normal
and rotated views.

v5:
- Add another subtest to toggle the order of rotation
- Exhaustively test the i915 driver's pinning and unpinning code paths
  for any fence leaks by iterating until MAX available fences.

Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasire...@intel.com>
---
 tests/kms_rotation_crc.c | 84 
 1 file changed, 84 insertions(+)

diff --git a/tests/kms_rotation_crc.c b/tests/kms_rotation_crc.c
index cc9847e..34f8150 100644
--- a/tests/kms_rotation_crc.c
+++ b/tests/kms_rotation_crc.c
@@ -264,6 +264,80 @@ static void test_plane_rotation(data_t *data, enum 
igt_plane plane_type)
igt_require_f(valid_tests, "no valid crtc/connector combinations 
found\n");
 }
 
+static void test_plane_rotation_ytiled_obj(data_t *data, enum igt_plane 
plane_type,
+  int toggle)
+{
+   igt_display_t *display = >display;
+   uint64_t tiling = LOCAL_I915_FORMAT_MOD_Y_TILED;
+   uint32_t format = DRM_FORMAT_XRGB;
+   int bpp = igt_drm_format_to_bpp(format);
+   enum igt_commit_style commit = COMMIT_LEGACY;
+   int fd = data->gfx_fd;
+   igt_output_t *output = >outputs[0];
+   igt_plane_t *plane;
+   drmModeModeInfo *mode;
+   unsigned int stride, size, w, h;
+   uint32_t gem_handle;
+   int num_fences = gem_available_fences(fd);
+   int i, ret;
+
+   igt_require(output != NULL && output->valid == true);
+
+   plane = igt_output_get_plane(output, plane_type);
+   igt_require(igt_plane_supports_rotation(plane));
+
+   if (plane_type == IGT_PLANE_PRIMARY || plane_type == IGT_PLANE_CURSOR) {
+   igt_require(data->display.has_universal_planes);
+   commit = COMMIT_UNIVERSAL;
+   }
+
+   mode = igt_output_get_mode(output);
+   w = mode->hdisplay;
+   h = mode->vdisplay;
+
+   for (stride = 512; stride < (w * bpp / 8); stride *= 2)
+   ;
+   for (size = 1024*1024; size < stride * h; size *= 2)
+   ;
+
+   gem_handle = gem_create(fd, size);
+   ret = __gem_set_tiling(fd, gem_handle, I915_TILING_Y, stride);
+   igt_assert(ret == 0);
+
+   do_or_die(__kms_addfb(fd, gem_handle, w, h, stride,
+ format, tiling, LOCAL_DRM_MODE_FB_MODIFIERS,
+ >fb.fb_id));
+   data->fb.width = w;
+   data->fb.height = h;
+   data->fb.gem_handle = gem_handle;
+
+   igt_plane_set_fb(plane, NULL);
+   igt_display_commit(display);
+
+   igt_plane_set_fb(plane, >fb);
+
+   for (i = 0; i < num_fences + 1; i++) {
+   igt_plane_set_rotation(plane, toggle ? IGT_ROTATION_0 : 
IGT_ROTATION_90);
+   drmModeObjectSetProperty(fd, plane->drm_plane->plane_id,
+DRM_MODE_OBJECT_PLANE,
+plane->rotation_property,
+plane->rotation);
+   ret = igt_display_try_commit2(display, commit);
+   igt_assert(ret == 0);
+
+   igt_plane_set_rotation(plane, toggle ? IGT_ROTATION_90 : 
IGT_ROTATION_0);
+   drmModeObjectSetProperty(fd, plane->drm_plane->plane_id,
+DRM_MODE_OBJECT_PLANE,
+plane->rotation_property,
+plane->rotation);
+   ret = igt_display_try_commit2(display, commit);
+   igt_assert(ret == 0);
+   }
+
+   kmstest_restore_vt_mode();
+   igt_remove_fb(fd, >fb);
+}
+
 igt_main
 {
data_t data = {};
@@ -345,6 +419,16 @@ igt_main
test_plane_rotation(, IGT_PLANE_PRIMARY);
}
 
+   igt_subtest_f("primary-rotation-90-to-0-Y-tiled") {
+   igt_require(gen >= 9);
+   test_plane_rotation_ytiled_obj(, IGT_PLANE_PRI

[Intel-gfx] [PATCH] drm/i915: Skip fence installation for objects with rotated views (v4)

2015-10-29 Thread Vivek Kasireddy

While pinning a fb object to the display plane, only install a fence
if the object is using a normal view. This corresponds with the
behavior found in i915_gem_object_do_pin() where the fencability
criteria is determined only for objects with normal views.

v2:
Look at the object's map_and_fenceable flag to determine whether to
install a fence or not (Chris).

v3:
Pin and unpin a fence only if the current view type is normal.

v4:
Extend the "view type is normal" check for pin_fence as well.

Cc: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
Cc: Ville Syrjala <ville.syrj...@linux.intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasire...@intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 36 
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c 
b/drivers/gpu/drm/i915/intel_display.c
index 2fdfca1..9c80968 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2419,22 +2419,24 @@ intel_pin_and_fence_fb_obj(struct drm_plane *plane,
 * framebuffer compression.  For simplicity, we always install
 * a fence as the cost is not that onerous.
 */
-   ret = i915_gem_object_get_fence(obj);
-   if (ret == -EDEADLK) {
-   /*
-* -EDEADLK means there are no free fences
-* no pending flips.
-*
-* This is propagated to atomic, but it uses
-* -EDEADLK to force a locking recovery, so
-* change the returned error to -EBUSY.
-*/
-   ret = -EBUSY;
-   goto err_unpin;
-   } else if (ret)
-   goto err_unpin;
+   if (view.type == I915_GGTT_VIEW_NORMAL) {
+   ret = i915_gem_object_get_fence(obj);
+   if (ret == -EDEADLK) {
+   /*
+* -EDEADLK means there are no free fences
+* no pending flips.
+*
+* This is propagated to atomic, but it uses
+* -EDEADLK to force a locking recovery, so
+* change the returned error to -EBUSY.
+*/
+   ret = -EBUSY;
+   goto err_unpin;
+   } else if (ret)
+   goto err_unpin;
 
-   i915_gem_object_pin_fence(obj);
+   i915_gem_object_pin_fence(obj);
+   }
 
dev_priv->mm.interruptible = true;
intel_runtime_pm_put(dev_priv);
@@ -2460,7 +2462,9 @@ static void intel_unpin_fb_obj(struct drm_framebuffer *fb,
ret = intel_fill_fb_ggtt_view(, fb, plane_state);
WARN_ONCE(ret, "Couldn't get view from plane state!");
 
-   i915_gem_object_unpin_fence(obj);
+   if (view.type == I915_GGTT_VIEW_NORMAL)
+   i915_gem_object_unpin_fence(obj);
+
i915_gem_object_unpin_from_display_plane(obj, );
 }
 
-- 
2.4.3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

1 2 >

1 - 100 of 123 matches

Mail list logo