Re: [PATCH 1/7] drm/msm/dp: use dp_ctrl_off_link_stream during PHY compliance test run

2021-07-08 Thread Stephen Boyd
Quoting Kuogee Hsieh (2021-07-06 10:20:14)
> DP cable should always connect to DPU during the entire PHY compliance
> testing run. Since DP PHY compliance test is executed at irq_hpd event
> context, dp_ctrl_off_link_stream() should be used instead of dp_ctrl_off().
> dp_ctrl_off() is used for unplug event which is triggered when DP cable is
> dis connected.
>
> Signed-off-by: Kuogee Hsieh 
> ---

Is this

Fixes: f21c8a276c2d ("drm/msm/dp: handle irq_hpd with sink_count = 0 correctly")

or

Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support")

? It's not clear how dp_ctrl_off() was working for compliance tests
before commit f21c8a276c2d.

>  drivers/gpu/drm/msm/dp/dp_ctrl.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c 
> b/drivers/gpu/drm/msm/dp/dp_ctrl.c
> index caf71fa..27fb0f0 100644
> --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c
> +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c
> @@ -1530,7 +1530,7 @@ static int dp_ctrl_process_phy_test_request(struct 
> dp_ctrl_private *ctrl)
>  * running. Add the global reset just before disabling the
>  * link clocks and core clocks.
>  */
> -   ret = dp_ctrl_off(&ctrl->dp_ctrl);
> +   ret = dp_ctrl_off_link_stream(&ctrl->dp_ctrl);
> if (ret) {
> DRM_ERROR("failed to disable DP controller\n");
> return ret;


[PATCH v2 0/2] Add support of HDMI for rk3568

2021-07-08 Thread Benjamin Gaignard
Add a compatible and platform datas to support HDMI for rk3568 SoC.

version 2:
- Add the clocks needed for the phy.
 
Benjamin Gaignard (2):
  dt-bindings: display: rockchip: Add compatible for rk3568 HDMI
  drm/rockchip: dw_hdmi: add rk3568 support

 .../display/rockchip/rockchip,dw-hdmi.yaml|  6 +-
 drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c   | 68 +++
 2 files changed, 73 insertions(+), 1 deletion(-)

-- 
2.25.1



[PATCH v2 2/2] drm/rockchip: dw_hdmi: add rk3568 support

2021-07-08 Thread Benjamin Gaignard
Add a new dw_hdmi_plat_data struct and new compatible for rk3568.
This version of the HDMI hardware block need two clocks to provide
phy reference clock: hclk_vio and hclk.

Signed-off-by: Benjamin Gaignard 
---
version 2:
- Add the clocks needed for the phy.

 drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c | 68 +
 1 file changed, 68 insertions(+)

diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c 
b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
index 830bdd5e9b7ce..dc0e255e45745 100644
--- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
@@ -50,6 +50,10 @@
 #define RK3399_GRF_SOC_CON20   0x6250
 #define RK3399_HDMI_LCDC_SEL   BIT(6)
 
+#define RK3568_GRF_VO_CON1 0x0364
+#define RK3568_HDMI_SDAIN_MSK  BIT(15)
+#define RK3568_HDMI_SCLIN_MSK  BIT(14)
+
 #define HIWORD_UPDATE(val, mask)   (val | (mask) << 16)
 
 /**
@@ -71,6 +75,8 @@ struct rockchip_hdmi {
const struct rockchip_hdmi_chip_data *chip_data;
struct clk *vpll_clk;
struct clk *grf_clk;
+   struct clk *hclk_vio;
+   struct clk *hclk_vop;
struct dw_hdmi *hdmi;
struct phy *phy;
 };
@@ -216,6 +222,26 @@ static int rockchip_hdmi_parse_dt(struct rockchip_hdmi 
*hdmi)
return PTR_ERR(hdmi->grf_clk);
}
 
+   hdmi->hclk_vio = devm_clk_get(hdmi->dev, "hclk_vio");
+   if (PTR_ERR(hdmi->hclk_vio) == -ENOENT) {
+   hdmi->hclk_vio = NULL;
+   } else if (PTR_ERR(hdmi->hclk_vio) == -EPROBE_DEFER) {
+   return -EPROBE_DEFER;
+   } else if (IS_ERR(hdmi->hclk_vio)) {
+   dev_err(hdmi->dev, "failed to get hclk_vio clock\n");
+   return PTR_ERR(hdmi->hclk_vio);
+   }
+
+   hdmi->hclk_vop = devm_clk_get(hdmi->dev, "hclk");
+   if (PTR_ERR(hdmi->hclk_vop) == -ENOENT) {
+   hdmi->hclk_vop = NULL;
+   } else if (PTR_ERR(hdmi->hclk_vop) == -EPROBE_DEFER) {
+   return -EPROBE_DEFER;
+   } else if (IS_ERR(hdmi->hclk_vop)) {
+   dev_err(hdmi->dev, "failed to get hclk_vop clock\n");
+   return PTR_ERR(hdmi->hclk_vop);
+   }
+
return 0;
 }
 
@@ -467,6 +493,19 @@ static const struct dw_hdmi_plat_data rk3399_hdmi_drv_data 
= {
.use_drm_infoframe = true,
 };
 
+static struct rockchip_hdmi_chip_data rk3568_chip_data = {
+   .lcdsel_grf_reg = -1,
+};
+
+static const struct dw_hdmi_plat_data rk3568_hdmi_drv_data = {
+   .mode_valid = dw_hdmi_rockchip_mode_valid,
+   .mpll_cfg   = rockchip_mpll_cfg,
+   .cur_ctr= rockchip_cur_ctr,
+   .phy_config = rockchip_phy_config,
+   .phy_data = &rk3568_chip_data,
+   .use_drm_infoframe = true,
+};
+
 static const struct of_device_id dw_hdmi_rockchip_dt_ids[] = {
{ .compatible = "rockchip,rk3228-dw-hdmi",
  .data = &rk3228_hdmi_drv_data
@@ -480,6 +519,9 @@ static const struct of_device_id dw_hdmi_rockchip_dt_ids[] 
= {
{ .compatible = "rockchip,rk3399-dw-hdmi",
  .data = &rk3399_hdmi_drv_data
},
+   { .compatible = "rockchip,rk3568-dw-hdmi",
+ .data = &rk3568_hdmi_drv_data
+   },
{},
 };
 MODULE_DEVICE_TABLE(of, dw_hdmi_rockchip_dt_ids);
@@ -536,6 +578,28 @@ static int dw_hdmi_rockchip_bind(struct device *dev, 
struct device *master,
return ret;
}
 
+   ret = clk_prepare_enable(hdmi->hclk_vio);
+   if (ret) {
+   dev_err(hdmi->dev, "Failed to enable HDMI hclk_vio: %d\n",
+   ret);
+   return ret;
+   }
+
+   ret = clk_prepare_enable(hdmi->hclk_vop);
+   if (ret) {
+   dev_err(hdmi->dev, "Failed to enable HDMI hclk_vop: %d\n",
+   ret);
+   return ret;
+   }
+
+   if (hdmi->chip_data == &rk3568_chip_data) {
+   regmap_write(hdmi->regmap, RK3568_GRF_VO_CON1,
+HIWORD_UPDATE(RK3568_HDMI_SDAIN_MSK |
+  RK3568_HDMI_SCLIN_MSK,
+  RK3568_HDMI_SDAIN_MSK |
+  RK3568_HDMI_SCLIN_MSK));
+   }
+
hdmi->phy = devm_phy_optional_get(dev, "hdmi");
if (IS_ERR(hdmi->phy)) {
ret = PTR_ERR(hdmi->phy);
@@ -559,6 +623,8 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct 
device *master,
ret = PTR_ERR(hdmi->hdmi);
drm_encoder_cleanup(encoder);
clk_disable_unprepare(hdmi->vpll_clk);
+   clk_disable_unprepare(hdmi->hclk_vio);
+   clk_disable_unprepare(hdmi->hclk_vop);
}
 
return ret;
@@ -571,6 +637,8 @@ static void dw_hdmi_rockchip_unbind(struct device *dev, 
struct device *master,
 
dw_hdmi_unbind(hdmi->hdmi);
clk_disable_unprepare(hdmi->vpll_clk);
+   clk_disable_unprepare(hdmi->hclk_vio);
+  

Oops in qxl_bo_move_notify()

2021-07-08 Thread Roberto Sassu
Hi

I'm getting this oops (on commit a180bd1d7e16):

[   17.711520] BUG: kernel NULL pointer dereference, address: 
0010
[   17.739451] RIP: 0010:qxl_bo_move_notify+0x35/0x80 [qxl]
[   17.827345] RSP: 0018:c9457c08 EFLAGS: 00010286
[   17.827350] RAX: 0001 RBX:  RCX: 
dc00
[   17.827353] RDX: 0007 RSI: 0004 RDI: 
85596feb
[   17.827356] RBP: 88800e311c00 R08:  R09: 

[   17.827358] R10: 8697b243 R11: fbfff0d2f648 R12: 

[   17.827361] R13: 88800e311e48 R14: 88800e311e98 R15: 
88800e311e90
[   17.827364] FS:  () GS:88805d80() 
knlGS:
[   17.861699] CS:  0010 DS:  ES:  CR0: 80050033
[   17.861703] CR2: 0010 CR3: 2642c000 CR4: 
00350ee0
[   17.861707] Call Trace:
[   17.861712]  ttm_bo_cleanup_memtype_use+0x4d/0xb0 [ttm]
[   17.861730]  ttm_bo_release+0x42d/0x7c0 [ttm]
[   17.861746]  ? ttm_bo_cleanup_refs+0x127/0x420 [ttm]
[   17.888300]  ttm_bo_delayed_delete+0x289/0x390 [ttm]
[   17.888317]  ? ttm_bo_cleanup_refs+0x420/0x420 [ttm]
[   17.888332]  ? lock_release+0x9c/0x5c0
[   17.901033]  ? rcu_read_lock_held_common+0x1a/0x50
[   17.905183]  ttm_device_delayed_workqueue+0x18/0x50 [ttm]
[   17.909371]  process_one_work+0x537/0x9f0
[   17.913345]  ? pwq_dec_nr_in_flight+0x160/0x160
[   17.917297]  ? lock_acquired+0xa4/0x580
[   17.921168]  ? worker_thread+0x169/0x600
[   17.925034]  worker_thread+0x7a/0x600
[   17.928657]  ? process_one_work+0x9f0/0x9f0
[   17.932360]  kthread+0x200/0x230
[   17.935930]  ? set_kthread_struct+0x80/0x80
[   17.939593]  ret_from_fork+0x22/0x30
[   17.951737] CR2: 0010
[   17.955496] ---[ end trace e30cc21c24e81ee5 ]---

I had a look at the code, and it seems that this is caused by
trying to use bo->resource which is NULL.

bo->resource is freed by ttm_bo_cleanup_refs() ->
ttm_bo_cleanup_memtype_use() -> ttm_resource_free().

And then a notification is issued by ttm_bo_cleanup_refs() ->
ttm_bo_put() -> ttm_bo_release() ->
ttm_bo_cleanup_memtype_use(), this time with bo->release
equal to NULL.

I was thinking a proper way to fix this. Checking that
bo->release is not NULL in qxl_bo_move_notify() would
solve the issue. But maybe there is a better way, like
avoiding that ttm_bo_cleanup_memtype_use() is called
twice. Which way would be preferable?

Thanks

Roberto

HUAWEI TECHNOLOGIES Duesseldorf GmbH, HRB 56063
Managing Director: Li Peng, Li Jian, Shi Yanli


Re: [PATCH v5 0/5] iommu/arm-smmu: adreno-smmu page fault handling

2021-07-08 Thread Rob Clark
On Tue, Jul 6, 2021 at 10:12 PM John Stultz  wrote:
>
> On Sun, Jul 4, 2021 at 11:16 AM Rob Clark  wrote:
> >
> > I suspect you are getting a dpu fault, and need:
> >
> > https://lore.kernel.org/linux-arm-msm/CAF6AEGvTjTUQXqom-xhdh456tdLscbVFPQ+iud1H1gHc8A2=h...@mail.gmail.com/
> >
> > I suppose Bjorn was expecting me to send that patch
>
> If it's helpful, I applied that and it got the db845c booting mainline
> again for me (along with some reverts for a separate ext4 shrinker
> crash).
> Tested-by: John Stultz 
>

Thanks, I'll send a patch shortly

BR,
-R


[PATCH 1/2] drivers/gpu/drm/i915/gt/intel_engine_cs.c: Repair typo in function name

2021-07-08 Thread zhaoxiao
Fixes the following W=1 kernel build warning(s):

drivers/gpu/drm/i915/gt/intel_engine_cs.c:882: warning: expecting prototype for 
intel_engines_init_common(). Prototype was for engine_init_common() instead
drivers/gpu/drm/i915/gt/intel_engine_cs.c:959: warning: expecting prototype for 
intel_engines_cleanup_common(). Prototype was for intel_engine_cleanup_common() 
instead

Signed-off-by: zhaoxiao 
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 7f03df236613..01b4dc041a72 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -868,7 +868,7 @@ create_kernel_context(struct intel_engine_cs *engine)
 }
 
 /**
- * intel_engines_init_common - initialize cengine state which might require hw 
access
+ * engine_init_common - initialize cengine state which might require hw access
  * @engine: Engine to initialize.
  *
  * Initializes @engine@ structure members shared between legacy and execlists
@@ -949,7 +949,7 @@ int intel_engines_init(struct intel_gt *gt)
 }
 
 /**
- * intel_engines_cleanup_common - cleans up the engine state created by
+ * intel_engine_cleanup_common - cleans up the engine state created by
  *the common initiailizers.
  * @engine: Engine to cleanup.
  *
-- 
2.20.1





[PATCH 2/2] drivers/gpu/drm/i915/display/intel_display_power.c: Repair typo in function name

2021-07-08 Thread zhaoxiao
Fixes the following W=1 kernel build warning(s):

drivers/gpu/drm/i915/display/intel_display_power.c:2300: warning: expecting 
prototype for intel_display_power_put_async(). Prototype was for 
__intel_display_power_put_async() instead

Signed-off-by: zhaoxiao 
---
 drivers/gpu/drm/i915/display/intel_display_power.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c 
b/drivers/gpu/drm/i915/display/intel_display_power.c
index 4298ae684d7d..c37e14f2df90 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -2285,7 +2285,7 @@ intel_display_power_put_async_work(struct work_struct 
*work)
 }
 
 /**
- * intel_display_power_put_async - release a power domain reference 
asynchronously
+ * __intel_display_power_put_async - release a power domain reference 
asynchronously
  * @i915: i915 device instance
  * @domain: power domain to reference
  * @wakeref: wakeref acquired for the reference that is being released
-- 
2.20.1





[PATCH v2 1/2] dt-bindings: display: rockchip: Add compatible for rk3568 HDMI

2021-07-08 Thread Benjamin Gaignard
Define a new compatible for rk3568 HDMI.
This version of HDMI hardware block needs two new clocks hclk_vio and hclk
to provide phy reference clocks.

Signed-off-by: Benjamin Gaignard 
---
version 2:
- Add the clocks needed for the phy.

 .../bindings/display/rockchip/rockchip,dw-hdmi.yaml | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git 
a/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-hdmi.yaml 
b/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-hdmi.yaml
index 75cd9c686e985..cb8643b3a8b84 100644
--- a/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-hdmi.yaml
+++ b/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-hdmi.yaml
@@ -23,6 +23,7 @@ properties:
   - rockchip,rk3288-dw-hdmi
   - rockchip,rk3328-dw-hdmi
   - rockchip,rk3399-dw-hdmi
+  - rockchip,rk3568-dw-hdmi
 
   reg-io-width:
 const: 4
@@ -51,8 +52,11 @@ properties:
   - vpll
   - enum:
   - grf
+  - hclk_vio
+  - vpll
+  - enum:
+  - hclk
   - vpll
-  - const: vpll
 
   ddc-i2c-bus:
 $ref: /schemas/types.yaml#/definitions/phandle
-- 
2.25.1



[PATCH] gpu: ttm: fix GPF in ttm_bo_release

2021-07-08 Thread Pavel Skripkin
My local syzbot instance hit GPF in ttm_bo_release().
Unfortunately, syzbot didn't produce a reproducer for this, but I
found out possible scenario:

drm_gem_vram_create()<-- drm_gem_vram_object kzalloced
 (bo embedded in this object)
  ttm_bo_init()
ttm_bo_init_reserved()
  ttm_resource_alloc()
man->func->alloc()   <-- allocation failure
  ttm_bo_put()
ttm_bo_release()
  ttm_mem_io_free()  <-- bo->resource == NULL passed
 as second argument
 *GPF*

So, I've added check in ttm_bo_release() to avoid passing
NULL as second argument to ttm_mem_io_free().

Fail log:

KASAN: null-ptr-deref in range [0x0020-0x0027]
CPU: 1 PID: 10419 Comm: syz-executor.3 Not tainted 5.13.0-rc7-next-20210625 #7
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
rel-1.14.0-0-g155821a-rebuilt.opensuse.org 04/01/2014
RIP: 0010:ttm_mem_io_free+0x28/0x170 drivers/gpu/drm/ttm/ttm_bo_util.c:66
Code: b1 90 41 56 41 55 41 54 55 48 89 fd 53 48 89 f3 e8 cd 19 24 fd 4c 8d 6b 
20 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 <80> 3c 02 00 0f 85 2a 01 
00 00 4c 8b 63 20 31 ff 4c 89 e6 e8 00 1f
RSP: 0018:c900141df968 EFLAGS: 00010202
RAX: dc00 RBX:  RCX: c90010da
RDX: 0004 RSI: 84513ea3 RDI: 888041fbc010
RBP: 888041fbc010 R08:  R09: 
R10: 0001 R11:  R12: 
R13: 0020 R14: 88806b258800 R15: 88806b258a38
FS:  7fa6e9845640() GS:88807ec0() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 7fad61265e18 CR3: 5ad79000 CR4: 00350ee0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Call Trace:
 ttm_bo_release+0xd94/0x10a0 drivers/gpu/drm/ttm/ttm_bo.c:422
 kref_put include/linux/kref.h:65 [inline]
 ttm_bo_put drivers/gpu/drm/ttm/ttm_bo.c:470 [inline]
 ttm_bo_init_reserved+0x7cb/0x960 drivers/gpu/drm/ttm/ttm_bo.c:1050
 ttm_bo_init+0x105/0x270 drivers/gpu/drm/ttm/ttm_bo.c:1074
 drm_gem_vram_create+0x332/0x4c0 drivers/gpu/drm/drm_gem_vram_helper.c:228

Fixes: d3116756a710 ("drm/ttm: rename bo->mem and make it a pointer")
Signed-off-by: Pavel Skripkin 
---
 drivers/gpu/drm/ttm/ttm_bo.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 1b950b45cf4b..15eb97459eab 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -419,7 +419,8 @@ static void ttm_bo_release(struct kref *kref)
bo->bdev->funcs->release_notify(bo);
 
drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node);
-   ttm_mem_io_free(bdev, bo->resource);
+   if (bo->resource)
+   ttm_mem_io_free(bdev, bo->resource);
}
 
if (!dma_resv_test_signaled(bo->base.resv, true) ||
-- 
2.32.0



Re: [PATCH v2 1/2] drm/i915/opregion: add support for mailbox #5 EDID

2021-07-08 Thread Daniel Dadap

On 6/1/21 5:43 PM, Anisse Astier wrote:


Le Tue, Jun 01, 2021 at 06:50:24PM +0300, Ville Syrj?l? a ?crit :

On Mon, May 31, 2021 at 10:46:41PM +0200, Anisse Astier wrote:

The ACPI OpRegion Mailbox #5 ASLE extension may contain an EDID to be
used for the embedded display. Add support for using it via by adding
the EDID to the list of available modes on the connector, and use it for
eDP when available.

If a panel's EDID is broken, there may be an override EDID set in the
ACPI OpRegion mailbox #5. Use it if available.

Looks like Windows uses the ACPI _DDC method instead. We should probably
do the same, just in case some crazy machine stores the EDID somewhere
else.

Thanks, I wouldn't have thought of this. It seems Daniel Dadap did a
patch series to do just that, in a generic way:
https://lore.kernel.org/amd-gfx/20200727205357.27839-1-dda...@nvidia.com/

I've tried patch 1 & 2, and after a fix[1] was able to call the _DDC method
on most devices, but without any EDID being returned.

I looked at the disassembled ACPI tables[2], and could not find any
device with the _DDC method. Are you sure it's the only method the
Windows driver uses to get the EDID ?



_DDC only works on devices that actually implement it, and the vast 
majority of devices don't, because the display just provides an EDID 
normally. AIUI, usually a device will implement _DDC either because an 
embedded panel has no ROM of its own to deliver an EDID, or to allow the 
EDID to be read by either GPU on a system with a muxed display, 
regardless of which GPU happens to have the DDC lines (in TMDS) or DP 
AUX routed to it at the moment. (To my knowledge, nobody actually muxes 
DP AUX independently from the main link, but there were some older 
pre-DP designs where DDC could be muxed independently.)


I'm not sure whether the comment about Windows using _DDC was meant for 
this device in particular, or just more generally, since DDC is part of 
the ACPI spec and some Windows GPU drivers *do* use it, where available. 
If it was meant for a particular device, then it's possible that the 
ACPI tables advertise different methods depending on e.g. _OSI. If you 
haven't already tried doing so, it might be worth overriding _OSI to 
spoof Windows, to see if _DDC gets advertised.


I'm not sure how you were able to call _DDC without an EDID being 
returned as described above, if there was no _DDC method in the ACPI 
tables; I would expect that attempting to call _DDC would fail to locate 
a suitable method and do_acpi_ddc would return NULL.




Regards,

Anisse

[1] _DOD ids should only use 16 lower bits, see table here:
https://uefi.org/specs/ACPI/6.4/Apx_B_Video_Extensions/display-specific-methods.html#dod-enumerate-all-devices-attached-to-the-display-adapter



Thanks; I don't see a version of your modified patch here, was the fix 
just to mask the _DOD IDs against 0x?




[2] acpidump: https://gitlab.freedesktop.org/drm/intel/-/issues/3454#note_913970



Re: [PATCH v2 01/11] drm/sched: Split drm_sched_job_init

2021-07-08 Thread Daniel Vetter
On Thu, Jul 8, 2021 at 8:56 AM Christian König  wrote:
>
> Am 07.07.21 um 18:32 schrieb Daniel Vetter:
> > On Wed, Jul 7, 2021 at 2:58 PM Christian König  
> > wrote:
> >> Am 07.07.21 um 14:13 schrieb Daniel Vetter:
> >>> On Wed, Jul 7, 2021 at 1:57 PM Christian König  
> >>> wrote:
>  Am 07.07.21 um 13:14 schrieb Daniel Vetter:
> > On Wed, Jul 7, 2021 at 11:30 AM Christian König
> >  wrote:
> >> Am 02.07.21 um 23:38 schrieb Daniel Vetter:
> >>> This is a very confusingly named function, because not just does it
> >>> init an object, it arms it and provides a point of no return for
> >>> pushing a job into the scheduler. It would be nice if that's a bit
> >>> clearer in the interface.
> >>>
> >>> But the real reason is that I want to push the dependency tracking
> >>> helpers into the scheduler code, and that means drm_sched_job_init
> >>> must be called a lot earlier, without arming the job.
> >>>
> >>> v2:
> >>> - don't change .gitignore (Steven)
> >>> - don't forget v3d (Emma)
> >>>
> >>> v3: Emma noticed that I leak the memory allocated in
> >>> drm_sched_job_init if we bail out before the point of no return in
> >>> subsequent driver patches. To be able to fix this change
> >>> drm_sched_job_cleanup() so it can handle being called both before and
> >>> after drm_sched_job_arm().
> >> Thinking more about this, I'm not sure if this really works.
> >>
> >> See drm_sched_job_init() was also calling drm_sched_entity_select_rq()
> >> to update the entity->rq association.
> >>
> >> And that can only be done later on when we arm the fence as well.
> > Hm yeah, but that's a bug in the existing code I think: We already
> > fail to clean up if we fail to allocate the fences. So I think the
> > right thing to do here is to split the checks into job_init, and do
> > the actual arming/rq selection in job_arm? I'm not entirely sure
> > what's all going on there, the first check looks a bit like trying to
> > schedule before the entity is set up, which is a driver bug and should
> > have a WARN_ON?
>  No you misunderstood me, the problem is something else.
> 
>  You asked previously why the call to drm_sched_job_init() was so late in
>  the CS.
> 
>  The reason for this was not alone the scheduler fence init, but also the
>  call to drm_sched_entity_select_rq().
> >>> Ah ok, I think I can fix that. Needs a prep patch to first make
> >>> drm_sched_entity_select infallible, then should be easy to do.
> >>>
> > The 2nd check around last_scheduled I have honeslty no idea what it's
> > even trying to do.
>  You mean that here?
> 
> fence = READ_ONCE(entity->last_scheduled);
> if (fence && !dma_fence_is_signaled(fence))
> return;
> 
>  This makes sure that load balancing is not moving the entity to a
>  different scheduler while there are still jobs running from this entity
>  on the hardware,
> >>> Yeah after a nap that idea crossed my mind too. But now I have locking
> >>> questions, afaiui the scheduler thread updates this, without taking
> >>> any locks - entity dequeuing is lockless. And here we read the fence
> >>> and then seem to yolo check whether it's signalled? What's preventing
> >>> a use-after-free here? There's no rcu or anything going on here at
> >>> all, and it's outside of the spinlock section, which starts a bit
> >>> further down.
> >> The last_scheduled fence of an entity can only change when there are
> >> jobs on the entities queued, and we have just ruled that out in the
> >> check before.
> > There aren't any barriers, so the cpu could easily run the two checks
> > the other way round. I'll ponder this and figure out where exactly we
> > need docs for the constraint and/or barriers to make this work as
> > intended. As-is I'm not seeing how it does ...
>
> spsc_queue_count() provides the necessary barrier with the atomic_read().

atomic_t is fully unordered, except when it's a read-modify-write
atomic op, then it's a full barrier. So yeah you need more here. But
also since you only need a read barrier on one side, and a write
barrier on the other, you don't actually need a cpu barriers on x86.
And READ_ONCE gives you the compiler barrier on one side at least, I
haven't found it on the writer side yet.

> But yes a comment would be really nice here. I had to think for a while
> why we don't need this as well.

I'm typing a patch, which after a night's sleep I realized has the
wrong barriers. And now I'm also typing some doc improvements for
drm_sched_entity and related functions.

>
> Christian.
>
> > -Daniel
> >
> >> Christian.
> >>
> >>
> >>> -Daniel
> >>>
>  Regards
>  Christian.
> 
> > -Daniel
> >
> >> Christian.
> >>
> >>> Also improve the kerneldoc for this.
> >>>
> >>> Acked-by: Steven Price  (v2)
> >

Re: [PATCH 5/7] drm/msm/dp: return correct edid checksum after corrupted edid checksum read

2021-07-08 Thread Stephen Boyd
Quoting Kuogee Hsieh (2021-07-06 10:20:18)
> Response with correct edid checksum saved at connector after corrupted edid
> checksum read. This fixes Link Layer CTS cases 4.2.2.3, 4.2.2.6.
>
> Signed-off-by: Kuogee Hsieh 
> ---
>  drivers/gpu/drm/msm/dp/dp_panel.c | 9 +++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c 
> b/drivers/gpu/drm/msm/dp/dp_panel.c
> index 88196f7..0fdb551 100644
> --- a/drivers/gpu/drm/msm/dp/dp_panel.c
> +++ b/drivers/gpu/drm/msm/dp/dp_panel.c
> @@ -271,7 +271,7 @@ static u8 dp_panel_get_edid_checksum(struct edid *edid)
>  {
> struct edid *last_block;
> u8 *raw_edid;
> -   bool is_edid_corrupt;
> +   bool is_edid_corrupt = false;
>
> if (!edid) {
> DRM_ERROR("invalid edid input\n");
> @@ -303,7 +303,12 @@ void dp_panel_handle_sink_request(struct dp_panel 
> *dp_panel)
> panel = container_of(dp_panel, struct dp_panel_private, dp_panel);
>
> if (panel->link->sink_request & DP_TEST_LINK_EDID_READ) {
> -   u8 checksum = dp_panel_get_edid_checksum(dp_panel->edid);
> +   u8 checksum;
> +
> +   if (dp_panel->edid)
> +   checksum = dp_panel_get_edid_checksum(dp_panel->edid);
> +   else
> +   checksum = dp_panel->connector->real_edid_checksum;
>
> dp_link_send_edid_checksum(panel->link, checksum);

It looks like this can be drm_dp_send_real_edid_checksum()? Then we
don't have to look at the connector internals sometimes and can drop
dp_panel_get_edid_checksum() entirely?

> dp_link_send_test_response(panel->link);


Re: [PATCH v2 01/11] drm/sched: Split drm_sched_job_init

2021-07-08 Thread Daniel Vetter
On Thu, Jul 8, 2021 at 9:09 AM Daniel Vetter  wrote:
> On Thu, Jul 8, 2021 at 8:56 AM Christian König  
> wrote:
> > Am 07.07.21 um 18:32 schrieb Daniel Vetter:
> > > On Wed, Jul 7, 2021 at 2:58 PM Christian König  
> > > wrote:
> > >> Am 07.07.21 um 14:13 schrieb Daniel Vetter:
> > >>> On Wed, Jul 7, 2021 at 1:57 PM Christian König 
> > >>>  wrote:
> >  Am 07.07.21 um 13:14 schrieb Daniel Vetter:
> > > On Wed, Jul 7, 2021 at 11:30 AM Christian König
> > >  wrote:
> > >> Am 02.07.21 um 23:38 schrieb Daniel Vetter:
> > >>> This is a very confusingly named function, because not just does it
> > >>> init an object, it arms it and provides a point of no return for
> > >>> pushing a job into the scheduler. It would be nice if that's a bit
> > >>> clearer in the interface.
> > >>>
> > >>> But the real reason is that I want to push the dependency tracking
> > >>> helpers into the scheduler code, and that means drm_sched_job_init
> > >>> must be called a lot earlier, without arming the job.
> > >>>
> > >>> v2:
> > >>> - don't change .gitignore (Steven)
> > >>> - don't forget v3d (Emma)
> > >>>
> > >>> v3: Emma noticed that I leak the memory allocated in
> > >>> drm_sched_job_init if we bail out before the point of no return in
> > >>> subsequent driver patches. To be able to fix this change
> > >>> drm_sched_job_cleanup() so it can handle being called both before 
> > >>> and
> > >>> after drm_sched_job_arm().
> > >> Thinking more about this, I'm not sure if this really works.
> > >>
> > >> See drm_sched_job_init() was also calling 
> > >> drm_sched_entity_select_rq()
> > >> to update the entity->rq association.
> > >>
> > >> And that can only be done later on when we arm the fence as well.
> > > Hm yeah, but that's a bug in the existing code I think: We already
> > > fail to clean up if we fail to allocate the fences. So I think the
> > > right thing to do here is to split the checks into job_init, and do
> > > the actual arming/rq selection in job_arm? I'm not entirely sure
> > > what's all going on there, the first check looks a bit like trying to
> > > schedule before the entity is set up, which is a driver bug and should
> > > have a WARN_ON?
> >  No you misunderstood me, the problem is something else.
> > 
> >  You asked previously why the call to drm_sched_job_init() was so late 
> >  in
> >  the CS.
> > 
> >  The reason for this was not alone the scheduler fence init, but also 
> >  the
> >  call to drm_sched_entity_select_rq().
> > >>> Ah ok, I think I can fix that. Needs a prep patch to first make
> > >>> drm_sched_entity_select infallible, then should be easy to do.
> > >>>
> > > The 2nd check around last_scheduled I have honeslty no idea what it's
> > > even trying to do.
> >  You mean that here?
> > 
> > fence = READ_ONCE(entity->last_scheduled);
> > if (fence && !dma_fence_is_signaled(fence))
> > return;
> > 
> >  This makes sure that load balancing is not moving the entity to a
> >  different scheduler while there are still jobs running from this entity
> >  on the hardware,
> > >>> Yeah after a nap that idea crossed my mind too. But now I have locking
> > >>> questions, afaiui the scheduler thread updates this, without taking
> > >>> any locks - entity dequeuing is lockless. And here we read the fence
> > >>> and then seem to yolo check whether it's signalled? What's preventing
> > >>> a use-after-free here? There's no rcu or anything going on here at
> > >>> all, and it's outside of the spinlock section, which starts a bit
> > >>> further down.
> > >> The last_scheduled fence of an entity can only change when there are
> > >> jobs on the entities queued, and we have just ruled that out in the
> > >> check before.
> > > There aren't any barriers, so the cpu could easily run the two checks
> > > the other way round. I'll ponder this and figure out where exactly we
> > > need docs for the constraint and/or barriers to make this work as
> > > intended. As-is I'm not seeing how it does ...
> >
> > spsc_queue_count() provides the necessary barrier with the atomic_read().
>
> atomic_t is fully unordered, except when it's a read-modify-write

Wasn't awake yet, I think the rule is read-modify-write and return
previous value gives you full barrier. So stuff like cmpxchg, but also
a few others. See atomic_t.txt under ODERING heading (yes that
maintainer refuses to accept .rst so I can't just link you to the
right section, it's silly). get/set and even RMW atomic ops that don't
return anything are all fully unordered.
-Daniel


> atomic op, then it's a full barrier. So yeah you need more here. But
> also since you only need a read barrier on one side, and a write
> barrier on the other, you don't actually need a cpu barriers on x86.
> And REA

Re: [PATCH 7/7] drm/msm/dp: retrain link when loss of symbol lock detected

2021-07-08 Thread Stephen Boyd
Quoting Kuogee Hsieh (2021-07-06 10:20:20)
> Main link symbol locked is achieved at end of link training 2. Some
> dongle main link symbol may become unlocked again if host did not end
> link training soon enough after completion of link training 2. Host
> have to re train main link if loss of symbol lock detected before
> end link training so that the coming video stream can be transmitted
> to sink properly.
>
> Signed-off-by: Kuogee Hsieh 

I guess this is a fix for the original driver, so it should be tagged
with Fixes appropriately.

> ---
>  drivers/gpu/drm/msm/dp/dp_ctrl.c | 34 ++
>  1 file changed, 34 insertions(+)
>
> diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c 
> b/drivers/gpu/drm/msm/dp/dp_ctrl.c
> index 0cb01a9..e616ab2 100644
> --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c
> +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c
> @@ -1661,6 +1661,25 @@ static bool dp_ctrl_any_lane_cr_lose(struct 
> dp_ctrl_private *ctrl,
> return false;
>  }
>
> +static bool dp_ctrl_loss_symbol_lock(struct dp_ctrl_private *ctrl)
> +{
> +   u8 link_status[6];

Can we use link_status[DP_LINK_STATUS_SIZE] instead?

> +   u8 status;
> +   int i;
> +   int lane = ctrl->link->link_params.num_lanes;

s/lane/num_lanes/

would make the code easier to read

> +
> +   dp_ctrl_read_link_status(ctrl, link_status);
> +
> +   for (i = 0; i < lane; i++) {
> +   status = link_status[i / 2];
> +   status >>= ((i % 2) * 4);
> +   if (!(status & DP_LANE_SYMBOL_LOCKED))
> +   return true;
> +   }
> +
> +   return false;
> +}
> +
>  int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl)
>  {
> int rc = 0;
> @@ -1777,6 +1796,17 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl)
> return rc;
>  }
>
> +static int dp_ctrl_link_retrain(struct dp_ctrl_private *ctrl)
> +{
> +   int ret = 0;

Please drop init of ret.

> +   u8 cr_status[2];
> +   int training_step = DP_TRAINING_NONE;
> +
> +   ret = dp_ctrl_setup_main_link(ctrl, cr_status, &training_step);

as it is assigned here.

> +
> +   return ret;

And indeed, it could be 'return dp_ctrl_setup_main_link()' instead.

> +}
> +
>  int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl)
>  {
> int ret = 0;
> @@ -1802,6 +1832,10 @@ int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl)
> }
> }
>
> +   /* if loss symbol lock happen, then retaining the link */

retain or retrain? The comment seems to be saying what the code says "if
loss retrain", so the comment is not very useful.

> +   if (dp_ctrl_loss_symbol_lock(ctrl))
> +   dp_ctrl_link_retrain(ctrl);
> +
> /* stop txing train pattern to end link training */
> dp_ctrl_clear_training_pattern(ctrl);
>


Re: BUG: MTK DRM/HDMI broken on 5.13 (mt7623/bpi-r2)

2021-07-08 Thread Dafna Hirschfeld

Hi Frank,


On 06.07.21 11:54, Frank Wunderlich wrote:

Hi,

i've noticed that HDMI is broken at least on my board (Bananapi-r2,mt7623) on 
5.13.

after some research i noticed that it is working till

commit 2e477391522354e763aa62ee3e281c1ad9e8eb1b
Author: Dafna Hirschfeld 
Date:   Tue Mar 30 13:09:02 2021 +0200

 drm/mediatek: Don't support hdmi connector creation


which is the last of mtk-drm-next-5.13 [1] so i guess a problem with 
core-patches

dmesg shows the following:

[7.071342] mediatek-drm mediatek-drm.1.auto: bound 14007000.ovl (ops mtk_dis
p_ovl_component_ops)
[7.080330] mediatek-drm mediatek-drm.1.auto: bound 14008000.rdma (ops mtk_di
sp_rdma_component_ops)
[7.089429] mediatek-drm mediatek-drm.1.auto: bound 1400b000.color (ops mtk_d
isp_color_component_ops)
[7.098689] mediatek-drm mediatek-drm.1.auto: bound 14012000.rdma (ops mtk_di
sp_rdma_component_ops)
[7.107814] mediatek-drm mediatek-drm.1.auto: bound 14014000.dpi (ops mtk_dpi
_component_ops)
[7.116338] mediatek-drm mediatek-drm.1.auto: Not creating crtc 1 because com
ponent 9 is disabled or missing

[   38.403957] Console: switching to colour frame buffer device 160x64
[   48.516398] [drm:drm_crtc_commit_wait] *ERROR* flip_done timed out
[   48.516422] [drm:drm_atomic_helper_wait_for_dependencies] *ERROR* [CRTC:41:cr
tc-0] commit wait timed out
[   58.756384] [drm:drm_crtc_commit_wait] *ERROR* flip_done timed out
[   58.756399] [drm:drm_atomic_helper_wait_for_dependencies] *ERROR* [CONNECTOR:
32:HDMI-A-1] commit wait timed out
[   68.996384] [drm:drm_crtc_commit_wait] *ERROR* flip_done timed out
[   68.996399] [drm:drm_atomic_helper_wait_for_dependencies] *ERROR* [PLANE:33:p
lane-0] commit wait timed out
[   68.996423] [drm:mtk_drm_crtc_atomic_begin] *ERROR* new event while there is
still a pending event
[   69.106385] [ cut here ]
[   69.106392] WARNING: CPU: 2 PID: 7 at drivers/gpu/drm/drm_atomic_helper.c:151
1 drm_atomic_helper_wait_for_vblanks.part.0+0x2a0/0x2a8
[   69.106414] [CRTC:41:crtc-0] vblank wait timed out


We also encountered that warning on mt8173 device - Acer Chromebook R13. It 
happen after resuming from suspend to ram.
We could not find a version that works and we were not able to find the fix of 
the bug.
It seems like the irq isr is not called after resuming from suspend.
Please share if you have new findings regarding that bug.

Thanks,
Dafna




so i guess the breaking commit may be this:

$ git logone -S"drm_crtc_commit_wait" -- drivers/gpu/drm/
b99c2c95412c 2021-01-11 drm: Introduce a drm_crtc_commit_wait helper

in drivers/gpu/drm/drm_atomic{,_helper}.c

but i cannot confirm it because my git bisect does strange things (after 
defining 5.13 as bad and the 2e4773915223 as good, second step is before the 
good commit till the end, last steps are 5.11...). sorry, i'm still new to 
bisect.

the fix is targeting to 5.12-rc2, is guess because CK Hu's tree is based on 
this...but the fix was not included in 5.12-rc2 (only after 5.12.0...got it by 
merging 5.12.14)

maybe you can help me?

regards Frank

[1] 
https://git.kernel.org/pub/scm/linux/kernel/git/chunkuang.hu/linux.git/log/?h=mediatek-drm-next-5.13

___
Linux-mediatek mailing list
linux-media...@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-mediatek



Re: [PATCH 2/7] drm/msm/dp: reduce link rate if failed at link training 1

2021-07-08 Thread Stephen Boyd
Quoting Kuogee Hsieh (2021-07-06 10:20:15)
> Reduce link rate and re start link training if link training 1
> failed due to loss of clock recovery done to fix Link Layer
> CTS case 4.3.1.7.  Also only update voltage and pre-emphasis
> swing level after link training started to fix Link Layer CTS
> case 4.3.1.6.
>
> Signed-off-by: Kuogee Hsieh 
> ---
>  drivers/gpu/drm/msm/dp/dp_ctrl.c | 86 
> ++--
>  1 file changed, 56 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c 
> b/drivers/gpu/drm/msm/dp/dp_ctrl.c
> index 27fb0f0..6f8443d 100644
> --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c
> +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c
> @@ -83,13 +83,6 @@ struct dp_ctrl_private {
> struct completion video_comp;
>  };
>
> -struct dp_cr_status {
> -   u8 lane_0_1;
> -   u8 lane_2_3;
> -};
> -
> -#define DP_LANE0_1_CR_DONE 0x11
> -
>  static int dp_aux_link_configure(struct drm_dp_aux *aux,
> struct dp_link_info *link)
>  {
> @@ -1080,7 +1073,7 @@ static int dp_ctrl_read_link_status(struct 
> dp_ctrl_private *ctrl,
>  }
>
>  static int dp_ctrl_link_train_1(struct dp_ctrl_private *ctrl,
> -   struct dp_cr_status *cr, int *training_step)
> +   u8 *cr, int *training_step)
>  {
> int tries, old_v_level, ret = 0;
> u8 link_status[DP_LINK_STATUS_SIZE];
> @@ -1109,8 +1102,8 @@ static int dp_ctrl_link_train_1(struct dp_ctrl_private 
> *ctrl,
> if (ret)
> return ret;
>
> -   cr->lane_0_1 = link_status[0];
> -   cr->lane_2_3 = link_status[1];
> +   cr[0] = link_status[0];
> +   cr[1] = link_status[1];
>
> if (drm_dp_clock_recovery_ok(link_status,
> ctrl->link->link_params.num_lanes)) {
> @@ -1188,7 +1181,7 @@ static void dp_ctrl_clear_training_pattern(struct 
> dp_ctrl_private *ctrl)
>  }
>
>  static int dp_ctrl_link_train_2(struct dp_ctrl_private *ctrl,
> -   struct dp_cr_status *cr, int *training_step)
> +   u8 *cr, int *training_step)
>  {
> int tries = 0, ret = 0;
> char pattern;
> @@ -1204,10 +1197,6 @@ static int dp_ctrl_link_train_2(struct dp_ctrl_private 
> *ctrl,
> else
> pattern = DP_TRAINING_PATTERN_2;
>
> -   ret = dp_ctrl_update_vx_px(ctrl);
> -   if (ret)
> -   return ret;
> -
> ret = dp_catalog_ctrl_set_pattern(ctrl->catalog, pattern);
> if (ret)
> return ret;
> @@ -1220,8 +1209,8 @@ static int dp_ctrl_link_train_2(struct dp_ctrl_private 
> *ctrl,
> ret = dp_ctrl_read_link_status(ctrl, link_status);
> if (ret)
> return ret;
> -   cr->lane_0_1 = link_status[0];
> -   cr->lane_2_3 = link_status[1];
> +   cr[0] = link_status[0];
> +   cr[1] = link_status[1];
>
> if (drm_dp_channel_eq_ok(link_status,
> ctrl->link->link_params.num_lanes)) {
> @@ -1241,7 +1230,7 @@ static int dp_ctrl_link_train_2(struct dp_ctrl_private 
> *ctrl,
>  static int dp_ctrl_reinitialize_mainlink(struct dp_ctrl_private *ctrl);
>
>  static int dp_ctrl_link_train(struct dp_ctrl_private *ctrl,
> -   struct dp_cr_status *cr, int *training_step)
> +   u8 *cr, int *training_step)
>  {
> int ret = 0;
> u8 encoding = DP_SET_ANSI_8B10B;
> @@ -1282,7 +1271,7 @@ static int dp_ctrl_link_train(struct dp_ctrl_private 
> *ctrl,
>  }
>
>  static int dp_ctrl_setup_main_link(struct dp_ctrl_private *ctrl,
> -   struct dp_cr_status *cr, int *training_step)
> +   u8 *cr, int *training_step)
>  {
> int ret = 0;
>
> @@ -1496,14 +1485,14 @@ static int dp_ctrl_deinitialize_mainlink(struct 
> dp_ctrl_private *ctrl)
>  static int dp_ctrl_link_maintenance(struct dp_ctrl_private *ctrl)
>  {
> int ret = 0;
> -   struct dp_cr_status cr;
> +   u8 cr_status[2];
> int training_step = DP_TRAINING_NONE;
>
> dp_ctrl_push_idle(&ctrl->dp_ctrl);
>
> ctrl->dp_ctrl.pixel_rate = ctrl->panel->dp_mode.drm_mode.clock;
>
> -   ret = dp_ctrl_setup_main_link(ctrl, &cr, &training_step);
> +   ret = dp_ctrl_setup_main_link(ctrl, cr_status, &training_step);
> if (ret)
> goto end;

Do we need to extract the link status information from deep in these
functions? Why not read it again when we need to?

>
> @@ -1634,6 +1623,41 @@ void dp_ctrl_handle_sink_request(struct dp_ctrl 
> *dp_ctrl)
> }
>  }
>
> +static bool dp_ctrl_any_lane_cr_done(struct dp_ctrl_private *ctrl,
> +   u8 *cr_status)
> +
> +{
> +   int i;
> +   u8 status;
> +   int lane = ctrl->link->link_params.num_lanes;
> +
> +   for (i = 0; i < lane; i++) {
> +   status = cr_status[i / 2];
> 

Re: [PATCH 3/7] drm/msm/dp: reset aux controller after dp_aux_cmd_fifo_tx() failed.

2021-07-08 Thread Stephen Boyd
Quoting Kuogee Hsieh (2021-07-06 10:20:16)
> Aux hardware calibration sequence requires resetting the aux controller
> in order for the new setting to take effect. However resetting the AUX
> controller will also clear HPD interrupt status which may accidentally
> cause pending unplug interrupt to get lost. Therefore reset aux
> controller only when link is in connection state when dp_aux_cmd_fifo_tx()
> fail. This fixes Link Layer CTS cases 4.2.1.1 and 4.2.1.2.
>
> Signed-off-by: Kuogee Hsieh 
> ---
>  drivers/gpu/drm/msm/dp/dp_aux.c | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/drivers/gpu/drm/msm/dp/dp_aux.c b/drivers/gpu/drm/msm/dp/dp_aux.c
> index 4a3293b..eb40d84 100644
> --- a/drivers/gpu/drm/msm/dp/dp_aux.c
> +++ b/drivers/gpu/drm/msm/dp/dp_aux.c
> @@ -353,6 +353,9 @@ static ssize_t dp_aux_transfer(struct drm_dp_aux *dp_aux,
> if (!(aux->retry_cnt % MAX_AUX_RETRIES))
> dp_catalog_aux_update_cfg(aux->catalog);
> }
> +   /* reset aux if link is in connected state */
> +   if (dp_catalog_link_is_connected(aux->catalog))

How do we avoid resetting aux when hpd is unplugged and then plugged
back in during an aux transfer?

> +   dp_catalog_aux_reset(aux->catalog);
> } else {
> aux->retry_cnt = 0;
> switch (aux->aux_error_num) {
> --
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> a Linux Foundation Collaborative Project
>


[PATCH v3] drm/panfrost:fix the exception name always "UNKNOWN"

2021-07-08 Thread ChunyouTang
From: ChunyouTang 

The exception_code in register is only 8 bits,So if
fault_status in panfrost_gpu_irq_handler() don't
(& 0xFF),it can't get correct exception reason.

and it's better to show all of the register value
to custom,so it's better fault_status don't (& 0xFF).

Signed-off-by: ChunyouTang 
---
 drivers/gpu/drm/panfrost/panfrost_gpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c 
b/drivers/gpu/drm/panfrost/panfrost_gpu.c
index 1fffb6a0b24f..d2d287bbf4e7 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
@@ -33,7 +33,7 @@ static irqreturn_t panfrost_gpu_irq_handler(int irq, void 
*data)
address |= gpu_read(pfdev, GPU_FAULT_ADDRESS_LO);
 
dev_warn(pfdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx\n",
-fault_status & 0xFF, panfrost_exception_name(pfdev, 
fault_status & 0xFF),
+fault_status, panfrost_exception_name(pfdev, 
fault_status & 0xFF),
 address);
 
if (state & GPU_IRQ_MULTIPLE_FAULT)
-- 
2.25.1



Re: page pools, was Re: [PATCH v9 1/5] drm: Add a sharable drm page-pool implementation

2021-07-08 Thread Christian König

Am 08.07.21 um 06:20 schrieb Christoph Hellwig:

On Wed, Jul 07, 2021 at 12:35:23PM -0700, John Stultz wrote:

So, as Christian mentioned, on the TTM side it's useful, as they are
trying to avoid TLB flushes when changing caching attributes.

For the dmabuf system heap purposes, the main benefit is moving the
page zeroing to the free path, rather than the allocation path. This
on its own doesn't save much, but allows us to defer frees (and thus
the zeroing) to the background, which can get that work out of the hot
path.

I really do no think that is worth it to fragment the free pages.


And I think functionality like that should be part of the common page 
allocator.


I mean we already have __GFP_ZERO, why not have a background kernel 
thread which zeros free pages when a CPU core is idle? (I'm pretty sure 
we already have that somehow).


Christian.


Re: [PATCH v2 01/11] drm/sched: Split drm_sched_job_init

2021-07-08 Thread Christian König

Am 08.07.21 um 09:19 schrieb Daniel Vetter:

On Thu, Jul 8, 2021 at 9:09 AM Daniel Vetter  wrote:

On Thu, Jul 8, 2021 at 8:56 AM Christian König  wrote:

Am 07.07.21 um 18:32 schrieb Daniel Vetter:

On Wed, Jul 7, 2021 at 2:58 PM Christian König  wrote:

Am 07.07.21 um 14:13 schrieb Daniel Vetter:

On Wed, Jul 7, 2021 at 1:57 PM Christian König  wrote:

Am 07.07.21 um 13:14 schrieb Daniel Vetter:

On Wed, Jul 7, 2021 at 11:30 AM Christian König
 wrote:

Am 02.07.21 um 23:38 schrieb Daniel Vetter:

This is a very confusingly named function, because not just does it
init an object, it arms it and provides a point of no return for
pushing a job into the scheduler. It would be nice if that's a bit
clearer in the interface.

But the real reason is that I want to push the dependency tracking
helpers into the scheduler code, and that means drm_sched_job_init
must be called a lot earlier, without arming the job.

v2:
- don't change .gitignore (Steven)
- don't forget v3d (Emma)

v3: Emma noticed that I leak the memory allocated in
drm_sched_job_init if we bail out before the point of no return in
subsequent driver patches. To be able to fix this change
drm_sched_job_cleanup() so it can handle being called both before and
after drm_sched_job_arm().

Thinking more about this, I'm not sure if this really works.

See drm_sched_job_init() was also calling drm_sched_entity_select_rq()
to update the entity->rq association.

And that can only be done later on when we arm the fence as well.

Hm yeah, but that's a bug in the existing code I think: We already
fail to clean up if we fail to allocate the fences. So I think the
right thing to do here is to split the checks into job_init, and do
the actual arming/rq selection in job_arm? I'm not entirely sure
what's all going on there, the first check looks a bit like trying to
schedule before the entity is set up, which is a driver bug and should
have a WARN_ON?

No you misunderstood me, the problem is something else.

You asked previously why the call to drm_sched_job_init() was so late in
the CS.

The reason for this was not alone the scheduler fence init, but also the
call to drm_sched_entity_select_rq().

Ah ok, I think I can fix that. Needs a prep patch to first make
drm_sched_entity_select infallible, then should be easy to do.


The 2nd check around last_scheduled I have honeslty no idea what it's
even trying to do.

You mean that here?

fence = READ_ONCE(entity->last_scheduled);
if (fence && !dma_fence_is_signaled(fence))
return;

This makes sure that load balancing is not moving the entity to a
different scheduler while there are still jobs running from this entity
on the hardware,

Yeah after a nap that idea crossed my mind too. But now I have locking
questions, afaiui the scheduler thread updates this, without taking
any locks - entity dequeuing is lockless. And here we read the fence
and then seem to yolo check whether it's signalled? What's preventing
a use-after-free here? There's no rcu or anything going on here at
all, and it's outside of the spinlock section, which starts a bit
further down.

The last_scheduled fence of an entity can only change when there are
jobs on the entities queued, and we have just ruled that out in the
check before.

There aren't any barriers, so the cpu could easily run the two checks
the other way round. I'll ponder this and figure out where exactly we
need docs for the constraint and/or barriers to make this work as
intended. As-is I'm not seeing how it does ...

spsc_queue_count() provides the necessary barrier with the atomic_read().

atomic_t is fully unordered, except when it's a read-modify-write

Wasn't awake yet, I think the rule is read-modify-write and return
previous value gives you full barrier. So stuff like cmpxchg, but also
a few others. See atomic_t.txt under ODERING heading (yes that
maintainer refuses to accept .rst so I can't just link you to the
right section, it's silly). get/set and even RMW atomic ops that don't
return anything are all fully unordered.


As far as I know that not completely correct. The rules around atomics i 
once learned are:


1. Everything which modifies something is a write barrier.
2. Everything which returns something is a read barrier.

And I know a whole bunch of use cases where this is relied upon in the 
core kernel, so I'm pretty sure that's correct.


In this case the write barrier is the atomic_dec() in spsc_queue_pop() 
and the read barrier is the aromic_read() in spsc_queue_count().


The READ_ONCE() is actually not even necessary as far as I can see.

Christian.


-Daniel



atomic op, then it's a full barrier. So yeah you need more here. But
also since you only need a read barrier on one side, and a write
barrier on the other, you don't actually need a cpu barriers on x86.
And READ_ONCE gives you the compiler barrier on one side at least, I
haven't found it on the writer side yet.


But yes a comment would be really nice here. I had 

Aw: Re: BUG: MTK DRM/HDMI broken on 5.13 (mt7623/bpi-r2)

2021-07-08 Thread Frank Wunderlich
> Gesendet: Donnerstag, 08. Juli 2021 um 09:22 Uhr
> Von: "Dafna Hirschfeld" 
> We also encountered that warning on mt8173 device - Acer Chromebook R13. It 
> happen after resuming from suspend to ram.
> We could not find a version that works and we were not able to find the fix 
> of the bug.
> It seems like the irq isr is not called after resuming from suspend.
> Please share if you have new findings regarding that bug.

Hi,

i have not yet found a way to make the commit-history flat for running bisect 
without the issue of disappearing childcommits when mergecommit is out of 
bisect scope. so i tried to start at working 5.12.0 with mtk-drm-patches and 
commits from drm core (i hope i have catched them all) by cherry-picking the 
single commits.

c24e104c26aa 2021-06-09 drm: Lock pointer access in drm_master_release()  (HEAD 
-> 5.12-drm)
2aa9212803a4 2021-06-08 drm: Fix use-after-free read in drm_getunique()
23b8d6c3be47 2021-04-08 treewide: Change list_sort to use const pointers
c1e987f51f06 2021-03-26 drm/dp_mst: Drop DRM_ERROR() on kzalloc() fail in 
drm_dp_mst_handle_up_req()
2176a9e962be 2021-04-01 drm/drm_internal.h: Remove repeated struct declaration
fc5d92c1485d 2021-04-08 drm/syncobj: use newly allocated stub fences
23a03d271e87 2021-03-29 drm/displayid: rename displayid_hdr to displayid_header
44ef605cb08f 2021-03-29 drm/displayid: allow data blocks with 0 payload length
bbdc0aefd1b5 2021-03-29 drm/edid: use the new displayid iterator for tile info
1ee4a22d671e 2021-03-29 drm/edid: use the new displayid iterator for finding 
CEA extension
d9b8c26b8ddf 2021-03-29 drm/edid: use the new displayid iterator for detailed 
modes
d9e95df8adc8 2021-03-29 drm/displayid: add new displayid section/block iterators
2dd279949358 2021-03-29 drm/displayid: add separate drm_displayid.c
bb1a3611abc1 2021-03-29 drm/edid: make a number of functions, parameters and 
variables const
0b18f5b98c71 2021-03-23 drm/dp_helper: Define options for FRL training for 
HDMI2.1 PCON
16fbc25ab84b 2021-03-25 drm/mst: Enhance MST topology logging
bb93ad6ab4e4 2021-03-26 drm: Fix 3 typos in the inline doc
27d30189b178 2021-03-22 drm/sysfs: Convert sysfs sprintf/snprintf family to 
sysfs_emit
04ad4ed36cf2 2021-03-18 drm: Few typo fixes
b8821cac052f 2021-03-13 drm: Add GUD USB Display driver
d3df1b84b9ff 2021-03-13 drm/probe-helper: Check epoch counter in 
output_poll_execute()
298372a0cda4 2021-03-13 drm/uapi: Add USB connector type
040c9022809d 2021-03-30 drm/mediatek: Don't support hdmi connector creation
7c6582b23551 2021-03-30 drm/mediatek: Switch the hdmi bridge ops to the atomic 
versions
b1b43d5948b2 2021-02-03 drm/mediatek: Add missing MODULE_DEVICE_TABLE()
fe5a0ff82cfb 2021-03-13 drm/mediatek: crtc: Make config-updating atomic

result: it is still working. so at least they do not break ;)

have you found any irq-related message in dmesg (i have not found any 
irq-error/warning-message)?
how have you traced that?

can somebody point us to the interrupts used for pageflip/vblank "requests"? in 
the wait-chain i do not see them,
it seems it is called asynchronous and wait only looks at a state in the 
completion-struct

i have the issue on bootup, i see only a purple screen instead of fbcon/xserver 
and the tracebacks
on serial are very annoying  as they repeating every x seconds (maybe change to 
WARN_ONCE?).
But after a while it seems to stop.

imho we need a way to make the history (temporary) flat (remove 
parent-information from commits to merge) so that bisect have only a list and 
not a "tree"

regards Frank


[PATCH] video: backlight: Only set maximum brightness for gpio-backlight

2021-07-08 Thread Marek Vasut
The note in c2adda27d202f ("video: backlight: Add of_find_backlight helper
in backlight.c") says that gpio-backlight uses brightness as power state.
Other backlight drivers do not, so limit this workaround to gpio-backlight.

This fixes the case where e.g. pwm-backlight can perfectly well be set to
brightness 0 on boot in DT, which without this patch leads to the display
brightness to be max instead of off.

Fixes: c2adda27d202f ("video: backlight: Add of_find_backlight helper in 
backlight.c")
Signed-off-by: Marek Vasut 
Cc: Daniel Thompson 
Cc: Meghana Madhyastha 
Cc: Noralf Trønnes 
Cc: Sean Paul 
Cc: Thierry Reding 
---
 drivers/video/backlight/backlight.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/video/backlight/backlight.c 
b/drivers/video/backlight/backlight.c
index 537fe1b376ad7..dfb66171dec41 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -676,6 +676,7 @@ EXPORT_SYMBOL(of_find_backlight_by_node);
 static struct backlight_device *of_find_backlight(struct device *dev)
 {
struct backlight_device *bd = NULL;
+   bool is_gpio_backlight = false;
struct device_node *np;
 
if (!dev)
@@ -685,6 +686,8 @@ static struct backlight_device *of_find_backlight(struct 
device *dev)
np = of_parse_phandle(dev->of_node, "backlight", 0);
if (np) {
bd = of_find_backlight_by_node(np);
+   is_gpio_backlight =
+   of_device_is_compatible(np, "gpio-backlight");
of_node_put(np);
if (!bd)
return ERR_PTR(-EPROBE_DEFER);
@@ -692,7 +695,7 @@ static struct backlight_device *of_find_backlight(struct 
device *dev)
 * Note: gpio_backlight uses brightness as
 * power state during probe
 */
-   if (!bd->props.brightness)
+   if (is_gpio_backlight && !bd->props.brightness)
bd->props.brightness = bd->props.max_brightness;
}
}
-- 
2.30.2



[PATCH v2] drm/vkms: Creating a debug file to get/track vkms config in vkms_drv.c

2021-07-08 Thread Beatriz Martins de Carvalho
Creating a vkms_config_debufs file in vkms_drv.c to get/track vkms config
data, for the long-term plan of making vkms configurable and have multiple
different instances.

Reviewed-by: Melissa Wen 
Signed-off-by: Beatriz Martins de Carvalho 
---
Changes in v2:
- corrected subject to make clear in terms of its purpose
- corrected commit message
---
 drivers/gpu/drm/vkms/vkms_drv.c | 28 
 1 file changed, 28 insertions(+)

diff --git a/drivers/gpu/drm/vkms/vkms_drv.c b/drivers/gpu/drm/vkms/vkms_drv.c
index 027ffe759440..c81fba6c72f0 100644
--- a/drivers/gpu/drm/vkms/vkms_drv.c
+++ b/drivers/gpu/drm/vkms/vkms_drv.c
@@ -28,6 +28,9 @@
 
 #include "vkms_drv.h"
 
+#include 
+#include 
+
 #define DRIVER_NAME"vkms"
 #define DRIVER_DESC"Virtual Kernel Mode Setting"
 #define DRIVER_DATE"20180514"
@@ -86,12 +89,37 @@ static void vkms_atomic_commit_tail(struct drm_atomic_state 
*old_state)
drm_atomic_helper_cleanup_planes(dev, old_state);
 }
 
+static int vkms_config_show(struct seq_file *m, void *data)
+{
+   struct drm_info_node *node = (struct drm_info_node *)m->private;
+   struct drm_device *dev = node->minor->dev;
+   struct vkms_device *vkmsdev = drm_device_to_vkms_device(dev);
+
+   seq_printf(m, "writeback=%d\n", vkmsdev->config->writeback);
+   seq_printf(m, "cursor=%d\n", vkmsdev->config->cursor);
+   seq_printf(m, "overlay=%d\n", vkmsdev->config->overlay);
+
+   return 0;
+}
+
+static const struct drm_info_list vkms_config_debugfs_list[] = {
+   { "vkms_config", vkms_config_show, 0 },
+};
+
+static void vkms_config_debugfs_init(struct drm_minor *minor)
+{
+   drm_debugfs_create_files(vkms_config_debugfs_list, 
ARRAY_SIZE(vkms_config_debugfs_list),
+minor->debugfs_root, minor);
+}
+
 static const struct drm_driver vkms_driver = {
.driver_features= DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_GEM,
.release= vkms_release,
.fops   = &vkms_driver_fops,
DRM_GEM_SHMEM_DRIVER_OPS,
 
+   .debugfs_init   = vkms_config_debugfs_init,
+
.name   = DRIVER_NAME,
.desc   = DRIVER_DESC,
.date   = DRIVER_DATE,
-- 
2.25.1



Aw: Re: BUG: MTK DRM/HDMI broken on 5.13 (mt7623/bpi-r2)

2021-07-08 Thread Frank Wunderlich
Hi

just a small update, added debug in the vendor-specific functions for page_flip 
and vblank and it seems they never get called

--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
@@ -87,21 +87,25 @@ static void mtk_drm_crtc_finish_page_flip(struct 
mtk_drm_crtc *mtk_crtc)
 {
struct drm_crtc *crtc = &mtk_crtc->base;
unsigned long flags;
-
+printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);
spin_lock_irqsave(&crtc->dev->event_lock, flags);
drm_crtc_send_vblank_event(crtc, mtk_crtc->event);
drm_crtc_vblank_put(crtc);
mtk_crtc->event = NULL;
spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);
 }

 static void mtk_drm_finish_page_flip(struct mtk_drm_crtc *mtk_crtc)
 {
+printk(KERN_ALERT "DEBUG: Passed %s %d 
update:%d,needsvblank:%d\n",__FUNCTION__,__LINE__,mtk_crtc->config_updating,mtk_crtc->pending_needs_vblank);
drm_crtc_handle_vblank(&mtk_crtc->base);
if (!mtk_crtc->config_updating && mtk_crtc->pending_needs_vblank) {
+printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);
mtk_drm_crtc_finish_page_flip(mtk_crtc);
mtk_crtc->pending_needs_vblank = false;
}
+printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);
 }

 static void mtk_drm_crtc_destroy(struct drm_crtc *crtc)

finish_page_flip is called by mtk_crtc_ddp_irq. this seems to be set in 
mtk_drm_crtc_enable_vblank with mtk_ddp_comp_enable_vblank. this is called 
correctly

113 static inline void mtk_ddp_comp_enable_vblank(struct mtk_ddp_comp *comp,
114   void (*vblank_cb)(void *),
115   void *vblank_cb_data)
116 {
117 printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);
118 if (comp->funcs && comp->funcs->enable_vblank)
119 {
120 comp->funcs->enable_vblank(comp->dev, vblank_cb, vblank_cb_data);
121 printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);
122 }
123 }

i see both messages, but mtk_crtc_ddp_irq is never called and so the other 2 
not.

root@bpi-r2:~# dmesg | grep -i DEBUG
[6.433509] DEBUG: Passed mtk_drm_crtc_enable_vblank 510
[6.433530] DEBUG: Passed mtk_ddp_comp_enable_vblank 117
[6.433537] DEBUG: Passed mtk_ddp_comp_enable_vblank 121 <<<


comp->funcs->enable_vblank should be mtk_drm_crtc_enable_vblank, right?

641 static const struct drm_crtc_funcs mtk_crtc_funcs = {
642 .set_config = drm_atomic_helper_set_config,
643 .page_flip  = drm_atomic_helper_page_flip,
644 .destroy= mtk_drm_crtc_destroy,
645 .reset  = mtk_drm_crtc_reset,
646 .atomic_duplicate_state = mtk_drm_crtc_duplicate_state,
647 .atomic_destroy_state   = mtk_drm_crtc_destroy_state,
648 .enable_vblank  = mtk_drm_crtc_enable_vblank, <<<
649 .disable_vblank = mtk_drm_crtc_disable_vblank,
650 };

but it looks like a recursion:
mtk_drm_crtc_enable_vblank calls mtk_ddp_comp_enable_vblank => enable_vblank 
(=mtk_drm_crtc_enable_vblank), but i see the messages not repeating

mtk_drm_crtc_enable_vblank(struct drm_crtc *crtc)
511 mtk_ddp_comp_enable_vblank(comp, mtk_crtc_ddp_irq, &mtk_crtc->base);

113 static inline void mtk_ddp_comp_enable_vblank(struct mtk_ddp_comp *comp,
114   void (*vblank_cb)(void *),
115   void *vblank_cb_data)
116 {
118 if (comp->funcs && comp->funcs->enable_vblank)
120 comp->funcs->enable_vblank(comp->dev, vblank_cb, vblank_cb_data);

but params do not match...comp->funcs->enable_vblank takes 3 arguments but 
comp->funcs->enable_vblank has only one.something i miss here...

i guess not, but is watchdog somehow involved? i ask because i see this on 
reboot/poweroff:

"watchdog: watchdog0: watchdog did not stop!"

i see this with my 5.13, 5.12-drm (5.12.0+mtk/core drm-patches) and 5.12.14 too 
(hdmi is working there), but not 5.12.0!
that means something in drm-patches (mtk/core) breaks watchdog. maybe the 
recursion mentioned above?

regards Frank


> Gesendet: Donnerstag, 08. Juli 2021 um 09:22 Uhr
> Von: "Dafna Hirschfeld" 

>
> Hi Frank,
>
>
> On 06.07.21 11:54, Frank Wunderlich wrote:
> > Hi,
> >
> > i've noticed that HDMI is broken at least on my board (Bananapi-r2,mt7623) 
> > on 5.13.
> >
> > after some research i noticed that it is working till
> >
> > commit 2e477391522354e763aa62ee3e281c1ad9e8eb1b
> > Author: Dafna Hirschfeld 

>
> We also encountered that warning on mt8173 device - Acer Chromebook R13. It 
> happen after resuming from suspend to ram.
> We could not find a version that works and we were not able to find the fix 
> of the bug.
> It seems like the irq isr is not called after resuming from suspend.
> Please share if you have new findings regarding that bug.
>
> Thanks,
> Dafna



[PULL] drm-intel-next for v5.15

2021-07-08 Thread Jani Nikula


Hi Dave & Daniel -

I'll be out for a bit, so I'm sending the first batch of changes for
v5.15 early. Nothing unusual here, I just don't want to have a huge pile
waiting. :)

Rodrigo will cover me.


BR,
Jani.


drm-intel-next-2021-07-08:
drm/i915 changes for v5.15:

Features:
- Enable pipe DMC loading on XE-LPD and ADL-P (Anusha)
- Finally remove JSH and EHL force probe requirement (Tejas)

Refactoring and cleanups:
- Refactor and fix DDI buffer translations (Ville)
- Clean up FBC CFB allocation code (Ville, with a fix from Matthew)
- Finish INTEL_GEN() and friends macro conversions (Lucas)
- Misc display cleanups (Ville)

Fixes:
- PSR fixes and ADL-P workarounds (José)
- Fix drm infoframe state mismatch (Bhanuprakash)
- Force Type-C PHY disconnect during suspend/shutdown (Imre)
- Fix power sequence violation on some Chromebook models (Shawn)
- Fix VGA workaround to avoid screen flicker at boot (Emil)
- Fix display 12+ watermark workaround adjustment (Lucas)

Misc:
- Backmerge drm-next (Jani)

BR,
Jani.

The following changes since commit 8a02ea42bc1d4c448caf1bab0e05899dad503f74:

  Merge tag 'drm-intel-next-fixes-2021-06-29' of 
git://anongit.freedesktop.org/drm/drm-intel into drm-next (2021-06-30 15:42:05 
+1000)

are available in the Git repository at:

  git://anongit.freedesktop.org/drm/drm-intel tags/drm-intel-next-2021-07-08

for you to fetch changes up to cd5606aa39925ad4483e96abffc9cc62bb36c640:

  gpu/drm/i915: nuke old GEN macros (2021-07-07 16:36:32 -0700)


drm/i915 changes for v5.15:

Features:
- Enable pipe DMC loading on XE-LPD and ADL-P (Anusha)
- Finally remove JSH and EHL force probe requirement (Tejas)

Refactoring and cleanups:
- Refactor and fix DDI buffer translations (Ville)
- Clean up FBC CFB allocation code (Ville, with a fix from Matthew)
- Finish INTEL_GEN() and friends macro conversions (Lucas)
- Misc display cleanups (Ville)

Fixes:
- PSR fixes and ADL-P workarounds (José)
- Fix drm infoframe state mismatch (Bhanuprakash)
- Force Type-C PHY disconnect during suspend/shutdown (Imre)
- Fix power sequence violation on some Chromebook models (Shawn)
- Fix VGA workaround to avoid screen flicker at boot (Emil)
- Fix display 12+ watermark workaround adjustment (Lucas)

Misc:
- Backmerge drm-next (Jani)


Anshuman Gupta (1):
  drm/i915/hdcp: Nuke Platform check for mst hdcp init

Anusha Srivatsa (4):
  drm/i915/dmc: Introduce DMC_FW_MAIN
  drm/i915/xelpd: Pipe A DMC plugging
  drm/i915/adl_p: Pipe B DMC Support
  drm/i915/adl_p: Load DMC

Bhanuprakash Modem (1):
  drm/i915/display: Fix state mismatch in drm infoframe

Emil Velikov (1):
  drm/i915: apply WaEnableVGAAccessThroughIOPort as needed

Imre Deak (1):
  drm/i915: Force a TypeC PHY disconnect during suspend/shutdown

Jani Nikula (2):
  drm/i915/dsc: abstract helpers to get bigjoiner primary/secondary crtc
  Merge drm/drm-next into drm-intel-next

José Roberto de Souza (7):
  Revert "drm/i915/display: Drop FIXME about turn off infoframes"
  drm/i915/display/psr: Handle SU Y granularity
  drm/i915/display/adl_p: Implement Wa_22012278275
  drm/i915/display/adl_p: Implement Wa_16011168373
  drm/i915/xelpd: Handle PSR2 SDP indication in the prior scanline
  drm/i915/display/adl_p: Implement Wa_16011303918
  drm/i915/display/dg1: Correctly map DPLLs during state readout

Kees Cook (1):
  drm/i915/display: Do not zero past infoframes.vsc

Lee Shawn C (1):
  drm/i915: keep backlight_enable on until turn eDP display off

Lucas De Marchi (5):
  drm/i915/xelpd: break feature inheritance
  drm/i915/display: fix level 0 adjustement on display ver >= 12
  drm/i915/display: use max_level to control loop
  drm/i915: finish INTEL_GEN and friends conversion
  gpu/drm/i915: nuke old GEN macros

Matthew Auld (1):
  drm/i915/display: check if compressed_llb was allocated

Tejas Upadhyay (3):
  drm/i915/jsl: Add W/A 1409054076 for JSL
  drm/i915/jsl: Remove require_force_probe protection
  drm/i915/ehl: Remove require_force_probe protection

Ville Syrjälä (32):
  drm/i915: s/intel/hsw/ for hsw/bdw/skl buf trans
  drm/i915: Introduce hsw_get_buf_trans()
  drm/i915: Wrap the platform specific buf trans structs into a union
  drm/i915: Rename dkl phy buf trans tables
  drm/i915: Wrap the buf trans tables into a struct
  drm/i915: Introduce intel_get_buf_trans()
  drm/i915; Return the whole buf_trans struct from get_buf_trans()
  drm/i915: Store the HDMI default entry in the bug trans struct
  drm/i915: Introduce encoder->get_buf_trans()
  drm/i915: Clean up hsw/bdw/skl/kbl buf trans funcs
  drm/i915: Introduce rkl_get_combo_buf_trans()
  drm/i915: Fix dg1 buf trans tables
  drm/i915: Deduplicate icl DP HBR2 vs. eDP HBR3 table
  drm/i915: Fix ehl edp hbr2 vswi

Re: [PATCH v2 01/11] drm/sched: Split drm_sched_job_init

2021-07-08 Thread Daniel Vetter
On Thu, Jul 08, 2021 at 09:53:00AM +0200, Christian König wrote:
> Am 08.07.21 um 09:19 schrieb Daniel Vetter:
> > On Thu, Jul 8, 2021 at 9:09 AM Daniel Vetter  wrote:
> > > On Thu, Jul 8, 2021 at 8:56 AM Christian König  
> > > wrote:
> > > > Am 07.07.21 um 18:32 schrieb Daniel Vetter:
> > > > > On Wed, Jul 7, 2021 at 2:58 PM Christian König 
> > > > >  wrote:
> > > > > > Am 07.07.21 um 14:13 schrieb Daniel Vetter:
> > > > > > > On Wed, Jul 7, 2021 at 1:57 PM Christian König 
> > > > > > >  wrote:
> > > > > > > > Am 07.07.21 um 13:14 schrieb Daniel Vetter:
> > > > > > > > > On Wed, Jul 7, 2021 at 11:30 AM Christian König
> > > > > > > > >  wrote:
> > > > > > > > > > Am 02.07.21 um 23:38 schrieb Daniel Vetter:
> > > > > > > > > > > This is a very confusingly named function, because not 
> > > > > > > > > > > just does it
> > > > > > > > > > > init an object, it arms it and provides a point of no 
> > > > > > > > > > > return for
> > > > > > > > > > > pushing a job into the scheduler. It would be nice if 
> > > > > > > > > > > that's a bit
> > > > > > > > > > > clearer in the interface.
> > > > > > > > > > > 
> > > > > > > > > > > But the real reason is that I want to push the dependency 
> > > > > > > > > > > tracking
> > > > > > > > > > > helpers into the scheduler code, and that means 
> > > > > > > > > > > drm_sched_job_init
> > > > > > > > > > > must be called a lot earlier, without arming the job.
> > > > > > > > > > > 
> > > > > > > > > > > v2:
> > > > > > > > > > > - don't change .gitignore (Steven)
> > > > > > > > > > > - don't forget v3d (Emma)
> > > > > > > > > > > 
> > > > > > > > > > > v3: Emma noticed that I leak the memory allocated in
> > > > > > > > > > > drm_sched_job_init if we bail out before the point of no 
> > > > > > > > > > > return in
> > > > > > > > > > > subsequent driver patches. To be able to fix this change
> > > > > > > > > > > drm_sched_job_cleanup() so it can handle being called 
> > > > > > > > > > > both before and
> > > > > > > > > > > after drm_sched_job_arm().
> > > > > > > > > > Thinking more about this, I'm not sure if this really works.
> > > > > > > > > > 
> > > > > > > > > > See drm_sched_job_init() was also calling 
> > > > > > > > > > drm_sched_entity_select_rq()
> > > > > > > > > > to update the entity->rq association.
> > > > > > > > > > 
> > > > > > > > > > And that can only be done later on when we arm the fence as 
> > > > > > > > > > well.
> > > > > > > > > Hm yeah, but that's a bug in the existing code I think: We 
> > > > > > > > > already
> > > > > > > > > fail to clean up if we fail to allocate the fences. So I 
> > > > > > > > > think the
> > > > > > > > > right thing to do here is to split the checks into job_init, 
> > > > > > > > > and do
> > > > > > > > > the actual arming/rq selection in job_arm? I'm not entirely 
> > > > > > > > > sure
> > > > > > > > > what's all going on there, the first check looks a bit like 
> > > > > > > > > trying to
> > > > > > > > > schedule before the entity is set up, which is a driver bug 
> > > > > > > > > and should
> > > > > > > > > have a WARN_ON?
> > > > > > > > No you misunderstood me, the problem is something else.
> > > > > > > > 
> > > > > > > > You asked previously why the call to drm_sched_job_init() was 
> > > > > > > > so late in
> > > > > > > > the CS.
> > > > > > > > 
> > > > > > > > The reason for this was not alone the scheduler fence init, but 
> > > > > > > > also the
> > > > > > > > call to drm_sched_entity_select_rq().
> > > > > > > Ah ok, I think I can fix that. Needs a prep patch to first make
> > > > > > > drm_sched_entity_select infallible, then should be easy to do.
> > > > > > > 
> > > > > > > > > The 2nd check around last_scheduled I have honeslty no idea 
> > > > > > > > > what it's
> > > > > > > > > even trying to do.
> > > > > > > > You mean that here?
> > > > > > > > 
> > > > > > > > fence = READ_ONCE(entity->last_scheduled);
> > > > > > > > if (fence && !dma_fence_is_signaled(fence))
> > > > > > > > return;
> > > > > > > > 
> > > > > > > > This makes sure that load balancing is not moving the entity to 
> > > > > > > > a
> > > > > > > > different scheduler while there are still jobs running from 
> > > > > > > > this entity
> > > > > > > > on the hardware,
> > > > > > > Yeah after a nap that idea crossed my mind too. But now I have 
> > > > > > > locking
> > > > > > > questions, afaiui the scheduler thread updates this, without 
> > > > > > > taking
> > > > > > > any locks - entity dequeuing is lockless. And here we read the 
> > > > > > > fence
> > > > > > > and then seem to yolo check whether it's signalled? What's 
> > > > > > > preventing
> > > > > > > a use-after-free here? There's no rcu or anything going on here at
> > > > > > > all, and it's outside of the spinlock section, which starts a bit
> > > > > > > further down.
> > > > > > The last_scheduled fence of an entity can only change when there are
> > > > > > jobs on the entities q

Re: [PATCH] gpu: ttm: fix GPF in ttm_bo_release

2021-07-08 Thread Pavel Skripkin
On Thu, 8 Jul 2021 11:37:01 +0300
Pavel Skripkin  wrote:

> On Thu, 8 Jul 2021 08:49:48 +0200
> Christian König  wrote:
> 
> > Am 07.07.21 um 20:51 schrieb Pavel Skripkin:
> > > My local syzbot instance hit GPF in ttm_bo_release().
> > > Unfortunately, syzbot didn't produce a reproducer for this, but I
> > > found out possible scenario:
> > >
> > > drm_gem_vram_create()<-- drm_gem_vram_object kzalloced
> > >(bo embedded in this object)
> > >ttm_bo_init()
> > >  ttm_bo_init_reserved()
> > >ttm_resource_alloc()
> > >  man->func->alloc()   <-- allocation failure
> > >ttm_bo_put()
> > >   ttm_bo_release()
> > > ttm_mem_io_free()  <-- bo->resource == NULL passed
> > >as second argument
> > >*GPF*
> > >
> > > So, I've added check in ttm_bo_release() to avoid passing
> > > NULL as second argument to ttm_mem_io_free().
> 
> Hi, Christian!
> 
> Thank you for quick feedback :)
> 
> > 
> > There is another ocassion of this a bit down before we call 
> > ttm_bo_move_to_lru_tail() apart from that good catch.
> > 
> 
> Did you mean, that ttm_bo_move_to_lru_tail() should have NULL check
> too? I checked it's realization, and, I think, NULL check is necessary
> there, since mem pointer is dereferenced w/o any checking
> 
> > But I'm wondering if we should make the functions NULL save instead
> > of the external check.
> > 
> 
> I tried to find more possible scenarios of GPF in ttm_bo_release(),
> but I didn't find one. But, yes, moving NULL check inside
> ttm_mem_io_free() is more general approach and it will defend this
> function from GPFs in the future.
> 
> 
> 
> With regards,
> Pavel Skripkin
> 

I misclicked and sent this email to Christian privately :(

Added all thread participants back, sorry.



With regards,
Pavel Skripkin


Re: Oops in qxl_bo_move_notify()

2021-07-08 Thread Daniel Vetter
On Wed, Jul 07, 2021 at 04:36:49PM +, Roberto Sassu wrote:
> Hi
> 
> I'm getting this oops (on commit a180bd1d7e16):
> 
> [   17.711520] BUG: kernel NULL pointer dereference, address: 
> 0010
> [   17.739451] RIP: 0010:qxl_bo_move_notify+0x35/0x80 [qxl]
> [   17.827345] RSP: 0018:c9457c08 EFLAGS: 00010286
> [   17.827350] RAX: 0001 RBX:  RCX: 
> dc00
> [   17.827353] RDX: 0007 RSI: 0004 RDI: 
> 85596feb
> [   17.827356] RBP: 88800e311c00 R08:  R09: 
> 
> [   17.827358] R10: 8697b243 R11: fbfff0d2f648 R12: 
> 
> [   17.827361] R13: 88800e311e48 R14: 88800e311e98 R15: 
> 88800e311e90
> [   17.827364] FS:  () GS:88805d80() 
> knlGS:
> [   17.861699] CS:  0010 DS:  ES:  CR0: 80050033
> [   17.861703] CR2: 0010 CR3: 2642c000 CR4: 
> 00350ee0
> [   17.861707] Call Trace:
> [   17.861712]  ttm_bo_cleanup_memtype_use+0x4d/0xb0 [ttm]
> [   17.861730]  ttm_bo_release+0x42d/0x7c0 [ttm]
> [   17.861746]  ? ttm_bo_cleanup_refs+0x127/0x420 [ttm]
> [   17.888300]  ttm_bo_delayed_delete+0x289/0x390 [ttm]
> [   17.888317]  ? ttm_bo_cleanup_refs+0x420/0x420 [ttm]
> [   17.888332]  ? lock_release+0x9c/0x5c0
> [   17.901033]  ? rcu_read_lock_held_common+0x1a/0x50
> [   17.905183]  ttm_device_delayed_workqueue+0x18/0x50 [ttm]
> [   17.909371]  process_one_work+0x537/0x9f0
> [   17.913345]  ? pwq_dec_nr_in_flight+0x160/0x160
> [   17.917297]  ? lock_acquired+0xa4/0x580
> [   17.921168]  ? worker_thread+0x169/0x600
> [   17.925034]  worker_thread+0x7a/0x600
> [   17.928657]  ? process_one_work+0x9f0/0x9f0
> [   17.932360]  kthread+0x200/0x230
> [   17.935930]  ? set_kthread_struct+0x80/0x80
> [   17.939593]  ret_from_fork+0x22/0x30
> [   17.951737] CR2: 0010
> [   17.955496] ---[ end trace e30cc21c24e81ee5 ]---
> 
> I had a look at the code, and it seems that this is caused by
> trying to use bo->resource which is NULL.
> 
> bo->resource is freed by ttm_bo_cleanup_refs() ->
> ttm_bo_cleanup_memtype_use() -> ttm_resource_free().
> 
> And then a notification is issued by ttm_bo_cleanup_refs() ->
> ttm_bo_put() -> ttm_bo_release() ->
> ttm_bo_cleanup_memtype_use(), this time with bo->release
> equal to NULL.
> 
> I was thinking a proper way to fix this. Checking that
> bo->release is not NULL in qxl_bo_move_notify() would
> solve the issue. But maybe there is a better way, like
> avoiding that ttm_bo_cleanup_memtype_use() is called
> twice. Which way would be preferable?

Adding Christian and Dave, who've touched all this recently iirc.
-Daniel

> 
> Thanks
> 
> Roberto
> 
> HUAWEI TECHNOLOGIES Duesseldorf GmbH, HRB 56063
> Managing Director: Li Peng, Li Jian, Shi Yanli

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [PATCH v2 01/11] drm/sched: Split drm_sched_job_init

2021-07-08 Thread Christian König

Am 08.07.21 um 12:02 schrieb Daniel Vetter:

On Thu, Jul 08, 2021 at 09:53:00AM +0200, Christian König wrote:

Am 08.07.21 um 09:19 schrieb Daniel Vetter:

On Thu, Jul 8, 2021 at 9:09 AM Daniel Vetter  wrote:

On Thu, Jul 8, 2021 at 8:56 AM Christian König  wrote:

Am 07.07.21 um 18:32 schrieb Daniel Vetter:

On Wed, Jul 7, 2021 at 2:58 PM Christian König  wrote:

Am 07.07.21 um 14:13 schrieb Daniel Vetter:

On Wed, Jul 7, 2021 at 1:57 PM Christian König  wrote:

Am 07.07.21 um 13:14 schrieb Daniel Vetter:

On Wed, Jul 7, 2021 at 11:30 AM Christian König
 wrote:

Am 02.07.21 um 23:38 schrieb Daniel Vetter:

This is a very confusingly named function, because not just does it
init an object, it arms it and provides a point of no return for
pushing a job into the scheduler. It would be nice if that's a bit
clearer in the interface.

But the real reason is that I want to push the dependency tracking
helpers into the scheduler code, and that means drm_sched_job_init
must be called a lot earlier, without arming the job.

v2:
- don't change .gitignore (Steven)
- don't forget v3d (Emma)

v3: Emma noticed that I leak the memory allocated in
drm_sched_job_init if we bail out before the point of no return in
subsequent driver patches. To be able to fix this change
drm_sched_job_cleanup() so it can handle being called both before and
after drm_sched_job_arm().

Thinking more about this, I'm not sure if this really works.

See drm_sched_job_init() was also calling drm_sched_entity_select_rq()
to update the entity->rq association.

And that can only be done later on when we arm the fence as well.

Hm yeah, but that's a bug in the existing code I think: We already
fail to clean up if we fail to allocate the fences. So I think the
right thing to do here is to split the checks into job_init, and do
the actual arming/rq selection in job_arm? I'm not entirely sure
what's all going on there, the first check looks a bit like trying to
schedule before the entity is set up, which is a driver bug and should
have a WARN_ON?

No you misunderstood me, the problem is something else.

You asked previously why the call to drm_sched_job_init() was so late in
the CS.

The reason for this was not alone the scheduler fence init, but also the
call to drm_sched_entity_select_rq().

Ah ok, I think I can fix that. Needs a prep patch to first make
drm_sched_entity_select infallible, then should be easy to do.


The 2nd check around last_scheduled I have honeslty no idea what it's
even trying to do.

You mean that here?

 fence = READ_ONCE(entity->last_scheduled);
 if (fence && !dma_fence_is_signaled(fence))
 return;

This makes sure that load balancing is not moving the entity to a
different scheduler while there are still jobs running from this entity
on the hardware,

Yeah after a nap that idea crossed my mind too. But now I have locking
questions, afaiui the scheduler thread updates this, without taking
any locks - entity dequeuing is lockless. And here we read the fence
and then seem to yolo check whether it's signalled? What's preventing
a use-after-free here? There's no rcu or anything going on here at
all, and it's outside of the spinlock section, which starts a bit
further down.

The last_scheduled fence of an entity can only change when there are
jobs on the entities queued, and we have just ruled that out in the
check before.

There aren't any barriers, so the cpu could easily run the two checks
the other way round. I'll ponder this and figure out where exactly we
need docs for the constraint and/or barriers to make this work as
intended. As-is I'm not seeing how it does ...

spsc_queue_count() provides the necessary barrier with the atomic_read().

atomic_t is fully unordered, except when it's a read-modify-write

Wasn't awake yet, I think the rule is read-modify-write and return
previous value gives you full barrier. So stuff like cmpxchg, but also
a few others. See atomic_t.txt under ODERING heading (yes that
maintainer refuses to accept .rst so I can't just link you to the
right section, it's silly). get/set and even RMW atomic ops that don't
return anything are all fully unordered.

As far as I know that not completely correct. The rules around atomics i
once learned are:

1. Everything which modifies something is a write barrier.
2. Everything which returns something is a read barrier.

And I know a whole bunch of use cases where this is relied upon in the core
kernel, so I'm pretty sure that's correct.

That's against what the doc says, and also it would mean stuff like
atomic_read_acquire or smp_mb__after/before_atomic is completely pointless.

On x86 you're right, anywhere else where there's no total store ordering I
you're wrong.


Good to know. I always thought that atomic_read_acquire() was just for 
documentation purpose.





If there's code that relies on this it needs to be fixed and properly
documented. I did go through the squeue code a bit, and might be better to
just rep

Re: [PATCH] gpu: ttm: fix GPF in ttm_bo_release

2021-07-08 Thread Christian König

Am 08.07.21 um 12:09 schrieb Pavel Skripkin:

On Thu, 8 Jul 2021 11:37:01 +0300
Pavel Skripkin  wrote:


On Thu, 8 Jul 2021 08:49:48 +0200
Christian König  wrote:


Am 07.07.21 um 20:51 schrieb Pavel Skripkin:

My local syzbot instance hit GPF in ttm_bo_release().
Unfortunately, syzbot didn't produce a reproducer for this, but I
found out possible scenario:

drm_gem_vram_create()<-- drm_gem_vram_object kzalloced
 (bo embedded in this object)
ttm_bo_init()
  ttm_bo_init_reserved()
ttm_resource_alloc()
  man->func->alloc()   <-- allocation failure
ttm_bo_put()
ttm_bo_release()
  ttm_mem_io_free()  <-- bo->resource == NULL passed
 as second argument
 *GPF*

So, I've added check in ttm_bo_release() to avoid passing
NULL as second argument to ttm_mem_io_free().

Hi, Christian!

Thank you for quick feedback :)


There is another ocassion of this a bit down before we call
ttm_bo_move_to_lru_tail() apart from that good catch.


Did you mean, that ttm_bo_move_to_lru_tail() should have NULL check
too?


Yes, exactly that.


  I checked it's realization, and, I think, NULL check is necessary
there, since mem pointer is dereferenced w/o any checking


But I'm wondering if we should make the functions NULL save instead
of the external check.


I tried to find more possible scenarios of GPF in ttm_bo_release(),
but I didn't find one. But, yes, moving NULL check inside
ttm_mem_io_free() is more general approach and it will defend this
function from GPFs in the future.



With regards,
Pavel Skripkin


I misclicked and sent this email to Christian privately :(

Added all thread participants back, sorry.


No problem.

Do you want to update your patch or should I take care of this?

Thanks,
Christian.





With regards,
Pavel Skripkin




Re: [PATCH] gpu: ttm: fix GPF in ttm_bo_release

2021-07-08 Thread Pavel Skripkin
On Thu, 8 Jul 2021 12:56:19 +0200
Christian König  wrote:

> Am 08.07.21 um 12:09 schrieb Pavel Skripkin:
> > On Thu, 8 Jul 2021 11:37:01 +0300
> > Pavel Skripkin  wrote:
> >
> >> On Thu, 8 Jul 2021 08:49:48 +0200
> >> Christian König  wrote:
> >>
> >>> Am 07.07.21 um 20:51 schrieb Pavel Skripkin:
>  My local syzbot instance hit GPF in ttm_bo_release().
>  Unfortunately, syzbot didn't produce a reproducer for this, but I
>  found out possible scenario:
> 
>  drm_gem_vram_create()<-- drm_gem_vram_object
>  kzalloced (bo embedded in this object)
>  ttm_bo_init()
>    ttm_bo_init_reserved()
>  ttm_resource_alloc()
>    man->func->alloc()   <-- allocation failure
>  ttm_bo_put()
>   ttm_bo_release()
> ttm_mem_io_free()  <-- bo->resource == NULL passed
>    as second argument
>    *GPF*
> 
>  So, I've added check in ttm_bo_release() to avoid passing
>  NULL as second argument to ttm_mem_io_free().
> >> Hi, Christian!
> >>
> >> Thank you for quick feedback :)
> >>
> >>> There is another ocassion of this a bit down before we call
> >>> ttm_bo_move_to_lru_tail() apart from that good catch.
> >>>
> >> Did you mean, that ttm_bo_move_to_lru_tail() should have NULL check
> >> too?
> 
> Yes, exactly that.
> 
> >>   I checked it's realization, and, I think, NULL check is necessary
> >> there, since mem pointer is dereferenced w/o any checking
> >>
> >>> But I'm wondering if we should make the functions NULL save
> >>> instead of the external check.
> >>>
> >> I tried to find more possible scenarios of GPF in ttm_bo_release(),
> >> but I didn't find one. But, yes, moving NULL check inside
> >> ttm_mem_io_free() is more general approach and it will defend this
> >> function from GPFs in the future.
> >>
> >>
> >>
> >> With regards,
> >> Pavel Skripkin
> >>
> > I misclicked and sent this email to Christian privately :(
> >
> > Added all thread participants back, sorry.
> 
> No problem.
> 
> Do you want to update your patch or should I take care of this?
> 

Yes, I will send v2 soon. Thank you!




With regards,
Pavel Skripkin


[PATCH] dma-buf: fix and rework dma_buf_poll v5

2021-07-08 Thread Christian König
Daniel pointed me towards this function and there are multiple obvious problems
in the implementation.

First of all the retry loop is not working as intended. In general the retry
makes only sense if you grab the reference first and then check the sequence
values.

Then we should always also wait for the exclusive fence.

It's also good practice to keep the reference around when installing callbacks
to fences you don't own.

And last the whole implementation was unnecessary complex and rather hard to
understand which could lead to probably unexpected behavior of the IOCTL.

Fix all this by reworking the implementation from scratch. Dropping the
whole RCU approach and taking the lock instead.

Only mildly tested and needs a thoughtful review of the code.

v2: fix the reference counting as well
v3: keep the excl fence handling as is for stable
v4: back to testing all fences, drop RCU
v5: handle in and out separately

Signed-off-by: Christian König 
CC: sta...@vger.kernel.org
---
 drivers/dma-buf/dma-buf.c | 152 +-
 include/linux/dma-buf.h   |   2 +-
 2 files changed, 68 insertions(+), 86 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index eadd1eaa2fb5..439e2379e1cb 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -72,7 +72,7 @@ static void dma_buf_release(struct dentry *dentry)
 * If you hit this BUG() it means someone dropped their ref to the
 * dma-buf while still having pending operation to the buffer.
 */
-   BUG_ON(dmabuf->cb_shared.active || dmabuf->cb_excl.active);
+   BUG_ON(dmabuf->cb_in.active || dmabuf->cb_out.active);
 
dmabuf->ops->release(dmabuf);
 
@@ -202,16 +202,57 @@ static void dma_buf_poll_cb(struct dma_fence *fence, 
struct dma_fence_cb *cb)
wake_up_locked_poll(dcb->poll, dcb->active);
dcb->active = 0;
spin_unlock_irqrestore(&dcb->poll->lock, flags);
+   dma_fence_put(fence);
+}
+
+static bool dma_buf_poll_shared(struct dma_resv *resv,
+   struct dma_buf_poll_cb_t *dcb)
+{
+   struct dma_resv_list *fobj = dma_resv_get_list(resv);
+   struct dma_fence *fence;
+   int i, r;
+
+   if (!fobj)
+   return false;
+
+   for (i = 0; i < fobj->shared_count; ++i) {
+   fence = rcu_dereference_protected(fobj->shared[i],
+ dma_resv_held(resv));
+   dma_fence_get(fence);
+   r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb);
+   if (!r)
+   return true;
+   dma_fence_put(fence);
+   }
+
+   return false;
+}
+
+static bool dma_buf_poll_excl(struct dma_resv *resv,
+ struct dma_buf_poll_cb_t *dcb)
+{
+   struct dma_fence *fence = dma_resv_get_excl(resv);
+   int r;
+
+   if (!fence)
+   return false;
+
+   dma_fence_get(fence);
+   r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb);
+   if (!r)
+   return true;
+   dma_fence_put(fence);
+
+   return false;
 }
 
 static __poll_t dma_buf_poll(struct file *file, poll_table *poll)
 {
struct dma_buf *dmabuf;
struct dma_resv *resv;
-   struct dma_resv_list *fobj;
-   struct dma_fence *fence_excl;
+   unsigned shared_count;
__poll_t events;
-   unsigned shared_count, seq;
+   int r, i;
 
dmabuf = file->private_data;
if (!dmabuf || !dmabuf->resv)
@@ -225,101 +266,42 @@ static __poll_t dma_buf_poll(struct file *file, 
poll_table *poll)
if (!events)
return 0;
 
-retry:
-   seq = read_seqcount_begin(&resv->seq);
-   rcu_read_lock();
-
-   fobj = rcu_dereference(resv->fence);
-   if (fobj)
-   shared_count = fobj->shared_count;
-   else
-   shared_count = 0;
-   fence_excl = rcu_dereference(resv->fence_excl);
-   if (read_seqcount_retry(&resv->seq, seq)) {
-   rcu_read_unlock();
-   goto retry;
-   }
-
-   if (fence_excl && (!(events & EPOLLOUT) || shared_count == 0)) {
-   struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl;
-   __poll_t pevents = EPOLLIN;
+   dma_resv_lock(resv, NULL);
 
-   if (shared_count == 0)
-   pevents |= EPOLLOUT;
+   if (events & EPOLLOUT) {
+   struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_out;
 
+   /* Check that callback isn't busy */
spin_lock_irq(&dmabuf->poll.lock);
-   if (dcb->active) {
-   dcb->active |= pevents;
-   events &= ~pevents;
-   } else
-   dcb->active = pevents;
+   if (dcb->active)
+   events &= ~EPOLLOUT;
+   else
+   dcb->active = EPOLLOUT;
  

Re: [PATCH v2 01/11] drm/sched: Split drm_sched_job_init

2021-07-08 Thread Daniel Vetter
On Thu, Jul 8, 2021 at 12:54 PM Christian König
 wrote:
>
> Am 08.07.21 um 12:02 schrieb Daniel Vetter:
> > On Thu, Jul 08, 2021 at 09:53:00AM +0200, Christian König wrote:
> >> Am 08.07.21 um 09:19 schrieb Daniel Vetter:
> >>> On Thu, Jul 8, 2021 at 9:09 AM Daniel Vetter  
> >>> wrote:
>  On Thu, Jul 8, 2021 at 8:56 AM Christian König 
>   wrote:
> > Am 07.07.21 um 18:32 schrieb Daniel Vetter:
> >> On Wed, Jul 7, 2021 at 2:58 PM Christian König 
> >>  wrote:
> >>> Am 07.07.21 um 14:13 schrieb Daniel Vetter:
>  On Wed, Jul 7, 2021 at 1:57 PM Christian König 
>   wrote:
> > Am 07.07.21 um 13:14 schrieb Daniel Vetter:
> >> On Wed, Jul 7, 2021 at 11:30 AM Christian König
> >>  wrote:
> >>> Am 02.07.21 um 23:38 schrieb Daniel Vetter:
>  This is a very confusingly named function, because not just does 
>  it
>  init an object, it arms it and provides a point of no return for
>  pushing a job into the scheduler. It would be nice if that's a 
>  bit
>  clearer in the interface.
> 
>  But the real reason is that I want to push the dependency 
>  tracking
>  helpers into the scheduler code, and that means 
>  drm_sched_job_init
>  must be called a lot earlier, without arming the job.
> 
>  v2:
>  - don't change .gitignore (Steven)
>  - don't forget v3d (Emma)
> 
>  v3: Emma noticed that I leak the memory allocated in
>  drm_sched_job_init if we bail out before the point of no return 
>  in
>  subsequent driver patches. To be able to fix this change
>  drm_sched_job_cleanup() so it can handle being called both 
>  before and
>  after drm_sched_job_arm().
> >>> Thinking more about this, I'm not sure if this really works.
> >>>
> >>> See drm_sched_job_init() was also calling 
> >>> drm_sched_entity_select_rq()
> >>> to update the entity->rq association.
> >>>
> >>> And that can only be done later on when we arm the fence as well.
> >> Hm yeah, but that's a bug in the existing code I think: We already
> >> fail to clean up if we fail to allocate the fences. So I think the
> >> right thing to do here is to split the checks into job_init, and do
> >> the actual arming/rq selection in job_arm? I'm not entirely sure
> >> what's all going on there, the first check looks a bit like trying 
> >> to
> >> schedule before the entity is set up, which is a driver bug and 
> >> should
> >> have a WARN_ON?
> > No you misunderstood me, the problem is something else.
> >
> > You asked previously why the call to drm_sched_job_init() was so 
> > late in
> > the CS.
> >
> > The reason for this was not alone the scheduler fence init, but 
> > also the
> > call to drm_sched_entity_select_rq().
>  Ah ok, I think I can fix that. Needs a prep patch to first make
>  drm_sched_entity_select infallible, then should be easy to do.
> 
> >> The 2nd check around last_scheduled I have honeslty no idea what 
> >> it's
> >> even trying to do.
> > You mean that here?
> >
> >  fence = READ_ONCE(entity->last_scheduled);
> >  if (fence && !dma_fence_is_signaled(fence))
> >  return;
> >
> > This makes sure that load balancing is not moving the entity to a
> > different scheduler while there are still jobs running from this 
> > entity
> > on the hardware,
>  Yeah after a nap that idea crossed my mind too. But now I have 
>  locking
>  questions, afaiui the scheduler thread updates this, without taking
>  any locks - entity dequeuing is lockless. And here we read the fence
>  and then seem to yolo check whether it's signalled? What's preventing
>  a use-after-free here? There's no rcu or anything going on here at
>  all, and it's outside of the spinlock section, which starts a bit
>  further down.
> >>> The last_scheduled fence of an entity can only change when there are
> >>> jobs on the entities queued, and we have just ruled that out in the
> >>> check before.
> >> There aren't any barriers, so the cpu could easily run the two checks
> >> the other way round. I'll ponder this and figure out where exactly we
> >> need docs for the constraint and/or barriers to make this work as
> >> intended. As-is I'm not seeing how it does ...
> > spsc_queue_count() provides the necessary barrier with the 
> > atomic_read().
>  atomic_t is fully unord

[PATCH v2] gpu: ttm: add missing NULL checks

2021-07-08 Thread Pavel Skripkin
My local syzbot instance hit GPF in ttm_bo_release().
Unfortunately, syzbot didn't produce a reproducer for this, but I
found out possible scenario:

drm_gem_vram_create()<-- drm_gem_vram_object kzalloced
 (bo embedded in this object)
  ttm_bo_init()
ttm_bo_init_reserved()
  ttm_resource_alloc()
man->func->alloc()   <-- allocation failure
  ttm_bo_put()
ttm_bo_release()
  ttm_mem_io_free()  <-- bo->resource == NULL passed
 as second argument
 *GPF*

Added NULL check inside ttm_mem_io_free() to prevent reported GPF and
make this function NULL save in future.

Same problem was in ttm_bo_move_to_lru_tail() as Christian reported.
ttm_bo_move_to_lru_tail() is called in ttm_bo_release() and mem pointer
can be NULL as well as in ttm_mem_io_free().

Fail log:

KASAN: null-ptr-deref in range [0x0020-0x0027]
CPU: 1 PID: 10419 Comm: syz-executor.3 Not tainted 5.13.0-rc7-next-20210625 #7
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
rel-1.14.0-0-g155821a-rebuilt.opensuse.org 04/01/2014
RIP: 0010:ttm_mem_io_free+0x28/0x170 drivers/gpu/drm/ttm/ttm_bo_util.c:66
Code: b1 90 41 56 41 55 41 54 55 48 89 fd 53 48 89 f3 e8 cd 19 24 fd 4c 8d 6b 
20 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 <80> 3c 02 00 0f 85 2a 01 
00 00 4c 8b 63 20 31 ff 4c 89 e6 e8 00 1f
RSP: 0018:c900141df968 EFLAGS: 00010202
RAX: dc00 RBX:  RCX: c90010da
RDX: 0004 RSI: 84513ea3 RDI: 888041fbc010
RBP: 888041fbc010 R08:  R09: 
R10: 0001 R11:  R12: 
R13: 0020 R14: 88806b258800 R15: 88806b258a38
FS:  7fa6e9845640() GS:88807ec0() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 7fad61265e18 CR3: 5ad79000 CR4: 00350ee0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Call Trace:
 ttm_bo_release+0xd94/0x10a0 drivers/gpu/drm/ttm/ttm_bo.c:422
 kref_put include/linux/kref.h:65 [inline]
 ttm_bo_put drivers/gpu/drm/ttm/ttm_bo.c:470 [inline]
 ttm_bo_init_reserved+0x7cb/0x960 drivers/gpu/drm/ttm/ttm_bo.c:1050
 ttm_bo_init+0x105/0x270 drivers/gpu/drm/ttm/ttm_bo.c:1074
 drm_gem_vram_create+0x332/0x4c0 drivers/gpu/drm/drm_gem_vram_helper.c:228

Fixes: d3116756a710 ("drm/ttm: rename bo->mem and make it a pointer")
Signed-off-by: Pavel Skripkin 
---

Changes in v2:
1. Added NULL check in ttm_bo_move_to_lru_tail()

2. Changed subject line, since NULL check added in 2 funtions

---
 drivers/gpu/drm/ttm/ttm_bo.c  | 3 +++
 drivers/gpu/drm/ttm/ttm_bo_util.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 1b950b45cf4b..8d7fd65ccced 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -102,6 +102,9 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
return;
}
 
+   if (!mem)
+   return;
+
man = ttm_manager_type(bdev, mem->mem_type);
list_move_tail(&bo->lru, &man->lru[bo->priority]);
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 2f57f824e6db..763fa6f4e07d 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -63,6 +63,9 @@ int ttm_mem_io_reserve(struct ttm_device *bdev,
 void ttm_mem_io_free(struct ttm_device *bdev,
 struct ttm_resource *mem)
 {
+   if (!mem)
+   return;
+
if (!mem->bus.offset && !mem->bus.addr)
return;
 
-- 
2.32.0



Re: [PATCH v2 01/11] drm/sched: Split drm_sched_job_init

2021-07-08 Thread Christian König




Am 08.07.21 um 13:20 schrieb Daniel Vetter:

On Thu, Jul 8, 2021 at 12:54 PM Christian König
 wrote:

[SNIP]

As far as I know that not completely correct. The rules around atomics i
once learned are:

1. Everything which modifies something is a write barrier.
2. Everything which returns something is a read barrier.

And I know a whole bunch of use cases where this is relied upon in the core
kernel, so I'm pretty sure that's correct.

That's against what the doc says, and also it would mean stuff like
atomic_read_acquire or smp_mb__after/before_atomic is completely pointless.

On x86 you're right, anywhere else where there's no total store ordering I
you're wrong.

Good to know. I always thought that atomic_read_acquire() was just for
documentation purpose.

Maybe you mixed it up with C++ atomics (which I think are now also in
C)? Those are strongly ordered by default (you can get the weakly
ordered kernel-style one too). It's a bit unfortunate that the default
semantics are exactly opposite between kernel and userspace :-/


Yeah, that's most likely it.


If there's code that relies on this it needs to be fixed and properly
documented. I did go through the squeue code a bit, and might be better to
just replace this with a core data structure.

Well the spsc was especially crafted for this use case and performed
quite a bit better then a double linked list.

Yeah  double-linked list is awful.


Or what core data structure do you have in mind?

Hm I thought there's a ready-made queue primitive, but there's just
llist.h. Which I think is roughly what the scheduler queue also does.
Minus the atomic_t for counting how many there are, and aside from the
tracepoints I don't think we're using those anywhere, we just check
for is_empty in the code (from a quick look only).


I think we just need to replace the atomic_read() with 
atomic_read_acquire() and the atomic_dec() with atomic_dec_return_release().


Apart from that everything should be working as far as I can see. And 
yes llist.h doesn't really do much different, it just doesn't keeps a 
tail pointer.


Christian.


-Daniel


Christian.


-Daniel


In this case the write barrier is the atomic_dec() in spsc_queue_pop() and
the read barrier is the aromic_read() in spsc_queue_count().

The READ_ONCE() is actually not even necessary as far as I can see.

Christian.


-Daniel



atomic op, then it's a full barrier. So yeah you need more here. But
also since you only need a read barrier on one side, and a write
barrier on the other, you don't actually need a cpu barriers on x86.
And READ_ONCE gives you the compiler barrier on one side at least, I
haven't found it on the writer side yet.


But yes a comment would be really nice here. I had to think for a while
why we don't need this as well.

I'm typing a patch, which after a night's sleep I realized has the
wrong barriers. And now I'm also typing some doc improvements for
drm_sched_entity and related functions.


Christian.


-Daniel


Christian.



-Daniel


Regards
Christian.


-Daniel


Christian.


Also improve the kerneldoc for this.

Acked-by: Steven Price  (v2)
Signed-off-by: Daniel Vetter 
Cc: Lucas Stach 
Cc: Russell King 
Cc: Christian Gmeiner 
Cc: Qiang Yu 
Cc: Rob Herring 
Cc: Tomeu Vizoso 
Cc: Steven Price 
Cc: Alyssa Rosenzweig 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: Sumit Semwal 
Cc: "Christian König" 
Cc: Masahiro Yamada 
Cc: Kees Cook 
Cc: Adam Borowski 
Cc: Nick Terrell 
Cc: Mauro Carvalho Chehab 
Cc: Paul Menzel 
Cc: Sami Tolvanen 
Cc: Viresh Kumar 
Cc: Alex Deucher 
Cc: Dave Airlie 
Cc: Nirmoy Das 
Cc: Deepak R Varma 
Cc: Lee Jones 
Cc: Kevin Wang 
Cc: Chen Li 
Cc: Luben Tuikov 
Cc: "Marek Olšák" 
Cc: Dennis Li 
Cc: Maarten Lankhorst 
Cc: Andrey Grodzovsky 
Cc: Sonny Jiang 
Cc: Boris Brezillon 
Cc: Tian Tao 
Cc: Jack Zhang 
Cc: etna...@lists.freedesktop.org
Cc: l...@lists.freedesktop.org
Cc: linux-me...@vger.kernel.org
Cc: linaro-mm-...@lists.linaro.org
Cc: Emma Anholt 
---
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  2 ++
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  |  2 ++
drivers/gpu/drm/etnaviv/etnaviv_sched.c  |  2 ++
drivers/gpu/drm/lima/lima_sched.c|  2 ++
drivers/gpu/drm/panfrost/panfrost_job.c  |  2 ++
drivers/gpu/drm/scheduler/sched_entity.c |  6 ++--
drivers/gpu/drm/scheduler/sched_fence.c  | 17 +
drivers/gpu/drm/scheduler/sched_main.c   | 46 +---
drivers/gpu/drm/v3d/v3d_gem.c|  2 ++
include/drm/gpu_scheduler.h  |  7 +++-
10 files changed, 74 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index c5386d13eb4a..a4ec092af9a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1226,6 +1226,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
if (r)
goto error_unlock;

+ 

Re: [PATCH v2] gpu: ttm: add missing NULL checks

2021-07-08 Thread Christian König

Am 08.07.21 um 13:25 schrieb Pavel Skripkin:

My local syzbot instance hit GPF in ttm_bo_release().
Unfortunately, syzbot didn't produce a reproducer for this, but I
found out possible scenario:

drm_gem_vram_create()<-- drm_gem_vram_object kzalloced
 (bo embedded in this object)
   ttm_bo_init()
 ttm_bo_init_reserved()
   ttm_resource_alloc()
 man->func->alloc()   <-- allocation failure
   ttm_bo_put()
ttm_bo_release()
  ttm_mem_io_free()  <-- bo->resource == NULL passed
 as second argument
 *GPF*

Added NULL check inside ttm_mem_io_free() to prevent reported GPF and
make this function NULL save in future.

Same problem was in ttm_bo_move_to_lru_tail() as Christian reported.
ttm_bo_move_to_lru_tail() is called in ttm_bo_release() and mem pointer
can be NULL as well as in ttm_mem_io_free().

Fail log:

KASAN: null-ptr-deref in range [0x0020-0x0027]
CPU: 1 PID: 10419 Comm: syz-executor.3 Not tainted 5.13.0-rc7-next-20210625 #7
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
rel-1.14.0-0-g155821a-rebuilt.opensuse.org 04/01/2014
RIP: 0010:ttm_mem_io_free+0x28/0x170 drivers/gpu/drm/ttm/ttm_bo_util.c:66
Code: b1 90 41 56 41 55 41 54 55 48 89 fd 53 48 89 f3 e8 cd 19 24 fd 4c 8d 6b 20 48 
b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 <80> 3c 02 00 0f 85 2a 01 00 00 
4c 8b 63 20 31 ff 4c 89 e6 e8 00 1f
RSP: 0018:c900141df968 EFLAGS: 00010202
RAX: dc00 RBX:  RCX: c90010da
RDX: 0004 RSI: 84513ea3 RDI: 888041fbc010
RBP: 888041fbc010 R08:  R09: 
R10: 0001 R11:  R12: 
R13: 0020 R14: 88806b258800 R15: 88806b258a38
FS:  7fa6e9845640() GS:88807ec0() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 7fad61265e18 CR3: 5ad79000 CR4: 00350ee0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Call Trace:
  ttm_bo_release+0xd94/0x10a0 drivers/gpu/drm/ttm/ttm_bo.c:422
  kref_put include/linux/kref.h:65 [inline]
  ttm_bo_put drivers/gpu/drm/ttm/ttm_bo.c:470 [inline]
  ttm_bo_init_reserved+0x7cb/0x960 drivers/gpu/drm/ttm/ttm_bo.c:1050
  ttm_bo_init+0x105/0x270 drivers/gpu/drm/ttm/ttm_bo.c:1074
  drm_gem_vram_create+0x332/0x4c0 drivers/gpu/drm/drm_gem_vram_helper.c:228

Fixes: d3116756a710 ("drm/ttm: rename bo->mem and make it a pointer")
Signed-off-by: Pavel Skripkin 


Reviewed-by: Christian König 

Going to push this to drm-misc-next-fixes.

Thanks,
Christian.


---

Changes in v2:
1. Added NULL check in ttm_bo_move_to_lru_tail()

2. Changed subject line, since NULL check added in 2 funtions

---
  drivers/gpu/drm/ttm/ttm_bo.c  | 3 +++
  drivers/gpu/drm/ttm/ttm_bo_util.c | 3 +++
  2 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 1b950b45cf4b..8d7fd65ccced 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -102,6 +102,9 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
return;
}
  
+	if (!mem)

+   return;
+
man = ttm_manager_type(bdev, mem->mem_type);
list_move_tail(&bo->lru, &man->lru[bo->priority]);
  
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c

index 2f57f824e6db..763fa6f4e07d 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -63,6 +63,9 @@ int ttm_mem_io_reserve(struct ttm_device *bdev,
  void ttm_mem_io_free(struct ttm_device *bdev,
 struct ttm_resource *mem)
  {
+   if (!mem)
+   return;
+
if (!mem->bus.offset && !mem->bus.addr)
return;
  




Re: Oops in qxl_bo_move_notify()

2021-07-08 Thread Christian König

Yeah, that's an already known issue.

When the allocation fails bo->resource might be NULL now and we need to 
add checks for that corner case as well.


Christian.

Am 08.07.21 um 12:14 schrieb Daniel Vetter:

On Wed, Jul 07, 2021 at 04:36:49PM +, Roberto Sassu wrote:

Hi

I'm getting this oops (on commit a180bd1d7e16):

 [   17.711520] BUG: kernel NULL pointer dereference, address: 
0010
 [   17.739451] RIP: 0010:qxl_bo_move_notify+0x35/0x80 [qxl]
 [   17.827345] RSP: 0018:c9457c08 EFLAGS: 00010286
 [   17.827350] RAX: 0001 RBX:  RCX: 
dc00
 [   17.827353] RDX: 0007 RSI: 0004 RDI: 
85596feb
 [   17.827356] RBP: 88800e311c00 R08:  R09: 

 [   17.827358] R10: 8697b243 R11: fbfff0d2f648 R12: 

 [   17.827361] R13: 88800e311e48 R14: 88800e311e98 R15: 
88800e311e90
 [   17.827364] FS:  () GS:88805d80() 
knlGS:
 [   17.861699] CS:  0010 DS:  ES:  CR0: 80050033
 [   17.861703] CR2: 0010 CR3: 2642c000 CR4: 
00350ee0
 [   17.861707] Call Trace:
 [   17.861712]  ttm_bo_cleanup_memtype_use+0x4d/0xb0 [ttm]
 [   17.861730]  ttm_bo_release+0x42d/0x7c0 [ttm]
 [   17.861746]  ? ttm_bo_cleanup_refs+0x127/0x420 [ttm]
 [   17.888300]  ttm_bo_delayed_delete+0x289/0x390 [ttm]
 [   17.888317]  ? ttm_bo_cleanup_refs+0x420/0x420 [ttm]
 [   17.888332]  ? lock_release+0x9c/0x5c0
 [   17.901033]  ? rcu_read_lock_held_common+0x1a/0x50
 [   17.905183]  ttm_device_delayed_workqueue+0x18/0x50 [ttm]
 [   17.909371]  process_one_work+0x537/0x9f0
 [   17.913345]  ? pwq_dec_nr_in_flight+0x160/0x160
 [   17.917297]  ? lock_acquired+0xa4/0x580
 [   17.921168]  ? worker_thread+0x169/0x600
 [   17.925034]  worker_thread+0x7a/0x600
 [   17.928657]  ? process_one_work+0x9f0/0x9f0
 [   17.932360]  kthread+0x200/0x230
 [   17.935930]  ? set_kthread_struct+0x80/0x80
 [   17.939593]  ret_from_fork+0x22/0x30
 [   17.951737] CR2: 0010
 [   17.955496] ---[ end trace e30cc21c24e81ee5 ]---

I had a look at the code, and it seems that this is caused by
trying to use bo->resource which is NULL.

bo->resource is freed by ttm_bo_cleanup_refs() ->
ttm_bo_cleanup_memtype_use() -> ttm_resource_free().

And then a notification is issued by ttm_bo_cleanup_refs() ->
ttm_bo_put() -> ttm_bo_release() ->
ttm_bo_cleanup_memtype_use(), this time with bo->release
equal to NULL.

I was thinking a proper way to fix this. Checking that
bo->release is not NULL in qxl_bo_move_notify() would
solve the issue. But maybe there is a better way, like
avoiding that ttm_bo_cleanup_memtype_use() is called
twice. Which way would be preferable?

Adding Christian and Dave, who've touched all this recently iirc.
-Daniel


Thanks

Roberto

HUAWEI TECHNOLOGIES Duesseldorf GmbH, HRB 56063
Managing Director: Li Peng, Li Jian, Shi Yanli




Re: [PATCH] drm/msm/dpu: Add newlines to printks

2021-07-08 Thread Dmitry Baryshkov
On Thu, 8 Jul 2021 at 09:56, Stephen Boyd  wrote:
>
> Add some missing newlines to the various DRM printks in this file.
> Noticed while looking at logs. While we're here unbreak quoted
> strings so grepping them is easier.
>
> Signed-off-by: Stephen Boyd 

Reviewed-by: Dmitry Baryshkov 

> ---
>  drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 12 +---
>  1 file changed, 5 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c 
> b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
> index 1c04b7cce43e..0e9d3fa1544b 100644
> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
> @@ -274,20 +274,20 @@ int dpu_encoder_helper_wait_for_irq(struct 
> dpu_encoder_phys *phys_enc,
>
> /* return EWOULDBLOCK since we know the wait isn't necessary */
> if (phys_enc->enable_state == DPU_ENC_DISABLED) {
> -   DRM_ERROR("encoder is disabled id=%u, intr=%d, irq=%d",
> +   DRM_ERROR("encoder is disabled id=%u, intr=%d, irq=%d\n",
>   DRMID(phys_enc->parent), intr_idx,
>   irq->irq_idx);
> return -EWOULDBLOCK;
> }
>
> if (irq->irq_idx < 0) {
> -   DRM_DEBUG_KMS("skip irq wait id=%u, intr=%d, irq=%s",
> +   DRM_DEBUG_KMS("skip irq wait id=%u, intr=%d, irq=%s\n",
>   DRMID(phys_enc->parent), intr_idx,
>   irq->name);
> return 0;
> }
>
> -   DRM_DEBUG_KMS("id=%u, intr=%d, irq=%d, pp=%d, pending_cnt=%d",
> +   DRM_DEBUG_KMS("id=%u, intr=%d, irq=%d, pp=%d, pending_cnt=%d\n",
>   DRMID(phys_enc->parent), intr_idx,
>   irq->irq_idx, phys_enc->hw_pp->idx - PINGPONG_0,
>   atomic_read(wait_info->atomic_cnt));
> @@ -303,8 +303,7 @@ int dpu_encoder_helper_wait_for_irq(struct 
> dpu_encoder_phys *phys_enc,
> if (irq_status) {
> unsigned long flags;
>
> -   DRM_DEBUG_KMS("irq not triggered id=%u, intr=%d, "
> - "irq=%d, pp=%d, atomic_cnt=%d",
> +   DRM_DEBUG_KMS("irq not triggered id=%u, intr=%d, 
> irq=%d, pp=%d, atomic_cnt=%d\n",
>   DRMID(phys_enc->parent), intr_idx,
>   irq->irq_idx,
>   phys_enc->hw_pp->idx - PINGPONG_0,
> @@ -315,8 +314,7 @@ int dpu_encoder_helper_wait_for_irq(struct 
> dpu_encoder_phys *phys_enc,
> ret = 0;
> } else {
> ret = -ETIMEDOUT;
> -   DRM_DEBUG_KMS("irq timeout id=%u, intr=%d, "
> - "irq=%d, pp=%d, atomic_cnt=%d",
> +   DRM_DEBUG_KMS("irq timeout id=%u, intr=%d, irq=%d, 
> pp=%d, atomic_cnt=%d\n",
>   DRMID(phys_enc->parent), intr_idx,
>   irq->irq_idx,
>   phys_enc->hw_pp->idx - PINGPONG_0,
>
> base-commit: e9f1cbc0c4114880090c7a578117d3b9cf184ad4
> --
> https://chromeos.dev
>


-- 
With best wishes
Dmitry


Re: [PATCH] dma-buf: fix and rework dma_buf_poll v5

2021-07-08 Thread Christian König

Sorry that was the wrong patch.

Still not feeling that well :(

Christian.

Am 08.07.21 um 13:19 schrieb Christian König:

Daniel pointed me towards this function and there are multiple obvious problems
in the implementation.

First of all the retry loop is not working as intended. In general the retry
makes only sense if you grab the reference first and then check the sequence
values.

Then we should always also wait for the exclusive fence.

It's also good practice to keep the reference around when installing callbacks
to fences you don't own.

And last the whole implementation was unnecessary complex and rather hard to
understand which could lead to probably unexpected behavior of the IOCTL.

Fix all this by reworking the implementation from scratch. Dropping the
whole RCU approach and taking the lock instead.

Only mildly tested and needs a thoughtful review of the code.

v2: fix the reference counting as well
v3: keep the excl fence handling as is for stable
v4: back to testing all fences, drop RCU
v5: handle in and out separately

Signed-off-by: Christian König 
CC: sta...@vger.kernel.org
---
  drivers/dma-buf/dma-buf.c | 152 +-
  include/linux/dma-buf.h   |   2 +-
  2 files changed, 68 insertions(+), 86 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index eadd1eaa2fb5..439e2379e1cb 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -72,7 +72,7 @@ static void dma_buf_release(struct dentry *dentry)
 * If you hit this BUG() it means someone dropped their ref to the
 * dma-buf while still having pending operation to the buffer.
 */
-   BUG_ON(dmabuf->cb_shared.active || dmabuf->cb_excl.active);
+   BUG_ON(dmabuf->cb_in.active || dmabuf->cb_out.active);
  
  	dmabuf->ops->release(dmabuf);
  
@@ -202,16 +202,57 @@ static void dma_buf_poll_cb(struct dma_fence *fence, struct dma_fence_cb *cb)

wake_up_locked_poll(dcb->poll, dcb->active);
dcb->active = 0;
spin_unlock_irqrestore(&dcb->poll->lock, flags);
+   dma_fence_put(fence);
+}
+
+static bool dma_buf_poll_shared(struct dma_resv *resv,
+   struct dma_buf_poll_cb_t *dcb)
+{
+   struct dma_resv_list *fobj = dma_resv_get_list(resv);
+   struct dma_fence *fence;
+   int i, r;
+
+   if (!fobj)
+   return false;
+
+   for (i = 0; i < fobj->shared_count; ++i) {
+   fence = rcu_dereference_protected(fobj->shared[i],
+ dma_resv_held(resv));
+   dma_fence_get(fence);
+   r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb);
+   if (!r)
+   return true;
+   dma_fence_put(fence);
+   }
+
+   return false;
+}
+
+static bool dma_buf_poll_excl(struct dma_resv *resv,
+ struct dma_buf_poll_cb_t *dcb)
+{
+   struct dma_fence *fence = dma_resv_get_excl(resv);
+   int r;
+
+   if (!fence)
+   return false;
+
+   dma_fence_get(fence);
+   r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb);
+   if (!r)
+   return true;
+   dma_fence_put(fence);
+
+   return false;
  }
  
  static __poll_t dma_buf_poll(struct file *file, poll_table *poll)

  {
struct dma_buf *dmabuf;
struct dma_resv *resv;
-   struct dma_resv_list *fobj;
-   struct dma_fence *fence_excl;
+   unsigned shared_count;
__poll_t events;
-   unsigned shared_count, seq;
+   int r, i;
  
  	dmabuf = file->private_data;

if (!dmabuf || !dmabuf->resv)
@@ -225,101 +266,42 @@ static __poll_t dma_buf_poll(struct file *file, 
poll_table *poll)
if (!events)
return 0;
  
-retry:

-   seq = read_seqcount_begin(&resv->seq);
-   rcu_read_lock();
-
-   fobj = rcu_dereference(resv->fence);
-   if (fobj)
-   shared_count = fobj->shared_count;
-   else
-   shared_count = 0;
-   fence_excl = rcu_dereference(resv->fence_excl);
-   if (read_seqcount_retry(&resv->seq, seq)) {
-   rcu_read_unlock();
-   goto retry;
-   }
-
-   if (fence_excl && (!(events & EPOLLOUT) || shared_count == 0)) {
-   struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl;
-   __poll_t pevents = EPOLLIN;
+   dma_resv_lock(resv, NULL);
  
-		if (shared_count == 0)

-   pevents |= EPOLLOUT;
+   if (events & EPOLLOUT) {
+   struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_out;
  
+		/* Check that callback isn't busy */

spin_lock_irq(&dmabuf->poll.lock);
-   if (dcb->active) {
-   dcb->active |= pevents;
-   events &= ~pevents;
-   } else
-   dcb->active = pevents;
+   if (dcb->active)
+

[PATCH] drm/radeon: Fix NULL dereference when updating memory stats

2021-07-08 Thread Christian König
From: Mikel Rychliski 

radeon_ttm_bo_destroy() is attempting to access the resource object to
update memory counters. However, the resource object is already freed when
ttm calls this function via the destroy callback. This causes an oops when
a bo is freed:

BUG: kernel NULL pointer dereference, address: 0010
RIP: 0010:radeon_ttm_bo_destroy+0x2c/0x100 [radeon]
Call Trace:
 radeon_bo_unref+0x1a/0x30 [radeon]
 radeon_gem_object_free+0x33/0x50 [radeon]
 drm_gem_object_release_handle+0x69/0x70 [drm]
 drm_gem_handle_delete+0x62/0xa0 [drm]
 ? drm_mode_destroy_dumb+0x40/0x40 [drm]
 drm_ioctl_kernel+0xb2/0xf0 [drm]
 drm_ioctl+0x30a/0x3c0 [drm]
 ? drm_mode_destroy_dumb+0x40/0x40 [drm]
 radeon_drm_ioctl+0x49/0x80 [radeon]
 __x64_sys_ioctl+0x8e/0xd0

Avoid the issue by updating the counters in the delete_mem_notify callback
instead. Also, fix memory statistic updating in radeon_bo_move() to
identify the source type correctly. The source type needs to be saved
before the move, because the moved from object may be altered by the move.

Fixes: bfa3357ef9ab ("drm/ttm: allocate resource object instead of embedding it 
v2")
Signed-off-by: Mikel Rychliski 
Reviewed-by: Christian König 
Signed-off-by: Christian König 
Link: 
https://patchwork.freedesktop.org/patch/msgid/20210624045121.15643-1-mi...@mikelr.com
---
 drivers/gpu/drm/radeon/radeon_object.c | 29 +++---
 drivers/gpu/drm/radeon/radeon_object.h |  2 +-
 drivers/gpu/drm/radeon/radeon_ttm.c| 13 +---
 3 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c 
b/drivers/gpu/drm/radeon/radeon_object.c
index bfaaa3c969a3..56ede9d63b12 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -49,23 +49,23 @@ static void radeon_bo_clear_surface_reg(struct radeon_bo 
*bo);
  * function are calling it.
  */
 
-static void radeon_update_memory_usage(struct radeon_bo *bo,
-  unsigned mem_type, int sign)
+static void radeon_update_memory_usage(struct ttm_buffer_object *bo,
+  unsigned int mem_type, int sign)
 {
-   struct radeon_device *rdev = bo->rdev;
+   struct radeon_device *rdev = radeon_get_rdev(bo->bdev);
 
switch (mem_type) {
case TTM_PL_TT:
if (sign > 0)
-   atomic64_add(bo->tbo.base.size, &rdev->gtt_usage);
+   atomic64_add(bo->base.size, &rdev->gtt_usage);
else
-   atomic64_sub(bo->tbo.base.size, &rdev->gtt_usage);
+   atomic64_sub(bo->base.size, &rdev->gtt_usage);
break;
case TTM_PL_VRAM:
if (sign > 0)
-   atomic64_add(bo->tbo.base.size, &rdev->vram_usage);
+   atomic64_add(bo->base.size, &rdev->vram_usage);
else
-   atomic64_sub(bo->tbo.base.size, &rdev->vram_usage);
+   atomic64_sub(bo->base.size, &rdev->vram_usage);
break;
}
 }
@@ -76,8 +76,6 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object 
*tbo)
 
bo = container_of(tbo, struct radeon_bo, tbo);
 
-   radeon_update_memory_usage(bo, bo->tbo.resource->mem_type, -1);
-
mutex_lock(&bo->rdev->gem.mutex);
list_del_init(&bo->list);
mutex_unlock(&bo->rdev->gem.mutex);
@@ -727,24 +725,21 @@ int radeon_bo_check_tiling(struct radeon_bo *bo, bool 
has_moved,
 }
 
 void radeon_bo_move_notify(struct ttm_buffer_object *bo,
-  bool evict,
+  unsigned int old_type,
   struct ttm_resource *new_mem)
 {
struct radeon_bo *rbo;
 
+   radeon_update_memory_usage(bo, old_type, -1);
+   if (new_mem)
+   radeon_update_memory_usage(bo, new_mem->mem_type, 1);
+
if (!radeon_ttm_bo_is_radeon_bo(bo))
return;
 
rbo = container_of(bo, struct radeon_bo, tbo);
radeon_bo_check_tiling(rbo, 0, 1);
radeon_vm_bo_invalidate(rbo->rdev, rbo);
-
-   /* update statistics */
-   if (!new_mem)
-   return;
-
-   radeon_update_memory_usage(rbo, bo->resource->mem_type, -1);
-   radeon_update_memory_usage(rbo, new_mem->mem_type, 1);
 }
 
 vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
diff --git a/drivers/gpu/drm/radeon/radeon_object.h 
b/drivers/gpu/drm/radeon/radeon_object.h
index 1739c6a142cd..1afc7992ef91 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -161,7 +161,7 @@ extern void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
 extern int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
bool force_drop);
 extern void radeon_

[PATCH] drm/qxl: add NULL check for bo->resource

2021-07-08 Thread Christian König
When allocations fails that can be NULL now.

Signed-off-by: Christian König 
Reported-by: Daniel Bristot de Oliveira 
Tested-by: Daniel Bristot de Oliveira 
---
 drivers/gpu/drm/qxl/qxl_ttm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index 19fd39d9a00c..37a1b6a6ad6d 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -127,7 +127,7 @@ static void qxl_bo_move_notify(struct ttm_buffer_object *bo,
struct qxl_bo *qbo;
struct qxl_device *qdev;
 
-   if (!qxl_ttm_bo_is_qxl_bo(bo))
+   if (!qxl_ttm_bo_is_qxl_bo(bo) || !bo->resource)
return;
qbo = to_qxl_bo(bo);
qdev = to_qxl(qbo->tbo.base.dev);
-- 
2.25.1



Re: [PATCH] drm/msm/mdp5: fix 64-bit division in bandwidth calculation

2021-07-08 Thread Dmitry Baryshkov

On 22/06/2021 11:03, Dmitry Baryshkov wrote:

Fix undefined symbols errors arising from 64-bit division on 32-bit
arm targets. Add 64-bit version of mult_frac and use it for calculating
bandwidth.

ERROR: modpost: "__aeabi_ldivmod" [drivers/gpu/drm/msm/msm.ko] undefined!
ERROR: modpost: "__aeabi_uldivmod" [drivers/gpu/drm/msm/msm.ko] undefined!

Fixes: 7e0230fd096c ("drm/msm/mdp5: provide dynamic bandwidth management")
Signed-off-by: Dmitry Baryshkov 


We are reworking now bandwidth management for mdp5, so both the original 
patch and the fix can be ignored for now.



---
  drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c  |  2 +-
  drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c   |  5 -
  drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c |  2 +-
  include/linux/math.h   | 13 +
  4 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c 
b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
index a9332078aa13..52724d0a6fea 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
@@ -755,7 +755,7 @@ static int mdp5_crtc_atomic_check(struct drm_crtc *crtc,
hw_cfg = mdp5_cfg_get_hw_config(mdp5_kms->cfg);
  
  	if (hw_cfg->perf.ab_inefficiency)

-   crtc_bw = mult_frac(crtc_bw, hw_cfg->perf.ab_inefficiency, 100);
+   crtc_bw = mult_frac_ull(crtc_bw, hw_cfg->perf.ab_inefficiency, 
100);
mdp5_cstate->new_crtc_bw = crtc_bw;
  
  	/*

diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c 
b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
index 3e1b28d3e41b..85b7093a1218 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
@@ -301,6 +301,7 @@ static const struct mdp_kms_funcs kms_funcs = {
  void mdp5_kms_set_bandwidth(struct mdp5_kms *mdp5_kms)
  {
int i;
+   u64 bw;
u32 full_bw = 0;
struct drm_crtc *tmp_crtc;
  
@@ -311,7 +312,9 @@ void mdp5_kms_set_bandwidth(struct mdp5_kms *mdp5_kms)

if (!tmp_crtc->enabled)
continue;
  
-		full_bw += Bps_to_icc(to_mdp5_crtc_state(tmp_crtc->state)->new_crtc_bw / mdp5_kms->num_paths);

+   bw = to_mdp5_crtc_state(tmp_crtc->state)->new_crtc_bw;
+   do_div(bw, mdp5_kms->num_paths * 1000); /* Bps_to_icc */
+   full_bw += bw;
}
  
  	DBG("SET BW to %d\n", full_bw);

diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c 
b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c
index 85275665558b..2ede34177a90 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c
@@ -191,7 +191,7 @@ static void mdp5_plane_calc_bw(struct drm_plane_state 
*state, struct drm_crtc_st
prefill_div = vbp + vpw + vfp;
  #endif
  
-	pstate->plane_bw = max(plane_bw, mult_frac(plane_bw, hw_latency_lines, prefill_div));

+   pstate->plane_bw = max(plane_bw, mult_frac_ull(plane_bw, 
hw_latency_lines, prefill_div));
  }
  
  static int mdp5_plane_atomic_check_with_state(struct drm_crtc_state *crtc_state,

diff --git a/include/linux/math.h b/include/linux/math.h
index 53674a327e39..1327385905df 100644
--- a/include/linux/math.h
+++ b/include/linux/math.h
@@ -118,6 +118,19 @@
  } \
  )
  
+#define mult_frac_ull(x, numer, denom)(			\

+{  \
+   typeof(x) quot = (x);   \
+   typeof(x) rem;  \
+   do_div(quot, (denom));  \
+   rem = (x) - quot * (denom); \
+   rem = (rem * (numer));  \
+   do_div(rem, (denom));   \
+   (quot * (numer)) + rem; \
+}  \
+)
+
+
  #define sector_div(a, b) do_div(a, b)
  
  /**





--
With best wishes
Dmitry


[PATCH v3] drm/panel: Add support for E Ink VB3300-KCA

2021-07-08 Thread Alistair Francis
Add support for the 10.3" E Ink panel described at:
https://www.eink.com/product.html?type=productdetail&id=7

Signed-off-by: Alistair Francis 
Acked-by: Rob Herring 
---
 .../bindings/display/panel/panel-simple.yaml  |  2 ++
 .../devicetree/bindings/vendor-prefixes.yaml  |  2 ++
 drivers/gpu/drm/panel/panel-simple.c  | 29 +++
 3 files changed, 33 insertions(+)

diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml 
b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
index b3797ba2698b..799e20222551 100644
--- a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
+++ b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
@@ -128,6 +128,8 @@ properties:
 # Emerging Display Technology Corp. WVGA TFT Display with capacitive 
touch
   - edt,etm0700g0dh6
   - edt,etm0700g0edh6
+# E Ink VB3300-KCA
+  - eink,vb3300-kca
 # Evervision Electronics Co. Ltd. VGG804821 5.0" WVGA TFT LCD Panel
   - evervision,vgg804821
 # Foxlink Group 5" WVGA TFT LCD panel
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml 
b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 0199728d2eaf..3612c6020fe4 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -335,6 +335,8 @@ patternProperties:
 description: eGalax_eMPIA Technology Inc
   "^einfochips,.*":
 description: Einfochips
+  "^eink,.*":
+description: E Ink Corporation
   "^elan,.*":
 description: Elan Microelectronic Corp.
   "^element14,.*":
diff --git a/drivers/gpu/drm/panel/panel-simple.c 
b/drivers/gpu/drm/panel/panel-simple.c
index 21939d4352cf..210377b03f6f 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -2046,6 +2046,32 @@ static const struct panel_desc edt_etm0700g0bdh6 = {
.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE,
 };
 
+static const struct display_timing eink_vb3300_kca_timing = {
+   .pixelclock = { 4000, 4000, 4000 },
+   .hactive = { 334, 334, 334 },
+   .hfront_porch = { 1, 1, 1 },
+   .hback_porch = { 1, 1, 1 },
+   .hsync_len = { 1, 1, 1 },
+   .vactive = { 1405, 1405, 1405 },
+   .vfront_porch = { 1, 1, 1 },
+   .vback_porch = { 1, 1, 1 },
+   .vsync_len = { 1, 1, 1 },
+   .flags = DISPLAY_FLAGS_HSYNC_LOW | DISPLAY_FLAGS_VSYNC_LOW |
+DISPLAY_FLAGS_DE_HIGH | DISPLAY_FLAGS_PIXDATA_POSEDGE,
+};
+
+static const struct panel_desc eink_vb3300_kca = {
+   .timings = &eink_vb3300_kca_timing,
+   .num_timings = 1,
+   .bpc = 6,
+   .size = {
+   .width = 157,
+   .height = 209,
+   },
+   .bus_format = MEDIA_BUS_FMT_RGB888_1X24,
+   .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE,
+};
+
 static const struct display_timing evervision_vgg804821_timing = {
.pixelclock = { 2760, 3330, 5000 },
.hactive = { 800, 800, 800 },
@@ -4344,6 +4370,9 @@ static const struct of_device_id platform_of_match[] = {
}, {
.compatible = "edt,etm0700g0dh6",
.data = &edt_etm0700g0dh6,
+   }, {
+   .compatible = "eink,vb3300-kca",
+   .data = &eink_vb3300_kca,
}, {
.compatible = "edt,etm0700g0bdh6",
.data = &edt_etm0700g0bdh6,
-- 
2.31.1



Re: [PATCH] drm/rockchip: Implement mmap as GEM object function

2021-07-08 Thread Thomas Zimmermann

ping for review

Am 24.06.21 um 11:55 schrieb Thomas Zimmermann:

Moving the driver-specific mmap code into a GEM object function allows
for using DRM helpers for various mmap callbacks.

The respective rockchip functions are being removed. The file_operations
structure fops is now being created by the helper macro
DEFINE_DRM_GEM_FOPS().

Signed-off-by: Thomas Zimmermann 
---
  drivers/gpu/drm/rockchip/rockchip_drm_drv.c   | 13 +-
  drivers/gpu/drm/rockchip/rockchip_drm_fbdev.c |  3 +-
  drivers/gpu/drm/rockchip/rockchip_drm_gem.c   | 44 +--
  drivers/gpu/drm/rockchip/rockchip_drm_gem.h   |  7 ---
  4 files changed, 15 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c 
b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index b730b8d5d949..2e3ab573a817 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
@@ -208,16 +208,7 @@ static void rockchip_drm_unbind(struct device *dev)
drm_dev_put(drm_dev);
  }
  
-static const struct file_operations rockchip_drm_driver_fops = {

-   .owner = THIS_MODULE,
-   .open = drm_open,
-   .mmap = rockchip_gem_mmap,
-   .poll = drm_poll,
-   .read = drm_read,
-   .unlocked_ioctl = drm_ioctl,
-   .compat_ioctl = drm_compat_ioctl,
-   .release = drm_release,
-};
+DEFINE_DRM_GEM_FOPS(rockchip_drm_driver_fops);
  
  static const struct drm_driver rockchip_drm_driver = {

.driver_features= DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC,
@@ -226,7 +217,7 @@ static const struct drm_driver rockchip_drm_driver = {
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_import_sg_table  = rockchip_gem_prime_import_sg_table,
-   .gem_prime_mmap = rockchip_gem_mmap_buf,
+   .gem_prime_mmap = drm_gem_prime_mmap,
.fops   = &rockchip_drm_driver_fops,
.name   = DRIVER_NAME,
.desc   = DRIVER_DESC,
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.c 
b/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.c
index 2fdc455c4ad7..d8418dd39d0e 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.c
@@ -7,6 +7,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  
  #include "rockchip_drm_drv.h"

@@ -24,7 +25,7 @@ static int rockchip_fbdev_mmap(struct fb_info *info,
struct drm_fb_helper *helper = info->par;
struct rockchip_drm_private *private = to_drm_private(helper);
  
-	return rockchip_gem_mmap_buf(private->fbdev_bo, vma);

+   return drm_gem_prime_mmap(private->fbdev_bo, vma);
  }
  
  static const struct fb_ops rockchip_drm_fbdev_ops = {

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c 
b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
index 7971f57436dd..63eb73b624aa 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
@@ -240,12 +240,22 @@ static int rockchip_drm_gem_object_mmap(struct 
drm_gem_object *obj,
int ret;
struct rockchip_gem_object *rk_obj = to_rockchip_obj(obj);
  
+	/*

+* Set vm_pgoff (used as a fake buffer offset by DRM) to 0 and map the
+* whole buffer from the start.
+*/
+   vma->vm_pgoff = 0;
+
/*
 * We allocated a struct page table for rk_obj, so clear
 * VM_PFNMAP flag that was set by drm_gem_mmap_obj()/drm_gem_mmap().
 */
+   vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
vma->vm_flags &= ~VM_PFNMAP;
  
+	vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));

+   vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+
if (rk_obj->pages)
ret = rockchip_drm_gem_object_mmap_iommu(obj, vma);
else
@@ -257,39 +267,6 @@ static int rockchip_drm_gem_object_mmap(struct 
drm_gem_object *obj,
return ret;
  }
  
-int rockchip_gem_mmap_buf(struct drm_gem_object *obj,

- struct vm_area_struct *vma)
-{
-   int ret;
-
-   ret = drm_gem_mmap_obj(obj, obj->size, vma);
-   if (ret)
-   return ret;
-
-   return rockchip_drm_gem_object_mmap(obj, vma);
-}
-
-/* drm driver mmap file operations */
-int rockchip_gem_mmap(struct file *filp, struct vm_area_struct *vma)
-{
-   struct drm_gem_object *obj;
-   int ret;
-
-   ret = drm_gem_mmap(filp, vma);
-   if (ret)
-   return ret;
-
-   /*
-* Set vm_pgoff (used as a fake buffer offset by DRM) to 0 and map the
-* whole buffer from the start.
-*/
-   vma->vm_pgoff = 0;
-
-   obj = vma->vm_private_data;
-
-   return rockchip_drm_gem_object_mmap(obj, vma);
-}
-
  static void rockchip_gem_release_object(struct rockchip_gem_object *rk_obj)
  {
drm_gem_object_release(&rk_obj->base);
@@ -301,6 +278,7 @@ static const str

Re: [PATCH] drm/xen: Implement mmap as GEM object function

2021-07-08 Thread Thomas Zimmermann

ping for review

Am 24.06.21 um 11:53 schrieb Thomas Zimmermann:

Moving the driver-specific mmap code into a GEM object function allows
for using DRM helpers for various mmap callbacks.

The respective xen functions are being removed. The file_operations
structure fops is now being created by the helper macro
DEFINE_DRM_GEM_FOPS().

Signed-off-by: Thomas Zimmermann 
---
  drivers/gpu/drm/xen/xen_drm_front.c |  16 +---
  drivers/gpu/drm/xen/xen_drm_front_gem.c | 108 +---
  drivers/gpu/drm/xen/xen_drm_front_gem.h |   7 --
  3 files changed, 44 insertions(+), 87 deletions(-)

diff --git a/drivers/gpu/drm/xen/xen_drm_front.c 
b/drivers/gpu/drm/xen/xen_drm_front.c
index 9f14d99c763c..434064c820e8 100644
--- a/drivers/gpu/drm/xen/xen_drm_front.c
+++ b/drivers/gpu/drm/xen/xen_drm_front.c
@@ -469,19 +469,7 @@ static void xen_drm_drv_release(struct drm_device *dev)
kfree(drm_info);
  }
  
-static const struct file_operations xen_drm_dev_fops = {

-   .owner  = THIS_MODULE,
-   .open   = drm_open,
-   .release= drm_release,
-   .unlocked_ioctl = drm_ioctl,
-#ifdef CONFIG_COMPAT
-   .compat_ioctl   = drm_compat_ioctl,
-#endif
-   .poll   = drm_poll,
-   .read   = drm_read,
-   .llseek = no_llseek,
-   .mmap   = xen_drm_front_gem_mmap,
-};
+DEFINE_DRM_GEM_FOPS(xen_drm_dev_fops);
  
  static const struct drm_driver xen_drm_driver = {

.driver_features   = DRIVER_GEM | DRIVER_MODESET | 
DRIVER_ATOMIC,
@@ -489,7 +477,7 @@ static const struct drm_driver xen_drm_driver = {
.prime_handle_to_fd= drm_gem_prime_handle_to_fd,
.prime_fd_to_handle= drm_gem_prime_fd_to_handle,
.gem_prime_import_sg_table = xen_drm_front_gem_import_sg_table,
-   .gem_prime_mmap= xen_drm_front_gem_prime_mmap,
+   .gem_prime_mmap= drm_gem_prime_mmap,
.dumb_create   = xen_drm_drv_dumb_create,
.fops  = &xen_drm_dev_fops,
.name  = "xendrm-du",
diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c 
b/drivers/gpu/drm/xen/xen_drm_front_gem.c
index b293c67230ef..dd358ba2bf8e 100644
--- a/drivers/gpu/drm/xen/xen_drm_front_gem.c
+++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c
@@ -57,6 +57,47 @@ static void gem_free_pages_array(struct xen_gem_object 
*xen_obj)
xen_obj->pages = NULL;
  }
  
+static int xen_drm_front_gem_object_mmap(struct drm_gem_object *gem_obj,

+struct vm_area_struct *vma)
+{
+   struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj);
+   int ret;
+
+   vma->vm_ops = gem_obj->funcs->vm_ops;
+
+   /*
+* Clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the
+* vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map
+* the whole buffer.
+*/
+   vma->vm_flags &= ~VM_PFNMAP;
+   vma->vm_flags |= VM_MIXEDMAP;
+   vma->vm_pgoff = 0;
+
+   /*
+* According to Xen on ARM ABI (xen/include/public/arch-arm.h):
+* all memory which is shared with other entities in the system
+* (including the hypervisor and other guests) must reside in memory
+* which is mapped as Normal Inner Write-Back Outer Write-Back
+* Inner-Shareable.
+*/
+   vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+
+   /*
+* vm_operations_struct.fault handler will be called if CPU access
+* to VM is here. For GPUs this isn't the case, because CPU  doesn't
+* touch the memory. Insert pages now, so both CPU and GPU are happy.
+*
+* FIXME: as we insert all the pages now then no .fault handler must
+* be called, so don't provide one
+*/
+   ret = vm_map_pages(vma, xen_obj->pages, xen_obj->num_pages);
+   if (ret < 0)
+   DRM_ERROR("Failed to map pages into vma: %d\n", ret);
+
+   return ret;
+}
+
  static const struct vm_operations_struct xen_drm_drv_vm_ops = {
.open   = drm_gem_vm_open,
.close  = drm_gem_vm_close,
@@ -67,6 +108,7 @@ static const struct drm_gem_object_funcs 
xen_drm_front_gem_object_funcs = {
.get_sg_table = xen_drm_front_gem_get_sg_table,
.vmap = xen_drm_front_gem_prime_vmap,
.vunmap = xen_drm_front_gem_prime_vunmap,
+   .mmap = xen_drm_front_gem_object_mmap,
.vm_ops = &xen_drm_drv_vm_ops,
  };
  
@@ -238,58 +280,6 @@ xen_drm_front_gem_import_sg_table(struct drm_device *dev,

return &xen_obj->base;
  }
  
-static int gem_mmap_obj(struct xen_gem_object *xen_obj,

-   struct vm_area_struct *vma)
-{
-   int ret;
-
-   /*
-* clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the
-* vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map
-* the whole buffer.
-*/

Re: [PATCH] drm/msm: Implement mmap as GEM object function

2021-07-08 Thread Thomas Zimmermann

ping for review

Am 24.06.21 um 11:03 schrieb Thomas Zimmermann:

Moving the driver-specific mmap code into a GEM object function allows
for using DRM helpers for various mmap callbacks.

The respective msm functions are being removed. The file_operations
structure fops is now being created by the helper macro
DEFINE_DRM_GEM_FOPS().

Signed-off-by: Thomas Zimmermann 
---
  drivers/gpu/drm/msm/msm_drv.c   | 14 +-
  drivers/gpu/drm/msm/msm_drv.h   |  1 -
  drivers/gpu/drm/msm/msm_fbdev.c | 10 +
  drivers/gpu/drm/msm/msm_gem.c   | 67 -
  drivers/gpu/drm/msm/msm_gem.h   |  3 --
  drivers/gpu/drm/msm/msm_gem_prime.c | 11 -
  6 files changed, 31 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index fe7d17cd35ec..f62eaedfc0d7 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -985,17 +985,7 @@ static const struct drm_ioctl_desc msm_ioctls[] = {
DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY, msm_ioctl_submitqueue_query, 
DRM_RENDER_ALLOW),
  };
  
-static const struct file_operations fops = {

-   .owner  = THIS_MODULE,
-   .open   = drm_open,
-   .release= drm_release,
-   .unlocked_ioctl = drm_ioctl,
-   .compat_ioctl   = drm_compat_ioctl,
-   .poll   = drm_poll,
-   .read   = drm_read,
-   .llseek = no_llseek,
-   .mmap   = msm_gem_mmap,
-};
+DEFINE_DRM_GEM_FOPS(fops);
  
  static const struct drm_driver msm_driver = {

.driver_features= DRIVER_GEM |
@@ -1015,7 +1005,7 @@ static const struct drm_driver msm_driver = {
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_import_sg_table = msm_gem_prime_import_sg_table,
-   .gem_prime_mmap = msm_gem_prime_mmap,
+   .gem_prime_mmap = drm_gem_prime_mmap,
  #ifdef CONFIG_DEBUG_FS
.debugfs_init   = msm_debugfs_init,
  #endif
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 2668941df529..8f1e0d7c8bbb 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -300,7 +300,6 @@ void msm_gem_shrinker_cleanup(struct drm_device *dev);
  struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj);
  int msm_gem_prime_vmap(struct drm_gem_object *obj, struct dma_buf_map *map);
  void msm_gem_prime_vunmap(struct drm_gem_object *obj, struct dma_buf_map 
*map);
-int msm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
  struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev,
struct dma_buf_attachment *attach, struct sg_table *sg);
  int msm_gem_prime_pin(struct drm_gem_object *obj);
diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c
index 227404077e39..07225907fd2d 100644
--- a/drivers/gpu/drm/msm/msm_fbdev.c
+++ b/drivers/gpu/drm/msm/msm_fbdev.c
@@ -8,6 +8,7 @@
  #include 
  #include 
  #include 
+#include 
  
  #include "msm_drv.h"

  #include "msm_gem.h"
@@ -48,15 +49,8 @@ static int msm_fbdev_mmap(struct fb_info *info, struct 
vm_area_struct *vma)
struct drm_fb_helper *helper = (struct drm_fb_helper *)info->par;
struct msm_fbdev *fbdev = to_msm_fbdev(helper);
struct drm_gem_object *bo = msm_framebuffer_bo(fbdev->fb, 0);
-   int ret = 0;
  
-	ret = drm_gem_mmap_obj(bo, bo->size, vma);

-   if (ret) {
-   pr_err("%s:drm_gem_mmap_obj fail\n", __func__);
-   return ret;
-   }
-
-   return msm_gem_mmap_obj(bo, vma);
+   return drm_gem_prime_mmap(bo, vma);
  }
  
  static int msm_fbdev_create(struct drm_fb_helper *helper,

diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index b61f5466e522..71d835bc575d 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -211,46 +211,6 @@ void msm_gem_put_pages(struct drm_gem_object *obj)
msm_gem_unlock(obj);
  }
  
-int msm_gem_mmap_obj(struct drm_gem_object *obj,

-   struct vm_area_struct *vma)
-{
-   struct msm_gem_object *msm_obj = to_msm_bo(obj);
-
-   vma->vm_flags &= ~VM_PFNMAP;
-   vma->vm_flags |= VM_MIXEDMAP;
-
-   if (msm_obj->flags & MSM_BO_WC) {
-   vma->vm_page_prot = 
pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
-   } else if (msm_obj->flags & MSM_BO_UNCACHED) {
-   vma->vm_page_prot = 
pgprot_noncached(vm_get_page_prot(vma->vm_flags));
-   } else {
-   /*
-* Shunt off cached objs to shmem file so they have their own
-* address_space (so unmap_mapping_range does what we want,
-* in particular in the case of mmap'd dmabufs)
-*/
-   vma->vm_pgoff = 0;
-   vma_set_file(vma, obj->filp);
-
- 

Re: [PATCH] drm/mediatek: Implement mmap as GEM object function

2021-07-08 Thread Thomas Zimmermann

ping for review

Am 24.06.21 um 11:01 schrieb Thomas Zimmermann:

Moving the driver-specific mmap code into a GEM object function allows
for using DRM helpers for various mmap callbacks.

The respective mediatek functions are being removed. The file_operations
structure fops is now being created by the helper macro
DEFINE_DRM_GEM_FOPS().

Signed-off-by: Thomas Zimmermann 
---
  drivers/gpu/drm/mediatek/mtk_drm_drv.c | 13 ++--
  drivers/gpu/drm/mediatek/mtk_drm_gem.c | 44 +++---
  drivers/gpu/drm/mediatek/mtk_drm_gem.h |  3 --
  3 files changed, 14 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c 
b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
index b46bdb8985da..bbfefb29c211 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
@@ -300,16 +300,7 @@ static void mtk_drm_kms_deinit(struct drm_device *drm)
component_unbind_all(drm->dev, drm);
  }
  
-static const struct file_operations mtk_drm_fops = {

-   .owner = THIS_MODULE,
-   .open = drm_open,
-   .release = drm_release,
-   .unlocked_ioctl = drm_ioctl,
-   .mmap = mtk_drm_gem_mmap,
-   .poll = drm_poll,
-   .read = drm_read,
-   .compat_ioctl = drm_compat_ioctl,
-};
+DEFINE_DRM_GEM_FOPS(mtk_drm_fops);
  
  /*

   * We need to override this because the device used to import the memory is
@@ -332,7 +323,7 @@ static const struct drm_driver mtk_drm_driver = {
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_import = mtk_drm_gem_prime_import,
.gem_prime_import_sg_table = mtk_gem_prime_import_sg_table,
-   .gem_prime_mmap = mtk_drm_gem_mmap_buf,
+   .gem_prime_mmap = drm_gem_prime_mmap,
.fops = &mtk_drm_fops,
  
  	.name = DRIVER_NAME,

diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c 
b/drivers/gpu/drm/mediatek/mtk_drm_gem.c
index 280ea0d5e840..d0544962cfc1 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c
@@ -14,11 +14,14 @@
  #include "mtk_drm_drv.h"
  #include "mtk_drm_gem.h"
  
+static int mtk_drm_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);

+
  static const struct drm_gem_object_funcs mtk_drm_gem_object_funcs = {
.free = mtk_drm_gem_free_object,
.get_sg_table = mtk_gem_prime_get_sg_table,
.vmap = mtk_drm_gem_prime_vmap,
.vunmap = mtk_drm_gem_prime_vunmap,
+   .mmap = mtk_drm_gem_object_mmap,
.vm_ops = &drm_gem_cma_vm_ops,
  };
  
@@ -145,11 +148,19 @@ static int mtk_drm_gem_object_mmap(struct drm_gem_object *obj,

struct mtk_drm_gem_obj *mtk_gem = to_mtk_gem_obj(obj);
struct mtk_drm_private *priv = obj->dev->dev_private;
  
+	/*

+* Set vm_pgoff (used as a fake buffer offset by DRM) to 0 and map the
+* whole buffer from the start.
+*/
+   vma->vm_pgoff = 0;
+
/*
 * dma_alloc_attrs() allocated a struct page table for mtk_gem, so clear
 * VM_PFNMAP flag that was set by drm_gem_mmap_obj()/drm_gem_mmap().
 */
-   vma->vm_flags &= ~VM_PFNMAP;
+   vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
+   vma->vm_page_prot = 
pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+   vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
  
  	ret = dma_mmap_attrs(priv->dma_dev, vma, mtk_gem->cookie,

 mtk_gem->dma_addr, obj->size, mtk_gem->dma_attrs);
@@ -159,37 +170,6 @@ static int mtk_drm_gem_object_mmap(struct drm_gem_object 
*obj,
return ret;
  }
  
-int mtk_drm_gem_mmap_buf(struct drm_gem_object *obj, struct vm_area_struct *vma)

-{
-   int ret;
-
-   ret = drm_gem_mmap_obj(obj, obj->size, vma);
-   if (ret)
-   return ret;
-
-   return mtk_drm_gem_object_mmap(obj, vma);
-}
-
-int mtk_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
-{
-   struct drm_gem_object *obj;
-   int ret;
-
-   ret = drm_gem_mmap(filp, vma);
-   if (ret)
-   return ret;
-
-   obj = vma->vm_private_data;
-
-   /*
-* Set vm_pgoff (used as a fake buffer offset by DRM) to 0 and map the
-* whole buffer from the start.
-*/
-   vma->vm_pgoff = 0;
-
-   return mtk_drm_gem_object_mmap(obj, vma);
-}
-
  /*
   * Allocate a sg_table for this GEM object.
   * Note: Both the table's contents, and the sg_table itself must be freed by
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.h 
b/drivers/gpu/drm/mediatek/mtk_drm_gem.h
index 6da5ccb4b933..9a359a06cb73 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_gem.h
+++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.h
@@ -39,9 +39,6 @@ struct mtk_drm_gem_obj *mtk_drm_gem_create(struct drm_device 
*dev, size_t size,
   bool alloc_kmap);
  int mtk_drm_gem_dumb_create(struct drm_file *file_priv, struct drm_device 
*dev,
struct drm_mode_create_dumb *args);
-

Re: [PATCH] drm/exynox: Implement mmap as GEM object function

2021-07-08 Thread Thomas Zimmermann

ping for review

Am 24.06.21 um 11:00 schrieb Thomas Zimmermann:

Moving the driver-specific mmap code into a GEM object function allows
for using DRM helpers for various mmap callbacks.

The respective exynos functions are being removed. The file_operations
structure exynos_drm_driver_fops is now being created by the helper macro
DEFINE_DRM_GEM_FOPS().

Signed-off-by: Thomas Zimmermann 
---
  drivers/gpu/drm/exynos/exynos_drm_drv.c   | 13 ++-
  drivers/gpu/drm/exynos/exynos_drm_fbdev.c | 20 ++-
  drivers/gpu/drm/exynos/exynos_drm_gem.c   | 43 +--
  drivers/gpu/drm/exynos/exynos_drm_gem.h   |  5 ---
  4 files changed, 13 insertions(+), 68 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c 
b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index e60257f1f24b..1d46751cad02 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -102,16 +102,7 @@ static const struct drm_ioctl_desc exynos_ioctls[] = {
DRM_RENDER_ALLOW),
  };
  
-static const struct file_operations exynos_drm_driver_fops = {

-   .owner  = THIS_MODULE,
-   .open   = drm_open,
-   .mmap   = exynos_drm_gem_mmap,
-   .poll   = drm_poll,
-   .read   = drm_read,
-   .unlocked_ioctl = drm_ioctl,
-   .compat_ioctl = drm_compat_ioctl,
-   .release= drm_release,
-};
+DEFINE_DRM_GEM_FOPS(exynos_drm_driver_fops);
  
  static const struct drm_driver exynos_drm_driver = {

.driver_features= DRIVER_MODESET | DRIVER_GEM
@@ -124,7 +115,7 @@ static const struct drm_driver exynos_drm_driver = {
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_import   = exynos_drm_gem_prime_import,
.gem_prime_import_sg_table  = exynos_drm_gem_prime_import_sg_table,
-   .gem_prime_mmap = exynos_drm_gem_prime_mmap,
+   .gem_prime_mmap = drm_gem_prime_mmap,
.ioctls = exynos_ioctls,
.num_ioctls = ARRAY_SIZE(exynos_ioctls),
.fops   = &exynos_drm_driver_fops,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c 
b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
index 5147f5929be7..02c97b9ca926 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
@@ -15,6 +15,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
  
@@ -39,25 +40,8 @@ static int exynos_drm_fb_mmap(struct fb_info *info,

struct drm_fb_helper *helper = info->par;
struct exynos_drm_fbdev *exynos_fbd = to_exynos_fbdev(helper);
struct exynos_drm_gem *exynos_gem = exynos_fbd->exynos_gem;
-   unsigned long vm_size;
-   int ret;
-
-   vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
-
-   vm_size = vma->vm_end - vma->vm_start;
-
-   if (vm_size > exynos_gem->size)
-   return -EINVAL;
  
-	ret = dma_mmap_attrs(to_dma_dev(helper->dev), vma, exynos_gem->cookie,

-exynos_gem->dma_addr, exynos_gem->size,
-exynos_gem->dma_attrs);
-   if (ret < 0) {
-   DRM_DEV_ERROR(to_dma_dev(helper->dev), "failed to mmap.\n");
-   return ret;
-   }
-
-   return 0;
+   return drm_gem_prime_mmap(&exynos_gem->base, vma);
  }
  
  static const struct fb_ops exynos_drm_fb_ops = {

diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c 
b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index 4396224227d1..c4b63902ee7a 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -17,6 +17,8 @@
  #include "exynos_drm_drv.h"
  #include "exynos_drm_gem.h"
  
+static int exynos_drm_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);

+
  static int exynos_drm_alloc_buf(struct exynos_drm_gem *exynos_gem, bool kvmap)
  {
struct drm_device *dev = exynos_gem->base.dev;
@@ -135,6 +137,7 @@ static const struct vm_operations_struct 
exynos_drm_gem_vm_ops = {
  static const struct drm_gem_object_funcs exynos_drm_gem_object_funcs = {
.free = exynos_drm_gem_free_object,
.get_sg_table = exynos_drm_gem_prime_get_sg_table,
+   .mmap = exynos_drm_gem_mmap,
.vm_ops = &exynos_drm_gem_vm_ops,
  };
  
@@ -354,12 +357,16 @@ int exynos_drm_gem_dumb_create(struct drm_file *file_priv,

return 0;
  }
  
-static int exynos_drm_gem_mmap_obj(struct drm_gem_object *obj,

-  struct vm_area_struct *vma)
+static int exynos_drm_gem_mmap(struct drm_gem_object *obj, struct 
vm_area_struct *vma)
  {
struct exynos_drm_gem *exynos_gem = to_exynos_gem(obj);
int ret;
  
+	if (obj->import_attach)

+   return dma_buf_mmap(obj->dma_buf, vma, 0);
+
+   vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
+
DRM_DEV_DEBUG_KMS(to_dma_dev(obj->dev), "flags = 0x%x\n",
 

RE: [PATCH] drm/qxl: add NULL check for bo->resource

2021-07-08 Thread Roberto Sassu
> From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com]
> Sent: Thursday, July 8, 2021 1:47 PM
> When allocations fails that can be NULL now.
> 
> Signed-off-by: Christian König 
> Reported-by: Daniel Bristot de Oliveira 
> Tested-by: Daniel Bristot de Oliveira 

Hi Christian

thanks, it worked.

Tested-by: Roberto Sassu 

Roberto

HUAWEI TECHNOLOGIES Duesseldorf GmbH, HRB 56063
Managing Director: Li Peng, Li Jian, Shi Yanli

> ---
>  drivers/gpu/drm/qxl/qxl_ttm.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
> index 19fd39d9a00c..37a1b6a6ad6d 100644
> --- a/drivers/gpu/drm/qxl/qxl_ttm.c
> +++ b/drivers/gpu/drm/qxl/qxl_ttm.c
> @@ -127,7 +127,7 @@ static void qxl_bo_move_notify(struct
> ttm_buffer_object *bo,
>   struct qxl_bo *qbo;
>   struct qxl_device *qdev;
> 
> - if (!qxl_ttm_bo_is_qxl_bo(bo))
> + if (!qxl_ttm_bo_is_qxl_bo(bo) || !bo->resource)
>   return;
>   qbo = to_qxl_bo(bo);
>   qdev = to_qxl(qbo->tbo.base.dev);
> --
> 2.25.1



Re: [PATCH v4 5/7] drm/panfrost: Add a new ioctl to submit batches

2021-07-08 Thread Christian König

Am 05.07.21 um 11:32 schrieb Daniel Vetter:

On Mon, Jul 05, 2021 at 10:29:48AM +0200, Boris Brezillon wrote:

This should help limit the number of ioctls when submitting multiple
jobs. The new ioctl also supports syncobj timelines and BO access flags.

v4:
* Implement panfrost_ioctl_submit() as a wrapper around
   panfrost_submit_job()
* Replace stride fields by a version field which is mapped to
   a  tuple internally

v3:
* Re-use panfrost_get_job_bos() and panfrost_get_job_in_syncs() in the
   old submit path

Signed-off-by: Boris Brezillon 
---
  drivers/gpu/drm/panfrost/panfrost_drv.c | 562 
  drivers/gpu/drm/panfrost/panfrost_job.c |   3 +
  include/uapi/drm/panfrost_drm.h |  92 
  3 files changed, 479 insertions(+), 178 deletions(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c 
b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 8e28ef30310b..a624e4f86aff 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -138,184 +138,6 @@ panfrost_get_job_mappings(struct drm_file *file_priv, 
struct panfrost_job *job)
return 0;
  }
  
-/**

- * panfrost_lookup_bos() - Sets up job->bo[] with the GEM objects
- * referenced by the job.
- * @dev: DRM device
- * @file_priv: DRM file for this fd
- * @args: IOCTL args
- * @job: job being set up
- *
- * Resolve handles from userspace to BOs and attach them to job.
- *
- * Note that this function doesn't need to unreference the BOs on
- * failure, because that will happen at panfrost_job_cleanup() time.
- */
-static int
-panfrost_lookup_bos(struct drm_device *dev,
- struct drm_file *file_priv,
- struct drm_panfrost_submit *args,
- struct panfrost_job *job)
-{
-   unsigned int i;
-   int ret;
-
-   job->bo_count = args->bo_handle_count;
-
-   if (!job->bo_count)
-   return 0;
-
-   job->bo_flags = kvmalloc_array(job->bo_count,
-  sizeof(*job->bo_flags),
-  GFP_KERNEL | __GFP_ZERO);
-   if (!job->bo_flags)
-   return -ENOMEM;
-
-   for (i = 0; i < job->bo_count; i++)
-   job->bo_flags[i] = PANFROST_BO_REF_EXCLUSIVE;
-
-   ret = drm_gem_objects_lookup(file_priv,
-(void __user *)(uintptr_t)args->bo_handles,
-job->bo_count, &job->bos);
-   if (ret)
-   return ret;
-
-   return panfrost_get_job_mappings(file_priv, job);
-}
-
-/**
- * panfrost_copy_in_sync() - Sets up job->deps with the sync objects
- * referenced by the job.
- * @dev: DRM device
- * @file_priv: DRM file for this fd
- * @args: IOCTL args
- * @job: job being set up
- *
- * Resolve syncobjs from userspace to fences and attach them to job.
- *
- * Note that this function doesn't need to unreference the fences on
- * failure, because that will happen at panfrost_job_cleanup() time.
- */
-static int
-panfrost_copy_in_sync(struct drm_device *dev,
- struct drm_file *file_priv,
- struct drm_panfrost_submit *args,
- struct panfrost_job *job)
-{
-   u32 *handles;
-   int ret = 0;
-   int i, in_fence_count;
-
-   in_fence_count = args->in_sync_count;
-
-   if (!in_fence_count)
-   return 0;
-
-   handles = kvmalloc_array(in_fence_count, sizeof(u32), GFP_KERNEL);
-   if (!handles) {
-   ret = -ENOMEM;
-   DRM_DEBUG("Failed to allocate incoming syncobj handles\n");
-   goto fail;
-   }
-
-   if (copy_from_user(handles,
-  (void __user *)(uintptr_t)args->in_syncs,
-  in_fence_count * sizeof(u32))) {
-   ret = -EFAULT;
-   DRM_DEBUG("Failed to copy in syncobj handles\n");
-   goto fail;
-   }
-
-   for (i = 0; i < in_fence_count; i++) {
-   struct dma_fence *fence;
-
-   ret = drm_syncobj_find_fence(file_priv, handles[i], 0, 0,
-&fence);
-   if (ret)
-   goto fail;
-
-   ret = drm_gem_fence_array_add(&job->deps, fence);
-
-   if (ret)
-   goto fail;
-   }
-
-fail:
-   kvfree(handles);
-   return ret;
-}
-
-static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
-   struct drm_file *file)
-{
-   struct panfrost_device *pfdev = dev->dev_private;
-   struct drm_panfrost_submit *args = data;
-   struct drm_syncobj *sync_out = NULL;
-   struct panfrost_submitqueue *queue;
-   struct panfrost_job *job;
-   int ret = 0;
-
-   if (!args->jc)
-   return -EINVAL;
-
-   if (args->requirements && args->requirements != PANFROST_JD_REQ_FS)
-   return -EINVAL;
-
-   queue = panfrost_submitqueue_get(file->driver_

Re: [PATCH] drm/msm: Implement mmap as GEM object function

2021-07-08 Thread Thomas Zimmermann



Am 08.07.21 um 14:04 schrieb Thomas Zimmermann:

ping for review


Nevermind, there's a newer version of this patch at

https://lore.kernel.org/dri-devel/20210706084753.8194-1-tzimmerm...@suse.de/

Best regards
Thomas



Am 24.06.21 um 11:03 schrieb Thomas Zimmermann:

Moving the driver-specific mmap code into a GEM object function allows
for using DRM helpers for various mmap callbacks.

The respective msm functions are being removed. The file_operations
structure fops is now being created by the helper macro
DEFINE_DRM_GEM_FOPS().

Signed-off-by: Thomas Zimmermann 
---
  drivers/gpu/drm/msm/msm_drv.c   | 14 +-
  drivers/gpu/drm/msm/msm_drv.h   |  1 -
  drivers/gpu/drm/msm/msm_fbdev.c | 10 +
  drivers/gpu/drm/msm/msm_gem.c   | 67 -
  drivers/gpu/drm/msm/msm_gem.h   |  3 --
  drivers/gpu/drm/msm/msm_gem_prime.c | 11 -
  6 files changed, 31 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.c 
b/drivers/gpu/drm/msm/msm_drv.c

index fe7d17cd35ec..f62eaedfc0d7 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -985,17 +985,7 @@ static const struct drm_ioctl_desc msm_ioctls[] = {
  DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY, 
msm_ioctl_submitqueue_query, DRM_RENDER_ALLOW),

  };
-static const struct file_operations fops = {
-    .owner  = THIS_MODULE,
-    .open   = drm_open,
-    .release    = drm_release,
-    .unlocked_ioctl = drm_ioctl,
-    .compat_ioctl   = drm_compat_ioctl,
-    .poll   = drm_poll,
-    .read   = drm_read,
-    .llseek = no_llseek,
-    .mmap   = msm_gem_mmap,
-};
+DEFINE_DRM_GEM_FOPS(fops);
  static const struct drm_driver msm_driver = {
  .driver_features    = DRIVER_GEM |
@@ -1015,7 +1005,7 @@ static const struct drm_driver msm_driver = {
  .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
  .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
  .gem_prime_import_sg_table = msm_gem_prime_import_sg_table,
-    .gem_prime_mmap = msm_gem_prime_mmap,
+    .gem_prime_mmap = drm_gem_prime_mmap,
  #ifdef CONFIG_DEBUG_FS
  .debugfs_init   = msm_debugfs_init,
  #endif
diff --git a/drivers/gpu/drm/msm/msm_drv.h 
b/drivers/gpu/drm/msm/msm_drv.h

index 2668941df529..8f1e0d7c8bbb 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -300,7 +300,6 @@ void msm_gem_shrinker_cleanup(struct drm_device 
*dev);
  struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object 
*obj);
  int msm_gem_prime_vmap(struct drm_gem_object *obj, struct 
dma_buf_map *map);
  void msm_gem_prime_vunmap(struct drm_gem_object *obj, struct 
dma_buf_map *map);
-int msm_gem_prime_mmap(struct drm_gem_object *obj, struct 
vm_area_struct *vma);
  struct drm_gem_object *msm_gem_prime_import_sg_table(struct 
drm_device *dev,

  struct dma_buf_attachment *attach, struct sg_table *sg);
  int msm_gem_prime_pin(struct drm_gem_object *obj);
diff --git a/drivers/gpu/drm/msm/msm_fbdev.c 
b/drivers/gpu/drm/msm/msm_fbdev.c

index 227404077e39..07225907fd2d 100644
--- a/drivers/gpu/drm/msm/msm_fbdev.c
+++ b/drivers/gpu/drm/msm/msm_fbdev.c
@@ -8,6 +8,7 @@
  #include 
  #include 
  #include 
+#include 
  #include "msm_drv.h"
  #include "msm_gem.h"
@@ -48,15 +49,8 @@ static int msm_fbdev_mmap(struct fb_info *info, 
struct vm_area_struct *vma)

  struct drm_fb_helper *helper = (struct drm_fb_helper *)info->par;
  struct msm_fbdev *fbdev = to_msm_fbdev(helper);
  struct drm_gem_object *bo = msm_framebuffer_bo(fbdev->fb, 0);
-    int ret = 0;
-    ret = drm_gem_mmap_obj(bo, bo->size, vma);
-    if (ret) {
-    pr_err("%s:drm_gem_mmap_obj fail\n", __func__);
-    return ret;
-    }
-
-    return msm_gem_mmap_obj(bo, vma);
+    return drm_gem_prime_mmap(bo, vma);
  }
  static int msm_fbdev_create(struct drm_fb_helper *helper,
diff --git a/drivers/gpu/drm/msm/msm_gem.c 
b/drivers/gpu/drm/msm/msm_gem.c

index b61f5466e522..71d835bc575d 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -211,46 +211,6 @@ void msm_gem_put_pages(struct drm_gem_object *obj)
  msm_gem_unlock(obj);
  }
-int msm_gem_mmap_obj(struct drm_gem_object *obj,
-    struct vm_area_struct *vma)
-{
-    struct msm_gem_object *msm_obj = to_msm_bo(obj);
-
-    vma->vm_flags &= ~VM_PFNMAP;
-    vma->vm_flags |= VM_MIXEDMAP;
-
-    if (msm_obj->flags & MSM_BO_WC) {
-    vma->vm_page_prot = 
pgprot_writecombine(vm_get_page_prot(vma->vm_flags));

-    } else if (msm_obj->flags & MSM_BO_UNCACHED) {
-    vma->vm_page_prot = 
pgprot_noncached(vm_get_page_prot(vma->vm_flags));

-    } else {
-    /*
- * Shunt off cached objs to shmem file so they have their own
- * address_space (so unmap_mapping_range does what we want,
- * in particular in the case of mmap'd dmabufs)
- */
-    vma->vm_pgoff = 0;
-    vma_set_fil

[PATCH v1 0/7] drm/msm/dpu: add support for idependent DSI config

2021-07-08 Thread Dmitry Baryshkov
This patchseries adds support for independent DSI config to DPU1 display
subdriver. Also drop one of msm_kms_funcs callbacks, made unnecessary
now.

Tested on RB5 (dpu, dsi). Previous iteration was tested by Alexey
Minnekhanov.

Changes since v1:
 - renamed dual DSI to bonded DSI as suggsted by Abhinav
 - added comments to _dpu_kms_initialize_dsi() regarding encoders usage

The following changes since commit e88bbc91849b2bf57683119c339e52916d34433f:

  Revert "drm/msm/mdp5: provide dynamic bandwidth management" (2021-06-23 
14:06:20 -0700)

are available in the Git repository at:

  https://git.linaro.org/people/dmitry.baryshkov/kernel.git 
msm-drm-drop-set-encoder-mode-1

for you to fetch changes up to 142f79dfc41271576731a49516d63ad47a56e1ca:

  drm/msm/kms: drop set_encoder_mode callback (2021-07-08 15:20:52 +0300)


Dmitry Baryshkov (7):
  drm/msm/dsi: rename dual DSI to bonded DSI
  drm/msm/dsi: add two helper functions
  drm/msm/dpu: support setting up two independent DSI connectors
  drm/msm/mdp5: move mdp5_encoder_set_intf_mode after msm_dsi_modeset_init
  drm/msm/dp: stop calling set_encoder_mode callback
  drm/msm/dsi: stop calling set_encoder_mode callback
  drm/msm/kms: drop set_encoder_mode callback

 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c  | 102 +--
 drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c |  11 +---
 drivers/gpu/drm/msm/dp/dp_display.c  |  18 --
 drivers/gpu/drm/msm/dsi/dsi.c|   9 ++-
 drivers/gpu/drm/msm/dsi/dsi.h|   9 ++-
 drivers/gpu/drm/msm/dsi/dsi_cfg.h|   2 +-
 drivers/gpu/drm/msm/dsi/dsi_host.c   |  30 -
 drivers/gpu/drm/msm/dsi/dsi_manager.c|  93 
 drivers/gpu/drm/msm/msm_drv.h|  12 +++-
 drivers/gpu/drm/msm/msm_kms.h|   3 -
 10 files changed, 136 insertions(+), 153 deletions(-)




[PATCH v1 1/7] drm/msm/dsi: rename dual DSI to bonded DSI

2021-07-08 Thread Dmitry Baryshkov
We are preparing to support two independent DSI hosts in the DSI/DPU
code. To remove possible confusion (as both configurations can be
referenced as dual DSI) let's rename old "dual DSI" (two DSI hosts
driving single device, with clocks being locked) to "bonded DSI".

Signed-off-by: Dmitry Baryshkov 
---
 drivers/gpu/drm/msm/dsi/dsi.h |  8 ++--
 drivers/gpu/drm/msm/dsi/dsi_cfg.h |  2 +-
 drivers/gpu/drm/msm/dsi/dsi_host.c| 30 ++--
 drivers/gpu/drm/msm/dsi/dsi_manager.c | 69 +--
 4 files changed, 54 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/msm/dsi/dsi.h b/drivers/gpu/drm/msm/dsi/dsi.h
index 9b8e9b07eced..856a532850c0 100644
--- a/drivers/gpu/drm/msm/dsi/dsi.h
+++ b/drivers/gpu/drm/msm/dsi/dsi.h
@@ -109,7 +109,7 @@ int msm_dsi_host_enable(struct mipi_dsi_host *host);
 int msm_dsi_host_disable(struct mipi_dsi_host *host);
 int msm_dsi_host_power_on(struct mipi_dsi_host *host,
struct msm_dsi_phy_shared_timings *phy_shared_timings,
-   bool is_dual_dsi);
+   bool is_bonded_dsi);
 int msm_dsi_host_power_off(struct mipi_dsi_host *host);
 int msm_dsi_host_set_display_mode(struct mipi_dsi_host *host,
  const struct drm_display_mode *mode);
@@ -123,7 +123,7 @@ int msm_dsi_host_set_src_pll(struct mipi_dsi_host *host,
 void msm_dsi_host_reset_phy(struct mipi_dsi_host *host);
 void msm_dsi_host_get_phy_clk_req(struct mipi_dsi_host *host,
struct msm_dsi_phy_clk_request *clk_req,
-   bool is_dual_dsi);
+   bool is_bonded_dsi);
 void msm_dsi_host_destroy(struct mipi_dsi_host *host);
 int msm_dsi_host_modeset_init(struct mipi_dsi_host *host,
struct drm_device *dev);
@@ -145,8 +145,8 @@ int dsi_dma_base_get_6g(struct msm_dsi_host *msm_host, 
uint64_t *iova);
 int dsi_dma_base_get_v2(struct msm_dsi_host *msm_host, uint64_t *iova);
 int dsi_clk_init_v2(struct msm_dsi_host *msm_host);
 int dsi_clk_init_6g_v2(struct msm_dsi_host *msm_host);
-int dsi_calc_clk_rate_v2(struct msm_dsi_host *msm_host, bool is_dual_dsi);
-int dsi_calc_clk_rate_6g(struct msm_dsi_host *msm_host, bool is_dual_dsi);
+int dsi_calc_clk_rate_v2(struct msm_dsi_host *msm_host, bool is_bonded_dsi);
+int dsi_calc_clk_rate_6g(struct msm_dsi_host *msm_host, bool is_bonded_dsi);
 void msm_dsi_host_snapshot(struct msm_disp_state *disp_state, struct 
mipi_dsi_host *host);
 /* dsi phy */
 struct msm_dsi_phy;
diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.h 
b/drivers/gpu/drm/msm/dsi/dsi_cfg.h
index ade9b609c7d9..2bce00d5a9fc 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_cfg.h
+++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.h
@@ -47,7 +47,7 @@ struct msm_dsi_host_cfg_ops {
void* (*tx_buf_get)(struct msm_dsi_host *msm_host);
void (*tx_buf_put)(struct msm_dsi_host *msm_host);
int (*dma_base_get)(struct msm_dsi_host *msm_host, uint64_t *iova);
-   int (*calc_clk_rate)(struct msm_dsi_host *msm_host, bool is_dual_dsi);
+   int (*calc_clk_rate)(struct msm_dsi_host *msm_host, bool is_bonded_dsi);
 };
 
 struct msm_dsi_cfg_handler {
diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c 
b/drivers/gpu/drm/msm/dsi/dsi_host.c
index ed504fe5074f..eb988faddbbf 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_host.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
@@ -679,7 +679,7 @@ void dsi_link_clk_disable_v2(struct msm_dsi_host *msm_host)
clk_disable_unprepare(msm_host->byte_clk);
 }
 
-static u32 dsi_get_pclk_rate(struct msm_dsi_host *msm_host, bool is_dual_dsi)
+static u32 dsi_get_pclk_rate(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
 {
struct drm_display_mode *mode = msm_host->mode;
u32 pclk_rate;
@@ -692,17 +692,17 @@ static u32 dsi_get_pclk_rate(struct msm_dsi_host 
*msm_host, bool is_dual_dsi)
 * the clock rates have to be split between the two dsi controllers.
 * Adjust the byte and pixel clock rates for each dsi host accordingly.
 */
-   if (is_dual_dsi)
+   if (is_bonded_dsi)
pclk_rate /= 2;
 
return pclk_rate;
 }
 
-static void dsi_calc_pclk(struct msm_dsi_host *msm_host, bool is_dual_dsi)
+static void dsi_calc_pclk(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
 {
u8 lanes = msm_host->lanes;
u32 bpp = dsi_get_bpp(msm_host->format);
-   u32 pclk_rate = dsi_get_pclk_rate(msm_host, is_dual_dsi);
+   u32 pclk_rate = dsi_get_pclk_rate(msm_host, is_bonded_dsi);
u64 pclk_bpp = (u64)pclk_rate * bpp;
 
if (lanes == 0) {
@@ -720,28 +720,28 @@ static void dsi_calc_pclk(struct msm_dsi_host *msm_host, 
bool is_dual_dsi)
 
 }
 
-int dsi_calc_clk_rate_6g(struct msm_dsi_host *msm_host, bool is_dual_dsi)
+int dsi_calc_clk_rate_6g(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
 {
if (!msm_host->mode) {
pr_err("%s: mode not set\n", __func__);
return -EINVAL;
}
 
-   dsi_calc_pclk(msm_host, is_

[PATCH v1 3/7] drm/msm/dpu: support setting up two independent DSI connectors

2021-07-08 Thread Dmitry Baryshkov
Move setting up encoders from set_encoder_mode to
_dpu_kms_initialize_dsi() / _dpu_kms_initialize_displayport(). This
allows us to support not only "single DSI" and "bonded DSI" but also "two
independent DSI" configurations. In future this would also help adding
support for multiple DP connectors.

Signed-off-by: Dmitry Baryshkov 
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 102 +---
 1 file changed, 57 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index 1d3a4f395e74..8459da36174e 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -471,30 +471,68 @@ static int _dpu_kms_initialize_dsi(struct drm_device *dev,
struct dpu_kms *dpu_kms)
 {
struct drm_encoder *encoder = NULL;
+   struct msm_display_info info;
int i, rc = 0;
 
if (!(priv->dsi[0] || priv->dsi[1]))
return rc;
 
-   /*TODO: Support two independent DSI connectors */
-   encoder = dpu_encoder_init(dev, DRM_MODE_ENCODER_DSI);
-   if (IS_ERR(encoder)) {
-   DPU_ERROR("encoder init failed for dsi display\n");
-   return PTR_ERR(encoder);
-   }
-
-   priv->encoders[priv->num_encoders++] = encoder;
-
+   /*
+* We support following confiurations:
+* - Single DSI host (dsi0 or dsi1)
+* - Two independent DSI hosts
+* - Bonded DSI0 and DSI1 hosts
+*
+*   TODO: Support swapping DSI0 and DSI1 in the bonded setup.
+*/
for (i = 0; i < ARRAY_SIZE(priv->dsi); i++) {
if (!priv->dsi[i])
continue;
 
+   if (!encoder) {
+   encoder = dpu_encoder_init(dev, DRM_MODE_ENCODER_DSI);
+   if (IS_ERR(encoder)) {
+   DPU_ERROR("encoder init failed for dsi 
display\n");
+   return PTR_ERR(encoder);
+   }
+
+   priv->encoders[priv->num_encoders++] = encoder;
+
+   memset(&info, 0, sizeof(info));
+   info.intf_type = encoder->encoder_type;
+   info.capabilities = msm_dsi_is_cmd_mode(priv->dsi[i]) ?
+   MSM_DISPLAY_CAP_CMD_MODE :
+   MSM_DISPLAY_CAP_VID_MODE;
+   }
+
rc = msm_dsi_modeset_init(priv->dsi[i], dev, encoder);
if (rc) {
DPU_ERROR("modeset_init failed for dsi[%d], rc = %d\n",
i, rc);
break;
}
+
+   info.h_tile_instance[info.num_of_h_tiles++] = i;
+
+   /* Register non-bonded encoder here. If the encoder is bonded,
+* it will be registered later, when both DSI hosts are
+* initialized.
+*/
+   if (!msm_dsi_is_bonded_dsi(priv->dsi[i])) {
+   rc = dpu_encoder_setup(dev, encoder, &info);
+   if (rc)
+   DPU_ERROR("failed to setup DPU encoder %d: 
rc:%d\n",
+ encoder->base.id, rc);
+   encoder = NULL;
+   }
+   }
+
+   /* Register bonded encoder here, when both DSI hosts are initialized */
+   if (encoder) {
+   rc = dpu_encoder_setup(dev, encoder, &info);
+   if (rc)
+   DPU_ERROR("failed to setup DPU encoder %d: rc:%d\n",
+ encoder->base.id, rc);
}
 
return rc;
@@ -505,6 +543,7 @@ static int _dpu_kms_initialize_displayport(struct 
drm_device *dev,
struct dpu_kms *dpu_kms)
 {
struct drm_encoder *encoder = NULL;
+   struct msm_display_info info;
int rc = 0;
 
if (!priv->dp)
@@ -516,6 +555,7 @@ static int _dpu_kms_initialize_displayport(struct 
drm_device *dev,
return PTR_ERR(encoder);
}
 
+   memset(&info, 0, sizeof(info));
rc = msm_dp_modeset_init(priv->dp, dev, encoder);
if (rc) {
DPU_ERROR("modeset_init failed for DP, rc = %d\n", rc);
@@ -524,6 +564,14 @@ static int _dpu_kms_initialize_displayport(struct 
drm_device *dev,
}
 
priv->encoders[priv->num_encoders++] = encoder;
+
+   info.num_of_h_tiles = 1;
+   info.capabilities = MSM_DISPLAY_CAP_VID_MODE;
+   info.intf_type = encoder->encoder_type;
+   rc = dpu_encoder_setup(dev, encoder, &info);
+   if (rc)
+   DPU_ERROR("failed to setup DPU encoder %d: rc:%d\n",
+ encoder->base.id, rc);
return rc;
 }
 
@@ -726,41 +774,6 @@ static void dpu_kms_destroy(struct msm_kms *kms)
msm_kms_destroy(&dpu_kms->base);
 }
 
-sta

[PATCH v1 2/7] drm/msm/dsi: add two helper functions

2021-07-08 Thread Dmitry Baryshkov
Add two helper functions to be used by display drivers for setting up
encoders.

Signed-off-by: Dmitry Baryshkov 
---
 drivers/gpu/drm/msm/dsi/dsi.c |  7 +++
 drivers/gpu/drm/msm/dsi/dsi_manager.c | 14 ++
 drivers/gpu/drm/msm/msm_drv.h | 12 ++--
 3 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/msm/dsi/dsi.c b/drivers/gpu/drm/msm/dsi/dsi.c
index 75afc12a7b25..5201d7eb0490 100644
--- a/drivers/gpu/drm/msm/dsi/dsi.c
+++ b/drivers/gpu/drm/msm/dsi/dsi.c
@@ -13,6 +13,13 @@ struct drm_encoder *msm_dsi_get_encoder(struct msm_dsi 
*msm_dsi)
return msm_dsi->encoder;
 }
 
+bool msm_dsi_is_cmd_mode(struct msm_dsi *msm_dsi)
+{
+   unsigned long host_flags = msm_dsi_host_get_mode_flags(msm_dsi->host);
+
+   return !(host_flags & MIPI_DSI_MODE_VIDEO);
+}
+
 static int dsi_get_phy(struct msm_dsi *msm_dsi)
 {
struct platform_device *pdev = msm_dsi->pdev;
diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c 
b/drivers/gpu/drm/msm/dsi/dsi_manager.c
index 1173663c6d5d..a81105633d3c 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_manager.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c
@@ -216,12 +216,6 @@ static int dsi_mgr_bridge_get_id(struct drm_bridge *bridge)
return dsi_bridge->id;
 }
 
-static bool dsi_mgr_is_cmd_mode(struct msm_dsi *msm_dsi)
-{
-   unsigned long host_flags = msm_dsi_host_get_mode_flags(msm_dsi->host);
-   return !(host_flags & MIPI_DSI_MODE_VIDEO);
-}
-
 void msm_dsi_manager_setup_encoder(int id)
 {
struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id);
@@ -231,7 +225,7 @@ void msm_dsi_manager_setup_encoder(int id)
 
if (encoder && kms->funcs->set_encoder_mode)
kms->funcs->set_encoder_mode(kms, encoder,
-dsi_mgr_is_cmd_mode(msm_dsi));
+msm_dsi_is_cmd_mode(msm_dsi));
 }
 
 static int msm_dsi_manager_panel_init(struct drm_connector *conn, u8 id)
@@ -276,7 +270,7 @@ static int msm_dsi_manager_panel_init(struct drm_connector 
*conn, u8 id)
if (other_dsi && other_dsi->panel && kms->funcs->set_split_display) {
kms->funcs->set_split_display(kms, master_dsi->encoder,
  slave_dsi->encoder,
- dsi_mgr_is_cmd_mode(msm_dsi));
+ msm_dsi_is_cmd_mode(msm_dsi));
}
 
 out:
@@ -839,3 +833,7 @@ void msm_dsi_manager_unregister(struct msm_dsi *msm_dsi)
msm_dsim->dsi[msm_dsi->id] = NULL;
 }
 
+bool msm_dsi_is_bonded_dsi(struct msm_dsi *msm_dsi)
+{
+   return IS_BONDED_DSI();
+}
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 1a48a709ffb3..e0528dfd965e 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -350,7 +350,8 @@ void __exit msm_dsi_unregister(void);
 int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, struct drm_device *dev,
 struct drm_encoder *encoder);
 void msm_dsi_snapshot(struct msm_disp_state *disp_state, struct msm_dsi 
*msm_dsi);
-
+bool msm_dsi_is_cmd_mode(struct msm_dsi *msm_dsi);
+bool msm_dsi_is_bonded_dsi(struct msm_dsi *msm_dsi);
 #else
 static inline void __init msm_dsi_register(void)
 {
@@ -367,7 +368,14 @@ static inline int msm_dsi_modeset_init(struct msm_dsi 
*msm_dsi,
 static inline void msm_dsi_snapshot(struct msm_disp_state *disp_state, struct 
msm_dsi *msm_dsi)
 {
 }
-
+static inline bool msm_dsi_is_cmd_mode(struct msm_dsi *msm_dsi)
+{
+   return false;
+}
+static bool msm_dsi_is_bonded_dsi(struct msm_dsi *msm_dsi)
+{
+   return false;
+}
 #endif
 
 #ifdef CONFIG_DRM_MSM_DP
-- 
2.30.2



[PATCH v1 4/7] drm/msm/mdp5: move mdp5_encoder_set_intf_mode after msm_dsi_modeset_init

2021-07-08 Thread Dmitry Baryshkov
Move a call to mdp5_encoder_set_intf_mode() after
msm_dsi_modeset_init(), removing set_encoder_mode callback.

Signed-off-by: Dmitry Baryshkov 
---
 drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c | 11 +++
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c 
b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
index 15aed45022bc..b3b42672b2d4 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
@@ -209,13 +209,6 @@ static int mdp5_set_split_display(struct msm_kms *kms,
  slave_encoder);
 }
 
-static void mdp5_set_encoder_mode(struct msm_kms *kms,
- struct drm_encoder *encoder,
- bool cmd_mode)
-{
-   mdp5_encoder_set_intf_mode(encoder, cmd_mode);
-}
-
 static void mdp5_kms_destroy(struct msm_kms *kms)
 {
struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms));
@@ -287,7 +280,6 @@ static const struct mdp_kms_funcs kms_funcs = {
.get_format  = mdp_get_format,
.round_pixclk= mdp5_round_pixclk,
.set_split_display = mdp5_set_split_display,
-   .set_encoder_mode = mdp5_set_encoder_mode,
.destroy = mdp5_kms_destroy,
 #ifdef CONFIG_DEBUG_FS
.debugfs_init= mdp5_kms_debugfs_init,
@@ -448,6 +440,9 @@ static int modeset_init_intf(struct mdp5_kms *mdp5_kms,
}
 
ret = msm_dsi_modeset_init(priv->dsi[dsi_id], dev, encoder);
+   if (!ret)
+   mdp5_encoder_set_intf_mode(encoder, 
msm_dsi_is_cmd_mode(priv->dsi[dsi_id]));
+
break;
}
default:
-- 
2.30.2



[PATCH v1 7/7] drm/msm/kms: drop set_encoder_mode callback

2021-07-08 Thread Dmitry Baryshkov
set_encoder_mode callback is completely unused now. Drop it from
msm_kms_func().

Signed-off-by: Dmitry Baryshkov 
---
 drivers/gpu/drm/msm/msm_kms.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_kms.h b/drivers/gpu/drm/msm/msm_kms.h
index 086a2d59b8c8..9484e8b62630 100644
--- a/drivers/gpu/drm/msm/msm_kms.h
+++ b/drivers/gpu/drm/msm/msm_kms.h
@@ -117,9 +117,6 @@ struct msm_kms_funcs {
struct drm_encoder *encoder,
struct drm_encoder *slave_encoder,
bool is_cmd_mode);
-   void (*set_encoder_mode)(struct msm_kms *kms,
-struct drm_encoder *encoder,
-bool cmd_mode);
/* cleanup: */
void (*destroy)(struct msm_kms *kms);
 
-- 
2.30.2



[PATCH v1 6/7] drm/msm/dsi: stop calling set_encoder_mode callback

2021-07-08 Thread Dmitry Baryshkov
None of the display drivers now implement set_encoder_mode callback.
Stop calling it from the modeset init code.

Signed-off-by: Dmitry Baryshkov 
---
 drivers/gpu/drm/msm/dsi/dsi.c |  2 --
 drivers/gpu/drm/msm/dsi/dsi.h |  1 -
 drivers/gpu/drm/msm/dsi/dsi_manager.c | 12 
 3 files changed, 15 deletions(-)

diff --git a/drivers/gpu/drm/msm/dsi/dsi.c b/drivers/gpu/drm/msm/dsi/dsi.c
index 5201d7eb0490..77c8dba297d8 100644
--- a/drivers/gpu/drm/msm/dsi/dsi.c
+++ b/drivers/gpu/drm/msm/dsi/dsi.c
@@ -251,8 +251,6 @@ int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, struct 
drm_device *dev,
goto fail;
}
 
-   msm_dsi_manager_setup_encoder(msm_dsi->id);
-
priv->bridges[priv->num_bridges++]   = msm_dsi->bridge;
priv->connectors[priv->num_connectors++] = msm_dsi->connector;
 
diff --git a/drivers/gpu/drm/msm/dsi/dsi.h b/drivers/gpu/drm/msm/dsi/dsi.h
index 856a532850c0..e0c3c4409377 100644
--- a/drivers/gpu/drm/msm/dsi/dsi.h
+++ b/drivers/gpu/drm/msm/dsi/dsi.h
@@ -80,7 +80,6 @@ struct drm_connector *msm_dsi_manager_connector_init(u8 id);
 struct drm_connector *msm_dsi_manager_ext_bridge_init(u8 id);
 int msm_dsi_manager_cmd_xfer(int id, const struct mipi_dsi_msg *msg);
 bool msm_dsi_manager_cmd_xfer_trigger(int id, u32 dma_base, u32 len);
-void msm_dsi_manager_setup_encoder(int id);
 int msm_dsi_manager_register(struct msm_dsi *msm_dsi);
 void msm_dsi_manager_unregister(struct msm_dsi *msm_dsi);
 bool msm_dsi_manager_validate_current_config(u8 id);
diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c 
b/drivers/gpu/drm/msm/dsi/dsi_manager.c
index a81105633d3c..e7f4e1d8978a 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_manager.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c
@@ -216,18 +216,6 @@ static int dsi_mgr_bridge_get_id(struct drm_bridge *bridge)
return dsi_bridge->id;
 }
 
-void msm_dsi_manager_setup_encoder(int id)
-{
-   struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id);
-   struct msm_drm_private *priv = msm_dsi->dev->dev_private;
-   struct msm_kms *kms = priv->kms;
-   struct drm_encoder *encoder = msm_dsi_get_encoder(msm_dsi);
-
-   if (encoder && kms->funcs->set_encoder_mode)
-   kms->funcs->set_encoder_mode(kms, encoder,
-msm_dsi_is_cmd_mode(msm_dsi));
-}
-
 static int msm_dsi_manager_panel_init(struct drm_connector *conn, u8 id)
 {
struct msm_drm_private *priv = conn->dev->dev_private;
-- 
2.30.2



[PATCH v1 5/7] drm/msm/dp: stop calling set_encoder_mode callback

2021-07-08 Thread Dmitry Baryshkov
None of the display drivers now implement set_encoder_mode callback.
Stop calling it from the modeset init code.

Signed-off-by: Dmitry Baryshkov 
---
 drivers/gpu/drm/msm/dp/dp_display.c | 18 --
 1 file changed, 18 deletions(-)

diff --git a/drivers/gpu/drm/msm/dp/dp_display.c 
b/drivers/gpu/drm/msm/dp/dp_display.c
index 051c1be1de7e..70b319a8fe83 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -102,8 +102,6 @@ struct dp_display_private {
struct dp_display_mode dp_mode;
struct msm_dp dp_display;
 
-   bool encoder_mode_set;
-
/* wait for audio signaling */
struct completion audio_comp;
 
@@ -283,20 +281,6 @@ static void dp_display_send_hpd_event(struct msm_dp 
*dp_display)
 }
 
 
-static void dp_display_set_encoder_mode(struct dp_display_private *dp)
-{
-   struct msm_drm_private *priv = dp->dp_display.drm_dev->dev_private;
-   struct msm_kms *kms = priv->kms;
-
-   if (!dp->encoder_mode_set && dp->dp_display.encoder &&
-   kms->funcs->set_encoder_mode) {
-   kms->funcs->set_encoder_mode(kms,
-   dp->dp_display.encoder, false);
-
-   dp->encoder_mode_set = true;
-   }
-}
-
 static int dp_display_send_hpd_notification(struct dp_display_private *dp,
bool hpd)
 {
@@ -369,8 +353,6 @@ static void dp_display_host_init(struct dp_display_private 
*dp, int reset)
if (dp->usbpd->orientation == ORIENTATION_CC2)
flip = true;
 
-   dp_display_set_encoder_mode(dp);
-
dp_power_init(dp->power, flip);
dp_ctrl_host_init(dp->ctrl, flip, reset);
dp_aux_init(dp->aux);
-- 
2.30.2



Re: Aw: Re: BUG: MTK DRM/HDMI broken on 5.13 (mt7623/bpi-r2)

2021-07-08 Thread Dafna Hirschfeld

Hi

On 08.07.21 11:35, Frank Wunderlich wrote:

Hi

just a small update, added debug in the vendor-specific functions for page_flip 
and vblank and it seems they never get called

--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
@@ -87,21 +87,25 @@ static void mtk_drm_crtc_finish_page_flip(struct 
mtk_drm_crtc *mtk_crtc)
  {
 struct drm_crtc *crtc = &mtk_crtc->base;
 unsigned long flags;
-
+printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);
 spin_lock_irqsave(&crtc->dev->event_lock, flags);
 drm_crtc_send_vblank_event(crtc, mtk_crtc->event);
 drm_crtc_vblank_put(crtc);
 mtk_crtc->event = NULL;
 spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);
  }

  static void mtk_drm_finish_page_flip(struct mtk_drm_crtc *mtk_crtc)
  {
+printk(KERN_ALERT "DEBUG: Passed %s %d 
update:%d,needsvblank:%d\n",__FUNCTION__,__LINE__,mtk_crtc->config_updating,mtk_crtc->pending_needs_vblank);
 drm_crtc_handle_vblank(&mtk_crtc->base);
 if (!mtk_crtc->config_updating && mtk_crtc->pending_needs_vblank) {
+printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);
 mtk_drm_crtc_finish_page_flip(mtk_crtc);
 mtk_crtc->pending_needs_vblank = false;
 }
+printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);
  }

  static void mtk_drm_crtc_destroy(struct drm_crtc *crtc)

finish_page_flip is called by mtk_crtc_ddp_irq. this seems to be set in 
mtk_drm_crtc_enable_vblank with mtk_ddp_comp_enable_vblank. this is called 
correctly

113 static inline void mtk_ddp_comp_enable_vblank(struct mtk_ddp_comp *comp,
114   void (*vblank_cb)(void *),
115   void *vblank_cb_data)
116 {
117 printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);
118 if (comp->funcs && comp->funcs->enable_vblank)
119 {
120 comp->funcs->enable_vblank(comp->dev, vblank_cb, vblank_cb_data);
121 printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);
122 }
123 }

i see both messages, but mtk_crtc_ddp_irq is never called and so the other 2 
not.


Yes, In my case the irq isr is also not called after resume which cause the 
warning
even though "enable_vblank" do get called. Don't know why is that.



root@bpi-r2:~# dmesg | grep -i DEBUG
[6.433509] DEBUG: Passed mtk_drm_crtc_enable_vblank 510
[6.433530] DEBUG: Passed mtk_ddp_comp_enable_vblank 117
[6.433537] DEBUG: Passed mtk_ddp_comp_enable_vblank 121 <<<


comp->funcs->enable_vblank should be mtk_drm_crtc_enable_vblank, right?


No, this is a bit confusing , there are also the funcs of the components, see 
in file mtk_drm_ddp_comp.c
so for mt7623  it is mtk_ovl_enable_vblank.

Thanks,
Dafna



641 static const struct drm_crtc_funcs mtk_crtc_funcs = {
642 .set_config = drm_atomic_helper_set_config,
643 .page_flip  = drm_atomic_helper_page_flip,
644 .destroy= mtk_drm_crtc_destroy,
645 .reset  = mtk_drm_crtc_reset,
646 .atomic_duplicate_state = mtk_drm_crtc_duplicate_state,
647 .atomic_destroy_state   = mtk_drm_crtc_destroy_state,
648 .enable_vblank  = mtk_drm_crtc_enable_vblank, <<<
649 .disable_vblank = mtk_drm_crtc_disable_vblank,
650 };

but it looks like a recursion:
mtk_drm_crtc_enable_vblank calls mtk_ddp_comp_enable_vblank => enable_vblank 
(=mtk_drm_crtc_enable_vblank), but i see the messages not repeating

mtk_drm_crtc_enable_vblank(struct drm_crtc *crtc)
511 mtk_ddp_comp_enable_vblank(comp, mtk_crtc_ddp_irq, &mtk_crtc->base);

113 static inline void mtk_ddp_comp_enable_vblank(struct mtk_ddp_comp *comp,
114   void (*vblank_cb)(void *),
115   void *vblank_cb_data)
116 {
118 if (comp->funcs && comp->funcs->enable_vblank)
120 comp->funcs->enable_vblank(comp->dev, vblank_cb, vblank_cb_data);

but params do not match...comp->funcs->enable_vblank takes 3 arguments but 
comp->funcs->enable_vblank has only one.something i miss here...

i guess not, but is watchdog somehow involved? i ask because i see this on 
reboot/poweroff:

"watchdog: watchdog0: watchdog did not stop!"

i see this with my 5.13, 5.12-drm (5.12.0+mtk/core drm-patches) and 5.12.14 too 
(hdmi is working there), but not 5.12.0!
that means something in drm-patches (mtk/core) breaks watchdog. maybe the 
recursion mentioned above?

regards Frank



Gesendet: Donnerstag, 08. Juli 2021 um 09:22 Uhr
Von: "Dafna Hirschfeld" 




Hi Frank,


On 06.07.21 11:54, Frank Wunderlich wrote:

Hi,

i've noticed that HDMI is broken at least on my board (Bananapi-r2,mt7623) on 
5.13.

after some research i noticed that it is working till

commit 2e477391522354e763aa62ee3e281c1ad9e8eb1b
Author: Dafna Hirschfeld 




We also encountered that warning on mt8173 device - Acer Chromebook R13. It 
hap

Re: [PATCH 4/4] drm/msm: always wait for the exclusive fence

2021-07-08 Thread Christian König

Am 03.07.21 um 01:01 schrieb Daniel Vetter:

On Fri, Jul 02, 2021 at 01:16:42PM +0200, Christian König wrote:

Drivers also need to to sync to the exclusive fence when
a shared one is present.

Completely untested since the driver won't even compile on !ARM.

It's really not that hard to set up a cross-compiler, reasonable distros
have that now all packages. Does explain though why you tend to break the
arm build with drm-misc patches.


Well having proper COMPILE_TEST handling in kconfig would be even better.

Otherwise everybody needs to cross-compile for ARM, ARM64 (with all the 
variants, e.g. BCM, S3C64XX, S5PV210, KEEMBAY, ZYNQMP etc etc), MIPS and 
so on.


We have tons of non-x86 drivers, but MSM is the only one which is 
painful to get to compile test.


Christian.



Please fix this.


Signed-off-by: Christian König 

Reviewed-by: Daniel Vetter 

---
  drivers/gpu/drm/msm/msm_gem.c | 16 +++-
  1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index a94a43de95ef..72a07e311de3 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -817,17 +817,15 @@ int msm_gem_sync_object(struct drm_gem_object *obj,
struct dma_fence *fence;
int i, ret;
  
-	fobj = dma_resv_shared_list(obj->resv);

-   if (!fobj || (fobj->shared_count == 0)) {
-   fence = dma_resv_excl_fence(obj->resv);
-   /* don't need to wait on our own fences, since ring is fifo */
-   if (fence && (fence->context != fctx->context)) {
-   ret = dma_fence_wait(fence, true);
-   if (ret)
-   return ret;
-   }
+   fence = dma_resv_excl_fence(obj->resv);
+   /* don't need to wait on our own fences, since ring is fifo */
+   if (fence && (fence->context != fctx->context)) {
+   ret = dma_fence_wait(fence, true);
+   if (ret)
+   return ret;
}
  
+	fobj = dma_resv_shared_list(obj->resv);

if (!exclusive || !fobj)
return 0;
  
--

2.25.1





[PATCH] MAINTAINERS: Add Raphael Gallais-Pou as STM32 DRM maintainer

2021-07-08 Thread Raphael GALLAIS-POU - foss
Add Raphael Gallais-Pou as STM32 DRM maintainer.

Signed-off-by: Raphael Gallais-Pou 
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 0f1171ceaf8b..4fa3bfc00f57 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6165,6 +6165,7 @@ DRM DRIVERS FOR STM
 M: Yannick Fertre 
 M: Philippe Cornu 
 M: Benjamin Gaignard 
+M: Raphael Gallais-Pou 
 L: dri-devel@lists.freedesktop.org
 S: Maintained
 T: git git://anongit.freedesktop.org/drm/drm-misc
-- 
2.17.1


[PATCH] dma-heap: Let dma heap use dma_map_attrs to map & unmap iova

2021-07-08 Thread guangming.cao
From: Guangming Cao 

For dma-heap users, they can't bypass cache sync when map/unmap iova
with dma heap. But they can do it by adding DMA_ATTR_SKIP_CPU_SYNC
into dma_alloc_attrs.

To keep alignment, at dma_heap side, also use
dma_buf_attachment.dma_map_attrs to do iova map & unmap.

Signed-off-by: Guangming Cao 
---
 drivers/dma-buf/heaps/cma_heap.c| 6 --
 drivers/dma-buf/heaps/system_heap.c | 6 --
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c
index 0c05b79870f9..2c9feb3bfc3e 100644
--- a/drivers/dma-buf/heaps/cma_heap.c
+++ b/drivers/dma-buf/heaps/cma_heap.c
@@ -99,9 +99,10 @@ static struct sg_table *cma_heap_map_dma_buf(struct 
dma_buf_attachment *attachme
 {
struct dma_heap_attachment *a = attachment->priv;
struct sg_table *table = &a->table;
+   int attrs = attachment->dma_map_attrs;
int ret;
 
-   ret = dma_map_sgtable(attachment->dev, table, direction, 0);
+   ret = dma_map_sgtable(attachment->dev, table, direction, attrs);
if (ret)
return ERR_PTR(-ENOMEM);
a->mapped = true;
@@ -113,9 +114,10 @@ static void cma_heap_unmap_dma_buf(struct 
dma_buf_attachment *attachment,
   enum dma_data_direction direction)
 {
struct dma_heap_attachment *a = attachment->priv;
+   int attrs = attachment->dma_map_attrs;
 
a->mapped = false;
-   dma_unmap_sgtable(attachment->dev, table, direction, 0);
+   dma_unmap_sgtable(attachment->dev, table, direction, attrs);
 }
 
 static int cma_heap_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
diff --git a/drivers/dma-buf/heaps/system_heap.c 
b/drivers/dma-buf/heaps/system_heap.c
index 23a7e74ef966..fc7b1e02988e 100644
--- a/drivers/dma-buf/heaps/system_heap.c
+++ b/drivers/dma-buf/heaps/system_heap.c
@@ -130,9 +130,10 @@ static struct sg_table *system_heap_map_dma_buf(struct 
dma_buf_attachment *attac
 {
struct dma_heap_attachment *a = attachment->priv;
struct sg_table *table = a->table;
+   int attrs = attachment->dma_map_attrs;
int ret;
 
-   ret = dma_map_sgtable(attachment->dev, table, direction, 0);
+   ret = dma_map_sgtable(attachment->dev, table, direction, attrs);
if (ret)
return ERR_PTR(ret);
 
@@ -145,9 +146,10 @@ static void system_heap_unmap_dma_buf(struct 
dma_buf_attachment *attachment,
  enum dma_data_direction direction)
 {
struct dma_heap_attachment *a = attachment->priv;
+   int attrs = attachment->dma_map_attrs;
 
a->mapped = false;
-   dma_unmap_sgtable(attachment->dev, table, direction, 0);
+   dma_unmap_sgtable(attachment->dev, table, direction, attrs);
 }
 
 static int system_heap_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
-- 
2.17.1


Re: [PATCH 06/7] drm/i915/guc: Optimize CTB writes and reads

2021-07-08 Thread Michal Wajdeczko



On 08.07.2021 01:25, Matthew Brost wrote:
> CTB writes are now in the path of command submission and should be
> optimized for performance. Rather than reading CTB descriptor values
> (e.g. head, tail) which could result in accesses across the PCIe bus,
> store shadow local copies and only read/write the descriptor values when
> absolutely necessary. Also store the current space in the each channel
> locally.
> 
> v2:
>  (Michal)
>   - Add additional sanity checks for head / tail pointers
>   - Use GUC_CTB_HDR_LEN rather than magic 1
> v3:
>  (Michal / John H)
>   - Drop redundant check of head value
> v4:
>  (John H)
>   - Drop redundant checks of tail / head values
> v5:
>  (Michal)
>   - Address more nits
> 
> Signed-off-by: John Harrison 
> Signed-off-by: Matthew Brost 
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 92 +++
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |  6 ++
>  2 files changed, 66 insertions(+), 32 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> index db3e85b89573..d552d3016779 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> @@ -130,6 +130,10 @@ static void guc_ct_buffer_desc_init(struct 
> guc_ct_buffer_desc *desc)
>  static void guc_ct_buffer_reset(struct intel_guc_ct_buffer *ctb)
>  {
>   ctb->broken = false;
> + ctb->tail = 0;
> + ctb->head = 0;
> + ctb->space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size);
> +
>   guc_ct_buffer_desc_init(ctb->desc);
>  }
>  
> @@ -383,10 +387,8 @@ static int ct_write(struct intel_guc_ct *ct,
>  {
>   struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
>   struct guc_ct_buffer_desc *desc = ctb->desc;
> - u32 head = desc->head;
> - u32 tail = desc->tail;
> + u32 tail = ctb->tail;
>   u32 size = ctb->size;
> - u32 used;
>   u32 header;
>   u32 hxg;
>   u32 type;
> @@ -396,25 +398,22 @@ static int ct_write(struct intel_guc_ct *ct,
>   if (unlikely(desc->status))
>   goto corrupted;
>  
> - if (unlikely((tail | head) >= size)) {
> - CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n",
> -  head, tail, size);
> + GEM_BUG_ON(tail > size);
> +
> +#ifdef CONFIG_DRM_I915_DEBUG_GUC
> + if (unlikely(tail != READ_ONCE(desc->tail))) {
> + CT_ERROR(ct, "Tail was modified %u != %u\n",
> +  desc->tail, tail);
> + desc->status |= GUC_CTB_STATUS_MISMATCH;
> + goto corrupted;
> + }
> + if (unlikely(desc->head >= size)) {

READ_ONCE wouldn't hurt

> + CT_ERROR(ct, "Invalid head offset %u >= %u)\n",
> +  desc->head, size);
>   desc->status |= GUC_CTB_STATUS_OVERFLOW;
>   goto corrupted;
>   }
> -
> - /*
> -  * tail == head condition indicates empty. GuC FW does not support
> -  * using up the entire buffer to get tail == head meaning full.
> -  */
> - if (tail < head)
> - used = (size - head) + tail;
> - else
> - used = tail - head;
> -
> - /* make sure there is a space including extra dw for the header */
> - if (unlikely(used + len + GUC_CTB_HDR_LEN >= size))
> - return -ENOSPC;
> +#endif
>  
>   /*
>* dw0: CT header (including fence)
> @@ -452,6 +451,10 @@ static int ct_write(struct intel_guc_ct *ct,
>*/
>   write_barrier(ct);
>  
> + /* update local copies */
> + ctb->tail = tail;
> + ctb->space -= len + GUC_CTB_HDR_LEN;

it looks that we rely on previous call to h2g_has_room(), but maybe for
completeness we should have sanity check in this function as well:

GEM_BUG_ON(ctb->space < len + HDR_LEN);

not a blocker, other LGTM,

Reviewed-by: Michal Wajdeczko 

Michal

> +
>   /* now update descriptor */
>   WRITE_ONCE(desc->tail, tail);
>  
> @@ -469,7 +472,7 @@ static int ct_write(struct intel_guc_ct *ct,
>   * @req: pointer to pending request
>   * @status:  placeholder for status
>   *
> - * For each sent request, Guc shall send bac CT response message.
> + * For each sent request, GuC shall send back CT response message.
>   * Our message handler will update status of tracked request once
>   * response message with given fence is received. Wait here and
>   * check for valid response status value.
> @@ -525,24 +528,36 @@ static inline bool ct_deadlocked(struct intel_guc_ct 
> *ct)
>   return ret;
>  }
>  
> -static inline bool h2g_has_room(struct intel_guc_ct_buffer *ctb, u32 len_dw)
> +static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw)
>  {
> + struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
>   struct guc_ct_buffer_desc *desc = ctb->desc;
> - u32 head = READ_ONCE(desc->head);
> + u32 head;
>   u32 space;
>  
> - space = CIRC_SPACE(desc->tail, head, ctb->size);
> + if (ctb->space >= l

Re: [PATCH] drm/meson: Convert to Linux IRQ interfaces

2021-07-08 Thread Martin Blumenstingl
Hi Thomas,

On Tue, Jul 6, 2021 at 9:45 AM Thomas Zimmermann  wrote:
>
> Drop the DRM IRQ midlayer in favor of Linux IRQ interfaces. DRM's
> IRQ helpers are mostly useful for UMS drivers. Modern KMS drivers
> don't benefit from using it.
>
> Signed-off-by: Thomas Zimmermann 
Tested-by: Martin Blumenstingl 
and also (although I am no drm subsystem expert):
Reviewed-by: Martin Blumenstingl 

[...]
> -   ret = drm_irq_install(drm, priv->vsync_irq);
> +   ret = request_irq(priv->vsync_irq, meson_irq, 0, drm->driver->name, 
> drm);
I'd like to use dev_name(dev) instead of drm->driver->name in the
future as that'll make it much easier to identify the corresponding
IRQ in /proc/interrupts for example
your patch makes this possible - thanks for this!


Best regards,
Martin


Re: [PATCH 1/2] drm/gud: Add Raspberry Pi Pico ID

2021-07-08 Thread Noralf Trønnes



Den 03.07.2021 21.24, skrev Peter Stuge:
> Hi Noralf,
> 
> Noralf Trønnes wrote:
>> Add VID/PID for the Raspberry Pi Pico implementation.
>> Source: https://github.com/notro/gud-pico
>>
>> +++ b/drivers/gpu/drm/gud/gud_drv.c
>> @@ -660,6 +660,7 @@ static int gud_resume(struct usb_interface *intf)
>>  
>>  static const struct usb_device_id gud_id_table[] = {
>>  { USB_DEVICE_INTERFACE_CLASS(0x1d50, 0x614d, USB_CLASS_VENDOR_SPEC) },
>> +{ USB_DEVICE_INTERFACE_CLASS(0x16d0, 0x10a9, USB_CLASS_VENDOR_SPEC) },
>>  { }
>>  };
> 
> A VID/PID isn't neccessarily tied to one implementation; as long as an
> implementation is in fact compatible with the driver I consider it okay
> to reuse a VID/PID, and the 0x1d50 conditions are met by gud-pico too.
> That said, there's no harm in adding another id. :)
> 
> Reviewed-by: Peter Stuge 
> 

Both patches applied, thanks for reviewing.

Noralf.


Re: [PATCH] drm/meson: Convert to Linux IRQ interfaces

2021-07-08 Thread Thomas Zimmermann

Hi

Am 08.07.21 um 15:31 schrieb Martin Blumenstingl:

Hi Thomas,

On Tue, Jul 6, 2021 at 9:45 AM Thomas Zimmermann  wrote:


Drop the DRM IRQ midlayer in favor of Linux IRQ interfaces. DRM's
IRQ helpers are mostly useful for UMS drivers. Modern KMS drivers
don't benefit from using it.

Signed-off-by: Thomas Zimmermann 

Tested-by: Martin Blumenstingl 
and also (although I am no drm subsystem expert):
Reviewed-by: Martin Blumenstingl 



Oh, just when I committed the patch. But thanks for your reply.



[...]

-   ret = drm_irq_install(drm, priv->vsync_irq);
+   ret = request_irq(priv->vsync_irq, meson_irq, 0, drm->driver->name, 
drm);

I'd like to use dev_name(dev) instead of drm->driver->name in the
future as that'll make it much easier to identify the corresponding
IRQ in /proc/interrupts for example
your patch makes this possible - thanks for this!


I also thought about this, but every driver in DRM and apparently most 
drivers in general pass the driver's name here. I think the change would 
make a lot of sense, but it's probably worth a kernel-wide effort.


Best regards
Thomas




Best regards,
Martin



--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5, 90409 Nürnberg, Germany
(HRB 36809, AG Nürnberg)
Geschäftsführer: Felix Imendörffer



OpenPGP_signature
Description: OpenPGP digital signature


Aw: Re: Re: BUG: MTK DRM/HDMI broken on 5.13 (mt7623/bpi-r2)

2021-07-08 Thread Frank Wunderlich
> Gesendet: Donnerstag, 08. Juli 2021 um 14:30 Uhr
> Von: "Dafna Hirschfeld" 
> > i see both messages, but mtk_crtc_ddp_irq is never called and so the other 
> > 2 not.
>
> Yes, In my case the irq isr is also not called after resume which cause the 
> warning
> even though "enable_vblank" do get called. Don't know why is that.


> > comp->funcs->enable_vblank should be mtk_drm_crtc_enable_vblank, right?
>
> No, this is a bit confusing , there are also the funcs of the components, see 
> in file mtk_drm_ddp_comp.c
> so for mt7623  it is mtk_ovl_enable_vblank.

thanks for pointing to this. in this function another struct is filled with the 
callback+data, and this callback seems to be called mtk_disp_ovl_irq_handler 
which name suggests also a irq as trigger

412 ret = devm_request_irq(dev, irq, mtk_disp_ovl_irq_handler,
413IRQF_TRIGGER_NONE, dev_name(dev), priv);
414 if (ret < 0) {
415 dev_err(dev, "Failed to request irq %d: %d\n", irq, ret);
416 return ret;
417 }

as i don't see this error in dmesg, i guess the registration was successful. 
added again some debug and it looks like the interrupt callback 
(mtk_disp_ovl_irq_handler) is not called

[5.125002] DEBUG: Passed mtk_disp_ovl_probe 416 int reg:0
[6.344029] DEBUG: Passed mtk_drm_crtc_enable_vblank 510
[6.344051] DEBUG: Passed mtk_ddp_comp_enable_vblank 117
[6.344057] DEBUG: Passed mtk_ovl_enable_vblank 107
[6.344062] DEBUG: Passed mtk_ovl_enable_vblank 112
[6.344066] DEBUG: Passed mtk_ddp_comp_enable_vblank 121

--- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
@@ -86,6 +86,7 @@ static irqreturn_t mtk_disp_ovl_irq_handler(int irq, void 
*dev_id)
 {
struct mtk_disp_ovl *priv = dev_id;

+printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);
/* Clear frame completion interrupt */
writel(0x0, priv->regs + DISP_REG_OVL_INTSTA);

@@ -93,6 +94,7 @@ static irqreturn_t mtk_disp_ovl_irq_handler(int irq, void 
*dev_id)
return IRQ_NONE;

priv->vblank_cb(priv->vblank_cb_data);
+printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);

return IRQ_HANDLED;
 }
@@ -102,11 +104,12 @@ void mtk_ovl_enable_vblank(struct device *dev,
   void *vblank_cb_data)
 {
struct mtk_disp_ovl *ovl = dev_get_drvdata(dev);
-
+printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);
ovl->vblank_cb = vblank_cb;
ovl->vblank_cb_data = vblank_cb_data;
writel(0x0, ovl->regs + DISP_REG_OVL_INTSTA);
writel_relaxed(OVL_FME_CPL_INT, ovl->regs + DISP_REG_OVL_INTEN);
+printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__);
 }

 void mtk_ovl_disable_vblank(struct device *dev)
@@ -410,6 +413,7 @@ static int mtk_disp_ovl_probe(struct platform_device *pdev)

ret = devm_request_irq(dev, irq, mtk_disp_ovl_irq_handler,
   IRQF_TRIGGER_NONE, dev_name(dev), priv);
+printk(KERN_ALERT "DEBUG: Passed %s %d int 
reg:%d\n",__FUNCTION__,__LINE__,ret);
if (ret < 0) {
dev_err(dev, "Failed to request irq %d: %d\n", irq, ret);
return ret;


how can we trace this further? maybe watchdog related?

> >
> > "watchdog: watchdog0: watchdog did not stop!"
> >
> > i see this with my 5.13, 5.12-drm (5.12.0+mtk/core drm-patches) and 5.12.14 
> > too (hdmi is working there), but not 5.12.0!
> > that means something in drm-patches (mtk/core) breaks watchdog. maybe the 
> > recursion mentioned above?



[PATCH] drm/tegra: gr2d: Explicitly control module reset

2021-07-08 Thread Thierry Reding
From: Thierry Reding 

As of commit 4782c0a5dd88 ("clk: tegra: Don't deassert reset on enabling
clocks"), module resets are no longer automatically deasserted when the
module clock is enabled. To make sure that the gr2d module continues to
work, we need to explicitly control the module reset.

Fixes: 4782c0a5dd88 ("clk: tegra: Don't deassert reset on enabling clocks")
Signed-off-by: Thierry Reding 
---
 drivers/gpu/drm/tegra/gr2d.c | 33 +++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
index de288cba3905..ba3722f1b865 100644
--- a/drivers/gpu/drm/tegra/gr2d.c
+++ b/drivers/gpu/drm/tegra/gr2d.c
@@ -4,9 +4,11 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include 
+#include 
 
 #include "drm.h"
 #include "gem.h"
@@ -19,6 +21,7 @@ struct gr2d_soc {
 struct gr2d {
struct tegra_drm_client client;
struct host1x_channel *channel;
+   struct reset_control *rst;
struct clk *clk;
 
const struct gr2d_soc *soc;
@@ -208,6 +211,12 @@ static int gr2d_probe(struct platform_device *pdev)
if (!syncpts)
return -ENOMEM;
 
+   gr2d->rst = devm_reset_control_get(dev, NULL);
+   if (IS_ERR(gr2d->rst)) {
+   dev_err(dev, "cannot get reset\n");
+   return PTR_ERR(gr2d->rst);
+   }
+
gr2d->clk = devm_clk_get(dev, NULL);
if (IS_ERR(gr2d->clk)) {
dev_err(dev, "cannot get clock\n");
@@ -220,6 +229,14 @@ static int gr2d_probe(struct platform_device *pdev)
return err;
}
 
+   usleep_range(2000, 4000);
+
+   err = reset_control_deassert(gr2d->rst);
+   if (err < 0) {
+   dev_err(dev, "failed to deassert reset: %d\n", err);
+   goto disable_clk;
+   }
+
INIT_LIST_HEAD(&gr2d->client.base.list);
gr2d->client.base.ops = &gr2d_client_ops;
gr2d->client.base.dev = dev;
@@ -234,8 +251,7 @@ static int gr2d_probe(struct platform_device *pdev)
err = host1x_client_register(&gr2d->client.base);
if (err < 0) {
dev_err(dev, "failed to register host1x client: %d\n", err);
-   clk_disable_unprepare(gr2d->clk);
-   return err;
+   goto assert_rst;
}
 
/* initialize address register map */
@@ -245,6 +261,13 @@ static int gr2d_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, gr2d);
 
return 0;
+
+assert_rst:
+   (void)reset_control_assert(gr2d->rst);
+disable_clk:
+   clk_disable_unprepare(gr2d->clk);
+
+   return err;
 }
 
 static int gr2d_remove(struct platform_device *pdev)
@@ -259,6 +282,12 @@ static int gr2d_remove(struct platform_device *pdev)
return err;
}
 
+   err = reset_control_assert(gr2d->rst);
+   if (err < 0)
+   dev_err(&pdev->dev, "failed to assert reset: %d\n", err);
+
+   usleep_range(2000, 4000);
+
clk_disable_unprepare(gr2d->clk);
 
return 0;
-- 
2.32.0



Re: [PATCH 2/2 v3] drm/panel: ws2401: Add driver for WideChips WS2401

2021-07-08 Thread Noralf Trønnes



Den 08.07.2021 01.43, skrev Linus Walleij:
> This adds a driver for panels based on the WideChips WS2401 display
> controller. This display controller is used in the Samsung LMS380KF01
> display found in the Samsung GT-I8160 (Codina) mobile phone and
> possibly others.
> 
> As is common with Samsung displays manufacturer commands are necessary
> to configure the display to a working state.
> 
> The display optionally supports internal backlight control, but can
> also use an external backlight.
> 
> This driver re-uses the DBI infrastructure to communicate with the
> display.
> 
> Cc: phone-de...@vger.kernel.org
> Cc: Douglas Anderson 
> Cc: Noralf Trønnes 
> Signed-off-by: Linus Walleij 
> ---

Reviewed-by: Noralf Trønnes 


Re: [PATCH] drm/tegra: gr2d: Explicitly control module reset

2021-07-08 Thread Dmitry Osipenko
08.07.2021 17:37, Thierry Reding пишет:
> From: Thierry Reding 
> 
> As of commit 4782c0a5dd88 ("clk: tegra: Don't deassert reset on enabling
> clocks"), module resets are no longer automatically deasserted when the
> module clock is enabled. To make sure that the gr2d module continues to
> work, we need to explicitly control the module reset.
> 
> Fixes: 4782c0a5dd88 ("clk: tegra: Don't deassert reset on enabling clocks")
> Signed-off-by: Thierry Reding 

On which board do see this problem?

TRM says that 2d should be in reset by default, but somehow it's not a
problem on devices that use fastboot.. why would it touch the 2d reset?

> ---
>  drivers/gpu/drm/tegra/gr2d.c | 33 +++--
>  1 file changed, 31 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
> index de288cba3905..ba3722f1b865 100644
> --- a/drivers/gpu/drm/tegra/gr2d.c
> +++ b/drivers/gpu/drm/tegra/gr2d.c
> @@ -4,9 +4,11 @@
>   */
>  
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "drm.h"
>  #include "gem.h"
> @@ -19,6 +21,7 @@ struct gr2d_soc {
>  struct gr2d {
>   struct tegra_drm_client client;
>   struct host1x_channel *channel;
> + struct reset_control *rst;

Unused variable?

>   struct clk *clk;
>  
>   const struct gr2d_soc *soc;
> @@ -208,6 +211,12 @@ static int gr2d_probe(struct platform_device *pdev)
>   if (!syncpts)
>   return -ENOMEM;
>  
> + gr2d->rst = devm_reset_control_get(dev, NULL);
> + if (IS_ERR(gr2d->rst)) {
> + dev_err(dev, "cannot get reset\n");
> + return PTR_ERR(gr2d->rst);
> + }
> +
>   gr2d->clk = devm_clk_get(dev, NULL);
>   if (IS_ERR(gr2d->clk)) {
>   dev_err(dev, "cannot get clock\n");
> @@ -220,6 +229,14 @@ static int gr2d_probe(struct platform_device *pdev)
>   return err;
>   }
>  
> + usleep_range(2000, 4000);
> +
> + err = reset_control_deassert(gr2d->rst);
> + if (err < 0) {
> + dev_err(dev, "failed to deassert reset: %d\n", err);
> + goto disable_clk;
> + }
> +
>   INIT_LIST_HEAD(&gr2d->client.base.list);
>   gr2d->client.base.ops = &gr2d_client_ops;
>   gr2d->client.base.dev = dev;
> @@ -234,8 +251,7 @@ static int gr2d_probe(struct platform_device *pdev)
>   err = host1x_client_register(&gr2d->client.base);
>   if (err < 0) {
>   dev_err(dev, "failed to register host1x client: %d\n", err);
> - clk_disable_unprepare(gr2d->clk);
> - return err;
> + goto assert_rst;
>   }
>  
>   /* initialize address register map */
> @@ -245,6 +261,13 @@ static int gr2d_probe(struct platform_device *pdev)
>   platform_set_drvdata(pdev, gr2d);
>  
>   return 0;
> +
> +assert_rst:
> + (void)reset_control_assert(gr2d->rst);

(void)?


Re: [PATCH] drm/tegra: gr2d: Explicitly control module reset

2021-07-08 Thread Dmitry Osipenko
08.07.2021 18:13, Dmitry Osipenko пишет:
>>  #include "drm.h"
>>  #include "gem.h"
>> @@ -19,6 +21,7 @@ struct gr2d_soc {
>>  struct gr2d {
>>  struct tegra_drm_client client;
>>  struct host1x_channel *channel;
>> +struct reset_control *rst;
> Unused variable?

Ah, I haven't noticed that it's struct. Looks okay.


Aw: Re: BUG: MTK DRM/HDMI broken on 5.13 (mt7623/bpi-r2)

2021-07-08 Thread Frank Wunderlich
> Gesendet: Donnerstag, 08. Juli 2021 um 11:35 Uhr
> Von: "Frank Wunderlich" 
> i guess not, but is watchdog somehow involved? i ask because i see this on 
> reboot/poweroff:
>
> "watchdog: watchdog0: watchdog did not stop!"
>
> i see this with my 5.13, 5.12-drm (5.12.0+mtk/core drm-patches) and 5.12.14 
> too (hdmi is working there), but not 5.12.0!
> that means something in drm-patches (mtk/core) breaks watchdog. maybe the 
> recursion mentioned above?

have to correct me: 5.12.0 shows this error too, so error not caused by 
drm-patches, but i guess unrelated to the possible irq issue causing hdmi not 
working on 5.13 (wait-for-vblank/page_flip tracebacks)

i'm not aware who is also involved in the problem, so i want to avoid send 
people to the wrong way :)

regards Frank


[PATCH 00/30] drm/i915/gem: ioctl clean-ups (v9)

2021-07-08 Thread Jason Ekstrand
Overview:
-

This patch series attempts to clean up some of the IOCTL mess we've created
over the last few years.  The most egregious bit being context mutability.
In summary, this series:

 1. Drops two never-used context params: RINGSIZE and NO_ZEROMAP
 2. Drops the entire CONTEXT_CLONE API
 3. Implements SINGLE_TIMELINE with a syncobj instead of actually sharing
intel_timeline between engines.
 4. Adds a few sanity restrictions to the balancing/bonding API.
 5. Implements a proto-ctx mechanism so that the engine set and VM can only
be set early on in the lifetime of a context, before anything ever
executes on it.  This effectively makes the VM and engine set
immutable.

This series has been tested with IGT as well as the Iris, ANV, and the
Intel media driver doing an 8K decode (this uses bonding/balancing).  I've
also done quite a bit of git archeology to ensure that nothing in here will
break anything that's already shipped at some point in history.  It's
possible I've missed something, but I've dug quite a bit.


Details and motivation:
---

In very broad strokes, there's an effort going on right now within Intel to
try and clean up and simplify i915 anywhere we can.  We obviously don't
want to break any shipping userspace but, as can be seen by this series,
there's a lot i915 theoretically supports which userspace doesn't actually
need.  Some of this, like the two context params used here, were simply
oversights where we went through the usual API review process and merged
the i915 bits but the userspace bits never landed for some reason.

Not all are so innocent, however.  For instance, there's an entire context
cloning API which allows one to create a context with certain parameters
"cloned" from some other context.  This entire API has never been used by
any userspace except IGT and there were never patches to any other
userspace to use it.  It never should have landed.  Also, when we added
support for setting explicit engine sets and sharing VMs across contexts,
people decided to do so via SET_CONTEXT_PARAM.  While this allowed them to
re-use existing API, it did so at the cost of making those states mutable
which leads to a plethora of potential race conditions.  There were even
IGT tests merged to cover some of theses:

 - gem_vm_create@async-destroy and gem_vm_create@destroy-race which test
   swapping out the VM on a running context.

 - gem_ctx_persistence@replace* which test whether a client can escape a
   non-persistent context by submitting a hanging batch and then swapping
   out the engine set before the hang is detected.

 - api_intel_bb@bb-with-vm which tests the that intel_bb_assign_vm works
   properly.  This API is never used by any other IGT test.

There is also an entire deferred flush and set state framework in
i915_gem_cotnext.c which exists for safely swapping out the VM while there
is work in-flight on a context.

So, clearly people knew that this API was inherently racy and difficult to
implement but they landed it anyway.  Why?  The best explanation I've been
given is because it makes the API more "unified" or "symmetric" for this
stuff to go through SET_CONTEXT_PARAM.  It's not because any userspace
actually wants to be able to swap out the VM or the set of engines on a
running context.  That would be utterly insane.

This patch series cleans up this particular mess by introducing the concept
of a i915_gem_proto_context data structure which contains context creation
information.  When you initially call GEM_CONTEXT_CREATE, a proto-context
in created instead of an actual context.  Then, the first time something is
done on the context besides SET_CONTEXT_PARAM, an actual context is
created.  This allows us to keep the old drivers which use
SET_CONTEXT_PARAM to set up the engine set (see also media) while ensuring
that, once you have an i915_gem_context, the VM and the engine set are
immutable state.

Eventually, there are more clean-ups I'd like to do on top of this which
should make working with contexts inside i915 simpler and safer:

 1. Move the GEM handle -> vma LUT from i915_gem_context into either
i915_ppgtt or drm_i915_file_private depending on whether or not the
hardware has a full PPGTT.

 2. Move the delayed context destruction code into intel_context or a
per-engine wrapper struct rather than i915_gem_context.

 3. Get rid of the separation between context close and context destroy

 4. Get rid of the RCU on i915_gem_context

However, these should probably be done as a separate patch series as this
one is already starting to get longish, especially if you consider the 89
IGT patches that go along with it.

Test-with: 20210707210215.351483-1-ja...@jlekstrand.net

Jason Ekstrand (30):
  drm/i915: Drop I915_CONTEXT_PARAM_RINGSIZE
  drm/i915: Stop storing the ring size in the ring pointer (v3)
  drm/i915: Drop I915_CONTEXT_PARAM_NO_ZEROMAP
  drm/i915/gem: Set the watchdog timeout directly in
intel_context_s

[PATCH 01/30] drm/i915: Drop I915_CONTEXT_PARAM_RINGSIZE

2021-07-08 Thread Jason Ekstrand
This reverts commit 88be76cdafc7 ("drm/i915: Allow userspace to specify
ringsize on construction").  This API was originally added for OpenCL
but the compute-runtime PR has sat open for a year without action so we
can still pull it out if we want.  I argue we should drop it for three
reasons:

 1. If the compute-runtime PR has sat open for a year, this clearly
isn't that important.

 2. It's a very leaky API.  Ring size is an implementation detail of the
current execlist scheduler and really only makes sense there.  It
can't apply to the older ring-buffer scheduler on pre-execlist
hardware because that's shared across all contexts and it won't
apply to the GuC scheduler that's in the pipeline.

 3. Having userspace set a ring size in bytes is a bad solution to the
problem of having too small a ring.  There is no way that userspace
has the information to know how to properly set the ring size so
it's just going to detect the feature and always set it to the
maximum of 512K.  This is what the compute-runtime PR does.  The
scheduler in i915, on the other hand, does have the information to
make an informed choice.  It could detect if the ring size is a
problem and grow it itself.  Or, if that's too hard, we could just
increase the default size from 16K to 32K or even 64K instead of
relying on userspace to do it.

Let's drop this API for now and, if someone decides they really care
about solving this problem, they can do it properly.

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/Makefile |  1 -
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 85 +--
 drivers/gpu/drm/i915/gt/intel_context_param.c | 63 --
 drivers/gpu/drm/i915/gt/intel_context_param.h |  3 -
 include/uapi/drm/i915_drm.h   | 20 +
 5 files changed, 4 insertions(+), 168 deletions(-)
 delete mode 100644 drivers/gpu/drm/i915/gt/intel_context_param.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 01f28ad5ea578..10b3bb6207bab 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -89,7 +89,6 @@ gt-y += \
gt/gen8_ppgtt.o \
gt/intel_breadcrumbs.o \
gt/intel_context.o \
-   gt/intel_context_param.o \
gt/intel_context_sseu.o \
gt/intel_engine_cs.o \
gt/intel_engine_heartbeat.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 7720b8c22c816..ddc3cc3f8f092 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1334,63 +1334,6 @@ static int set_ppgtt(struct drm_i915_file_private 
*file_priv,
return err;
 }
 
-static int __apply_ringsize(struct intel_context *ce, void *sz)
-{
-   return intel_context_set_ring_size(ce, (unsigned long)sz);
-}
-
-static int set_ringsize(struct i915_gem_context *ctx,
-   struct drm_i915_gem_context_param *args)
-{
-   if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915))
-   return -ENODEV;
-
-   if (args->size)
-   return -EINVAL;
-
-   if (!IS_ALIGNED(args->value, I915_GTT_PAGE_SIZE))
-   return -EINVAL;
-
-   if (args->value < I915_GTT_PAGE_SIZE)
-   return -EINVAL;
-
-   if (args->value > 128 * I915_GTT_PAGE_SIZE)
-   return -EINVAL;
-
-   return context_apply_all(ctx,
-__apply_ringsize,
-__intel_context_ring_size(args->value));
-}
-
-static int __get_ringsize(struct intel_context *ce, void *arg)
-{
-   long sz;
-
-   sz = intel_context_get_ring_size(ce);
-   GEM_BUG_ON(sz > INT_MAX);
-
-   return sz; /* stop on first engine */
-}
-
-static int get_ringsize(struct i915_gem_context *ctx,
-   struct drm_i915_gem_context_param *args)
-{
-   int sz;
-
-   if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915))
-   return -ENODEV;
-
-   if (args->size)
-   return -EINVAL;
-
-   sz = context_apply_all(ctx, __get_ringsize, NULL);
-   if (sz < 0)
-   return sz;
-
-   args->value = sz;
-   return 0;
-}
-
 int
 i915_gem_user_to_context_sseu(struct intel_gt *gt,
  const struct drm_i915_gem_context_param_sseu 
*user,
@@ -2036,11 +1979,8 @@ static int ctx_setparam(struct drm_i915_file_private 
*fpriv,
ret = set_persistence(ctx, args);
break;
 
-   case I915_CONTEXT_PARAM_RINGSIZE:
-   ret = set_ringsize(ctx, args);
-   break;
-
case I915_CONTEXT_PARAM_BAN_PERIOD:
+   case I915_CONTEXT_PARAM_RINGSIZE:
default:
ret = -EINVAL;
break;
@@ -2068,18 +2008,6 @@ static int create_setparam(struct i915_user_extension 
__user *ext, void *data)
return ctx_setparam(arg->fpr

[PATCH 02/30] drm/i915: Stop storing the ring size in the ring pointer (v3)

2021-07-08 Thread Jason Ekstrand
Previously, we were storing the ring size in the ring pointer before it
was actually allocated.  We would then guard setting the ring size on
checking for CONTEXT_ALLOC_BIT.  This is error-prone at best and really
only saves us a few bytes on something that already burns at least 4K.
Instead, this patch adds a new ring_size field and makes everything use
that.

v2 (Daniel Vetter):
 - Replace 512 * SZ_4K with SZ_2M

v2 (Jason Ekstrand):
 - Rebase on top of page migration code

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 3 +--
 drivers/gpu/drm/i915/gt/intel_context.c   | 3 ++-
 drivers/gpu/drm/i915/gt/intel_context.h   | 5 -
 drivers/gpu/drm/i915/gt/intel_context_types.h | 1 +
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 3 ++-
 drivers/gpu/drm/i915/gt/intel_lrc.c   | 2 +-
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 3 ++-
 drivers/gpu/drm/i915/gt/selftest_execlists.c  | 2 +-
 drivers/gpu/drm/i915/gt/selftest_mocs.c   | 2 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   | 2 +-
 drivers/gpu/drm/i915/gvt/scheduler.c  | 7 ++-
 11 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index ddc3cc3f8f092..a4faf06022d5a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -211,8 +211,7 @@ static void intel_context_set_gem(struct intel_context *ce,
GEM_BUG_ON(rcu_access_pointer(ce->gem_context));
RCU_INIT_POINTER(ce->gem_context, ctx);
 
-   if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
-   ce->ring = __intel_context_ring_size(SZ_16K);
+   ce->ring_size = SZ_16K;
 
if (rcu_access_pointer(ctx->vm)) {
struct i915_address_space *vm;
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index 4033184f13b9f..bd63813c8a802 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -371,7 +371,8 @@ intel_context_init(struct intel_context *ce, struct 
intel_engine_cs *engine)
ce->engine = engine;
ce->ops = engine->cops;
ce->sseu = engine->sseu;
-   ce->ring = __intel_context_ring_size(SZ_4K);
+   ce->ring = NULL;
+   ce->ring_size = SZ_4K;
 
ewma_runtime_init(&ce->runtime.avg);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h 
b/drivers/gpu/drm/i915/gt/intel_context.h
index f83a73a2b39fc..b10cbe8fee992 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -175,11 +175,6 @@ int intel_context_prepare_remote_request(struct 
intel_context *ce,
 
 struct i915_request *intel_context_create_request(struct intel_context *ce);
 
-static inline struct intel_ring *__intel_context_ring_size(u64 sz)
-{
-   return u64_to_ptr(struct intel_ring, sz);
-}
-
 static inline bool intel_context_is_barrier(const struct intel_context *ce)
 {
return test_bit(CONTEXT_BARRIER_BIT, &ce->flags);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index ed8c447a7346b..90026c1771055 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -82,6 +82,7 @@ struct intel_context {
spinlock_t signal_lock; /* protects signals, the list of requests */
 
struct i915_vma *state;
+   u32 ring_size;
struct intel_ring *ring;
struct intel_timeline *timeline;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 5ca3d16643353..d561573ed98c2 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -807,7 +807,8 @@ intel_engine_create_pinned_context(struct intel_engine_cs 
*engine,
 
__set_bit(CONTEXT_BARRIER_BIT, &ce->flags);
ce->timeline = page_pack_bits(NULL, hwsp);
-   ce->ring = __intel_context_ring_size(ring_size);
+   ce->ring = NULL;
+   ce->ring_size = ring_size;
 
i915_vm_put(ce->vm);
ce->vm = i915_vm_get(vm);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index a27bac0a4bfb8..8ada1afe3d229 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -845,7 +845,7 @@ int lrc_alloc(struct intel_context *ce, struct 
intel_engine_cs *engine)
if (IS_ERR(vma))
return PTR_ERR(vma);
 
-   ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
+   ring = intel_engine_create_ring(engine, ce->ring_size);
if (IS_ERR(ring)) {
err = PTR_ERR(ring);
goto err_vma;
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c 
b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 23c59ce66cee5..f10d2335fc8c6 100644
--- a/drivers/gpu/

[PATCH 03/30] drm/i915: Drop I915_CONTEXT_PARAM_NO_ZEROMAP

2021-07-08 Thread Jason Ekstrand
The idea behind this param is to support OpenCL drivers with relocations
because OpenCL reserves 0x0 for NULL and, if we placed memory there, it
would confuse CL kernels.  It was originally sent out as part of a patch
series including libdrm [1] and Beignet [2] support.  However, the
libdrm and Beignet patches never landed in their respective upstream
projects so this API has never been used.  It's never been used in Mesa
or any other driver, either.

Dropping this API allows us to delete a small bit of code.

[1]: https://lists.freedesktop.org/archives/intel-gfx/2015-May/067030.html
[2]: https://lists.freedesktop.org/archives/intel-gfx/2015-May/067031.html

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c  | 16 ++--
 .../gpu/drm/i915/gem/i915_gem_context_types.h|  1 -
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c   |  8 
 include/uapi/drm/i915_drm.h  |  4 
 4 files changed, 6 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index a4faf06022d5a..5fc0eb4beeeae 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1920,15 +1920,6 @@ static int ctx_setparam(struct drm_i915_file_private 
*fpriv,
int ret = 0;
 
switch (args->param) {
-   case I915_CONTEXT_PARAM_NO_ZEROMAP:
-   if (args->size)
-   ret = -EINVAL;
-   else if (args->value)
-   set_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
-   else
-   clear_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
-   break;
-
case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
if (args->size)
ret = -EINVAL;
@@ -1978,6 +1969,7 @@ static int ctx_setparam(struct drm_i915_file_private 
*fpriv,
ret = set_persistence(ctx, args);
break;
 
+   case I915_CONTEXT_PARAM_NO_ZEROMAP:
case I915_CONTEXT_PARAM_BAN_PERIOD:
case I915_CONTEXT_PARAM_RINGSIZE:
default:
@@ -2358,11 +2350,6 @@ int i915_gem_context_getparam_ioctl(struct drm_device 
*dev, void *data,
return -ENOENT;
 
switch (args->param) {
-   case I915_CONTEXT_PARAM_NO_ZEROMAP:
-   args->size = 0;
-   args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
-   break;
-
case I915_CONTEXT_PARAM_GTT_SIZE:
args->size = 0;
rcu_read_lock();
@@ -2410,6 +2397,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device 
*dev, void *data,
args->value = i915_gem_context_is_persistent(ctx);
break;
 
+   case I915_CONTEXT_PARAM_NO_ZEROMAP:
case I915_CONTEXT_PARAM_BAN_PERIOD:
case I915_CONTEXT_PARAM_RINGSIZE:
default:
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 340473aa70de0..5ae71ec936f7c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -129,7 +129,6 @@ struct i915_gem_context {
 * @user_flags: small set of booleans controlled by the user
 */
unsigned long user_flags;
-#define UCONTEXT_NO_ZEROMAP0
 #define UCONTEXT_NO_ERROR_CAPTURE  1
 #define UCONTEXT_BANNABLE  2
 #define UCONTEXT_RECOVERABLE   3
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 7ff2fc3c0b2c9..73acc65d25bad 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -290,7 +290,6 @@ struct i915_execbuffer {
struct intel_context *reloc_context;
 
u64 invalid_flags; /** Set of execobj.flags that are invalid */
-   u32 context_flags; /** Set of execobj.flags to insert from the ctx */
 
u64 batch_len; /** Length of batch within object */
u32 batch_start_offset; /** Location within object of batch */
@@ -552,9 +551,6 @@ eb_validate_vma(struct i915_execbuffer *eb,
entry->flags |= EXEC_OBJECT_NEEDS_GTT | 
__EXEC_OBJECT_NEEDS_MAP;
}
 
-   if (!(entry->flags & EXEC_OBJECT_PINNED))
-   entry->flags |= eb->context_flags;
-
return 0;
 }
 
@@ -761,10 +757,6 @@ static int eb_select_context(struct i915_execbuffer *eb)
if (rcu_access_pointer(ctx->vm))
eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
 
-   eb->context_flags = 0;
-   if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags))
-   eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS;
-
return 0;
 }
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index f229c0abcbb54..79dcafaf476eb 100644
--- a/include/uapi/drm/i915

[PATCH 04/30] drm/i915/gem: Set the watchdog timeout directly in intel_context_set_gem (v2)

2021-07-08 Thread Jason Ekstrand
Instead of handling it like a context param, unconditionally set it when
intel_contexts are created.  For years we've had the idea of a watchdog
uAPI floating about. The aim was for media, so that they could set very
tight deadlines for their transcodes jobs, so that if you have a corrupt
bitstream (especially for decoding) you don't hang your desktop too
hard.  But it's been stuck in limbo since forever, and this simplifies
things a bit in preparation for the proto-context work.  If we decide to
actually make said uAPI a reality, we can do it through the proto-
context easily enough.

This does mean that we move from reading the request_timeout_ms param
once per engine when engines are created instead of once at context
creation.  If someone changes request_timeout_ms between creating a
context and setting engines, it will mean that they get the new timeout.
If someone races setting request_timeout_ms and context creation, they
can theoretically end up with different timeouts.  However, since both
of these are fairly harmless and require changing kernel params, we
don't care.

v2 (Tvrtko Ursulin):
 - Add a comment about races with request_timeout_ms

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 44 +++
 .../gpu/drm/i915/gem/i915_gem_context_types.h |  4 --
 drivers/gpu/drm/i915/gt/intel_context_param.h |  3 +-
 3 files changed, 7 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 5fc0eb4beeeae..9750a1ac7023e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -232,7 +232,12 @@ static void intel_context_set_gem(struct intel_context *ce,
intel_engine_has_timeslices(ce->engine))
__set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
 
-   intel_context_set_watchdog_us(ce, ctx->watchdog.timeout_us);
+   if (IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) &&
+   ctx->i915->params.request_timeout_ms) {
+   unsigned int timeout_ms = ctx->i915->params.request_timeout_ms;
+
+   intel_context_set_watchdog_us(ce, (u64)timeout_ms * 1000);
+   }
 }
 
 static void __free_engines(struct i915_gem_engines *e, unsigned int count)
@@ -791,41 +796,6 @@ static void __assign_timeline(struct i915_gem_context *ctx,
context_apply_all(ctx, __apply_timeline, timeline);
 }
 
-static int __apply_watchdog(struct intel_context *ce, void *timeout_us)
-{
-   return intel_context_set_watchdog_us(ce, (uintptr_t)timeout_us);
-}
-
-static int
-__set_watchdog(struct i915_gem_context *ctx, unsigned long timeout_us)
-{
-   int ret;
-
-   ret = context_apply_all(ctx, __apply_watchdog,
-   (void *)(uintptr_t)timeout_us);
-   if (!ret)
-   ctx->watchdog.timeout_us = timeout_us;
-
-   return ret;
-}
-
-static void __set_default_fence_expiry(struct i915_gem_context *ctx)
-{
-   struct drm_i915_private *i915 = ctx->i915;
-   int ret;
-
-   if (!IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) ||
-   !i915->params.request_timeout_ms)
-   return;
-
-   /* Default expiry for user fences. */
-   ret = __set_watchdog(ctx, i915->params.request_timeout_ms * 1000);
-   if (ret)
-   drm_notice(&i915->drm,
-  "Failed to configure default fence expiry! (%d)",
-  ret);
-}
-
 static struct i915_gem_context *
 i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
 {
@@ -870,8 +840,6 @@ i915_gem_create_context(struct drm_i915_private *i915, 
unsigned int flags)
intel_timeline_put(timeline);
}
 
-   __set_default_fence_expiry(ctx);
-
trace_i915_context_create(ctx);
 
return ctx;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 5ae71ec936f7c..676592e27e7d2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -153,10 +153,6 @@ struct i915_gem_context {
 */
atomic_t active_count;
 
-   struct {
-   u64 timeout_us;
-   } watchdog;
-
/**
 * @hang_timestamp: The last time(s) this context caused a GPU hang
 */
diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.h 
b/drivers/gpu/drm/i915/gt/intel_context_param.h
index dffedd983693d..0c69cb42d075c 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_param.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_param.h
@@ -10,11 +10,10 @@
 
 #include "intel_context.h"
 
-static inline int
+static inline void
 intel_context_set_watchdog_us(struct intel_context *ce, u64 timeout_us)
 {
ce->watchdog.timeout_us = timeout_us;
-   return 0;
 }
 
 #endif /* INTEL_CONTEXT_PARAM_H */
-- 
2.31.1



[PATCH 05/30] drm/i915/gem: Return void from context_apply_all

2021-07-08 Thread Jason Ekstrand
None of the callbacks we use with it return an error code anymore; they
all return 0 unconditionally.

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c | 26 +++--
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 9750a1ac7023e..3503d46c88cbf 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -718,32 +718,25 @@ __context_engines_await(const struct i915_gem_context 
*ctx,
return engines;
 }
 
-static int
+static void
 context_apply_all(struct i915_gem_context *ctx,
- int (*fn)(struct intel_context *ce, void *data),
+ void (*fn)(struct intel_context *ce, void *data),
  void *data)
 {
struct i915_gem_engines_iter it;
struct i915_gem_engines *e;
struct intel_context *ce;
-   int err = 0;
 
e = __context_engines_await(ctx, NULL);
-   for_each_gem_engine(ce, e, it) {
-   err = fn(ce, data);
-   if (err)
-   break;
-   }
+   for_each_gem_engine(ce, e, it)
+   fn(ce, data);
i915_sw_fence_complete(&e->fence);
-
-   return err;
 }
 
-static int __apply_ppgtt(struct intel_context *ce, void *vm)
+static void __apply_ppgtt(struct intel_context *ce, void *vm)
 {
i915_vm_put(ce->vm);
ce->vm = i915_vm_get(vm);
-   return 0;
 }
 
 static struct i915_address_space *
@@ -783,10 +776,9 @@ static void __set_timeline(struct intel_timeline **dst,
intel_timeline_put(old);
 }
 
-static int __apply_timeline(struct intel_context *ce, void *timeline)
+static void __apply_timeline(struct intel_context *ce, void *timeline)
 {
__set_timeline(&ce->timeline, timeline);
-   return 0;
 }
 
 static void __assign_timeline(struct i915_gem_context *ctx,
@@ -1841,19 +1833,17 @@ set_persistence(struct i915_gem_context *ctx,
return __context_set_persistence(ctx, args->value);
 }
 
-static int __apply_priority(struct intel_context *ce, void *arg)
+static void __apply_priority(struct intel_context *ce, void *arg)
 {
struct i915_gem_context *ctx = arg;
 
if (!intel_engine_has_timeslices(ce->engine))
-   return 0;
+   return;
 
if (ctx->sched.priority >= I915_PRIORITY_NORMAL)
intel_context_set_use_semaphores(ce);
else
intel_context_clear_use_semaphores(ce);
-
-   return 0;
 }
 
 static int set_priority(struct i915_gem_context *ctx,
-- 
2.31.1



[PATCH 06/30] drm/i915: Drop the CONTEXT_CLONE API (v2)

2021-07-08 Thread Jason Ekstrand
This API allows one context to grab bits out of another context upon
creation.  It can be used as a short-cut for setparam(getparam()) for
things like I915_CONTEXT_PARAM_VM.  However, it's never been used by any
real userspace.  It's used by a few IGT tests and that's it.  Since it
doesn't add any real value (most of the stuff you can CLONE you can copy
in other ways), drop it.

There is one thing that this API allows you to clone which you cannot
clone via getparam/setparam: timelines.  However, timelines are an
implementation detail of i915 and not really something that needs to be
exposed to userspace.  Also, sharing timelines between contexts isn't
obviously useful and supporting it has the potential to complicate i915
internally.  It also doesn't add any functionality that the client can't
get in other ways.  If a client really wants a shared timeline, they can
use a syncobj and set it as an in and out fence on every submit.

v2 (Jason Ekstrand):
 - More detailed commit message

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 199 +-
 .../drm/i915/gt/intel_execlists_submission.c  |  28 ---
 .../drm/i915/gt/intel_execlists_submission.h  |   3 -
 include/uapi/drm/i915_drm.h   |  16 +-
 4 files changed, 6 insertions(+), 240 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 3503d46c88cbf..9f9369d3c0004 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1957,207 +1957,14 @@ static int create_setparam(struct i915_user_extension 
__user *ext, void *data)
return ctx_setparam(arg->fpriv, arg->ctx, &local.param);
 }
 
-static int clone_engines(struct i915_gem_context *dst,
-struct i915_gem_context *src)
+static int invalid_ext(struct i915_user_extension __user *ext, void *data)
 {
-   struct i915_gem_engines *clone, *e;
-   bool user_engines;
-   unsigned long n;
-
-   e = __context_engines_await(src, &user_engines);
-   if (!e)
-   return -ENOENT;
-
-   clone = alloc_engines(e->num_engines);
-   if (!clone)
-   goto err_unlock;
-
-   for (n = 0; n < e->num_engines; n++) {
-   struct intel_engine_cs *engine;
-
-   if (!e->engines[n]) {
-   clone->engines[n] = NULL;
-   continue;
-   }
-   engine = e->engines[n]->engine;
-
-   /*
-* Virtual engines are singletons; they can only exist
-* inside a single context, because they embed their
-* HW context... As each virtual context implies a single
-* timeline (each engine can only dequeue a single request
-* at any time), it would be surprising for two contexts
-* to use the same engine. So let's create a copy of
-* the virtual engine instead.
-*/
-   if (intel_engine_is_virtual(engine))
-   clone->engines[n] =
-   intel_execlists_clone_virtual(engine);
-   else
-   clone->engines[n] = intel_context_create(engine);
-   if (IS_ERR_OR_NULL(clone->engines[n])) {
-   __free_engines(clone, n);
-   goto err_unlock;
-   }
-
-   intel_context_set_gem(clone->engines[n], dst);
-   }
-   clone->num_engines = n;
-   i915_sw_fence_complete(&e->fence);
-
-   /* Serialised by constructor */
-   engines_idle_release(dst, rcu_replace_pointer(dst->engines, clone, 1));
-   if (user_engines)
-   i915_gem_context_set_user_engines(dst);
-   else
-   i915_gem_context_clear_user_engines(dst);
-   return 0;
-
-err_unlock:
-   i915_sw_fence_complete(&e->fence);
-   return -ENOMEM;
-}
-
-static int clone_flags(struct i915_gem_context *dst,
-  struct i915_gem_context *src)
-{
-   dst->user_flags = src->user_flags;
-   return 0;
-}
-
-static int clone_schedattr(struct i915_gem_context *dst,
-  struct i915_gem_context *src)
-{
-   dst->sched = src->sched;
-   return 0;
-}
-
-static int clone_sseu(struct i915_gem_context *dst,
- struct i915_gem_context *src)
-{
-   struct i915_gem_engines *e = i915_gem_context_lock_engines(src);
-   struct i915_gem_engines *clone;
-   unsigned long n;
-   int err;
-
-   /* no locking required; sole access under constructor*/
-   clone = __context_engines_static(dst);
-   if (e->num_engines != clone->num_engines) {
-   err = -EINVAL;
-   goto unlock;
-   }
-
-   for (n = 0; n < e->num_engines; n++) {
-   struct intel_context *ce = e->engines[n];
-
- 

[PATCH 07/30] drm/i915: Implement SINGLE_TIMELINE with a syncobj (v4)

2021-07-08 Thread Jason Ekstrand
This API is entirely unnecessary and I'd love to get rid of it.  If
userspace wants a single timeline across multiple contexts, they can
either use implicit synchronization or a syncobj, both of which existed
at the time this feature landed.  The justification given at the time
was that it would help GL drivers which are inherently single-timeline.
However, neither of our GL drivers actually wanted the feature.  i965
was already in maintenance mode at the time and iris uses syncobj for
everything.

Unfortunately, as much as I'd love to get rid of it, it is used by the
media driver so we can't do that.  We can, however, do the next-best
thing which is to embed a syncobj in the context and do exactly what
we'd expect from userspace internally.  This isn't an entirely identical
implementation because it's no longer atomic if userspace races with
itself by calling execbuffer2 twice simultaneously from different
threads.  It won't crash in that case; it just doesn't guarantee any
ordering between those two submits.  It also means that sync files
exported from different engines on a SINGLE_TIMELINE context will have
different fence contexts.  This is visible to userspace if it looks at
the obj_name field of sync_fence_info.

Moving SINGLE_TIMELINE to a syncobj emulation has a couple of technical
advantages beyond mere annoyance.  One is that intel_timeline is no
longer an api-visible object and can remain entirely an implementation
detail.  This may be advantageous as we make scheduler changes going
forward.  Second is that, together with deleting the CLONE_CONTEXT API,
we should now have a 1:1 mapping between intel_context and
intel_timeline which may help us reduce locking.

v2 (Tvrtko Ursulin):
 - Update the comment on i915_gem_context::syncobj to mention that it's
   an emulation and the possible race if userspace calls execbuffer2
   twice on the same context concurrently.
v2 (Jason Ekstrand):
 - Wrap the checks for eb.gem_context->syncobj in unlikely()
 - Drop the dma_fence reference
 - Improved commit message

v3 (Jason Ekstrand):
 - Move the dma_fence_put() to before the error exit

v4 (Tvrtko Ursulin):
 - Add a comment about fence contexts to the commit message

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 49 +--
 .../gpu/drm/i915/gem/i915_gem_context_types.h | 14 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 16 ++
 3 files changed, 40 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 9f9369d3c0004..249bd36f14019 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -67,6 +67,8 @@
 #include 
 #include 
 
+#include 
+
 #include "gt/gen6_ppgtt.h"
 #include "gt/intel_context.h"
 #include "gt/intel_context_param.h"
@@ -224,10 +226,6 @@ static void intel_context_set_gem(struct intel_context *ce,
ce->vm = vm;
}
 
-   GEM_BUG_ON(ce->timeline);
-   if (ctx->timeline)
-   ce->timeline = intel_timeline_get(ctx->timeline);
-
if (ctx->sched.priority >= I915_PRIORITY_NORMAL &&
intel_engine_has_timeslices(ce->engine))
__set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
@@ -351,9 +349,6 @@ void i915_gem_context_release(struct kref *ref)
mutex_destroy(&ctx->engines_mutex);
mutex_destroy(&ctx->lut_mutex);
 
-   if (ctx->timeline)
-   intel_timeline_put(ctx->timeline);
-
put_pid(ctx->pid);
mutex_destroy(&ctx->mutex);
 
@@ -570,6 +565,9 @@ static void context_close(struct i915_gem_context *ctx)
if (vm)
i915_vm_close(vm);
 
+   if (ctx->syncobj)
+   drm_syncobj_put(ctx->syncobj);
+
ctx->file_priv = ERR_PTR(-EBADF);
 
/*
@@ -765,33 +763,11 @@ static void __assign_ppgtt(struct i915_gem_context *ctx,
i915_vm_close(vm);
 }
 
-static void __set_timeline(struct intel_timeline **dst,
-  struct intel_timeline *src)
-{
-   struct intel_timeline *old = *dst;
-
-   *dst = src ? intel_timeline_get(src) : NULL;
-
-   if (old)
-   intel_timeline_put(old);
-}
-
-static void __apply_timeline(struct intel_context *ce, void *timeline)
-{
-   __set_timeline(&ce->timeline, timeline);
-}
-
-static void __assign_timeline(struct i915_gem_context *ctx,
- struct intel_timeline *timeline)
-{
-   __set_timeline(&ctx->timeline, timeline);
-   context_apply_all(ctx, __apply_timeline, timeline);
-}
-
 static struct i915_gem_context *
 i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
 {
struct i915_gem_context *ctx;
+   int ret;
 
if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE &&
!HAS_EXECLISTS(i915))
@@ -820,16 +796,13 @@ i915_gem_create_context(struct drm_i915_private *

[PATCH 11/30] drm/i915/request: Remove the hook from await_execution

2021-07-08 Thread Jason Ekstrand
This was only ever used for FENCE_SUBMIT automatic engine selection
which was removed in the previous commit.

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c|  3 +-
 drivers/gpu/drm/i915/i915_request.c   | 42 ---
 drivers/gpu/drm/i915/i915_request.h   |  4 +-
 3 files changed, 9 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 30498948c83d0..9aa7e10d16308 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -3492,8 +3492,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (in_fence) {
if (args->flags & I915_EXEC_FENCE_SUBMIT)
err = i915_request_await_execution(eb.request,
-  in_fence,
-  NULL);
+  in_fence);
else
err = i915_request_await_dma_fence(eb.request,
   in_fence);
diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index c5989c0b83d3e..86b4c9f2613d5 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -49,7 +49,6 @@
 struct execute_cb {
struct irq_work work;
struct i915_sw_fence *fence;
-   void (*hook)(struct i915_request *rq, struct dma_fence *signal);
struct i915_request *signal;
 };
 
@@ -180,17 +179,6 @@ static void irq_execute_cb(struct irq_work *wrk)
kmem_cache_free(global.slab_execute_cbs, cb);
 }
 
-static void irq_execute_cb_hook(struct irq_work *wrk)
-{
-   struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
-
-   cb->hook(container_of(cb->fence, struct i915_request, submit),
-&cb->signal->fence);
-   i915_request_put(cb->signal);
-
-   irq_execute_cb(wrk);
-}
-
 static __always_inline void
 __notify_execute_cb(struct i915_request *rq, bool (*fn)(struct irq_work *wrk))
 {
@@ -517,17 +505,12 @@ static bool __request_in_flight(const struct i915_request 
*signal)
 static int
 __await_execution(struct i915_request *rq,
  struct i915_request *signal,
- void (*hook)(struct i915_request *rq,
-  struct dma_fence *signal),
  gfp_t gfp)
 {
struct execute_cb *cb;
 
-   if (i915_request_is_active(signal)) {
-   if (hook)
-   hook(rq, &signal->fence);
+   if (i915_request_is_active(signal))
return 0;
-   }
 
cb = kmem_cache_alloc(global.slab_execute_cbs, gfp);
if (!cb)
@@ -537,12 +520,6 @@ __await_execution(struct i915_request *rq,
i915_sw_fence_await(cb->fence);
init_irq_work(&cb->work, irq_execute_cb);
 
-   if (hook) {
-   cb->hook = hook;
-   cb->signal = i915_request_get(signal);
-   cb->work.func = irq_execute_cb_hook;
-   }
-
/*
 * Register the callback first, then see if the signaler is already
 * active. This ensures that if we race with the
@@ -1253,7 +1230,7 @@ emit_semaphore_wait(struct i915_request *to,
goto await_fence;
 
/* Only submit our spinner after the signaler is running! */
-   if (__await_execution(to, from, NULL, gfp))
+   if (__await_execution(to, from, gfp))
goto await_fence;
 
if (__emit_semaphore_wait(to, from, from->fence.seqno))
@@ -1284,16 +1261,14 @@ static int intel_timeline_sync_set_start(struct 
intel_timeline *tl,
 
 static int
 __i915_request_await_execution(struct i915_request *to,
-  struct i915_request *from,
-  void (*hook)(struct i915_request *rq,
-   struct dma_fence *signal))
+  struct i915_request *from)
 {
int err;
 
GEM_BUG_ON(intel_context_is_barrier(from->context));
 
/* Submit both requests at the same time */
-   err = __await_execution(to, from, hook, I915_FENCE_GFP);
+   err = __await_execution(to, from, I915_FENCE_GFP);
if (err)
return err;
 
@@ -1406,9 +1381,7 @@ i915_request_await_external(struct i915_request *rq, 
struct dma_fence *fence)
 
 int
 i915_request_await_execution(struct i915_request *rq,
-struct dma_fence *fence,
-void (*hook)(struct i915_request *rq,
- struct dma_fence *signal))
+struct dma_fence *fence)
 {
struct dma_fence **child = &fence;
unsigned int nchild = 1;
@@ -1441,8 +1414,7 @@ i915_request_await_ex

[PATCH 10/30] drm/i915/gem: Remove engine auto-magic with FENCE_SUBMIT (v2)

2021-07-08 Thread Jason Ekstrand
Even though FENCE_SUBMIT is only documented to wait until the request in
the in-fence starts instead of waiting until it completes, it has a bit
more magic than that.  If FENCE_SUBMIT is used to submit something to a
balanced engine, we would wait to assign engines until the primary
request was ready to start and then attempt to assign it to a different
engine than the primary.  There is an IGT test (the bonded-slice subtest
of gem_exec_balancer) which exercises this by submitting a primary batch
to a specific VCS and then using FENCE_SUBMIT to submit a secondary
which can run on any VCS and have i915 figure out which VCS to run it on
such that they can run in parallel.

However, this functionality has never been used in the real world.  The
media driver (the only user of FENCE_SUBMIT) always picks exactly two
physical engines to bond and never asks us to pick which to use.

v2 (Daniel Vetter):
 - Mention the exact IGT test this breaks

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c  |  2 +-
 drivers/gpu/drm/i915/gt/intel_engine_types.h|  7 ---
 .../drm/i915/gt/intel_execlists_submission.c| 17 -
 3 files changed, 1 insertion(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 7b7897242a837..30498948c83d0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -3493,7 +3493,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (args->flags & I915_EXEC_FENCE_SUBMIT)
err = i915_request_await_execution(eb.request,
   in_fence,
-  
eb.engine->bond_execute);
+  NULL);
else
err = i915_request_await_dma_fence(eb.request,
   in_fence);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 5b91068ab2779..1cb9c3b70b29a 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -416,13 +416,6 @@ struct intel_engine_cs {
 */
void(*submit_request)(struct i915_request *rq);
 
-   /*
-* Called on signaling of a SUBMIT_FENCE, passing along the signaling
-* request down to the bonded pairs.
-*/
-   void(*bond_execute)(struct i915_request *rq,
-   struct dma_fence *signal);
-
void(*release)(struct intel_engine_cs *engine);
 
struct intel_engine_execlists execlists;
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c 
b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 98b256352c23d..56e25090da672 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -3655,22 +3655,6 @@ static void virtual_submit_request(struct i915_request 
*rq)
spin_unlock_irqrestore(&ve->base.sched_engine->lock, flags);
 }
 
-static void
-virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
-{
-   intel_engine_mask_t allowed, exec;
-
-   allowed = ~to_request(signal)->engine->mask;
-
-   /* Restrict the bonded request to run on only the available engines */
-   exec = READ_ONCE(rq->execution_mask);
-   while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
-   ;
-
-   /* Prevent the master from being re-run on the bonded engines */
-   to_request(signal)->execution_mask &= ~allowed;
-}
-
 struct intel_context *
 intel_execlists_create_virtual(struct intel_engine_cs **siblings,
   unsigned int count)
@@ -3731,7 +3715,6 @@ intel_execlists_create_virtual(struct intel_engine_cs 
**siblings,
ve->base.sched_engine->schedule = i915_schedule;
ve->base.sched_engine->kick_backend = kick_execlists;
ve->base.submit_request = virtual_submit_request;
-   ve->base.bond_execute = virtual_bond_execute;
 
INIT_LIST_HEAD(virtual_queue(ve));
tasklet_setup(&ve->base.sched_engine->tasklet, 
virtual_submission_tasklet);
-- 
2.31.1



[PATCH 12/30] drm/i915/gem: Disallow creating contexts with too many engines

2021-07-08 Thread Jason Ekstrand
There's no sense in allowing userspace to create more engines than it
can possibly access via execbuf.

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 5eca91ded3423..0ba8506fb966f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1639,11 +1639,11 @@ set_engines(struct i915_gem_context *ctx,
return -EINVAL;
}
 
-   /*
-* Note that I915_EXEC_RING_MASK limits execbuf to only using the
-* first 64 engines defined here.
-*/
num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines);
+   /* RING_MASK has no shift so we can use it directly here */
+   if (num_engines > I915_EXEC_RING_MASK + 1)
+   return -EINVAL;
+
set.engines = alloc_engines(num_engines);
if (!set.engines)
return -ENOMEM;
-- 
2.31.1



[PATCH 13/30] drm/i915: Stop manually RCU banging in reset_stats_ioctl (v2)

2021-07-08 Thread Jason Ekstrand
As far as I can tell, the only real reason for this is to avoid taking a
reference to the i915_gem_context.  The cost of those two atomics
probably pales in comparison to the cost of the ioctl itself so we're
really not buying ourselves anything here.  We're about to make context
lookup a tiny bit more complicated, so let's get rid of the one hand-
rolled case.

Some usermode drivers such as our Vulkan driver call GET_RESET_STATS on
every execbuf so the perf here could theoretically be an issue.  If this
ever does become a performance issue for any such userspace drivers,
they can use set CONTEXT_PARAM_RECOVERABLE to false and look for -EIO
coming from execbuf to check for hangs instead.

v2 (Daniel Vetter):
 - Add a comment in the commit message about recoverable contexts

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c | 13 -
 drivers/gpu/drm/i915/i915_drv.h |  8 +---
 2 files changed, 5 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 0ba8506fb966f..61fe6d18d4068 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -2090,16 +2090,13 @@ int i915_gem_context_reset_stats_ioctl(struct 
drm_device *dev,
struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_reset_stats *args = data;
struct i915_gem_context *ctx;
-   int ret;
 
if (args->flags || args->pad)
return -EINVAL;
 
-   ret = -ENOENT;
-   rcu_read_lock();
-   ctx = __i915_gem_context_lookup_rcu(file->driver_priv, args->ctx_id);
+   ctx = i915_gem_context_lookup(file->driver_priv, args->ctx_id);
if (!ctx)
-   goto out;
+   return -ENOENT;
 
/*
 * We opt for unserialised reads here. This may result in tearing
@@ -2116,10 +2113,8 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device 
*dev,
args->batch_active = atomic_read(&ctx->guilty_count);
args->batch_pending = atomic_read(&ctx->active_count);
 
-   ret = 0;
-out:
-   rcu_read_unlock();
-   return ret;
+   i915_gem_context_put(ctx);
+   return 0;
 }
 
 /* GEM context-engines iterator: for_each_gem_engine() */
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6dff4ca012419..ae45ea7b26997 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1853,19 +1853,13 @@ struct drm_gem_object *i915_gem_prime_import(struct 
drm_device *dev,
 
 struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int 
flags);
 
-static inline struct i915_gem_context *
-__i915_gem_context_lookup_rcu(struct drm_i915_file_private *file_priv, u32 id)
-{
-   return xa_load(&file_priv->context_xa, id);
-}
-
 static inline struct i915_gem_context *
 i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id)
 {
struct i915_gem_context *ctx;
 
rcu_read_lock();
-   ctx = __i915_gem_context_lookup_rcu(file_priv, id);
+   ctx = xa_load(&file_priv->context_xa, id);
if (ctx && !kref_get_unless_zero(&ctx->ref))
ctx = NULL;
rcu_read_unlock();
-- 
2.31.1



[PATCH 09/30] drm/i915/gem: Disallow bonding of virtual engines (v3)

2021-07-08 Thread Jason Ekstrand
This adds a bunch of complexity which the media driver has never
actually used.  The media driver does technically bond a balanced engine
to another engine but the balanced engine only has one engine in the
sibling set.  This doesn't actually result in a virtual engine.

This functionality was originally added to handle cases where we may
have more than two video engines and media might want to load-balance
their bonded submits by, for instance, submitting to a balanced vcs0-1
as the primary and then vcs2-3 as the secondary.  However, no such
hardware has shipped thus far and, if we ever want to enable such
use-cases in the future, we'll use the up-and-coming parallel submit API
which targets GuC submission.

This makes I915_CONTEXT_ENGINES_EXT_BOND a total no-op.  We leave the
validation code in place in case we ever decide we want to do something
interesting with the bonding information.

v2 (Jason Ekstrand):
 - Don't delete quite as much code.

v3 (Tvrtko Ursulin):
 - Add some history to the commit message

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  18 +-
 .../drm/i915/gt/intel_execlists_submission.c  |  69 --
 .../drm/i915/gt/intel_execlists_submission.h  |   5 +-
 drivers/gpu/drm/i915/gt/selftest_execlists.c  | 229 --
 4 files changed, 8 insertions(+), 313 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index e36e3b1ae14e4..5eca91ded3423 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1552,6 +1552,12 @@ set_engines__bond(struct i915_user_extension __user 
*base, void *data)
}
virtual = set->engines->engines[idx]->engine;
 
+   if (intel_engine_is_virtual(virtual)) {
+   drm_dbg(&i915->drm,
+   "Bonding with virtual engines not allowed\n");
+   return -EINVAL;
+   }
+
err = check_user_mbz(&ext->flags);
if (err)
return err;
@@ -1592,18 +1598,6 @@ set_engines__bond(struct i915_user_extension __user 
*base, void *data)
n, ci.engine_class, ci.engine_instance);
return -EINVAL;
}
-
-   /*
-* A non-virtual engine has no siblings to choose between; and
-* a submit fence will always be directed to the one engine.
-*/
-   if (intel_engine_is_virtual(virtual)) {
-   err = intel_virtual_engine_attach_bond(virtual,
-  master,
-  bond);
-   if (err)
-   return err;
-   }
}
 
return 0;
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c 
b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 7dd7afccb3adc..98b256352c23d 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -182,18 +182,6 @@ struct virtual_engine {
int prio;
} nodes[I915_NUM_ENGINES];
 
-   /*
-* Keep track of bonded pairs -- restrictions upon on our selection
-* of physical engines any particular request may be submitted to.
-* If we receive a submit-fence from a master engine, we will only
-* use one of sibling_mask physical engines.
-*/
-   struct ve_bond {
-   const struct intel_engine_cs *master;
-   intel_engine_mask_t sibling_mask;
-   } *bonds;
-   unsigned int num_bonds;
-
/* And finally, which physical engines this virtual engine maps onto. */
unsigned int num_siblings;
struct intel_engine_cs *siblings[];
@@ -3413,7 +3401,6 @@ static void rcu_virtual_context_destroy(struct 
work_struct *wrk)
i915_sched_engine_put(ve->base.sched_engine);
intel_engine_free_request_pool(&ve->base);
 
-   kfree(ve->bonds);
kfree(ve);
 }
 
@@ -3668,33 +3655,13 @@ static void virtual_submit_request(struct i915_request 
*rq)
spin_unlock_irqrestore(&ve->base.sched_engine->lock, flags);
 }
 
-static struct ve_bond *
-virtual_find_bond(struct virtual_engine *ve,
- const struct intel_engine_cs *master)
-{
-   int i;
-
-   for (i = 0; i < ve->num_bonds; i++) {
-   if (ve->bonds[i].master == master)
-   return &ve->bonds[i];
-   }
-
-   return NULL;
-}
-
 static void
 virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
 {
-   struct virtual_engine *ve = to_virtual_engine(rq->engine);
intel_engine_mask_t allowed, exec;
-   struct ve_bond *bond;
 
allowed = ~to_request(signal)->engine->mask;
 
-   bond = virtual_find_bond(ve, to_req

[PATCH 14/30] drm/i915/gem: Add a separate validate_priority helper

2021-07-08 Thread Jason Ekstrand
With the proto-context stuff added later in this series, we end up
having to duplicate set_priority.  This lets us avoid duplicating the
validation logic.

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c | 42 +
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 61fe6d18d4068..f9a6eac78c0ae 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -169,6 +169,28 @@ lookup_user_engine(struct i915_gem_context *ctx,
return i915_gem_context_get_engine(ctx, idx);
 }
 
+static int validate_priority(struct drm_i915_private *i915,
+const struct drm_i915_gem_context_param *args)
+{
+   s64 priority = args->value;
+
+   if (args->size)
+   return -EINVAL;
+
+   if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
+   return -ENODEV;
+
+   if (priority > I915_CONTEXT_MAX_USER_PRIORITY ||
+   priority < I915_CONTEXT_MIN_USER_PRIORITY)
+   return -EINVAL;
+
+   if (priority > I915_CONTEXT_DEFAULT_PRIORITY &&
+   !capable(CAP_SYS_NICE))
+   return -EPERM;
+
+   return 0;
+}
+
 static struct i915_address_space *
 context_get_vm_rcu(struct i915_gem_context *ctx)
 {
@@ -1744,23 +1766,13 @@ static void __apply_priority(struct intel_context *ce, 
void *arg)
 static int set_priority(struct i915_gem_context *ctx,
const struct drm_i915_gem_context_param *args)
 {
-   s64 priority = args->value;
-
-   if (args->size)
-   return -EINVAL;
-
-   if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
-   return -ENODEV;
-
-   if (priority > I915_CONTEXT_MAX_USER_PRIORITY ||
-   priority < I915_CONTEXT_MIN_USER_PRIORITY)
-   return -EINVAL;
+   int err;
 
-   if (priority > I915_CONTEXT_DEFAULT_PRIORITY &&
-   !capable(CAP_SYS_NICE))
-   return -EPERM;
+   err = validate_priority(ctx->i915, args);
+   if (err)
+   return err;
 
-   ctx->sched.priority = priority;
+   ctx->sched.priority = args->value;
context_apply_all(ctx, __apply_priority, ctx);
 
return 0;
-- 
2.31.1



[PATCH 08/30] drm/i915: Drop getparam support for I915_CONTEXT_PARAM_ENGINES

2021-07-08 Thread Jason Ekstrand
This has never been used by any userspace except IGT and provides no
real functionality beyond parroting back parameters userspace passed in
as part of context creation or via setparam.  If the context is in
legacy mode (where you use I915_EXEC_RENDER and friends), it returns
success with zero data so it's not useful for discovering what engines
are in the context.  It's also not a replacement for the recently
removed I915_CONTEXT_CLONE_ENGINES because it doesn't return any of the
balancing or bonding information.

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c | 77 +
 1 file changed, 1 insertion(+), 76 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 249bd36f14019..e36e3b1ae14e4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1724,78 +1724,6 @@ set_engines(struct i915_gem_context *ctx,
return 0;
 }
 
-static int
-get_engines(struct i915_gem_context *ctx,
-   struct drm_i915_gem_context_param *args)
-{
-   struct i915_context_param_engines __user *user;
-   struct i915_gem_engines *e;
-   size_t n, count, size;
-   bool user_engines;
-   int err = 0;
-
-   e = __context_engines_await(ctx, &user_engines);
-   if (!e)
-   return -ENOENT;
-
-   if (!user_engines) {
-   i915_sw_fence_complete(&e->fence);
-   args->size = 0;
-   return 0;
-   }
-
-   count = e->num_engines;
-
-   /* Be paranoid in case we have an impedance mismatch */
-   if (!check_struct_size(user, engines, count, &size)) {
-   err = -EINVAL;
-   goto err_free;
-   }
-   if (overflows_type(size, args->size)) {
-   err = -EINVAL;
-   goto err_free;
-   }
-
-   if (!args->size) {
-   args->size = size;
-   goto err_free;
-   }
-
-   if (args->size < size) {
-   err = -EINVAL;
-   goto err_free;
-   }
-
-   user = u64_to_user_ptr(args->value);
-   if (put_user(0, &user->extensions)) {
-   err = -EFAULT;
-   goto err_free;
-   }
-
-   for (n = 0; n < count; n++) {
-   struct i915_engine_class_instance ci = {
-   .engine_class = I915_ENGINE_CLASS_INVALID,
-   .engine_instance = I915_ENGINE_CLASS_INVALID_NONE,
-   };
-
-   if (e->engines[n]) {
-   ci.engine_class = e->engines[n]->engine->uabi_class;
-   ci.engine_instance = 
e->engines[n]->engine->uabi_instance;
-   }
-
-   if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) {
-   err = -EFAULT;
-   goto err_free;
-   }
-   }
-
-   args->size = size;
-
-err_free:
-   i915_sw_fence_complete(&e->fence);
-   return err;
-}
-
 static int
 set_persistence(struct i915_gem_context *ctx,
const struct drm_i915_gem_context_param *args)
@@ -2126,10 +2054,6 @@ int i915_gem_context_getparam_ioctl(struct drm_device 
*dev, void *data,
ret = get_ppgtt(file_priv, ctx, args);
break;
 
-   case I915_CONTEXT_PARAM_ENGINES:
-   ret = get_engines(ctx, args);
-   break;
-
case I915_CONTEXT_PARAM_PERSISTENCE:
args->size = 0;
args->value = i915_gem_context_is_persistent(ctx);
@@ -2137,6 +2061,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device 
*dev, void *data,
 
case I915_CONTEXT_PARAM_NO_ZEROMAP:
case I915_CONTEXT_PARAM_BAN_PERIOD:
+   case I915_CONTEXT_PARAM_ENGINES:
case I915_CONTEXT_PARAM_RINGSIZE:
default:
ret = -EINVAL;
-- 
2.31.1



[PATCH 15/30] drm/i915: Add gem/i915_gem_context.h to the docs

2021-07-08 Thread Jason Ekstrand
In order to prevent kernel doc warnings, also fill out docs for any
missing fields and fix those that forgot the "@".

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 Documentation/gpu/i915.rst|  2 +
 .../gpu/drm/i915/gem/i915_gem_context_types.h | 43 ---
 2 files changed, 38 insertions(+), 7 deletions(-)

diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index e6fd9608e9c6d..204ebdaadb45a 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -422,6 +422,8 @@ Batchbuffer Parsing
 User Batchbuffer Execution
 --
 
+.. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+
 .. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
:doc: User command execution
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index df76767f0c41b..5f0673a2129f9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -30,19 +30,39 @@ struct i915_address_space;
 struct intel_timeline;
 struct intel_ring;
 
+/**
+ * struct i915_gem_engines - A set of engines
+ */
 struct i915_gem_engines {
union {
+   /** @link: Link in i915_gem_context::stale::engines */
struct list_head link;
+
+   /** @rcu: RCU to use when freeing */
struct rcu_head rcu;
};
+
+   /** @fence: Fence used for delayed destruction of engines */
struct i915_sw_fence fence;
+
+   /** @ctx: i915_gem_context backpointer */
struct i915_gem_context *ctx;
+
+   /** @num_engines: Number of engines in this set */
unsigned int num_engines;
+
+   /** @engines: Array of engines */
struct intel_context *engines[];
 };
 
+/**
+ * struct i915_gem_engines_iter - Iterator for an i915_gem_engines set
+ */
 struct i915_gem_engines_iter {
+   /** @idx: Index into i915_gem_engines::engines */
unsigned int idx;
+
+   /** @engines: Engine set being iterated */
const struct i915_gem_engines *engines;
 };
 
@@ -53,10 +73,10 @@ struct i915_gem_engines_iter {
  * logical hardware state for a particular client.
  */
 struct i915_gem_context {
-   /** i915: i915 device backpointer */
+   /** @i915: i915 device backpointer */
struct drm_i915_private *i915;
 
-   /** file_priv: owning file descriptor */
+   /** @file_priv: owning file descriptor */
struct drm_i915_file_private *file_priv;
 
/**
@@ -81,7 +101,9 @@ struct i915_gem_context {
 * CONTEXT_USER_ENGINES flag is set).
 */
struct i915_gem_engines __rcu *engines;
-   struct mutex engines_mutex; /* guards writes to engines */
+
+   /** @engines_mutex: guards writes to engines */
+   struct mutex engines_mutex;
 
/**
 * @syncobj: Shared timeline syncobj
@@ -118,7 +140,7 @@ struct i915_gem_context {
 */
struct pid *pid;
 
-   /** link: place with &drm_i915_private.context_list */
+   /** @link: place with &drm_i915_private.context_list */
struct list_head link;
 
/**
@@ -153,11 +175,13 @@ struct i915_gem_context {
 #define CONTEXT_CLOSED 0
 #define CONTEXT_USER_ENGINES   1
 
+   /** @mutex: guards everything that isn't engines or handles_vma */
struct mutex mutex;
 
+   /** @sched: scheduler parameters */
struct i915_sched_attr sched;
 
-   /** guilty_count: How many times this context has caused a GPU hang. */
+   /** @guilty_count: How many times this context has caused a GPU hang. */
atomic_t guilty_count;
/**
 * @active_count: How many times this context was active during a GPU
@@ -171,15 +195,17 @@ struct i915_gem_context {
unsigned long hang_timestamp[2];
 #define CONTEXT_FAST_HANG_JIFFIES (120 * HZ) /* 3 hangs within 120s? Banned! */
 
-   /** remap_slice: Bitmask of cache lines that need remapping */
+   /** @remap_slice: Bitmask of cache lines that need remapping */
u8 remap_slice;
 
/**
-* handles_vma: rbtree to look up our context specific obj/vma for
+* @handles_vma: rbtree to look up our context specific obj/vma for
 * the user handle. (user handles are per fd, but the binding is
 * per vm, which may be one per context or shared with the global GTT)
 */
struct radix_tree_root handles_vma;
+
+   /** @lut_mutex: Locks handles_vma */
struct mutex lut_mutex;
 
/**
@@ -191,8 +217,11 @@ struct i915_gem_context {
 */
char name[TASK_COMM_LEN + 8];
 
+   /** @stale: tracks stale engines to be destroyed */
struct {
+   /** @lock: guards engines */
spinlock_t lock;
+   /** @engines: list of stale engines */
struct list_head engines;
} stal

[PATCH 17/30] drm/i915/gem: Rework error handling in default_engines

2021-07-08 Thread Jason Ekstrand
Since free_engines works for partially constructed engine sets, we can
use the usual goto pattern.

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 741624da8db78..5b75f98274b9e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -366,7 +366,7 @@ static struct i915_gem_engines *default_engines(struct 
i915_gem_context *ctx)
 {
const struct intel_gt *gt = &ctx->i915->gt;
struct intel_engine_cs *engine;
-   struct i915_gem_engines *e;
+   struct i915_gem_engines *e, *err;
enum intel_engine_id id;
 
e = alloc_engines(I915_NUM_ENGINES);
@@ -384,18 +384,21 @@ static struct i915_gem_engines *default_engines(struct 
i915_gem_context *ctx)
 
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
-   __free_engines(e, e->num_engines + 1);
-   return ERR_CAST(ce);
+   err = ERR_CAST(ce);
+   goto free_engines;
}
 
intel_context_set_gem(ce, ctx);
 
e->engines[engine->legacy_idx] = ce;
-   e->num_engines = max(e->num_engines, engine->legacy_idx);
+   e->num_engines = max(e->num_engines, engine->legacy_idx + 1);
}
-   e->num_engines++;
 
return e;
+
+free_engines:
+   free_engines(e);
+   return err;
 }
 
 void i915_gem_context_release(struct kref *ref)
-- 
2.31.1



[PATCH 18/30] drm/i915/gem: Optionally set SSEU in intel_context_set_gem

2021-07-08 Thread Jason Ekstrand
For now this is a no-op because everyone passes in a null SSEU but it
lets us get some of the error handling and selftest refactoring plumbed
through.

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 41 +++
 .../gpu/drm/i915/gem/selftests/mock_context.c |  6 ++-
 2 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 5b75f98274b9e..206721dccd24e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -266,9 +266,12 @@ context_get_vm_rcu(struct i915_gem_context *ctx)
} while (1);
 }
 
-static void intel_context_set_gem(struct intel_context *ce,
- struct i915_gem_context *ctx)
+static int intel_context_set_gem(struct intel_context *ce,
+struct i915_gem_context *ctx,
+struct intel_sseu sseu)
 {
+   int ret = 0;
+
GEM_BUG_ON(rcu_access_pointer(ce->gem_context));
RCU_INIT_POINTER(ce->gem_context, ctx);
 
@@ -295,6 +298,12 @@ static void intel_context_set_gem(struct intel_context *ce,
 
intel_context_set_watchdog_us(ce, (u64)timeout_ms * 1000);
}
+
+   /* A valid SSEU has no zero fields */
+   if (sseu.slice_mask && !WARN_ON(ce->engine->class != RENDER_CLASS))
+   ret = intel_context_reconfigure_sseu(ce, sseu);
+
+   return ret;
 }
 
 static void __free_engines(struct i915_gem_engines *e, unsigned int count)
@@ -362,7 +371,8 @@ static struct i915_gem_engines *alloc_engines(unsigned int 
count)
return e;
 }
 
-static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
+static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx,
+   struct intel_sseu rcs_sseu)
 {
const struct intel_gt *gt = &ctx->i915->gt;
struct intel_engine_cs *engine;
@@ -375,6 +385,8 @@ static struct i915_gem_engines *default_engines(struct 
i915_gem_context *ctx)
 
for_each_engine(engine, gt, id) {
struct intel_context *ce;
+   struct intel_sseu sseu = {};
+   int ret;
 
if (engine->legacy_idx == INVALID_ENGINE)
continue;
@@ -388,10 +400,18 @@ static struct i915_gem_engines *default_engines(struct 
i915_gem_context *ctx)
goto free_engines;
}
 
-   intel_context_set_gem(ce, ctx);
-
e->engines[engine->legacy_idx] = ce;
e->num_engines = max(e->num_engines, engine->legacy_idx + 1);
+
+   if (engine->class == RENDER_CLASS)
+   sseu = rcs_sseu;
+
+   ret = intel_context_set_gem(ce, ctx, sseu);
+   if (ret) {
+   err = ERR_PTR(ret);
+   goto free_engines;
+   }
+
}
 
return e;
@@ -705,6 +725,7 @@ __create_context(struct drm_i915_private *i915,
 {
struct i915_gem_context *ctx;
struct i915_gem_engines *e;
+   struct intel_sseu null_sseu = {};
int err;
int i;
 
@@ -722,7 +743,7 @@ __create_context(struct drm_i915_private *i915,
INIT_LIST_HEAD(&ctx->stale.engines);
 
mutex_init(&ctx->engines_mutex);
-   e = default_engines(ctx);
+   e = default_engines(ctx, null_sseu);
if (IS_ERR(e)) {
err = PTR_ERR(e);
goto err_free;
@@ -1508,6 +1529,7 @@ set_engines__load_balance(struct i915_user_extension 
__user *base, void *data)
struct intel_engine_cs *stack[16];
struct intel_engine_cs **siblings;
struct intel_context *ce;
+   struct intel_sseu null_sseu = {};
u16 num_siblings, idx;
unsigned int n;
int err;
@@ -1580,7 +1602,7 @@ set_engines__load_balance(struct i915_user_extension 
__user *base, void *data)
goto out_siblings;
}
 
-   intel_context_set_gem(ce, set->ctx);
+   intel_context_set_gem(ce, set->ctx, null_sseu);
 
if (cmpxchg(&set->engines->engines[idx], NULL, ce)) {
intel_context_put(ce);
@@ -1688,6 +1710,7 @@ set_engines(struct i915_gem_context *ctx,
struct drm_i915_private *i915 = ctx->i915;
struct i915_context_param_engines __user *user =
u64_to_user_ptr(args->value);
+   struct intel_sseu null_sseu = {};
struct set_engines set = { .ctx = ctx };
unsigned int num_engines, n;
u64 extensions;
@@ -1697,7 +1720,7 @@ set_engines(struct i915_gem_context *ctx,
if (!i915_gem_context_user_engines(ctx))
return 0;
 
-   set.engines = default_engines(ctx);
+   set.engines = default_engines(ctx, null_sseu);
if (IS_ERR(set.eng

[PATCH 16/30] drm/i915/gem: Add an intermediate proto_context struct (v5)

2021-07-08 Thread Jason Ekstrand
The current context uAPI allows for two methods of setting context
parameters: SET_CONTEXT_PARAM and CONTEXT_CREATE_EXT_SETPARAM.  The
former is allowed to be called at any time while the later happens as
part of GEM_CONTEXT_CREATE.  Currently, everything settable via one is
settable via the other.  While some params are fairly simple and setting
them on a live context is harmless such the context priority, others are
far trickier such as the VM or the set of engines.  In order to swap out
the VM, for instance, we have to delay until all current in-flight work
is complete, swap in the new VM, and then continue.  This leads to a
plethora of potential race conditions we'd really rather avoid.

Unfortunately, both methods of setting the VM and the engine set are in
active use today so we can't simply disallow setting the VM or engine
set vial SET_CONTEXT_PARAM.  In order to work around this wart, this
commit adds a proto-context struct which contains all the context create
parameters.

v2 (Daniel Vetter):
 - Better commit message
 - Use __set/clear_bit instead of set/clear_bit because there's no race
   and we don't need the atomics

v3 (Daniel Vetter):
 - Use manual bitops and BIT() instead of __set_bit

v4 (Daniel Vetter):
 - Add a changelog to the commit message
 - Better hyperlinking in docs
 - Create the default PPGTT in i915_gem_create_context

v5 (Daniel Vetter):
 - Hand-roll the initialization of UCONTEXT_PERSISTENCE

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 84 +++
 .../gpu/drm/i915/gem/i915_gem_context_types.h | 22 +
 .../gpu/drm/i915/gem/selftests/mock_context.c | 16 +++-
 3 files changed, 105 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index f9a6eac78c0ae..741624da8db78 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -191,6 +191,43 @@ static int validate_priority(struct drm_i915_private *i915,
return 0;
 }
 
+static void proto_context_close(struct i915_gem_proto_context *pc)
+{
+   if (pc->vm)
+   i915_vm_put(pc->vm);
+   kfree(pc);
+}
+
+static struct i915_gem_proto_context *
+proto_context_create(struct drm_i915_private *i915, unsigned int flags)
+{
+   struct i915_gem_proto_context *pc, *err;
+
+   pc = kzalloc(sizeof(*pc), GFP_KERNEL);
+   if (!pc)
+   return ERR_PTR(-ENOMEM);
+
+   pc->user_flags = BIT(UCONTEXT_BANNABLE) |
+BIT(UCONTEXT_RECOVERABLE);
+   if (i915->params.enable_hangcheck)
+   pc->user_flags |= BIT(UCONTEXT_PERSISTENCE);
+   pc->sched.priority = I915_PRIORITY_NORMAL;
+
+   if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
+   if (!HAS_EXECLISTS(i915)) {
+   err = ERR_PTR(-EINVAL);
+   goto proto_close;
+   }
+   pc->single_timeline = true;
+   }
+
+   return pc;
+
+proto_close:
+   proto_context_close(pc);
+   return err;
+}
+
 static struct i915_address_space *
 context_get_vm_rcu(struct i915_gem_context *ctx)
 {
@@ -660,7 +697,8 @@ static int __context_set_persistence(struct 
i915_gem_context *ctx, bool state)
 }
 
 static struct i915_gem_context *
-__create_context(struct drm_i915_private *i915)
+__create_context(struct drm_i915_private *i915,
+const struct i915_gem_proto_context *pc)
 {
struct i915_gem_context *ctx;
struct i915_gem_engines *e;
@@ -673,7 +711,7 @@ __create_context(struct drm_i915_private *i915)
 
kref_init(&ctx->ref);
ctx->i915 = i915;
-   ctx->sched.priority = I915_PRIORITY_NORMAL;
+   ctx->sched = pc->sched;
mutex_init(&ctx->mutex);
INIT_LIST_HEAD(&ctx->link);
 
@@ -696,9 +734,7 @@ __create_context(struct drm_i915_private *i915)
 * is no remap info, it will be a NOP. */
ctx->remap_slice = ALL_L3_SLICES(i915);
 
-   i915_gem_context_set_bannable(ctx);
-   i915_gem_context_set_recoverable(ctx);
-   __context_set_persistence(ctx, true /* cgroup hook? */);
+   ctx->user_flags = pc->user_flags;
 
for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
@@ -786,20 +822,22 @@ static void __assign_ppgtt(struct i915_gem_context *ctx,
 }
 
 static struct i915_gem_context *
-i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
+i915_gem_create_context(struct drm_i915_private *i915,
+   const struct i915_gem_proto_context *pc)
 {
struct i915_gem_context *ctx;
int ret;
 
-   if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE &&
-   !HAS_EXECLISTS(i915))
-   return ERR_PTR(-EINVAL);
-
-   ctx = __create_context(i915);
+   ctx = __create_context(i915, pc);

[PATCH 19/30] drm/i915: Add an i915_gem_vm_lookup helper

2021-07-08 Thread Jason Ekstrand
This is the VM equivalent of i915_gem_context_lookup.  It's only used
once in this patch but future patches will need to duplicate this lookup
code so it's better to have it in a helper.

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c |  6 +-
 drivers/gpu/drm/i915/i915_drv.h | 14 ++
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 206721dccd24e..3c59d1e4080c4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1311,11 +1311,7 @@ static int set_ppgtt(struct drm_i915_file_private 
*file_priv,
if (upper_32_bits(args->value))
return -ENOENT;
 
-   rcu_read_lock();
-   vm = xa_load(&file_priv->vm_xa, args->value);
-   if (vm && !kref_get_unless_zero(&vm->ref))
-   vm = NULL;
-   rcu_read_unlock();
+   vm = i915_gem_vm_lookup(file_priv, args->value);
if (!vm)
return -ENOENT;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ae45ea7b26997..8c1994c16b920 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1867,6 +1867,20 @@ i915_gem_context_lookup(struct drm_i915_file_private 
*file_priv, u32 id)
return ctx;
 }
 
+static inline struct i915_address_space *
+i915_gem_vm_lookup(struct drm_i915_file_private *file_priv, u32 id)
+{
+   struct i915_address_space *vm;
+
+   rcu_read_lock();
+   vm = xa_load(&file_priv->vm_xa, id);
+   if (vm && !kref_get_unless_zero(&vm->ref))
+   vm = NULL;
+   rcu_read_unlock();
+
+   return vm;
+}
+
 /* i915_gem_evict.c */
 int __must_check i915_gem_evict_something(struct i915_address_space *vm,
  u64 min_size, u64 alignment,
-- 
2.31.1



[PATCH 20/30] drm/i915/gem: Make an alignment check more sensible

2021-07-08 Thread Jason Ekstrand
What we really want to check is that size of the engines array, i.e.
args->size - sizeof(*user) is divisible by the element size, i.e.
sizeof(*user->engines) because that's what's required for computing the
array length right below the check.  However, we're currently not doing
this and instead doing a compile-time check that sizeof(*user) is
divisible by sizeof(*user->engines) and avoiding the subtraction.  As
far as I can tell, the only reason for the more confusing pair of checks
is to avoid a single subtraction of a constant.

The other thing the BUILD_BUG_ON might be trying to implicitly check is
that offsetof(user->engines) == sizeof(*user) and we don't have any
weird padding throwing us off.  However, that's not the check it's doing
and it's not even a reliable way to do that check.

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 3c59d1e4080c4..f135fbc97c5a7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1723,9 +1723,8 @@ set_engines(struct i915_gem_context *ctx,
goto replace;
}
 
-   BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->engines)));
if (args->size < sizeof(*user) ||
-   !IS_ALIGNED(args->size, sizeof(*user->engines))) {
+   !IS_ALIGNED(args->size -  sizeof(*user), sizeof(*user->engines))) {
drm_dbg(&i915->drm, "Invalid size for engine array: %d\n",
args->size);
return -EINVAL;
-- 
2.31.1



[PATCH 21/30] drm/i915/gem: Use the proto-context to handle create parameters (v5)

2021-07-08 Thread Jason Ekstrand
This means that the proto-context needs to grow support for engine
configuration information as well as setparam logic.  Fortunately, we'll
be deleting a lot of setparam logic on the primary context shortly so it
will hopefully balance out.

There's an extra bit of fun here when it comes to setting SSEU and the
way it interacts with PARAM_ENGINES.  Unfortunately, thanks to
SET_CONTEXT_PARAM and not being allowed to pick the order in which we
handle certain parameters, we have think about those interactions.

v2 (Daniel Vetter):
 - Add a proto_context_free_user_engines helper
 - Comment on SSEU in the commit message
 - Use proto_context_set_persistence in set_proto_ctx_param

v3 (Daniel Vetter):
 - Fix a doc comment
 - Do an explicit HAS_FULL_PPGTT check in set_proto_ctx_vm instead of
   relying on pc->vm != NULL.
 - Handle errors for CONTEXT_PARAM_PERSISTENCE
 - Don't allow more resetting user engines
 - Rework initialization of UCONTEXT_PERSISTENCE

v4 (Jason Ekstrand):
 - Move hand-rolled initialization of UCONTEXT_PERSISTENCE to an
   earlier patch

v5 (Jason Ekstrand):
 - Move proto_context_set_persistence to this patch

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 577 +-
 .../gpu/drm/i915/gem/i915_gem_context_types.h |  58 ++
 2 files changed, 618 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index f135fbc97c5a7..4972b8c91d942 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -193,11 +193,59 @@ static int validate_priority(struct drm_i915_private 
*i915,
 
 static void proto_context_close(struct i915_gem_proto_context *pc)
 {
+   int i;
+
if (pc->vm)
i915_vm_put(pc->vm);
+   if (pc->user_engines) {
+   for (i = 0; i < pc->num_user_engines; i++)
+   kfree(pc->user_engines[i].siblings);
+   kfree(pc->user_engines);
+   }
kfree(pc);
 }
 
+static int proto_context_set_persistence(struct drm_i915_private *i915,
+struct i915_gem_proto_context *pc,
+bool persist)
+{
+   if (persist) {
+   /*
+* Only contexts that are short-lived [that will expire or be
+* reset] are allowed to survive past termination. We require
+* hangcheck to ensure that the persistent requests are healthy.
+*/
+   if (!i915->params.enable_hangcheck)
+   return -EINVAL;
+
+   pc->user_flags |= BIT(UCONTEXT_PERSISTENCE);
+   } else {
+   /* To cancel a context we use "preempt-to-idle" */
+   if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
+   return -ENODEV;
+
+   /*
+* If the cancel fails, we then need to reset, cleanly!
+*
+* If the per-engine reset fails, all hope is lost! We resort
+* to a full GPU reset in that unlikely case, but realistically
+* if the engine could not reset, the full reset does not fare
+* much better. The damage has been done.
+*
+* However, if we cannot reset an engine by itself, we cannot
+* cleanup a hanging persistent context without causing
+* colateral damage, and we should not pretend we can by
+* exposing the interface.
+*/
+   if (!intel_has_reset_engine(&i915->gt))
+   return -ENODEV;
+
+   pc->user_flags &= ~BIT(UCONTEXT_PERSISTENCE);
+   }
+
+   return 0;
+}
+
 static struct i915_gem_proto_context *
 proto_context_create(struct drm_i915_private *i915, unsigned int flags)
 {
@@ -207,6 +255,8 @@ proto_context_create(struct drm_i915_private *i915, 
unsigned int flags)
if (!pc)
return ERR_PTR(-ENOMEM);
 
+   pc->num_user_engines = -1;
+   pc->user_engines = NULL;
pc->user_flags = BIT(UCONTEXT_BANNABLE) |
 BIT(UCONTEXT_RECOVERABLE);
if (i915->params.enable_hangcheck)
@@ -228,6 +278,430 @@ proto_context_create(struct drm_i915_private *i915, 
unsigned int flags)
return err;
 }
 
+static int set_proto_ctx_vm(struct drm_i915_file_private *fpriv,
+   struct i915_gem_proto_context *pc,
+   const struct drm_i915_gem_context_param *args)
+{
+   struct drm_i915_private *i915 = fpriv->dev_priv;
+   struct i915_address_space *vm;
+
+   if (args->size)
+   return -EINVAL;
+
+   if (!HAS_FULL_PPGTT(i915))
+   return -ENODEV;
+
+   if (upper_32_bits(args->value))
+   return -ENOENT;
+
+   vm = i915_gem_vm_lookup

[PATCH 22/30] drm/i915/gem: Return an error ptr from context_lookup

2021-07-08 Thread Jason Ekstrand
We're about to start doing lazy context creation which means contexts
get created in i915_gem_context_lookup and we may start having more
errors than -ENOENT.

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c| 12 ++--
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c |  4 ++--
 drivers/gpu/drm/i915/i915_drv.h|  2 +-
 drivers/gpu/drm/i915/i915_perf.c   |  4 ++--
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 4972b8c91d942..7045e3afa7113 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -2636,8 +2636,8 @@ int i915_gem_context_getparam_ioctl(struct drm_device 
*dev, void *data,
int ret = 0;
 
ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
-   if (!ctx)
-   return -ENOENT;
+   if (IS_ERR(ctx))
+   return PTR_ERR(ctx);
 
switch (args->param) {
case I915_CONTEXT_PARAM_GTT_SIZE:
@@ -2705,8 +2705,8 @@ int i915_gem_context_setparam_ioctl(struct drm_device 
*dev, void *data,
int ret;
 
ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
-   if (!ctx)
-   return -ENOENT;
+   if (IS_ERR(ctx))
+   return PTR_ERR(ctx);
 
ret = ctx_setparam(file_priv, ctx, args);
 
@@ -2725,8 +2725,8 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device 
*dev,
return -EINVAL;
 
ctx = i915_gem_context_lookup(file->driver_priv, args->ctx_id);
-   if (!ctx)
-   return -ENOENT;
+   if (IS_ERR(ctx))
+   return PTR_ERR(ctx);
 
/*
 * We opt for unserialised reads here. This may result in tearing
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 9aa7e10d16308..5ea8b4e23e428 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -750,8 +750,8 @@ static int eb_select_context(struct i915_execbuffer *eb)
struct i915_gem_context *ctx;
 
ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1);
-   if (unlikely(!ctx))
-   return -ENOENT;
+   if (unlikely(IS_ERR(ctx)))
+   return PTR_ERR(ctx);
 
eb->gem_context = ctx;
if (rcu_access_pointer(ctx->vm))
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8c1994c16b920..d9278c973a734 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1864,7 +1864,7 @@ i915_gem_context_lookup(struct drm_i915_file_private 
*file_priv, u32 id)
ctx = NULL;
rcu_read_unlock();
 
-   return ctx;
+   return ctx ? ctx : ERR_PTR(-ENOENT);
 }
 
 static inline struct i915_address_space *
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 9f94914958c39..b4ec114a4698b 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -3414,10 +3414,10 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
struct drm_i915_file_private *file_priv = file->driver_priv;
 
specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle);
-   if (!specific_ctx) {
+   if (IS_ERR(specific_ctx)) {
DRM_DEBUG("Failed to look up context with ID %u for 
opening perf stream\n",
  ctx_handle);
-   ret = -ENOENT;
+   ret = PTR_ERR(specific_ctx);
goto err;
}
}
-- 
2.31.1



[PATCH 23/30] drm/i915/gt: Drop i915_address_space::file (v2)

2021-07-08 Thread Jason Ekstrand
There's a big comment saying how useful it is but no one is using this
for anything anymore.

It was added in 2bfa996e031b ("drm/i915: Store owning file on the
i915_address_space") and used for debugfs at the time as well as telling
the difference between the global GTT and a PPGTT.  In f6e8aa387171
("drm/i915: Report the number of closed vma held by each context in
debugfs") we removed one use of it by switching to a context walk and
comparing with the VM in the context.  Finally, VM stats for debugfs
were entirely nuked in db80a1294c23 ("drm/i915/gem: Remove per-client
stats from debugfs/i915_gem_objects")

v2 (Daniel Vetter):
 - Delete a struct drm_i915_file_private pre-declaration
 - Add a comment to the commit message about history

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c |  9 -
 drivers/gpu/drm/i915/gt/intel_gtt.h | 11 ---
 drivers/gpu/drm/i915/selftests/mock_gtt.c   |  1 -
 3 files changed, 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 7045e3afa7113..5a1402544d48d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1453,17 +1453,10 @@ static int gem_context_register(struct i915_gem_context 
*ctx,
u32 *id)
 {
struct drm_i915_private *i915 = ctx->i915;
-   struct i915_address_space *vm;
int ret;
 
ctx->file_priv = fpriv;
 
-   mutex_lock(&ctx->mutex);
-   vm = i915_gem_context_vm(ctx);
-   if (vm)
-   WRITE_ONCE(vm->file, fpriv); /* XXX */
-   mutex_unlock(&ctx->mutex);
-
ctx->pid = get_task_pid(current, PIDTYPE_PID);
snprintf(ctx->name, sizeof(ctx->name), "%s[%d]",
 current->comm, pid_nr(ctx->pid));
@@ -1562,8 +1555,6 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void 
*data,
if (IS_ERR(ppgtt))
return PTR_ERR(ppgtt);
 
-   ppgtt->vm.file = file_priv;
-
if (args->extensions) {
err = i915_user_extensions(u64_to_user_ptr(args->extensions),
   NULL, 0,
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h 
b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 9bd89f2a01ff1..bc7153018ebd5 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -140,7 +140,6 @@ typedef u64 gen8_pte_t;
 
 enum i915_cache_level;
 
-struct drm_i915_file_private;
 struct drm_i915_gem_object;
 struct i915_fence_reg;
 struct i915_vma;
@@ -220,16 +219,6 @@ struct i915_address_space {
struct intel_gt *gt;
struct drm_i915_private *i915;
struct device *dma;
-   /*
-* Every address space belongs to a struct file - except for the global
-* GTT that is owned by the driver (and so @file is set to NULL). In
-* principle, no information should leak from one context to another
-* (or between files/processes etc) unless explicitly shared by the
-* owner. Tracking the owner is important in order to free up per-file
-* objects along with the file, to aide resource tracking, and to
-* assign blame.
-*/
-   struct drm_i915_file_private *file;
u64 total;  /* size addr space maps (ex. 2GB for ggtt) */
u64 reserved;   /* size addr space reserved */
 
diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c 
b/drivers/gpu/drm/i915/selftests/mock_gtt.c
index 5c7ae40bba634..cc047ec594f93 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
@@ -73,7 +73,6 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, 
const char *name)
ppgtt->vm.gt = &i915->gt;
ppgtt->vm.i915 = i915;
ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE);
-   ppgtt->vm.file = ERR_PTR(-ENODEV);
ppgtt->vm.dma = i915->drm.dev;
 
i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
-- 
2.31.1



[PATCH 24/30] drm/i915/gem: Delay context creation (v3)

2021-07-08 Thread Jason Ekstrand
The current context uAPI allows for two methods of setting context
parameters: SET_CONTEXT_PARAM and CONTEXT_CREATE_EXT_SETPARAM.  The
former is allowed to be called at any time while the later happens as
part of GEM_CONTEXT_CREATE.  Currently, everything settable via one is
settable via the other.  While some params are fairly simple and setting
them on a live context is harmless such as the context priority, others
are far trickier such as the VM or the set of engines.  In order to swap
out the VM, for instance, we have to delay until all current in-flight
work is complete, swap in the new VM, and then continue.  This leads to
a plethora of potential race conditions we'd really rather avoid.

In previous patches, we added a i915_gem_proto_context struct which is
capable of storing and tracking all such create parameters.  This commit
delays the creation of the actual context until after the client is done
configuring it with SET_CONTEXT_PARAM.  From the perspective of the
client, it has the same u32 context ID the whole time.  From the
perspective of i915, however, it's an i915_gem_proto_context right up
until the point where we attempt to do something which the proto-context
can't handle.  Then the real context gets created.

This is accomplished via a little xarray dance.  When GEM_CONTEXT_CREATE
is called, we create a proto-context, reserve a slot in context_xa but
leave it NULL, the proto-context in the corresponding slot in
proto_context_xa.  Then, whenever we go to look up a context, we first
check context_xa.  If it's there, we return the i915_gem_context and
we're done.  If it's not, we look in proto_context_xa and, if we find it
there, we create the actual context and kill the proto-context.

In order for this dance to work properly, everything which ever touches
a proto-context is guarded by drm_i915_file_private::proto_context_lock,
including context creation.  Yes, this means context creation now takes
a giant global lock but it can't really be helped and that should never
be on any driver's fast-path anyway.

v2 (Daniel Vetter):
 - Commit message grammatical fixes.
 - Use WARN_ON instead of GEM_BUG_ON
 - Rename lazy_create_context_locked to finalize_create_context_locked
 - Rework the control-flow logic in the setparam ioctl
 - Better documentation all around

v3 (kernel test robot):
 - Make finalize_create_context_locked static

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 203 ++
 drivers/gpu/drm/i915/gem/i915_gem_context.h   |   3 +
 .../gpu/drm/i915/gem/i915_gem_context_types.h |  54 +
 .../gpu/drm/i915/gem/selftests/mock_context.c |   5 +-
 drivers/gpu/drm/i915/i915_drv.h   |  76 +--
 5 files changed, 283 insertions(+), 58 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 5a1402544d48d..c4f89e4b1665f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -278,6 +278,42 @@ proto_context_create(struct drm_i915_private *i915, 
unsigned int flags)
return err;
 }
 
+static int proto_context_register_locked(struct drm_i915_file_private *fpriv,
+struct i915_gem_proto_context *pc,
+u32 *id)
+{
+   int ret;
+   void *old;
+
+   lockdep_assert_held(&fpriv->proto_context_lock);
+
+   ret = xa_alloc(&fpriv->context_xa, id, NULL, xa_limit_32b, GFP_KERNEL);
+   if (ret)
+   return ret;
+
+   old = xa_store(&fpriv->proto_context_xa, *id, pc, GFP_KERNEL);
+   if (xa_is_err(old)) {
+   xa_erase(&fpriv->context_xa, *id);
+   return xa_err(old);
+   }
+   WARN_ON(old);
+
+   return 0;
+}
+
+static int proto_context_register(struct drm_i915_file_private *fpriv,
+ struct i915_gem_proto_context *pc,
+ u32 *id)
+{
+   int ret;
+
+   mutex_lock(&fpriv->proto_context_lock);
+   ret = proto_context_register_locked(fpriv, pc, id);
+   mutex_unlock(&fpriv->proto_context_lock);
+
+   return ret;
+}
+
 static int set_proto_ctx_vm(struct drm_i915_file_private *fpriv,
struct i915_gem_proto_context *pc,
const struct drm_i915_gem_context_param *args)
@@ -1448,12 +1484,12 @@ void i915_gem_init__contexts(struct drm_i915_private 
*i915)
init_contexts(&i915->gem.contexts);
 }
 
-static int gem_context_register(struct i915_gem_context *ctx,
-   struct drm_i915_file_private *fpriv,
-   u32 *id)
+static void gem_context_register(struct i915_gem_context *ctx,
+struct drm_i915_file_private *fpriv,
+u32 id)
 {
struct drm_i915_private *i915 = ctx->i915;
-   int ret;
+ 

[PATCH 26/30] drm/i915/gem: Don't allow changing the engine set on running contexts (v3)

2021-07-08 Thread Jason Ekstrand
When the APIs were added to manage the engine set on a GEM context
directly from userspace, the questionable choice was made to allow
changing the engine set on a context at any time.  This is horribly racy
and there's absolutely no reason why any userspace would want to do this
outside of trying to exercise interesting race conditions.  By removing
support for CONTEXT_PARAM_ENGINES from ctx_setparam, we make it
impossible to change the engine set after the context has been fully
created.

This doesn't yet let us delete all the deferred engine clean-up code as
that's still used for handling the case where the client dies or calls
GEM_CONTEXT_DESTROY while work is in flight.  However, moving to an API
where the engine set is effectively immutable gives us more options to
potentially clean that code up a bit going forward.  It also removes a
whole class of ways in which a client can hurt itself or try to get
around kernel context banning.

v2 (Jason Ekstrand):
 - Expand the commit mesage

v3 (Jason Ekstrand):
 - Make it more obvious that I915_CONTEXT_PARAM_ENGINES returns -EINVAL

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c | 304 +---
 1 file changed, 1 insertion(+), 303 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 40acecfbbe5b5..5f5375b15c530 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1819,305 +1819,6 @@ static int set_sseu(struct i915_gem_context *ctx,
return ret;
 }
 
-struct set_engines {
-   struct i915_gem_context *ctx;
-   struct i915_gem_engines *engines;
-};
-
-static int
-set_engines__load_balance(struct i915_user_extension __user *base, void *data)
-{
-   struct i915_context_engines_load_balance __user *ext =
-   container_of_user(base, typeof(*ext), base);
-   const struct set_engines *set = data;
-   struct drm_i915_private *i915 = set->ctx->i915;
-   struct intel_engine_cs *stack[16];
-   struct intel_engine_cs **siblings;
-   struct intel_context *ce;
-   struct intel_sseu null_sseu = {};
-   u16 num_siblings, idx;
-   unsigned int n;
-   int err;
-
-   if (!HAS_EXECLISTS(i915))
-   return -ENODEV;
-
-   if (intel_uc_uses_guc_submission(&i915->gt.uc))
-   return -ENODEV; /* not implement yet */
-
-   if (get_user(idx, &ext->engine_index))
-   return -EFAULT;
-
-   if (idx >= set->engines->num_engines) {
-   drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n",
-   idx, set->engines->num_engines);
-   return -EINVAL;
-   }
-
-   idx = array_index_nospec(idx, set->engines->num_engines);
-   if (set->engines->engines[idx]) {
-   drm_dbg(&i915->drm,
-   "Invalid placement[%d], already occupied\n", idx);
-   return -EEXIST;
-   }
-
-   if (get_user(num_siblings, &ext->num_siblings))
-   return -EFAULT;
-
-   err = check_user_mbz(&ext->flags);
-   if (err)
-   return err;
-
-   err = check_user_mbz(&ext->mbz64);
-   if (err)
-   return err;
-
-   siblings = stack;
-   if (num_siblings > ARRAY_SIZE(stack)) {
-   siblings = kmalloc_array(num_siblings,
-sizeof(*siblings),
-GFP_KERNEL);
-   if (!siblings)
-   return -ENOMEM;
-   }
-
-   for (n = 0; n < num_siblings; n++) {
-   struct i915_engine_class_instance ci;
-
-   if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) {
-   err = -EFAULT;
-   goto out_siblings;
-   }
-
-   siblings[n] = intel_engine_lookup_user(i915,
-  ci.engine_class,
-  ci.engine_instance);
-   if (!siblings[n]) {
-   drm_dbg(&i915->drm,
-   "Invalid sibling[%d]: { class:%d, inst:%d }\n",
-   n, ci.engine_class, ci.engine_instance);
-   err = -EINVAL;
-   goto out_siblings;
-   }
-   }
-
-   ce = intel_execlists_create_virtual(siblings, n);
-   if (IS_ERR(ce)) {
-   err = PTR_ERR(ce);
-   goto out_siblings;
-   }
-
-   intel_context_set_gem(ce, set->ctx, null_sseu);
-
-   if (cmpxchg(&set->engines->engines[idx], NULL, ce)) {
-   intel_context_put(ce);
-   err = -EEXIST;
-   goto out_siblings;
-   }
-
-out_siblings:
-   if (siblings != stack)
-   kfree(siblings);
-
-   return err;
-}
-
-static int
-set_engin

[PATCH 25/30] drm/i915/gem: Don't allow changing the VM on running contexts (v4)

2021-07-08 Thread Jason Ekstrand
When the APIs were added to manage VMs more directly from userspace, the
questionable choice was made to allow changing out the VM on a context
at any time.  This is horribly racy and there's absolutely no reason why
any userspace would want to do this outside of testing that exact race.
By removing support for CONTEXT_PARAM_VM from ctx_setparam, we make it
impossible to change out the VM after the context has been fully
created.  This lets us delete a bunch of deferred task code as well as a
duplicated (and slightly different) copy of the code which programs the
PPGTT registers.

v2 (Jason Ekstrand):
 - Expand the commit message

v3 (Daniel Vetter):
 - Don't drop the __rcu on the vm pointer

v4 (Jason Ekstrand):
 - Make it more obvious that I915_CONTEXT_PARAM_VM returns -EINVAL

Signed-off-by: Jason Ekstrand 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 263 +-
 .../drm/i915/gem/selftests/i915_gem_context.c | 119 
 .../drm/i915/selftests/i915_mock_selftests.h  |   1 -
 3 files changed, 1 insertion(+), 382 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index c4f89e4b1665f..40acecfbbe5b5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1633,120 +1633,6 @@ int i915_gem_vm_destroy_ioctl(struct drm_device *dev, 
void *data,
return 0;
 }
 
-struct context_barrier_task {
-   struct i915_active base;
-   void (*task)(void *data);
-   void *data;
-};
-
-static void cb_retire(struct i915_active *base)
-{
-   struct context_barrier_task *cb = container_of(base, typeof(*cb), base);
-
-   if (cb->task)
-   cb->task(cb->data);
-
-   i915_active_fini(&cb->base);
-   kfree(cb);
-}
-
-I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault);
-static int context_barrier_task(struct i915_gem_context *ctx,
-   intel_engine_mask_t engines,
-   bool (*skip)(struct intel_context *ce, void 
*data),
-   int (*pin)(struct intel_context *ce, struct 
i915_gem_ww_ctx *ww, void *data),
-   int (*emit)(struct i915_request *rq, void 
*data),
-   void (*task)(void *data),
-   void *data)
-{
-   struct context_barrier_task *cb;
-   struct i915_gem_engines_iter it;
-   struct i915_gem_engines *e;
-   struct i915_gem_ww_ctx ww;
-   struct intel_context *ce;
-   int err = 0;
-
-   GEM_BUG_ON(!task);
-
-   cb = kmalloc(sizeof(*cb), GFP_KERNEL);
-   if (!cb)
-   return -ENOMEM;
-
-   i915_active_init(&cb->base, NULL, cb_retire, 0);
-   err = i915_active_acquire(&cb->base);
-   if (err) {
-   kfree(cb);
-   return err;
-   }
-
-   e = __context_engines_await(ctx, NULL);
-   if (!e) {
-   i915_active_release(&cb->base);
-   return -ENOENT;
-   }
-
-   for_each_gem_engine(ce, e, it) {
-   struct i915_request *rq;
-
-   if (I915_SELFTEST_ONLY(context_barrier_inject_fault &
-  ce->engine->mask)) {
-   err = -ENXIO;
-   break;
-   }
-
-   if (!(ce->engine->mask & engines))
-   continue;
-
-   if (skip && skip(ce, data))
-   continue;
-
-   i915_gem_ww_ctx_init(&ww, true);
-retry:
-   err = intel_context_pin_ww(ce, &ww);
-   if (err)
-   goto err;
-
-   if (pin)
-   err = pin(ce, &ww, data);
-   if (err)
-   goto err_unpin;
-
-   rq = i915_request_create(ce);
-   if (IS_ERR(rq)) {
-   err = PTR_ERR(rq);
-   goto err_unpin;
-   }
-
-   err = 0;
-   if (emit)
-   err = emit(rq, data);
-   if (err == 0)
-   err = i915_active_add_request(&cb->base, rq);
-
-   i915_request_add(rq);
-err_unpin:
-   intel_context_unpin(ce);
-err:
-   if (err == -EDEADLK) {
-   err = i915_gem_ww_ctx_backoff(&ww);
-   if (!err)
-   goto retry;
-   }
-   i915_gem_ww_ctx_fini(&ww);
-
-   if (err)
-   break;
-   }
-   i915_sw_fence_complete(&e->fence);
-
-   cb->task = err ? NULL : task; /* caller needs to unwind instead */
-   cb->data = data;
-
-   i915_active_release(&cb->base);
-
-   return err;
-}
-
 static int get_ppgtt(struct drm_i915_file_private *file_priv,
 struct i915_gem_context *

  1   2   >