Re: [PATCH 1/7] drm/msm/dp: use dp_ctrl_off_link_stream during PHY compliance test run
Quoting Kuogee Hsieh (2021-07-06 10:20:14) > DP cable should always connect to DPU during the entire PHY compliance > testing run. Since DP PHY compliance test is executed at irq_hpd event > context, dp_ctrl_off_link_stream() should be used instead of dp_ctrl_off(). > dp_ctrl_off() is used for unplug event which is triggered when DP cable is > dis connected. > > Signed-off-by: Kuogee Hsieh > --- Is this Fixes: f21c8a276c2d ("drm/msm/dp: handle irq_hpd with sink_count = 0 correctly") or Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support") ? It's not clear how dp_ctrl_off() was working for compliance tests before commit f21c8a276c2d. > drivers/gpu/drm/msm/dp/dp_ctrl.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c > b/drivers/gpu/drm/msm/dp/dp_ctrl.c > index caf71fa..27fb0f0 100644 > --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c > +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c > @@ -1530,7 +1530,7 @@ static int dp_ctrl_process_phy_test_request(struct > dp_ctrl_private *ctrl) > * running. Add the global reset just before disabling the > * link clocks and core clocks. > */ > - ret = dp_ctrl_off(&ctrl->dp_ctrl); > + ret = dp_ctrl_off_link_stream(&ctrl->dp_ctrl); > if (ret) { > DRM_ERROR("failed to disable DP controller\n"); > return ret;
[PATCH v2 0/2] Add support of HDMI for rk3568
Add a compatible and platform datas to support HDMI for rk3568 SoC. version 2: - Add the clocks needed for the phy. Benjamin Gaignard (2): dt-bindings: display: rockchip: Add compatible for rk3568 HDMI drm/rockchip: dw_hdmi: add rk3568 support .../display/rockchip/rockchip,dw-hdmi.yaml| 6 +- drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c | 68 +++ 2 files changed, 73 insertions(+), 1 deletion(-) -- 2.25.1
[PATCH v2 2/2] drm/rockchip: dw_hdmi: add rk3568 support
Add a new dw_hdmi_plat_data struct and new compatible for rk3568. This version of the HDMI hardware block need two clocks to provide phy reference clock: hclk_vio and hclk. Signed-off-by: Benjamin Gaignard --- version 2: - Add the clocks needed for the phy. drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c | 68 + 1 file changed, 68 insertions(+) diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c index 830bdd5e9b7ce..dc0e255e45745 100644 --- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c @@ -50,6 +50,10 @@ #define RK3399_GRF_SOC_CON20 0x6250 #define RK3399_HDMI_LCDC_SEL BIT(6) +#define RK3568_GRF_VO_CON1 0x0364 +#define RK3568_HDMI_SDAIN_MSK BIT(15) +#define RK3568_HDMI_SCLIN_MSK BIT(14) + #define HIWORD_UPDATE(val, mask) (val | (mask) << 16) /** @@ -71,6 +75,8 @@ struct rockchip_hdmi { const struct rockchip_hdmi_chip_data *chip_data; struct clk *vpll_clk; struct clk *grf_clk; + struct clk *hclk_vio; + struct clk *hclk_vop; struct dw_hdmi *hdmi; struct phy *phy; }; @@ -216,6 +222,26 @@ static int rockchip_hdmi_parse_dt(struct rockchip_hdmi *hdmi) return PTR_ERR(hdmi->grf_clk); } + hdmi->hclk_vio = devm_clk_get(hdmi->dev, "hclk_vio"); + if (PTR_ERR(hdmi->hclk_vio) == -ENOENT) { + hdmi->hclk_vio = NULL; + } else if (PTR_ERR(hdmi->hclk_vio) == -EPROBE_DEFER) { + return -EPROBE_DEFER; + } else if (IS_ERR(hdmi->hclk_vio)) { + dev_err(hdmi->dev, "failed to get hclk_vio clock\n"); + return PTR_ERR(hdmi->hclk_vio); + } + + hdmi->hclk_vop = devm_clk_get(hdmi->dev, "hclk"); + if (PTR_ERR(hdmi->hclk_vop) == -ENOENT) { + hdmi->hclk_vop = NULL; + } else if (PTR_ERR(hdmi->hclk_vop) == -EPROBE_DEFER) { + return -EPROBE_DEFER; + } else if (IS_ERR(hdmi->hclk_vop)) { + dev_err(hdmi->dev, "failed to get hclk_vop clock\n"); + return PTR_ERR(hdmi->hclk_vop); + } + return 0; } @@ -467,6 +493,19 @@ static const struct dw_hdmi_plat_data rk3399_hdmi_drv_data = { .use_drm_infoframe = true, }; +static struct rockchip_hdmi_chip_data rk3568_chip_data = { + .lcdsel_grf_reg = -1, +}; + +static const struct dw_hdmi_plat_data rk3568_hdmi_drv_data = { + .mode_valid = dw_hdmi_rockchip_mode_valid, + .mpll_cfg = rockchip_mpll_cfg, + .cur_ctr= rockchip_cur_ctr, + .phy_config = rockchip_phy_config, + .phy_data = &rk3568_chip_data, + .use_drm_infoframe = true, +}; + static const struct of_device_id dw_hdmi_rockchip_dt_ids[] = { { .compatible = "rockchip,rk3228-dw-hdmi", .data = &rk3228_hdmi_drv_data @@ -480,6 +519,9 @@ static const struct of_device_id dw_hdmi_rockchip_dt_ids[] = { { .compatible = "rockchip,rk3399-dw-hdmi", .data = &rk3399_hdmi_drv_data }, + { .compatible = "rockchip,rk3568-dw-hdmi", + .data = &rk3568_hdmi_drv_data + }, {}, }; MODULE_DEVICE_TABLE(of, dw_hdmi_rockchip_dt_ids); @@ -536,6 +578,28 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master, return ret; } + ret = clk_prepare_enable(hdmi->hclk_vio); + if (ret) { + dev_err(hdmi->dev, "Failed to enable HDMI hclk_vio: %d\n", + ret); + return ret; + } + + ret = clk_prepare_enable(hdmi->hclk_vop); + if (ret) { + dev_err(hdmi->dev, "Failed to enable HDMI hclk_vop: %d\n", + ret); + return ret; + } + + if (hdmi->chip_data == &rk3568_chip_data) { + regmap_write(hdmi->regmap, RK3568_GRF_VO_CON1, +HIWORD_UPDATE(RK3568_HDMI_SDAIN_MSK | + RK3568_HDMI_SCLIN_MSK, + RK3568_HDMI_SDAIN_MSK | + RK3568_HDMI_SCLIN_MSK)); + } + hdmi->phy = devm_phy_optional_get(dev, "hdmi"); if (IS_ERR(hdmi->phy)) { ret = PTR_ERR(hdmi->phy); @@ -559,6 +623,8 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master, ret = PTR_ERR(hdmi->hdmi); drm_encoder_cleanup(encoder); clk_disable_unprepare(hdmi->vpll_clk); + clk_disable_unprepare(hdmi->hclk_vio); + clk_disable_unprepare(hdmi->hclk_vop); } return ret; @@ -571,6 +637,8 @@ static void dw_hdmi_rockchip_unbind(struct device *dev, struct device *master, dw_hdmi_unbind(hdmi->hdmi); clk_disable_unprepare(hdmi->vpll_clk); + clk_disable_unprepare(hdmi->hclk_vio); +
Oops in qxl_bo_move_notify()
Hi I'm getting this oops (on commit a180bd1d7e16): [ 17.711520] BUG: kernel NULL pointer dereference, address: 0010 [ 17.739451] RIP: 0010:qxl_bo_move_notify+0x35/0x80 [qxl] [ 17.827345] RSP: 0018:c9457c08 EFLAGS: 00010286 [ 17.827350] RAX: 0001 RBX: RCX: dc00 [ 17.827353] RDX: 0007 RSI: 0004 RDI: 85596feb [ 17.827356] RBP: 88800e311c00 R08: R09: [ 17.827358] R10: 8697b243 R11: fbfff0d2f648 R12: [ 17.827361] R13: 88800e311e48 R14: 88800e311e98 R15: 88800e311e90 [ 17.827364] FS: () GS:88805d80() knlGS: [ 17.861699] CS: 0010 DS: ES: CR0: 80050033 [ 17.861703] CR2: 0010 CR3: 2642c000 CR4: 00350ee0 [ 17.861707] Call Trace: [ 17.861712] ttm_bo_cleanup_memtype_use+0x4d/0xb0 [ttm] [ 17.861730] ttm_bo_release+0x42d/0x7c0 [ttm] [ 17.861746] ? ttm_bo_cleanup_refs+0x127/0x420 [ttm] [ 17.888300] ttm_bo_delayed_delete+0x289/0x390 [ttm] [ 17.888317] ? ttm_bo_cleanup_refs+0x420/0x420 [ttm] [ 17.888332] ? lock_release+0x9c/0x5c0 [ 17.901033] ? rcu_read_lock_held_common+0x1a/0x50 [ 17.905183] ttm_device_delayed_workqueue+0x18/0x50 [ttm] [ 17.909371] process_one_work+0x537/0x9f0 [ 17.913345] ? pwq_dec_nr_in_flight+0x160/0x160 [ 17.917297] ? lock_acquired+0xa4/0x580 [ 17.921168] ? worker_thread+0x169/0x600 [ 17.925034] worker_thread+0x7a/0x600 [ 17.928657] ? process_one_work+0x9f0/0x9f0 [ 17.932360] kthread+0x200/0x230 [ 17.935930] ? set_kthread_struct+0x80/0x80 [ 17.939593] ret_from_fork+0x22/0x30 [ 17.951737] CR2: 0010 [ 17.955496] ---[ end trace e30cc21c24e81ee5 ]--- I had a look at the code, and it seems that this is caused by trying to use bo->resource which is NULL. bo->resource is freed by ttm_bo_cleanup_refs() -> ttm_bo_cleanup_memtype_use() -> ttm_resource_free(). And then a notification is issued by ttm_bo_cleanup_refs() -> ttm_bo_put() -> ttm_bo_release() -> ttm_bo_cleanup_memtype_use(), this time with bo->release equal to NULL. I was thinking a proper way to fix this. Checking that bo->release is not NULL in qxl_bo_move_notify() would solve the issue. But maybe there is a better way, like avoiding that ttm_bo_cleanup_memtype_use() is called twice. Which way would be preferable? Thanks Roberto HUAWEI TECHNOLOGIES Duesseldorf GmbH, HRB 56063 Managing Director: Li Peng, Li Jian, Shi Yanli
Re: [PATCH v5 0/5] iommu/arm-smmu: adreno-smmu page fault handling
On Tue, Jul 6, 2021 at 10:12 PM John Stultz wrote: > > On Sun, Jul 4, 2021 at 11:16 AM Rob Clark wrote: > > > > I suspect you are getting a dpu fault, and need: > > > > https://lore.kernel.org/linux-arm-msm/CAF6AEGvTjTUQXqom-xhdh456tdLscbVFPQ+iud1H1gHc8A2=h...@mail.gmail.com/ > > > > I suppose Bjorn was expecting me to send that patch > > If it's helpful, I applied that and it got the db845c booting mainline > again for me (along with some reverts for a separate ext4 shrinker > crash). > Tested-by: John Stultz > Thanks, I'll send a patch shortly BR, -R
[PATCH 1/2] drivers/gpu/drm/i915/gt/intel_engine_cs.c: Repair typo in function name
Fixes the following W=1 kernel build warning(s): drivers/gpu/drm/i915/gt/intel_engine_cs.c:882: warning: expecting prototype for intel_engines_init_common(). Prototype was for engine_init_common() instead drivers/gpu/drm/i915/gt/intel_engine_cs.c:959: warning: expecting prototype for intel_engines_cleanup_common(). Prototype was for intel_engine_cleanup_common() instead Signed-off-by: zhaoxiao --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 7f03df236613..01b4dc041a72 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -868,7 +868,7 @@ create_kernel_context(struct intel_engine_cs *engine) } /** - * intel_engines_init_common - initialize cengine state which might require hw access + * engine_init_common - initialize cengine state which might require hw access * @engine: Engine to initialize. * * Initializes @engine@ structure members shared between legacy and execlists @@ -949,7 +949,7 @@ int intel_engines_init(struct intel_gt *gt) } /** - * intel_engines_cleanup_common - cleans up the engine state created by + * intel_engine_cleanup_common - cleans up the engine state created by *the common initiailizers. * @engine: Engine to cleanup. * -- 2.20.1
[PATCH 2/2] drivers/gpu/drm/i915/display/intel_display_power.c: Repair typo in function name
Fixes the following W=1 kernel build warning(s): drivers/gpu/drm/i915/display/intel_display_power.c:2300: warning: expecting prototype for intel_display_power_put_async(). Prototype was for __intel_display_power_put_async() instead Signed-off-by: zhaoxiao --- drivers/gpu/drm/i915/display/intel_display_power.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 4298ae684d7d..c37e14f2df90 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -2285,7 +2285,7 @@ intel_display_power_put_async_work(struct work_struct *work) } /** - * intel_display_power_put_async - release a power domain reference asynchronously + * __intel_display_power_put_async - release a power domain reference asynchronously * @i915: i915 device instance * @domain: power domain to reference * @wakeref: wakeref acquired for the reference that is being released -- 2.20.1
[PATCH v2 1/2] dt-bindings: display: rockchip: Add compatible for rk3568 HDMI
Define a new compatible for rk3568 HDMI. This version of HDMI hardware block needs two new clocks hclk_vio and hclk to provide phy reference clocks. Signed-off-by: Benjamin Gaignard --- version 2: - Add the clocks needed for the phy. .../bindings/display/rockchip/rockchip,dw-hdmi.yaml | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-hdmi.yaml b/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-hdmi.yaml index 75cd9c686e985..cb8643b3a8b84 100644 --- a/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-hdmi.yaml +++ b/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-hdmi.yaml @@ -23,6 +23,7 @@ properties: - rockchip,rk3288-dw-hdmi - rockchip,rk3328-dw-hdmi - rockchip,rk3399-dw-hdmi + - rockchip,rk3568-dw-hdmi reg-io-width: const: 4 @@ -51,8 +52,11 @@ properties: - vpll - enum: - grf + - hclk_vio + - vpll + - enum: + - hclk - vpll - - const: vpll ddc-i2c-bus: $ref: /schemas/types.yaml#/definitions/phandle -- 2.25.1
[PATCH] gpu: ttm: fix GPF in ttm_bo_release
My local syzbot instance hit GPF in ttm_bo_release(). Unfortunately, syzbot didn't produce a reproducer for this, but I found out possible scenario: drm_gem_vram_create()<-- drm_gem_vram_object kzalloced (bo embedded in this object) ttm_bo_init() ttm_bo_init_reserved() ttm_resource_alloc() man->func->alloc() <-- allocation failure ttm_bo_put() ttm_bo_release() ttm_mem_io_free() <-- bo->resource == NULL passed as second argument *GPF* So, I've added check in ttm_bo_release() to avoid passing NULL as second argument to ttm_mem_io_free(). Fail log: KASAN: null-ptr-deref in range [0x0020-0x0027] CPU: 1 PID: 10419 Comm: syz-executor.3 Not tainted 5.13.0-rc7-next-20210625 #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a-rebuilt.opensuse.org 04/01/2014 RIP: 0010:ttm_mem_io_free+0x28/0x170 drivers/gpu/drm/ttm/ttm_bo_util.c:66 Code: b1 90 41 56 41 55 41 54 55 48 89 fd 53 48 89 f3 e8 cd 19 24 fd 4c 8d 6b 20 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 <80> 3c 02 00 0f 85 2a 01 00 00 4c 8b 63 20 31 ff 4c 89 e6 e8 00 1f RSP: 0018:c900141df968 EFLAGS: 00010202 RAX: dc00 RBX: RCX: c90010da RDX: 0004 RSI: 84513ea3 RDI: 888041fbc010 RBP: 888041fbc010 R08: R09: R10: 0001 R11: R12: R13: 0020 R14: 88806b258800 R15: 88806b258a38 FS: 7fa6e9845640() GS:88807ec0() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: 7fad61265e18 CR3: 5ad79000 CR4: 00350ee0 DR0: DR1: DR2: DR3: DR6: fffe0ff0 DR7: 0400 Call Trace: ttm_bo_release+0xd94/0x10a0 drivers/gpu/drm/ttm/ttm_bo.c:422 kref_put include/linux/kref.h:65 [inline] ttm_bo_put drivers/gpu/drm/ttm/ttm_bo.c:470 [inline] ttm_bo_init_reserved+0x7cb/0x960 drivers/gpu/drm/ttm/ttm_bo.c:1050 ttm_bo_init+0x105/0x270 drivers/gpu/drm/ttm/ttm_bo.c:1074 drm_gem_vram_create+0x332/0x4c0 drivers/gpu/drm/drm_gem_vram_helper.c:228 Fixes: d3116756a710 ("drm/ttm: rename bo->mem and make it a pointer") Signed-off-by: Pavel Skripkin --- drivers/gpu/drm/ttm/ttm_bo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 1b950b45cf4b..15eb97459eab 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -419,7 +419,8 @@ static void ttm_bo_release(struct kref *kref) bo->bdev->funcs->release_notify(bo); drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node); - ttm_mem_io_free(bdev, bo->resource); + if (bo->resource) + ttm_mem_io_free(bdev, bo->resource); } if (!dma_resv_test_signaled(bo->base.resv, true) || -- 2.32.0
Re: [PATCH v2 1/2] drm/i915/opregion: add support for mailbox #5 EDID
On 6/1/21 5:43 PM, Anisse Astier wrote: Le Tue, Jun 01, 2021 at 06:50:24PM +0300, Ville Syrj?l? a ?crit : On Mon, May 31, 2021 at 10:46:41PM +0200, Anisse Astier wrote: The ACPI OpRegion Mailbox #5 ASLE extension may contain an EDID to be used for the embedded display. Add support for using it via by adding the EDID to the list of available modes on the connector, and use it for eDP when available. If a panel's EDID is broken, there may be an override EDID set in the ACPI OpRegion mailbox #5. Use it if available. Looks like Windows uses the ACPI _DDC method instead. We should probably do the same, just in case some crazy machine stores the EDID somewhere else. Thanks, I wouldn't have thought of this. It seems Daniel Dadap did a patch series to do just that, in a generic way: https://lore.kernel.org/amd-gfx/20200727205357.27839-1-dda...@nvidia.com/ I've tried patch 1 & 2, and after a fix[1] was able to call the _DDC method on most devices, but without any EDID being returned. I looked at the disassembled ACPI tables[2], and could not find any device with the _DDC method. Are you sure it's the only method the Windows driver uses to get the EDID ? _DDC only works on devices that actually implement it, and the vast majority of devices don't, because the display just provides an EDID normally. AIUI, usually a device will implement _DDC either because an embedded panel has no ROM of its own to deliver an EDID, or to allow the EDID to be read by either GPU on a system with a muxed display, regardless of which GPU happens to have the DDC lines (in TMDS) or DP AUX routed to it at the moment. (To my knowledge, nobody actually muxes DP AUX independently from the main link, but there were some older pre-DP designs where DDC could be muxed independently.) I'm not sure whether the comment about Windows using _DDC was meant for this device in particular, or just more generally, since DDC is part of the ACPI spec and some Windows GPU drivers *do* use it, where available. If it was meant for a particular device, then it's possible that the ACPI tables advertise different methods depending on e.g. _OSI. If you haven't already tried doing so, it might be worth overriding _OSI to spoof Windows, to see if _DDC gets advertised. I'm not sure how you were able to call _DDC without an EDID being returned as described above, if there was no _DDC method in the ACPI tables; I would expect that attempting to call _DDC would fail to locate a suitable method and do_acpi_ddc would return NULL. Regards, Anisse [1] _DOD ids should only use 16 lower bits, see table here: https://uefi.org/specs/ACPI/6.4/Apx_B_Video_Extensions/display-specific-methods.html#dod-enumerate-all-devices-attached-to-the-display-adapter Thanks; I don't see a version of your modified patch here, was the fix just to mask the _DOD IDs against 0x? [2] acpidump: https://gitlab.freedesktop.org/drm/intel/-/issues/3454#note_913970
Re: [PATCH v2 01/11] drm/sched: Split drm_sched_job_init
On Thu, Jul 8, 2021 at 8:56 AM Christian König wrote: > > Am 07.07.21 um 18:32 schrieb Daniel Vetter: > > On Wed, Jul 7, 2021 at 2:58 PM Christian König > > wrote: > >> Am 07.07.21 um 14:13 schrieb Daniel Vetter: > >>> On Wed, Jul 7, 2021 at 1:57 PM Christian König > >>> wrote: > Am 07.07.21 um 13:14 schrieb Daniel Vetter: > > On Wed, Jul 7, 2021 at 11:30 AM Christian König > > wrote: > >> Am 02.07.21 um 23:38 schrieb Daniel Vetter: > >>> This is a very confusingly named function, because not just does it > >>> init an object, it arms it and provides a point of no return for > >>> pushing a job into the scheduler. It would be nice if that's a bit > >>> clearer in the interface. > >>> > >>> But the real reason is that I want to push the dependency tracking > >>> helpers into the scheduler code, and that means drm_sched_job_init > >>> must be called a lot earlier, without arming the job. > >>> > >>> v2: > >>> - don't change .gitignore (Steven) > >>> - don't forget v3d (Emma) > >>> > >>> v3: Emma noticed that I leak the memory allocated in > >>> drm_sched_job_init if we bail out before the point of no return in > >>> subsequent driver patches. To be able to fix this change > >>> drm_sched_job_cleanup() so it can handle being called both before and > >>> after drm_sched_job_arm(). > >> Thinking more about this, I'm not sure if this really works. > >> > >> See drm_sched_job_init() was also calling drm_sched_entity_select_rq() > >> to update the entity->rq association. > >> > >> And that can only be done later on when we arm the fence as well. > > Hm yeah, but that's a bug in the existing code I think: We already > > fail to clean up if we fail to allocate the fences. So I think the > > right thing to do here is to split the checks into job_init, and do > > the actual arming/rq selection in job_arm? I'm not entirely sure > > what's all going on there, the first check looks a bit like trying to > > schedule before the entity is set up, which is a driver bug and should > > have a WARN_ON? > No you misunderstood me, the problem is something else. > > You asked previously why the call to drm_sched_job_init() was so late in > the CS. > > The reason for this was not alone the scheduler fence init, but also the > call to drm_sched_entity_select_rq(). > >>> Ah ok, I think I can fix that. Needs a prep patch to first make > >>> drm_sched_entity_select infallible, then should be easy to do. > >>> > > The 2nd check around last_scheduled I have honeslty no idea what it's > > even trying to do. > You mean that here? > > fence = READ_ONCE(entity->last_scheduled); > if (fence && !dma_fence_is_signaled(fence)) > return; > > This makes sure that load balancing is not moving the entity to a > different scheduler while there are still jobs running from this entity > on the hardware, > >>> Yeah after a nap that idea crossed my mind too. But now I have locking > >>> questions, afaiui the scheduler thread updates this, without taking > >>> any locks - entity dequeuing is lockless. And here we read the fence > >>> and then seem to yolo check whether it's signalled? What's preventing > >>> a use-after-free here? There's no rcu or anything going on here at > >>> all, and it's outside of the spinlock section, which starts a bit > >>> further down. > >> The last_scheduled fence of an entity can only change when there are > >> jobs on the entities queued, and we have just ruled that out in the > >> check before. > > There aren't any barriers, so the cpu could easily run the two checks > > the other way round. I'll ponder this and figure out where exactly we > > need docs for the constraint and/or barriers to make this work as > > intended. As-is I'm not seeing how it does ... > > spsc_queue_count() provides the necessary barrier with the atomic_read(). atomic_t is fully unordered, except when it's a read-modify-write atomic op, then it's a full barrier. So yeah you need more here. But also since you only need a read barrier on one side, and a write barrier on the other, you don't actually need a cpu barriers on x86. And READ_ONCE gives you the compiler barrier on one side at least, I haven't found it on the writer side yet. > But yes a comment would be really nice here. I had to think for a while > why we don't need this as well. I'm typing a patch, which after a night's sleep I realized has the wrong barriers. And now I'm also typing some doc improvements for drm_sched_entity and related functions. > > Christian. > > > -Daniel > > > >> Christian. > >> > >> > >>> -Daniel > >>> > Regards > Christian. > > > -Daniel > > > >> Christian. > >> > >>> Also improve the kerneldoc for this. > >>> > >>> Acked-by: Steven Price (v2) > >
Re: [PATCH 5/7] drm/msm/dp: return correct edid checksum after corrupted edid checksum read
Quoting Kuogee Hsieh (2021-07-06 10:20:18) > Response with correct edid checksum saved at connector after corrupted edid > checksum read. This fixes Link Layer CTS cases 4.2.2.3, 4.2.2.6. > > Signed-off-by: Kuogee Hsieh > --- > drivers/gpu/drm/msm/dp/dp_panel.c | 9 +++-- > 1 file changed, 7 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c > b/drivers/gpu/drm/msm/dp/dp_panel.c > index 88196f7..0fdb551 100644 > --- a/drivers/gpu/drm/msm/dp/dp_panel.c > +++ b/drivers/gpu/drm/msm/dp/dp_panel.c > @@ -271,7 +271,7 @@ static u8 dp_panel_get_edid_checksum(struct edid *edid) > { > struct edid *last_block; > u8 *raw_edid; > - bool is_edid_corrupt; > + bool is_edid_corrupt = false; > > if (!edid) { > DRM_ERROR("invalid edid input\n"); > @@ -303,7 +303,12 @@ void dp_panel_handle_sink_request(struct dp_panel > *dp_panel) > panel = container_of(dp_panel, struct dp_panel_private, dp_panel); > > if (panel->link->sink_request & DP_TEST_LINK_EDID_READ) { > - u8 checksum = dp_panel_get_edid_checksum(dp_panel->edid); > + u8 checksum; > + > + if (dp_panel->edid) > + checksum = dp_panel_get_edid_checksum(dp_panel->edid); > + else > + checksum = dp_panel->connector->real_edid_checksum; > > dp_link_send_edid_checksum(panel->link, checksum); It looks like this can be drm_dp_send_real_edid_checksum()? Then we don't have to look at the connector internals sometimes and can drop dp_panel_get_edid_checksum() entirely? > dp_link_send_test_response(panel->link);
Re: [PATCH v2 01/11] drm/sched: Split drm_sched_job_init
On Thu, Jul 8, 2021 at 9:09 AM Daniel Vetter wrote: > On Thu, Jul 8, 2021 at 8:56 AM Christian König > wrote: > > Am 07.07.21 um 18:32 schrieb Daniel Vetter: > > > On Wed, Jul 7, 2021 at 2:58 PM Christian König > > > wrote: > > >> Am 07.07.21 um 14:13 schrieb Daniel Vetter: > > >>> On Wed, Jul 7, 2021 at 1:57 PM Christian König > > >>> wrote: > > Am 07.07.21 um 13:14 schrieb Daniel Vetter: > > > On Wed, Jul 7, 2021 at 11:30 AM Christian König > > > wrote: > > >> Am 02.07.21 um 23:38 schrieb Daniel Vetter: > > >>> This is a very confusingly named function, because not just does it > > >>> init an object, it arms it and provides a point of no return for > > >>> pushing a job into the scheduler. It would be nice if that's a bit > > >>> clearer in the interface. > > >>> > > >>> But the real reason is that I want to push the dependency tracking > > >>> helpers into the scheduler code, and that means drm_sched_job_init > > >>> must be called a lot earlier, without arming the job. > > >>> > > >>> v2: > > >>> - don't change .gitignore (Steven) > > >>> - don't forget v3d (Emma) > > >>> > > >>> v3: Emma noticed that I leak the memory allocated in > > >>> drm_sched_job_init if we bail out before the point of no return in > > >>> subsequent driver patches. To be able to fix this change > > >>> drm_sched_job_cleanup() so it can handle being called both before > > >>> and > > >>> after drm_sched_job_arm(). > > >> Thinking more about this, I'm not sure if this really works. > > >> > > >> See drm_sched_job_init() was also calling > > >> drm_sched_entity_select_rq() > > >> to update the entity->rq association. > > >> > > >> And that can only be done later on when we arm the fence as well. > > > Hm yeah, but that's a bug in the existing code I think: We already > > > fail to clean up if we fail to allocate the fences. So I think the > > > right thing to do here is to split the checks into job_init, and do > > > the actual arming/rq selection in job_arm? I'm not entirely sure > > > what's all going on there, the first check looks a bit like trying to > > > schedule before the entity is set up, which is a driver bug and should > > > have a WARN_ON? > > No you misunderstood me, the problem is something else. > > > > You asked previously why the call to drm_sched_job_init() was so late > > in > > the CS. > > > > The reason for this was not alone the scheduler fence init, but also > > the > > call to drm_sched_entity_select_rq(). > > >>> Ah ok, I think I can fix that. Needs a prep patch to first make > > >>> drm_sched_entity_select infallible, then should be easy to do. > > >>> > > > The 2nd check around last_scheduled I have honeslty no idea what it's > > > even trying to do. > > You mean that here? > > > > fence = READ_ONCE(entity->last_scheduled); > > if (fence && !dma_fence_is_signaled(fence)) > > return; > > > > This makes sure that load balancing is not moving the entity to a > > different scheduler while there are still jobs running from this entity > > on the hardware, > > >>> Yeah after a nap that idea crossed my mind too. But now I have locking > > >>> questions, afaiui the scheduler thread updates this, without taking > > >>> any locks - entity dequeuing is lockless. And here we read the fence > > >>> and then seem to yolo check whether it's signalled? What's preventing > > >>> a use-after-free here? There's no rcu or anything going on here at > > >>> all, and it's outside of the spinlock section, which starts a bit > > >>> further down. > > >> The last_scheduled fence of an entity can only change when there are > > >> jobs on the entities queued, and we have just ruled that out in the > > >> check before. > > > There aren't any barriers, so the cpu could easily run the two checks > > > the other way round. I'll ponder this and figure out where exactly we > > > need docs for the constraint and/or barriers to make this work as > > > intended. As-is I'm not seeing how it does ... > > > > spsc_queue_count() provides the necessary barrier with the atomic_read(). > > atomic_t is fully unordered, except when it's a read-modify-write Wasn't awake yet, I think the rule is read-modify-write and return previous value gives you full barrier. So stuff like cmpxchg, but also a few others. See atomic_t.txt under ODERING heading (yes that maintainer refuses to accept .rst so I can't just link you to the right section, it's silly). get/set and even RMW atomic ops that don't return anything are all fully unordered. -Daniel > atomic op, then it's a full barrier. So yeah you need more here. But > also since you only need a read barrier on one side, and a write > barrier on the other, you don't actually need a cpu barriers on x86. > And REA
Re: [PATCH 7/7] drm/msm/dp: retrain link when loss of symbol lock detected
Quoting Kuogee Hsieh (2021-07-06 10:20:20) > Main link symbol locked is achieved at end of link training 2. Some > dongle main link symbol may become unlocked again if host did not end > link training soon enough after completion of link training 2. Host > have to re train main link if loss of symbol lock detected before > end link training so that the coming video stream can be transmitted > to sink properly. > > Signed-off-by: Kuogee Hsieh I guess this is a fix for the original driver, so it should be tagged with Fixes appropriately. > --- > drivers/gpu/drm/msm/dp/dp_ctrl.c | 34 ++ > 1 file changed, 34 insertions(+) > > diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c > b/drivers/gpu/drm/msm/dp/dp_ctrl.c > index 0cb01a9..e616ab2 100644 > --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c > +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c > @@ -1661,6 +1661,25 @@ static bool dp_ctrl_any_lane_cr_lose(struct > dp_ctrl_private *ctrl, > return false; > } > > +static bool dp_ctrl_loss_symbol_lock(struct dp_ctrl_private *ctrl) > +{ > + u8 link_status[6]; Can we use link_status[DP_LINK_STATUS_SIZE] instead? > + u8 status; > + int i; > + int lane = ctrl->link->link_params.num_lanes; s/lane/num_lanes/ would make the code easier to read > + > + dp_ctrl_read_link_status(ctrl, link_status); > + > + for (i = 0; i < lane; i++) { > + status = link_status[i / 2]; > + status >>= ((i % 2) * 4); > + if (!(status & DP_LANE_SYMBOL_LOCKED)) > + return true; > + } > + > + return false; > +} > + > int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl) > { > int rc = 0; > @@ -1777,6 +1796,17 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl) > return rc; > } > > +static int dp_ctrl_link_retrain(struct dp_ctrl_private *ctrl) > +{ > + int ret = 0; Please drop init of ret. > + u8 cr_status[2]; > + int training_step = DP_TRAINING_NONE; > + > + ret = dp_ctrl_setup_main_link(ctrl, cr_status, &training_step); as it is assigned here. > + > + return ret; And indeed, it could be 'return dp_ctrl_setup_main_link()' instead. > +} > + > int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl) > { > int ret = 0; > @@ -1802,6 +1832,10 @@ int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl) > } > } > > + /* if loss symbol lock happen, then retaining the link */ retain or retrain? The comment seems to be saying what the code says "if loss retrain", so the comment is not very useful. > + if (dp_ctrl_loss_symbol_lock(ctrl)) > + dp_ctrl_link_retrain(ctrl); > + > /* stop txing train pattern to end link training */ > dp_ctrl_clear_training_pattern(ctrl); >
Re: BUG: MTK DRM/HDMI broken on 5.13 (mt7623/bpi-r2)
Hi Frank, On 06.07.21 11:54, Frank Wunderlich wrote: Hi, i've noticed that HDMI is broken at least on my board (Bananapi-r2,mt7623) on 5.13. after some research i noticed that it is working till commit 2e477391522354e763aa62ee3e281c1ad9e8eb1b Author: Dafna Hirschfeld Date: Tue Mar 30 13:09:02 2021 +0200 drm/mediatek: Don't support hdmi connector creation which is the last of mtk-drm-next-5.13 [1] so i guess a problem with core-patches dmesg shows the following: [7.071342] mediatek-drm mediatek-drm.1.auto: bound 14007000.ovl (ops mtk_dis p_ovl_component_ops) [7.080330] mediatek-drm mediatek-drm.1.auto: bound 14008000.rdma (ops mtk_di sp_rdma_component_ops) [7.089429] mediatek-drm mediatek-drm.1.auto: bound 1400b000.color (ops mtk_d isp_color_component_ops) [7.098689] mediatek-drm mediatek-drm.1.auto: bound 14012000.rdma (ops mtk_di sp_rdma_component_ops) [7.107814] mediatek-drm mediatek-drm.1.auto: bound 14014000.dpi (ops mtk_dpi _component_ops) [7.116338] mediatek-drm mediatek-drm.1.auto: Not creating crtc 1 because com ponent 9 is disabled or missing [ 38.403957] Console: switching to colour frame buffer device 160x64 [ 48.516398] [drm:drm_crtc_commit_wait] *ERROR* flip_done timed out [ 48.516422] [drm:drm_atomic_helper_wait_for_dependencies] *ERROR* [CRTC:41:cr tc-0] commit wait timed out [ 58.756384] [drm:drm_crtc_commit_wait] *ERROR* flip_done timed out [ 58.756399] [drm:drm_atomic_helper_wait_for_dependencies] *ERROR* [CONNECTOR: 32:HDMI-A-1] commit wait timed out [ 68.996384] [drm:drm_crtc_commit_wait] *ERROR* flip_done timed out [ 68.996399] [drm:drm_atomic_helper_wait_for_dependencies] *ERROR* [PLANE:33:p lane-0] commit wait timed out [ 68.996423] [drm:mtk_drm_crtc_atomic_begin] *ERROR* new event while there is still a pending event [ 69.106385] [ cut here ] [ 69.106392] WARNING: CPU: 2 PID: 7 at drivers/gpu/drm/drm_atomic_helper.c:151 1 drm_atomic_helper_wait_for_vblanks.part.0+0x2a0/0x2a8 [ 69.106414] [CRTC:41:crtc-0] vblank wait timed out We also encountered that warning on mt8173 device - Acer Chromebook R13. It happen after resuming from suspend to ram. We could not find a version that works and we were not able to find the fix of the bug. It seems like the irq isr is not called after resuming from suspend. Please share if you have new findings regarding that bug. Thanks, Dafna so i guess the breaking commit may be this: $ git logone -S"drm_crtc_commit_wait" -- drivers/gpu/drm/ b99c2c95412c 2021-01-11 drm: Introduce a drm_crtc_commit_wait helper in drivers/gpu/drm/drm_atomic{,_helper}.c but i cannot confirm it because my git bisect does strange things (after defining 5.13 as bad and the 2e4773915223 as good, second step is before the good commit till the end, last steps are 5.11...). sorry, i'm still new to bisect. the fix is targeting to 5.12-rc2, is guess because CK Hu's tree is based on this...but the fix was not included in 5.12-rc2 (only after 5.12.0...got it by merging 5.12.14) maybe you can help me? regards Frank [1] https://git.kernel.org/pub/scm/linux/kernel/git/chunkuang.hu/linux.git/log/?h=mediatek-drm-next-5.13 ___ Linux-mediatek mailing list linux-media...@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-mediatek
Re: [PATCH 2/7] drm/msm/dp: reduce link rate if failed at link training 1
Quoting Kuogee Hsieh (2021-07-06 10:20:15) > Reduce link rate and re start link training if link training 1 > failed due to loss of clock recovery done to fix Link Layer > CTS case 4.3.1.7. Also only update voltage and pre-emphasis > swing level after link training started to fix Link Layer CTS > case 4.3.1.6. > > Signed-off-by: Kuogee Hsieh > --- > drivers/gpu/drm/msm/dp/dp_ctrl.c | 86 > ++-- > 1 file changed, 56 insertions(+), 30 deletions(-) > > diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c > b/drivers/gpu/drm/msm/dp/dp_ctrl.c > index 27fb0f0..6f8443d 100644 > --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c > +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c > @@ -83,13 +83,6 @@ struct dp_ctrl_private { > struct completion video_comp; > }; > > -struct dp_cr_status { > - u8 lane_0_1; > - u8 lane_2_3; > -}; > - > -#define DP_LANE0_1_CR_DONE 0x11 > - > static int dp_aux_link_configure(struct drm_dp_aux *aux, > struct dp_link_info *link) > { > @@ -1080,7 +1073,7 @@ static int dp_ctrl_read_link_status(struct > dp_ctrl_private *ctrl, > } > > static int dp_ctrl_link_train_1(struct dp_ctrl_private *ctrl, > - struct dp_cr_status *cr, int *training_step) > + u8 *cr, int *training_step) > { > int tries, old_v_level, ret = 0; > u8 link_status[DP_LINK_STATUS_SIZE]; > @@ -1109,8 +1102,8 @@ static int dp_ctrl_link_train_1(struct dp_ctrl_private > *ctrl, > if (ret) > return ret; > > - cr->lane_0_1 = link_status[0]; > - cr->lane_2_3 = link_status[1]; > + cr[0] = link_status[0]; > + cr[1] = link_status[1]; > > if (drm_dp_clock_recovery_ok(link_status, > ctrl->link->link_params.num_lanes)) { > @@ -1188,7 +1181,7 @@ static void dp_ctrl_clear_training_pattern(struct > dp_ctrl_private *ctrl) > } > > static int dp_ctrl_link_train_2(struct dp_ctrl_private *ctrl, > - struct dp_cr_status *cr, int *training_step) > + u8 *cr, int *training_step) > { > int tries = 0, ret = 0; > char pattern; > @@ -1204,10 +1197,6 @@ static int dp_ctrl_link_train_2(struct dp_ctrl_private > *ctrl, > else > pattern = DP_TRAINING_PATTERN_2; > > - ret = dp_ctrl_update_vx_px(ctrl); > - if (ret) > - return ret; > - > ret = dp_catalog_ctrl_set_pattern(ctrl->catalog, pattern); > if (ret) > return ret; > @@ -1220,8 +1209,8 @@ static int dp_ctrl_link_train_2(struct dp_ctrl_private > *ctrl, > ret = dp_ctrl_read_link_status(ctrl, link_status); > if (ret) > return ret; > - cr->lane_0_1 = link_status[0]; > - cr->lane_2_3 = link_status[1]; > + cr[0] = link_status[0]; > + cr[1] = link_status[1]; > > if (drm_dp_channel_eq_ok(link_status, > ctrl->link->link_params.num_lanes)) { > @@ -1241,7 +1230,7 @@ static int dp_ctrl_link_train_2(struct dp_ctrl_private > *ctrl, > static int dp_ctrl_reinitialize_mainlink(struct dp_ctrl_private *ctrl); > > static int dp_ctrl_link_train(struct dp_ctrl_private *ctrl, > - struct dp_cr_status *cr, int *training_step) > + u8 *cr, int *training_step) > { > int ret = 0; > u8 encoding = DP_SET_ANSI_8B10B; > @@ -1282,7 +1271,7 @@ static int dp_ctrl_link_train(struct dp_ctrl_private > *ctrl, > } > > static int dp_ctrl_setup_main_link(struct dp_ctrl_private *ctrl, > - struct dp_cr_status *cr, int *training_step) > + u8 *cr, int *training_step) > { > int ret = 0; > > @@ -1496,14 +1485,14 @@ static int dp_ctrl_deinitialize_mainlink(struct > dp_ctrl_private *ctrl) > static int dp_ctrl_link_maintenance(struct dp_ctrl_private *ctrl) > { > int ret = 0; > - struct dp_cr_status cr; > + u8 cr_status[2]; > int training_step = DP_TRAINING_NONE; > > dp_ctrl_push_idle(&ctrl->dp_ctrl); > > ctrl->dp_ctrl.pixel_rate = ctrl->panel->dp_mode.drm_mode.clock; > > - ret = dp_ctrl_setup_main_link(ctrl, &cr, &training_step); > + ret = dp_ctrl_setup_main_link(ctrl, cr_status, &training_step); > if (ret) > goto end; Do we need to extract the link status information from deep in these functions? Why not read it again when we need to? > > @@ -1634,6 +1623,41 @@ void dp_ctrl_handle_sink_request(struct dp_ctrl > *dp_ctrl) > } > } > > +static bool dp_ctrl_any_lane_cr_done(struct dp_ctrl_private *ctrl, > + u8 *cr_status) > + > +{ > + int i; > + u8 status; > + int lane = ctrl->link->link_params.num_lanes; > + > + for (i = 0; i < lane; i++) { > + status = cr_status[i / 2]; >
Re: [PATCH 3/7] drm/msm/dp: reset aux controller after dp_aux_cmd_fifo_tx() failed.
Quoting Kuogee Hsieh (2021-07-06 10:20:16) > Aux hardware calibration sequence requires resetting the aux controller > in order for the new setting to take effect. However resetting the AUX > controller will also clear HPD interrupt status which may accidentally > cause pending unplug interrupt to get lost. Therefore reset aux > controller only when link is in connection state when dp_aux_cmd_fifo_tx() > fail. This fixes Link Layer CTS cases 4.2.1.1 and 4.2.1.2. > > Signed-off-by: Kuogee Hsieh > --- > drivers/gpu/drm/msm/dp/dp_aux.c | 3 +++ > 1 file changed, 3 insertions(+) > > diff --git a/drivers/gpu/drm/msm/dp/dp_aux.c b/drivers/gpu/drm/msm/dp/dp_aux.c > index 4a3293b..eb40d84 100644 > --- a/drivers/gpu/drm/msm/dp/dp_aux.c > +++ b/drivers/gpu/drm/msm/dp/dp_aux.c > @@ -353,6 +353,9 @@ static ssize_t dp_aux_transfer(struct drm_dp_aux *dp_aux, > if (!(aux->retry_cnt % MAX_AUX_RETRIES)) > dp_catalog_aux_update_cfg(aux->catalog); > } > + /* reset aux if link is in connected state */ > + if (dp_catalog_link_is_connected(aux->catalog)) How do we avoid resetting aux when hpd is unplugged and then plugged back in during an aux transfer? > + dp_catalog_aux_reset(aux->catalog); > } else { > aux->retry_cnt = 0; > switch (aux->aux_error_num) { > -- > The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum, > a Linux Foundation Collaborative Project >
[PATCH v3] drm/panfrost:fix the exception name always "UNKNOWN"
From: ChunyouTang The exception_code in register is only 8 bits,So if fault_status in panfrost_gpu_irq_handler() don't (& 0xFF),it can't get correct exception reason. and it's better to show all of the register value to custom,so it's better fault_status don't (& 0xFF). Signed-off-by: ChunyouTang --- drivers/gpu/drm/panfrost/panfrost_gpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c index 1fffb6a0b24f..d2d287bbf4e7 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c @@ -33,7 +33,7 @@ static irqreturn_t panfrost_gpu_irq_handler(int irq, void *data) address |= gpu_read(pfdev, GPU_FAULT_ADDRESS_LO); dev_warn(pfdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx\n", -fault_status & 0xFF, panfrost_exception_name(pfdev, fault_status & 0xFF), +fault_status, panfrost_exception_name(pfdev, fault_status & 0xFF), address); if (state & GPU_IRQ_MULTIPLE_FAULT) -- 2.25.1
Re: page pools, was Re: [PATCH v9 1/5] drm: Add a sharable drm page-pool implementation
Am 08.07.21 um 06:20 schrieb Christoph Hellwig: On Wed, Jul 07, 2021 at 12:35:23PM -0700, John Stultz wrote: So, as Christian mentioned, on the TTM side it's useful, as they are trying to avoid TLB flushes when changing caching attributes. For the dmabuf system heap purposes, the main benefit is moving the page zeroing to the free path, rather than the allocation path. This on its own doesn't save much, but allows us to defer frees (and thus the zeroing) to the background, which can get that work out of the hot path. I really do no think that is worth it to fragment the free pages. And I think functionality like that should be part of the common page allocator. I mean we already have __GFP_ZERO, why not have a background kernel thread which zeros free pages when a CPU core is idle? (I'm pretty sure we already have that somehow). Christian.
Re: [PATCH v2 01/11] drm/sched: Split drm_sched_job_init
Am 08.07.21 um 09:19 schrieb Daniel Vetter: On Thu, Jul 8, 2021 at 9:09 AM Daniel Vetter wrote: On Thu, Jul 8, 2021 at 8:56 AM Christian König wrote: Am 07.07.21 um 18:32 schrieb Daniel Vetter: On Wed, Jul 7, 2021 at 2:58 PM Christian König wrote: Am 07.07.21 um 14:13 schrieb Daniel Vetter: On Wed, Jul 7, 2021 at 1:57 PM Christian König wrote: Am 07.07.21 um 13:14 schrieb Daniel Vetter: On Wed, Jul 7, 2021 at 11:30 AM Christian König wrote: Am 02.07.21 um 23:38 schrieb Daniel Vetter: This is a very confusingly named function, because not just does it init an object, it arms it and provides a point of no return for pushing a job into the scheduler. It would be nice if that's a bit clearer in the interface. But the real reason is that I want to push the dependency tracking helpers into the scheduler code, and that means drm_sched_job_init must be called a lot earlier, without arming the job. v2: - don't change .gitignore (Steven) - don't forget v3d (Emma) v3: Emma noticed that I leak the memory allocated in drm_sched_job_init if we bail out before the point of no return in subsequent driver patches. To be able to fix this change drm_sched_job_cleanup() so it can handle being called both before and after drm_sched_job_arm(). Thinking more about this, I'm not sure if this really works. See drm_sched_job_init() was also calling drm_sched_entity_select_rq() to update the entity->rq association. And that can only be done later on when we arm the fence as well. Hm yeah, but that's a bug in the existing code I think: We already fail to clean up if we fail to allocate the fences. So I think the right thing to do here is to split the checks into job_init, and do the actual arming/rq selection in job_arm? I'm not entirely sure what's all going on there, the first check looks a bit like trying to schedule before the entity is set up, which is a driver bug and should have a WARN_ON? No you misunderstood me, the problem is something else. You asked previously why the call to drm_sched_job_init() was so late in the CS. The reason for this was not alone the scheduler fence init, but also the call to drm_sched_entity_select_rq(). Ah ok, I think I can fix that. Needs a prep patch to first make drm_sched_entity_select infallible, then should be easy to do. The 2nd check around last_scheduled I have honeslty no idea what it's even trying to do. You mean that here? fence = READ_ONCE(entity->last_scheduled); if (fence && !dma_fence_is_signaled(fence)) return; This makes sure that load balancing is not moving the entity to a different scheduler while there are still jobs running from this entity on the hardware, Yeah after a nap that idea crossed my mind too. But now I have locking questions, afaiui the scheduler thread updates this, without taking any locks - entity dequeuing is lockless. And here we read the fence and then seem to yolo check whether it's signalled? What's preventing a use-after-free here? There's no rcu or anything going on here at all, and it's outside of the spinlock section, which starts a bit further down. The last_scheduled fence of an entity can only change when there are jobs on the entities queued, and we have just ruled that out in the check before. There aren't any barriers, so the cpu could easily run the two checks the other way round. I'll ponder this and figure out where exactly we need docs for the constraint and/or barriers to make this work as intended. As-is I'm not seeing how it does ... spsc_queue_count() provides the necessary barrier with the atomic_read(). atomic_t is fully unordered, except when it's a read-modify-write Wasn't awake yet, I think the rule is read-modify-write and return previous value gives you full barrier. So stuff like cmpxchg, but also a few others. See atomic_t.txt under ODERING heading (yes that maintainer refuses to accept .rst so I can't just link you to the right section, it's silly). get/set and even RMW atomic ops that don't return anything are all fully unordered. As far as I know that not completely correct. The rules around atomics i once learned are: 1. Everything which modifies something is a write barrier. 2. Everything which returns something is a read barrier. And I know a whole bunch of use cases where this is relied upon in the core kernel, so I'm pretty sure that's correct. In this case the write barrier is the atomic_dec() in spsc_queue_pop() and the read barrier is the aromic_read() in spsc_queue_count(). The READ_ONCE() is actually not even necessary as far as I can see. Christian. -Daniel atomic op, then it's a full barrier. So yeah you need more here. But also since you only need a read barrier on one side, and a write barrier on the other, you don't actually need a cpu barriers on x86. And READ_ONCE gives you the compiler barrier on one side at least, I haven't found it on the writer side yet. But yes a comment would be really nice here. I had
Aw: Re: BUG: MTK DRM/HDMI broken on 5.13 (mt7623/bpi-r2)
> Gesendet: Donnerstag, 08. Juli 2021 um 09:22 Uhr > Von: "Dafna Hirschfeld" > We also encountered that warning on mt8173 device - Acer Chromebook R13. It > happen after resuming from suspend to ram. > We could not find a version that works and we were not able to find the fix > of the bug. > It seems like the irq isr is not called after resuming from suspend. > Please share if you have new findings regarding that bug. Hi, i have not yet found a way to make the commit-history flat for running bisect without the issue of disappearing childcommits when mergecommit is out of bisect scope. so i tried to start at working 5.12.0 with mtk-drm-patches and commits from drm core (i hope i have catched them all) by cherry-picking the single commits. c24e104c26aa 2021-06-09 drm: Lock pointer access in drm_master_release() (HEAD -> 5.12-drm) 2aa9212803a4 2021-06-08 drm: Fix use-after-free read in drm_getunique() 23b8d6c3be47 2021-04-08 treewide: Change list_sort to use const pointers c1e987f51f06 2021-03-26 drm/dp_mst: Drop DRM_ERROR() on kzalloc() fail in drm_dp_mst_handle_up_req() 2176a9e962be 2021-04-01 drm/drm_internal.h: Remove repeated struct declaration fc5d92c1485d 2021-04-08 drm/syncobj: use newly allocated stub fences 23a03d271e87 2021-03-29 drm/displayid: rename displayid_hdr to displayid_header 44ef605cb08f 2021-03-29 drm/displayid: allow data blocks with 0 payload length bbdc0aefd1b5 2021-03-29 drm/edid: use the new displayid iterator for tile info 1ee4a22d671e 2021-03-29 drm/edid: use the new displayid iterator for finding CEA extension d9b8c26b8ddf 2021-03-29 drm/edid: use the new displayid iterator for detailed modes d9e95df8adc8 2021-03-29 drm/displayid: add new displayid section/block iterators 2dd279949358 2021-03-29 drm/displayid: add separate drm_displayid.c bb1a3611abc1 2021-03-29 drm/edid: make a number of functions, parameters and variables const 0b18f5b98c71 2021-03-23 drm/dp_helper: Define options for FRL training for HDMI2.1 PCON 16fbc25ab84b 2021-03-25 drm/mst: Enhance MST topology logging bb93ad6ab4e4 2021-03-26 drm: Fix 3 typos in the inline doc 27d30189b178 2021-03-22 drm/sysfs: Convert sysfs sprintf/snprintf family to sysfs_emit 04ad4ed36cf2 2021-03-18 drm: Few typo fixes b8821cac052f 2021-03-13 drm: Add GUD USB Display driver d3df1b84b9ff 2021-03-13 drm/probe-helper: Check epoch counter in output_poll_execute() 298372a0cda4 2021-03-13 drm/uapi: Add USB connector type 040c9022809d 2021-03-30 drm/mediatek: Don't support hdmi connector creation 7c6582b23551 2021-03-30 drm/mediatek: Switch the hdmi bridge ops to the atomic versions b1b43d5948b2 2021-02-03 drm/mediatek: Add missing MODULE_DEVICE_TABLE() fe5a0ff82cfb 2021-03-13 drm/mediatek: crtc: Make config-updating atomic result: it is still working. so at least they do not break ;) have you found any irq-related message in dmesg (i have not found any irq-error/warning-message)? how have you traced that? can somebody point us to the interrupts used for pageflip/vblank "requests"? in the wait-chain i do not see them, it seems it is called asynchronous and wait only looks at a state in the completion-struct i have the issue on bootup, i see only a purple screen instead of fbcon/xserver and the tracebacks on serial are very annoying as they repeating every x seconds (maybe change to WARN_ONCE?). But after a while it seems to stop. imho we need a way to make the history (temporary) flat (remove parent-information from commits to merge) so that bisect have only a list and not a "tree" regards Frank
[PATCH] video: backlight: Only set maximum brightness for gpio-backlight
The note in c2adda27d202f ("video: backlight: Add of_find_backlight helper in backlight.c") says that gpio-backlight uses brightness as power state. Other backlight drivers do not, so limit this workaround to gpio-backlight. This fixes the case where e.g. pwm-backlight can perfectly well be set to brightness 0 on boot in DT, which without this patch leads to the display brightness to be max instead of off. Fixes: c2adda27d202f ("video: backlight: Add of_find_backlight helper in backlight.c") Signed-off-by: Marek Vasut Cc: Daniel Thompson Cc: Meghana Madhyastha Cc: Noralf Trønnes Cc: Sean Paul Cc: Thierry Reding --- drivers/video/backlight/backlight.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c index 537fe1b376ad7..dfb66171dec41 100644 --- a/drivers/video/backlight/backlight.c +++ b/drivers/video/backlight/backlight.c @@ -676,6 +676,7 @@ EXPORT_SYMBOL(of_find_backlight_by_node); static struct backlight_device *of_find_backlight(struct device *dev) { struct backlight_device *bd = NULL; + bool is_gpio_backlight = false; struct device_node *np; if (!dev) @@ -685,6 +686,8 @@ static struct backlight_device *of_find_backlight(struct device *dev) np = of_parse_phandle(dev->of_node, "backlight", 0); if (np) { bd = of_find_backlight_by_node(np); + is_gpio_backlight = + of_device_is_compatible(np, "gpio-backlight"); of_node_put(np); if (!bd) return ERR_PTR(-EPROBE_DEFER); @@ -692,7 +695,7 @@ static struct backlight_device *of_find_backlight(struct device *dev) * Note: gpio_backlight uses brightness as * power state during probe */ - if (!bd->props.brightness) + if (is_gpio_backlight && !bd->props.brightness) bd->props.brightness = bd->props.max_brightness; } } -- 2.30.2
[PATCH v2] drm/vkms: Creating a debug file to get/track vkms config in vkms_drv.c
Creating a vkms_config_debufs file in vkms_drv.c to get/track vkms config data, for the long-term plan of making vkms configurable and have multiple different instances. Reviewed-by: Melissa Wen Signed-off-by: Beatriz Martins de Carvalho --- Changes in v2: - corrected subject to make clear in terms of its purpose - corrected commit message --- drivers/gpu/drm/vkms/vkms_drv.c | 28 1 file changed, 28 insertions(+) diff --git a/drivers/gpu/drm/vkms/vkms_drv.c b/drivers/gpu/drm/vkms/vkms_drv.c index 027ffe759440..c81fba6c72f0 100644 --- a/drivers/gpu/drm/vkms/vkms_drv.c +++ b/drivers/gpu/drm/vkms/vkms_drv.c @@ -28,6 +28,9 @@ #include "vkms_drv.h" +#include +#include + #define DRIVER_NAME"vkms" #define DRIVER_DESC"Virtual Kernel Mode Setting" #define DRIVER_DATE"20180514" @@ -86,12 +89,37 @@ static void vkms_atomic_commit_tail(struct drm_atomic_state *old_state) drm_atomic_helper_cleanup_planes(dev, old_state); } +static int vkms_config_show(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct vkms_device *vkmsdev = drm_device_to_vkms_device(dev); + + seq_printf(m, "writeback=%d\n", vkmsdev->config->writeback); + seq_printf(m, "cursor=%d\n", vkmsdev->config->cursor); + seq_printf(m, "overlay=%d\n", vkmsdev->config->overlay); + + return 0; +} + +static const struct drm_info_list vkms_config_debugfs_list[] = { + { "vkms_config", vkms_config_show, 0 }, +}; + +static void vkms_config_debugfs_init(struct drm_minor *minor) +{ + drm_debugfs_create_files(vkms_config_debugfs_list, ARRAY_SIZE(vkms_config_debugfs_list), +minor->debugfs_root, minor); +} + static const struct drm_driver vkms_driver = { .driver_features= DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_GEM, .release= vkms_release, .fops = &vkms_driver_fops, DRM_GEM_SHMEM_DRIVER_OPS, + .debugfs_init = vkms_config_debugfs_init, + .name = DRIVER_NAME, .desc = DRIVER_DESC, .date = DRIVER_DATE, -- 2.25.1
Aw: Re: BUG: MTK DRM/HDMI broken on 5.13 (mt7623/bpi-r2)
Hi just a small update, added debug in the vendor-specific functions for page_flip and vblank and it seems they never get called --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -87,21 +87,25 @@ static void mtk_drm_crtc_finish_page_flip(struct mtk_drm_crtc *mtk_crtc) { struct drm_crtc *crtc = &mtk_crtc->base; unsigned long flags; - +printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); spin_lock_irqsave(&crtc->dev->event_lock, flags); drm_crtc_send_vblank_event(crtc, mtk_crtc->event); drm_crtc_vblank_put(crtc); mtk_crtc->event = NULL; spin_unlock_irqrestore(&crtc->dev->event_lock, flags); +printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); } static void mtk_drm_finish_page_flip(struct mtk_drm_crtc *mtk_crtc) { +printk(KERN_ALERT "DEBUG: Passed %s %d update:%d,needsvblank:%d\n",__FUNCTION__,__LINE__,mtk_crtc->config_updating,mtk_crtc->pending_needs_vblank); drm_crtc_handle_vblank(&mtk_crtc->base); if (!mtk_crtc->config_updating && mtk_crtc->pending_needs_vblank) { +printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); mtk_drm_crtc_finish_page_flip(mtk_crtc); mtk_crtc->pending_needs_vblank = false; } +printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); } static void mtk_drm_crtc_destroy(struct drm_crtc *crtc) finish_page_flip is called by mtk_crtc_ddp_irq. this seems to be set in mtk_drm_crtc_enable_vblank with mtk_ddp_comp_enable_vblank. this is called correctly 113 static inline void mtk_ddp_comp_enable_vblank(struct mtk_ddp_comp *comp, 114 void (*vblank_cb)(void *), 115 void *vblank_cb_data) 116 { 117 printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); 118 if (comp->funcs && comp->funcs->enable_vblank) 119 { 120 comp->funcs->enable_vblank(comp->dev, vblank_cb, vblank_cb_data); 121 printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); 122 } 123 } i see both messages, but mtk_crtc_ddp_irq is never called and so the other 2 not. root@bpi-r2:~# dmesg | grep -i DEBUG [6.433509] DEBUG: Passed mtk_drm_crtc_enable_vblank 510 [6.433530] DEBUG: Passed mtk_ddp_comp_enable_vblank 117 [6.433537] DEBUG: Passed mtk_ddp_comp_enable_vblank 121 <<< comp->funcs->enable_vblank should be mtk_drm_crtc_enable_vblank, right? 641 static const struct drm_crtc_funcs mtk_crtc_funcs = { 642 .set_config = drm_atomic_helper_set_config, 643 .page_flip = drm_atomic_helper_page_flip, 644 .destroy= mtk_drm_crtc_destroy, 645 .reset = mtk_drm_crtc_reset, 646 .atomic_duplicate_state = mtk_drm_crtc_duplicate_state, 647 .atomic_destroy_state = mtk_drm_crtc_destroy_state, 648 .enable_vblank = mtk_drm_crtc_enable_vblank, <<< 649 .disable_vblank = mtk_drm_crtc_disable_vblank, 650 }; but it looks like a recursion: mtk_drm_crtc_enable_vblank calls mtk_ddp_comp_enable_vblank => enable_vblank (=mtk_drm_crtc_enable_vblank), but i see the messages not repeating mtk_drm_crtc_enable_vblank(struct drm_crtc *crtc) 511 mtk_ddp_comp_enable_vblank(comp, mtk_crtc_ddp_irq, &mtk_crtc->base); 113 static inline void mtk_ddp_comp_enable_vblank(struct mtk_ddp_comp *comp, 114 void (*vblank_cb)(void *), 115 void *vblank_cb_data) 116 { 118 if (comp->funcs && comp->funcs->enable_vblank) 120 comp->funcs->enable_vblank(comp->dev, vblank_cb, vblank_cb_data); but params do not match...comp->funcs->enable_vblank takes 3 arguments but comp->funcs->enable_vblank has only one.something i miss here... i guess not, but is watchdog somehow involved? i ask because i see this on reboot/poweroff: "watchdog: watchdog0: watchdog did not stop!" i see this with my 5.13, 5.12-drm (5.12.0+mtk/core drm-patches) and 5.12.14 too (hdmi is working there), but not 5.12.0! that means something in drm-patches (mtk/core) breaks watchdog. maybe the recursion mentioned above? regards Frank > Gesendet: Donnerstag, 08. Juli 2021 um 09:22 Uhr > Von: "Dafna Hirschfeld" > > Hi Frank, > > > On 06.07.21 11:54, Frank Wunderlich wrote: > > Hi, > > > > i've noticed that HDMI is broken at least on my board (Bananapi-r2,mt7623) > > on 5.13. > > > > after some research i noticed that it is working till > > > > commit 2e477391522354e763aa62ee3e281c1ad9e8eb1b > > Author: Dafna Hirschfeld > > We also encountered that warning on mt8173 device - Acer Chromebook R13. It > happen after resuming from suspend to ram. > We could not find a version that works and we were not able to find the fix > of the bug. > It seems like the irq isr is not called after resuming from suspend. > Please share if you have new findings regarding that bug. > > Thanks, > Dafna
[PULL] drm-intel-next for v5.15
Hi Dave & Daniel - I'll be out for a bit, so I'm sending the first batch of changes for v5.15 early. Nothing unusual here, I just don't want to have a huge pile waiting. :) Rodrigo will cover me. BR, Jani. drm-intel-next-2021-07-08: drm/i915 changes for v5.15: Features: - Enable pipe DMC loading on XE-LPD and ADL-P (Anusha) - Finally remove JSH and EHL force probe requirement (Tejas) Refactoring and cleanups: - Refactor and fix DDI buffer translations (Ville) - Clean up FBC CFB allocation code (Ville, with a fix from Matthew) - Finish INTEL_GEN() and friends macro conversions (Lucas) - Misc display cleanups (Ville) Fixes: - PSR fixes and ADL-P workarounds (José) - Fix drm infoframe state mismatch (Bhanuprakash) - Force Type-C PHY disconnect during suspend/shutdown (Imre) - Fix power sequence violation on some Chromebook models (Shawn) - Fix VGA workaround to avoid screen flicker at boot (Emil) - Fix display 12+ watermark workaround adjustment (Lucas) Misc: - Backmerge drm-next (Jani) BR, Jani. The following changes since commit 8a02ea42bc1d4c448caf1bab0e05899dad503f74: Merge tag 'drm-intel-next-fixes-2021-06-29' of git://anongit.freedesktop.org/drm/drm-intel into drm-next (2021-06-30 15:42:05 +1000) are available in the Git repository at: git://anongit.freedesktop.org/drm/drm-intel tags/drm-intel-next-2021-07-08 for you to fetch changes up to cd5606aa39925ad4483e96abffc9cc62bb36c640: gpu/drm/i915: nuke old GEN macros (2021-07-07 16:36:32 -0700) drm/i915 changes for v5.15: Features: - Enable pipe DMC loading on XE-LPD and ADL-P (Anusha) - Finally remove JSH and EHL force probe requirement (Tejas) Refactoring and cleanups: - Refactor and fix DDI buffer translations (Ville) - Clean up FBC CFB allocation code (Ville, with a fix from Matthew) - Finish INTEL_GEN() and friends macro conversions (Lucas) - Misc display cleanups (Ville) Fixes: - PSR fixes and ADL-P workarounds (José) - Fix drm infoframe state mismatch (Bhanuprakash) - Force Type-C PHY disconnect during suspend/shutdown (Imre) - Fix power sequence violation on some Chromebook models (Shawn) - Fix VGA workaround to avoid screen flicker at boot (Emil) - Fix display 12+ watermark workaround adjustment (Lucas) Misc: - Backmerge drm-next (Jani) Anshuman Gupta (1): drm/i915/hdcp: Nuke Platform check for mst hdcp init Anusha Srivatsa (4): drm/i915/dmc: Introduce DMC_FW_MAIN drm/i915/xelpd: Pipe A DMC plugging drm/i915/adl_p: Pipe B DMC Support drm/i915/adl_p: Load DMC Bhanuprakash Modem (1): drm/i915/display: Fix state mismatch in drm infoframe Emil Velikov (1): drm/i915: apply WaEnableVGAAccessThroughIOPort as needed Imre Deak (1): drm/i915: Force a TypeC PHY disconnect during suspend/shutdown Jani Nikula (2): drm/i915/dsc: abstract helpers to get bigjoiner primary/secondary crtc Merge drm/drm-next into drm-intel-next José Roberto de Souza (7): Revert "drm/i915/display: Drop FIXME about turn off infoframes" drm/i915/display/psr: Handle SU Y granularity drm/i915/display/adl_p: Implement Wa_22012278275 drm/i915/display/adl_p: Implement Wa_16011168373 drm/i915/xelpd: Handle PSR2 SDP indication in the prior scanline drm/i915/display/adl_p: Implement Wa_16011303918 drm/i915/display/dg1: Correctly map DPLLs during state readout Kees Cook (1): drm/i915/display: Do not zero past infoframes.vsc Lee Shawn C (1): drm/i915: keep backlight_enable on until turn eDP display off Lucas De Marchi (5): drm/i915/xelpd: break feature inheritance drm/i915/display: fix level 0 adjustement on display ver >= 12 drm/i915/display: use max_level to control loop drm/i915: finish INTEL_GEN and friends conversion gpu/drm/i915: nuke old GEN macros Matthew Auld (1): drm/i915/display: check if compressed_llb was allocated Tejas Upadhyay (3): drm/i915/jsl: Add W/A 1409054076 for JSL drm/i915/jsl: Remove require_force_probe protection drm/i915/ehl: Remove require_force_probe protection Ville Syrjälä (32): drm/i915: s/intel/hsw/ for hsw/bdw/skl buf trans drm/i915: Introduce hsw_get_buf_trans() drm/i915: Wrap the platform specific buf trans structs into a union drm/i915: Rename dkl phy buf trans tables drm/i915: Wrap the buf trans tables into a struct drm/i915: Introduce intel_get_buf_trans() drm/i915; Return the whole buf_trans struct from get_buf_trans() drm/i915: Store the HDMI default entry in the bug trans struct drm/i915: Introduce encoder->get_buf_trans() drm/i915: Clean up hsw/bdw/skl/kbl buf trans funcs drm/i915: Introduce rkl_get_combo_buf_trans() drm/i915: Fix dg1 buf trans tables drm/i915: Deduplicate icl DP HBR2 vs. eDP HBR3 table drm/i915: Fix ehl edp hbr2 vswi
Re: [PATCH v2 01/11] drm/sched: Split drm_sched_job_init
On Thu, Jul 08, 2021 at 09:53:00AM +0200, Christian König wrote: > Am 08.07.21 um 09:19 schrieb Daniel Vetter: > > On Thu, Jul 8, 2021 at 9:09 AM Daniel Vetter wrote: > > > On Thu, Jul 8, 2021 at 8:56 AM Christian König > > > wrote: > > > > Am 07.07.21 um 18:32 schrieb Daniel Vetter: > > > > > On Wed, Jul 7, 2021 at 2:58 PM Christian König > > > > > wrote: > > > > > > Am 07.07.21 um 14:13 schrieb Daniel Vetter: > > > > > > > On Wed, Jul 7, 2021 at 1:57 PM Christian König > > > > > > > wrote: > > > > > > > > Am 07.07.21 um 13:14 schrieb Daniel Vetter: > > > > > > > > > On Wed, Jul 7, 2021 at 11:30 AM Christian König > > > > > > > > > wrote: > > > > > > > > > > Am 02.07.21 um 23:38 schrieb Daniel Vetter: > > > > > > > > > > > This is a very confusingly named function, because not > > > > > > > > > > > just does it > > > > > > > > > > > init an object, it arms it and provides a point of no > > > > > > > > > > > return for > > > > > > > > > > > pushing a job into the scheduler. It would be nice if > > > > > > > > > > > that's a bit > > > > > > > > > > > clearer in the interface. > > > > > > > > > > > > > > > > > > > > > > But the real reason is that I want to push the dependency > > > > > > > > > > > tracking > > > > > > > > > > > helpers into the scheduler code, and that means > > > > > > > > > > > drm_sched_job_init > > > > > > > > > > > must be called a lot earlier, without arming the job. > > > > > > > > > > > > > > > > > > > > > > v2: > > > > > > > > > > > - don't change .gitignore (Steven) > > > > > > > > > > > - don't forget v3d (Emma) > > > > > > > > > > > > > > > > > > > > > > v3: Emma noticed that I leak the memory allocated in > > > > > > > > > > > drm_sched_job_init if we bail out before the point of no > > > > > > > > > > > return in > > > > > > > > > > > subsequent driver patches. To be able to fix this change > > > > > > > > > > > drm_sched_job_cleanup() so it can handle being called > > > > > > > > > > > both before and > > > > > > > > > > > after drm_sched_job_arm(). > > > > > > > > > > Thinking more about this, I'm not sure if this really works. > > > > > > > > > > > > > > > > > > > > See drm_sched_job_init() was also calling > > > > > > > > > > drm_sched_entity_select_rq() > > > > > > > > > > to update the entity->rq association. > > > > > > > > > > > > > > > > > > > > And that can only be done later on when we arm the fence as > > > > > > > > > > well. > > > > > > > > > Hm yeah, but that's a bug in the existing code I think: We > > > > > > > > > already > > > > > > > > > fail to clean up if we fail to allocate the fences. So I > > > > > > > > > think the > > > > > > > > > right thing to do here is to split the checks into job_init, > > > > > > > > > and do > > > > > > > > > the actual arming/rq selection in job_arm? I'm not entirely > > > > > > > > > sure > > > > > > > > > what's all going on there, the first check looks a bit like > > > > > > > > > trying to > > > > > > > > > schedule before the entity is set up, which is a driver bug > > > > > > > > > and should > > > > > > > > > have a WARN_ON? > > > > > > > > No you misunderstood me, the problem is something else. > > > > > > > > > > > > > > > > You asked previously why the call to drm_sched_job_init() was > > > > > > > > so late in > > > > > > > > the CS. > > > > > > > > > > > > > > > > The reason for this was not alone the scheduler fence init, but > > > > > > > > also the > > > > > > > > call to drm_sched_entity_select_rq(). > > > > > > > Ah ok, I think I can fix that. Needs a prep patch to first make > > > > > > > drm_sched_entity_select infallible, then should be easy to do. > > > > > > > > > > > > > > > > The 2nd check around last_scheduled I have honeslty no idea > > > > > > > > > what it's > > > > > > > > > even trying to do. > > > > > > > > You mean that here? > > > > > > > > > > > > > > > > fence = READ_ONCE(entity->last_scheduled); > > > > > > > > if (fence && !dma_fence_is_signaled(fence)) > > > > > > > > return; > > > > > > > > > > > > > > > > This makes sure that load balancing is not moving the entity to > > > > > > > > a > > > > > > > > different scheduler while there are still jobs running from > > > > > > > > this entity > > > > > > > > on the hardware, > > > > > > > Yeah after a nap that idea crossed my mind too. But now I have > > > > > > > locking > > > > > > > questions, afaiui the scheduler thread updates this, without > > > > > > > taking > > > > > > > any locks - entity dequeuing is lockless. And here we read the > > > > > > > fence > > > > > > > and then seem to yolo check whether it's signalled? What's > > > > > > > preventing > > > > > > > a use-after-free here? There's no rcu or anything going on here at > > > > > > > all, and it's outside of the spinlock section, which starts a bit > > > > > > > further down. > > > > > > The last_scheduled fence of an entity can only change when there are > > > > > > jobs on the entities q
Re: [PATCH] gpu: ttm: fix GPF in ttm_bo_release
On Thu, 8 Jul 2021 11:37:01 +0300 Pavel Skripkin wrote: > On Thu, 8 Jul 2021 08:49:48 +0200 > Christian König wrote: > > > Am 07.07.21 um 20:51 schrieb Pavel Skripkin: > > > My local syzbot instance hit GPF in ttm_bo_release(). > > > Unfortunately, syzbot didn't produce a reproducer for this, but I > > > found out possible scenario: > > > > > > drm_gem_vram_create()<-- drm_gem_vram_object kzalloced > > >(bo embedded in this object) > > >ttm_bo_init() > > > ttm_bo_init_reserved() > > >ttm_resource_alloc() > > > man->func->alloc() <-- allocation failure > > >ttm_bo_put() > > > ttm_bo_release() > > > ttm_mem_io_free() <-- bo->resource == NULL passed > > >as second argument > > >*GPF* > > > > > > So, I've added check in ttm_bo_release() to avoid passing > > > NULL as second argument to ttm_mem_io_free(). > > Hi, Christian! > > Thank you for quick feedback :) > > > > > There is another ocassion of this a bit down before we call > > ttm_bo_move_to_lru_tail() apart from that good catch. > > > > Did you mean, that ttm_bo_move_to_lru_tail() should have NULL check > too? I checked it's realization, and, I think, NULL check is necessary > there, since mem pointer is dereferenced w/o any checking > > > But I'm wondering if we should make the functions NULL save instead > > of the external check. > > > > I tried to find more possible scenarios of GPF in ttm_bo_release(), > but I didn't find one. But, yes, moving NULL check inside > ttm_mem_io_free() is more general approach and it will defend this > function from GPFs in the future. > > > > With regards, > Pavel Skripkin > I misclicked and sent this email to Christian privately :( Added all thread participants back, sorry. With regards, Pavel Skripkin
Re: Oops in qxl_bo_move_notify()
On Wed, Jul 07, 2021 at 04:36:49PM +, Roberto Sassu wrote: > Hi > > I'm getting this oops (on commit a180bd1d7e16): > > [ 17.711520] BUG: kernel NULL pointer dereference, address: > 0010 > [ 17.739451] RIP: 0010:qxl_bo_move_notify+0x35/0x80 [qxl] > [ 17.827345] RSP: 0018:c9457c08 EFLAGS: 00010286 > [ 17.827350] RAX: 0001 RBX: RCX: > dc00 > [ 17.827353] RDX: 0007 RSI: 0004 RDI: > 85596feb > [ 17.827356] RBP: 88800e311c00 R08: R09: > > [ 17.827358] R10: 8697b243 R11: fbfff0d2f648 R12: > > [ 17.827361] R13: 88800e311e48 R14: 88800e311e98 R15: > 88800e311e90 > [ 17.827364] FS: () GS:88805d80() > knlGS: > [ 17.861699] CS: 0010 DS: ES: CR0: 80050033 > [ 17.861703] CR2: 0010 CR3: 2642c000 CR4: > 00350ee0 > [ 17.861707] Call Trace: > [ 17.861712] ttm_bo_cleanup_memtype_use+0x4d/0xb0 [ttm] > [ 17.861730] ttm_bo_release+0x42d/0x7c0 [ttm] > [ 17.861746] ? ttm_bo_cleanup_refs+0x127/0x420 [ttm] > [ 17.888300] ttm_bo_delayed_delete+0x289/0x390 [ttm] > [ 17.888317] ? ttm_bo_cleanup_refs+0x420/0x420 [ttm] > [ 17.888332] ? lock_release+0x9c/0x5c0 > [ 17.901033] ? rcu_read_lock_held_common+0x1a/0x50 > [ 17.905183] ttm_device_delayed_workqueue+0x18/0x50 [ttm] > [ 17.909371] process_one_work+0x537/0x9f0 > [ 17.913345] ? pwq_dec_nr_in_flight+0x160/0x160 > [ 17.917297] ? lock_acquired+0xa4/0x580 > [ 17.921168] ? worker_thread+0x169/0x600 > [ 17.925034] worker_thread+0x7a/0x600 > [ 17.928657] ? process_one_work+0x9f0/0x9f0 > [ 17.932360] kthread+0x200/0x230 > [ 17.935930] ? set_kthread_struct+0x80/0x80 > [ 17.939593] ret_from_fork+0x22/0x30 > [ 17.951737] CR2: 0010 > [ 17.955496] ---[ end trace e30cc21c24e81ee5 ]--- > > I had a look at the code, and it seems that this is caused by > trying to use bo->resource which is NULL. > > bo->resource is freed by ttm_bo_cleanup_refs() -> > ttm_bo_cleanup_memtype_use() -> ttm_resource_free(). > > And then a notification is issued by ttm_bo_cleanup_refs() -> > ttm_bo_put() -> ttm_bo_release() -> > ttm_bo_cleanup_memtype_use(), this time with bo->release > equal to NULL. > > I was thinking a proper way to fix this. Checking that > bo->release is not NULL in qxl_bo_move_notify() would > solve the issue. But maybe there is a better way, like > avoiding that ttm_bo_cleanup_memtype_use() is called > twice. Which way would be preferable? Adding Christian and Dave, who've touched all this recently iirc. -Daniel > > Thanks > > Roberto > > HUAWEI TECHNOLOGIES Duesseldorf GmbH, HRB 56063 > Managing Director: Li Peng, Li Jian, Shi Yanli -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch
Re: [PATCH v2 01/11] drm/sched: Split drm_sched_job_init
Am 08.07.21 um 12:02 schrieb Daniel Vetter: On Thu, Jul 08, 2021 at 09:53:00AM +0200, Christian König wrote: Am 08.07.21 um 09:19 schrieb Daniel Vetter: On Thu, Jul 8, 2021 at 9:09 AM Daniel Vetter wrote: On Thu, Jul 8, 2021 at 8:56 AM Christian König wrote: Am 07.07.21 um 18:32 schrieb Daniel Vetter: On Wed, Jul 7, 2021 at 2:58 PM Christian König wrote: Am 07.07.21 um 14:13 schrieb Daniel Vetter: On Wed, Jul 7, 2021 at 1:57 PM Christian König wrote: Am 07.07.21 um 13:14 schrieb Daniel Vetter: On Wed, Jul 7, 2021 at 11:30 AM Christian König wrote: Am 02.07.21 um 23:38 schrieb Daniel Vetter: This is a very confusingly named function, because not just does it init an object, it arms it and provides a point of no return for pushing a job into the scheduler. It would be nice if that's a bit clearer in the interface. But the real reason is that I want to push the dependency tracking helpers into the scheduler code, and that means drm_sched_job_init must be called a lot earlier, without arming the job. v2: - don't change .gitignore (Steven) - don't forget v3d (Emma) v3: Emma noticed that I leak the memory allocated in drm_sched_job_init if we bail out before the point of no return in subsequent driver patches. To be able to fix this change drm_sched_job_cleanup() so it can handle being called both before and after drm_sched_job_arm(). Thinking more about this, I'm not sure if this really works. See drm_sched_job_init() was also calling drm_sched_entity_select_rq() to update the entity->rq association. And that can only be done later on when we arm the fence as well. Hm yeah, but that's a bug in the existing code I think: We already fail to clean up if we fail to allocate the fences. So I think the right thing to do here is to split the checks into job_init, and do the actual arming/rq selection in job_arm? I'm not entirely sure what's all going on there, the first check looks a bit like trying to schedule before the entity is set up, which is a driver bug and should have a WARN_ON? No you misunderstood me, the problem is something else. You asked previously why the call to drm_sched_job_init() was so late in the CS. The reason for this was not alone the scheduler fence init, but also the call to drm_sched_entity_select_rq(). Ah ok, I think I can fix that. Needs a prep patch to first make drm_sched_entity_select infallible, then should be easy to do. The 2nd check around last_scheduled I have honeslty no idea what it's even trying to do. You mean that here? fence = READ_ONCE(entity->last_scheduled); if (fence && !dma_fence_is_signaled(fence)) return; This makes sure that load balancing is not moving the entity to a different scheduler while there are still jobs running from this entity on the hardware, Yeah after a nap that idea crossed my mind too. But now I have locking questions, afaiui the scheduler thread updates this, without taking any locks - entity dequeuing is lockless. And here we read the fence and then seem to yolo check whether it's signalled? What's preventing a use-after-free here? There's no rcu or anything going on here at all, and it's outside of the spinlock section, which starts a bit further down. The last_scheduled fence of an entity can only change when there are jobs on the entities queued, and we have just ruled that out in the check before. There aren't any barriers, so the cpu could easily run the two checks the other way round. I'll ponder this and figure out where exactly we need docs for the constraint and/or barriers to make this work as intended. As-is I'm not seeing how it does ... spsc_queue_count() provides the necessary barrier with the atomic_read(). atomic_t is fully unordered, except when it's a read-modify-write Wasn't awake yet, I think the rule is read-modify-write and return previous value gives you full barrier. So stuff like cmpxchg, but also a few others. See atomic_t.txt under ODERING heading (yes that maintainer refuses to accept .rst so I can't just link you to the right section, it's silly). get/set and even RMW atomic ops that don't return anything are all fully unordered. As far as I know that not completely correct. The rules around atomics i once learned are: 1. Everything which modifies something is a write barrier. 2. Everything which returns something is a read barrier. And I know a whole bunch of use cases where this is relied upon in the core kernel, so I'm pretty sure that's correct. That's against what the doc says, and also it would mean stuff like atomic_read_acquire or smp_mb__after/before_atomic is completely pointless. On x86 you're right, anywhere else where there's no total store ordering I you're wrong. Good to know. I always thought that atomic_read_acquire() was just for documentation purpose. If there's code that relies on this it needs to be fixed and properly documented. I did go through the squeue code a bit, and might be better to just rep
Re: [PATCH] gpu: ttm: fix GPF in ttm_bo_release
Am 08.07.21 um 12:09 schrieb Pavel Skripkin: On Thu, 8 Jul 2021 11:37:01 +0300 Pavel Skripkin wrote: On Thu, 8 Jul 2021 08:49:48 +0200 Christian König wrote: Am 07.07.21 um 20:51 schrieb Pavel Skripkin: My local syzbot instance hit GPF in ttm_bo_release(). Unfortunately, syzbot didn't produce a reproducer for this, but I found out possible scenario: drm_gem_vram_create()<-- drm_gem_vram_object kzalloced (bo embedded in this object) ttm_bo_init() ttm_bo_init_reserved() ttm_resource_alloc() man->func->alloc() <-- allocation failure ttm_bo_put() ttm_bo_release() ttm_mem_io_free() <-- bo->resource == NULL passed as second argument *GPF* So, I've added check in ttm_bo_release() to avoid passing NULL as second argument to ttm_mem_io_free(). Hi, Christian! Thank you for quick feedback :) There is another ocassion of this a bit down before we call ttm_bo_move_to_lru_tail() apart from that good catch. Did you mean, that ttm_bo_move_to_lru_tail() should have NULL check too? Yes, exactly that. I checked it's realization, and, I think, NULL check is necessary there, since mem pointer is dereferenced w/o any checking But I'm wondering if we should make the functions NULL save instead of the external check. I tried to find more possible scenarios of GPF in ttm_bo_release(), but I didn't find one. But, yes, moving NULL check inside ttm_mem_io_free() is more general approach and it will defend this function from GPFs in the future. With regards, Pavel Skripkin I misclicked and sent this email to Christian privately :( Added all thread participants back, sorry. No problem. Do you want to update your patch or should I take care of this? Thanks, Christian. With regards, Pavel Skripkin
Re: [PATCH] gpu: ttm: fix GPF in ttm_bo_release
On Thu, 8 Jul 2021 12:56:19 +0200 Christian König wrote: > Am 08.07.21 um 12:09 schrieb Pavel Skripkin: > > On Thu, 8 Jul 2021 11:37:01 +0300 > > Pavel Skripkin wrote: > > > >> On Thu, 8 Jul 2021 08:49:48 +0200 > >> Christian König wrote: > >> > >>> Am 07.07.21 um 20:51 schrieb Pavel Skripkin: > My local syzbot instance hit GPF in ttm_bo_release(). > Unfortunately, syzbot didn't produce a reproducer for this, but I > found out possible scenario: > > drm_gem_vram_create()<-- drm_gem_vram_object > kzalloced (bo embedded in this object) > ttm_bo_init() > ttm_bo_init_reserved() > ttm_resource_alloc() > man->func->alloc() <-- allocation failure > ttm_bo_put() > ttm_bo_release() > ttm_mem_io_free() <-- bo->resource == NULL passed > as second argument > *GPF* > > So, I've added check in ttm_bo_release() to avoid passing > NULL as second argument to ttm_mem_io_free(). > >> Hi, Christian! > >> > >> Thank you for quick feedback :) > >> > >>> There is another ocassion of this a bit down before we call > >>> ttm_bo_move_to_lru_tail() apart from that good catch. > >>> > >> Did you mean, that ttm_bo_move_to_lru_tail() should have NULL check > >> too? > > Yes, exactly that. > > >> I checked it's realization, and, I think, NULL check is necessary > >> there, since mem pointer is dereferenced w/o any checking > >> > >>> But I'm wondering if we should make the functions NULL save > >>> instead of the external check. > >>> > >> I tried to find more possible scenarios of GPF in ttm_bo_release(), > >> but I didn't find one. But, yes, moving NULL check inside > >> ttm_mem_io_free() is more general approach and it will defend this > >> function from GPFs in the future. > >> > >> > >> > >> With regards, > >> Pavel Skripkin > >> > > I misclicked and sent this email to Christian privately :( > > > > Added all thread participants back, sorry. > > No problem. > > Do you want to update your patch or should I take care of this? > Yes, I will send v2 soon. Thank you! With regards, Pavel Skripkin
[PATCH] dma-buf: fix and rework dma_buf_poll v5
Daniel pointed me towards this function and there are multiple obvious problems in the implementation. First of all the retry loop is not working as intended. In general the retry makes only sense if you grab the reference first and then check the sequence values. Then we should always also wait for the exclusive fence. It's also good practice to keep the reference around when installing callbacks to fences you don't own. And last the whole implementation was unnecessary complex and rather hard to understand which could lead to probably unexpected behavior of the IOCTL. Fix all this by reworking the implementation from scratch. Dropping the whole RCU approach and taking the lock instead. Only mildly tested and needs a thoughtful review of the code. v2: fix the reference counting as well v3: keep the excl fence handling as is for stable v4: back to testing all fences, drop RCU v5: handle in and out separately Signed-off-by: Christian König CC: sta...@vger.kernel.org --- drivers/dma-buf/dma-buf.c | 152 +- include/linux/dma-buf.h | 2 +- 2 files changed, 68 insertions(+), 86 deletions(-) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index eadd1eaa2fb5..439e2379e1cb 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -72,7 +72,7 @@ static void dma_buf_release(struct dentry *dentry) * If you hit this BUG() it means someone dropped their ref to the * dma-buf while still having pending operation to the buffer. */ - BUG_ON(dmabuf->cb_shared.active || dmabuf->cb_excl.active); + BUG_ON(dmabuf->cb_in.active || dmabuf->cb_out.active); dmabuf->ops->release(dmabuf); @@ -202,16 +202,57 @@ static void dma_buf_poll_cb(struct dma_fence *fence, struct dma_fence_cb *cb) wake_up_locked_poll(dcb->poll, dcb->active); dcb->active = 0; spin_unlock_irqrestore(&dcb->poll->lock, flags); + dma_fence_put(fence); +} + +static bool dma_buf_poll_shared(struct dma_resv *resv, + struct dma_buf_poll_cb_t *dcb) +{ + struct dma_resv_list *fobj = dma_resv_get_list(resv); + struct dma_fence *fence; + int i, r; + + if (!fobj) + return false; + + for (i = 0; i < fobj->shared_count; ++i) { + fence = rcu_dereference_protected(fobj->shared[i], + dma_resv_held(resv)); + dma_fence_get(fence); + r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb); + if (!r) + return true; + dma_fence_put(fence); + } + + return false; +} + +static bool dma_buf_poll_excl(struct dma_resv *resv, + struct dma_buf_poll_cb_t *dcb) +{ + struct dma_fence *fence = dma_resv_get_excl(resv); + int r; + + if (!fence) + return false; + + dma_fence_get(fence); + r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb); + if (!r) + return true; + dma_fence_put(fence); + + return false; } static __poll_t dma_buf_poll(struct file *file, poll_table *poll) { struct dma_buf *dmabuf; struct dma_resv *resv; - struct dma_resv_list *fobj; - struct dma_fence *fence_excl; + unsigned shared_count; __poll_t events; - unsigned shared_count, seq; + int r, i; dmabuf = file->private_data; if (!dmabuf || !dmabuf->resv) @@ -225,101 +266,42 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll) if (!events) return 0; -retry: - seq = read_seqcount_begin(&resv->seq); - rcu_read_lock(); - - fobj = rcu_dereference(resv->fence); - if (fobj) - shared_count = fobj->shared_count; - else - shared_count = 0; - fence_excl = rcu_dereference(resv->fence_excl); - if (read_seqcount_retry(&resv->seq, seq)) { - rcu_read_unlock(); - goto retry; - } - - if (fence_excl && (!(events & EPOLLOUT) || shared_count == 0)) { - struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl; - __poll_t pevents = EPOLLIN; + dma_resv_lock(resv, NULL); - if (shared_count == 0) - pevents |= EPOLLOUT; + if (events & EPOLLOUT) { + struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_out; + /* Check that callback isn't busy */ spin_lock_irq(&dmabuf->poll.lock); - if (dcb->active) { - dcb->active |= pevents; - events &= ~pevents; - } else - dcb->active = pevents; + if (dcb->active) + events &= ~EPOLLOUT; + else + dcb->active = EPOLLOUT;
Re: [PATCH v2 01/11] drm/sched: Split drm_sched_job_init
On Thu, Jul 8, 2021 at 12:54 PM Christian König wrote: > > Am 08.07.21 um 12:02 schrieb Daniel Vetter: > > On Thu, Jul 08, 2021 at 09:53:00AM +0200, Christian König wrote: > >> Am 08.07.21 um 09:19 schrieb Daniel Vetter: > >>> On Thu, Jul 8, 2021 at 9:09 AM Daniel Vetter > >>> wrote: > On Thu, Jul 8, 2021 at 8:56 AM Christian König > wrote: > > Am 07.07.21 um 18:32 schrieb Daniel Vetter: > >> On Wed, Jul 7, 2021 at 2:58 PM Christian König > >> wrote: > >>> Am 07.07.21 um 14:13 schrieb Daniel Vetter: > On Wed, Jul 7, 2021 at 1:57 PM Christian König > wrote: > > Am 07.07.21 um 13:14 schrieb Daniel Vetter: > >> On Wed, Jul 7, 2021 at 11:30 AM Christian König > >> wrote: > >>> Am 02.07.21 um 23:38 schrieb Daniel Vetter: > This is a very confusingly named function, because not just does > it > init an object, it arms it and provides a point of no return for > pushing a job into the scheduler. It would be nice if that's a > bit > clearer in the interface. > > But the real reason is that I want to push the dependency > tracking > helpers into the scheduler code, and that means > drm_sched_job_init > must be called a lot earlier, without arming the job. > > v2: > - don't change .gitignore (Steven) > - don't forget v3d (Emma) > > v3: Emma noticed that I leak the memory allocated in > drm_sched_job_init if we bail out before the point of no return > in > subsequent driver patches. To be able to fix this change > drm_sched_job_cleanup() so it can handle being called both > before and > after drm_sched_job_arm(). > >>> Thinking more about this, I'm not sure if this really works. > >>> > >>> See drm_sched_job_init() was also calling > >>> drm_sched_entity_select_rq() > >>> to update the entity->rq association. > >>> > >>> And that can only be done later on when we arm the fence as well. > >> Hm yeah, but that's a bug in the existing code I think: We already > >> fail to clean up if we fail to allocate the fences. So I think the > >> right thing to do here is to split the checks into job_init, and do > >> the actual arming/rq selection in job_arm? I'm not entirely sure > >> what's all going on there, the first check looks a bit like trying > >> to > >> schedule before the entity is set up, which is a driver bug and > >> should > >> have a WARN_ON? > > No you misunderstood me, the problem is something else. > > > > You asked previously why the call to drm_sched_job_init() was so > > late in > > the CS. > > > > The reason for this was not alone the scheduler fence init, but > > also the > > call to drm_sched_entity_select_rq(). > Ah ok, I think I can fix that. Needs a prep patch to first make > drm_sched_entity_select infallible, then should be easy to do. > > >> The 2nd check around last_scheduled I have honeslty no idea what > >> it's > >> even trying to do. > > You mean that here? > > > > fence = READ_ONCE(entity->last_scheduled); > > if (fence && !dma_fence_is_signaled(fence)) > > return; > > > > This makes sure that load balancing is not moving the entity to a > > different scheduler while there are still jobs running from this > > entity > > on the hardware, > Yeah after a nap that idea crossed my mind too. But now I have > locking > questions, afaiui the scheduler thread updates this, without taking > any locks - entity dequeuing is lockless. And here we read the fence > and then seem to yolo check whether it's signalled? What's preventing > a use-after-free here? There's no rcu or anything going on here at > all, and it's outside of the spinlock section, which starts a bit > further down. > >>> The last_scheduled fence of an entity can only change when there are > >>> jobs on the entities queued, and we have just ruled that out in the > >>> check before. > >> There aren't any barriers, so the cpu could easily run the two checks > >> the other way round. I'll ponder this and figure out where exactly we > >> need docs for the constraint and/or barriers to make this work as > >> intended. As-is I'm not seeing how it does ... > > spsc_queue_count() provides the necessary barrier with the > > atomic_read(). > atomic_t is fully unord
[PATCH v2] gpu: ttm: add missing NULL checks
My local syzbot instance hit GPF in ttm_bo_release(). Unfortunately, syzbot didn't produce a reproducer for this, but I found out possible scenario: drm_gem_vram_create()<-- drm_gem_vram_object kzalloced (bo embedded in this object) ttm_bo_init() ttm_bo_init_reserved() ttm_resource_alloc() man->func->alloc() <-- allocation failure ttm_bo_put() ttm_bo_release() ttm_mem_io_free() <-- bo->resource == NULL passed as second argument *GPF* Added NULL check inside ttm_mem_io_free() to prevent reported GPF and make this function NULL save in future. Same problem was in ttm_bo_move_to_lru_tail() as Christian reported. ttm_bo_move_to_lru_tail() is called in ttm_bo_release() and mem pointer can be NULL as well as in ttm_mem_io_free(). Fail log: KASAN: null-ptr-deref in range [0x0020-0x0027] CPU: 1 PID: 10419 Comm: syz-executor.3 Not tainted 5.13.0-rc7-next-20210625 #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a-rebuilt.opensuse.org 04/01/2014 RIP: 0010:ttm_mem_io_free+0x28/0x170 drivers/gpu/drm/ttm/ttm_bo_util.c:66 Code: b1 90 41 56 41 55 41 54 55 48 89 fd 53 48 89 f3 e8 cd 19 24 fd 4c 8d 6b 20 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 <80> 3c 02 00 0f 85 2a 01 00 00 4c 8b 63 20 31 ff 4c 89 e6 e8 00 1f RSP: 0018:c900141df968 EFLAGS: 00010202 RAX: dc00 RBX: RCX: c90010da RDX: 0004 RSI: 84513ea3 RDI: 888041fbc010 RBP: 888041fbc010 R08: R09: R10: 0001 R11: R12: R13: 0020 R14: 88806b258800 R15: 88806b258a38 FS: 7fa6e9845640() GS:88807ec0() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: 7fad61265e18 CR3: 5ad79000 CR4: 00350ee0 DR0: DR1: DR2: DR3: DR6: fffe0ff0 DR7: 0400 Call Trace: ttm_bo_release+0xd94/0x10a0 drivers/gpu/drm/ttm/ttm_bo.c:422 kref_put include/linux/kref.h:65 [inline] ttm_bo_put drivers/gpu/drm/ttm/ttm_bo.c:470 [inline] ttm_bo_init_reserved+0x7cb/0x960 drivers/gpu/drm/ttm/ttm_bo.c:1050 ttm_bo_init+0x105/0x270 drivers/gpu/drm/ttm/ttm_bo.c:1074 drm_gem_vram_create+0x332/0x4c0 drivers/gpu/drm/drm_gem_vram_helper.c:228 Fixes: d3116756a710 ("drm/ttm: rename bo->mem and make it a pointer") Signed-off-by: Pavel Skripkin --- Changes in v2: 1. Added NULL check in ttm_bo_move_to_lru_tail() 2. Changed subject line, since NULL check added in 2 funtions --- drivers/gpu/drm/ttm/ttm_bo.c | 3 +++ drivers/gpu/drm/ttm/ttm_bo_util.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 1b950b45cf4b..8d7fd65ccced 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -102,6 +102,9 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo, return; } + if (!mem) + return; + man = ttm_manager_type(bdev, mem->mem_type); list_move_tail(&bo->lru, &man->lru[bo->priority]); diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 2f57f824e6db..763fa6f4e07d 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -63,6 +63,9 @@ int ttm_mem_io_reserve(struct ttm_device *bdev, void ttm_mem_io_free(struct ttm_device *bdev, struct ttm_resource *mem) { + if (!mem) + return; + if (!mem->bus.offset && !mem->bus.addr) return; -- 2.32.0
Re: [PATCH v2 01/11] drm/sched: Split drm_sched_job_init
Am 08.07.21 um 13:20 schrieb Daniel Vetter: On Thu, Jul 8, 2021 at 12:54 PM Christian König wrote: [SNIP] As far as I know that not completely correct. The rules around atomics i once learned are: 1. Everything which modifies something is a write barrier. 2. Everything which returns something is a read barrier. And I know a whole bunch of use cases where this is relied upon in the core kernel, so I'm pretty sure that's correct. That's against what the doc says, and also it would mean stuff like atomic_read_acquire or smp_mb__after/before_atomic is completely pointless. On x86 you're right, anywhere else where there's no total store ordering I you're wrong. Good to know. I always thought that atomic_read_acquire() was just for documentation purpose. Maybe you mixed it up with C++ atomics (which I think are now also in C)? Those are strongly ordered by default (you can get the weakly ordered kernel-style one too). It's a bit unfortunate that the default semantics are exactly opposite between kernel and userspace :-/ Yeah, that's most likely it. If there's code that relies on this it needs to be fixed and properly documented. I did go through the squeue code a bit, and might be better to just replace this with a core data structure. Well the spsc was especially crafted for this use case and performed quite a bit better then a double linked list. Yeah double-linked list is awful. Or what core data structure do you have in mind? Hm I thought there's a ready-made queue primitive, but there's just llist.h. Which I think is roughly what the scheduler queue also does. Minus the atomic_t for counting how many there are, and aside from the tracepoints I don't think we're using those anywhere, we just check for is_empty in the code (from a quick look only). I think we just need to replace the atomic_read() with atomic_read_acquire() and the atomic_dec() with atomic_dec_return_release(). Apart from that everything should be working as far as I can see. And yes llist.h doesn't really do much different, it just doesn't keeps a tail pointer. Christian. -Daniel Christian. -Daniel In this case the write barrier is the atomic_dec() in spsc_queue_pop() and the read barrier is the aromic_read() in spsc_queue_count(). The READ_ONCE() is actually not even necessary as far as I can see. Christian. -Daniel atomic op, then it's a full barrier. So yeah you need more here. But also since you only need a read barrier on one side, and a write barrier on the other, you don't actually need a cpu barriers on x86. And READ_ONCE gives you the compiler barrier on one side at least, I haven't found it on the writer side yet. But yes a comment would be really nice here. I had to think for a while why we don't need this as well. I'm typing a patch, which after a night's sleep I realized has the wrong barriers. And now I'm also typing some doc improvements for drm_sched_entity and related functions. Christian. -Daniel Christian. -Daniel Regards Christian. -Daniel Christian. Also improve the kerneldoc for this. Acked-by: Steven Price (v2) Signed-off-by: Daniel Vetter Cc: Lucas Stach Cc: Russell King Cc: Christian Gmeiner Cc: Qiang Yu Cc: Rob Herring Cc: Tomeu Vizoso Cc: Steven Price Cc: Alyssa Rosenzweig Cc: David Airlie Cc: Daniel Vetter Cc: Sumit Semwal Cc: "Christian König" Cc: Masahiro Yamada Cc: Kees Cook Cc: Adam Borowski Cc: Nick Terrell Cc: Mauro Carvalho Chehab Cc: Paul Menzel Cc: Sami Tolvanen Cc: Viresh Kumar Cc: Alex Deucher Cc: Dave Airlie Cc: Nirmoy Das Cc: Deepak R Varma Cc: Lee Jones Cc: Kevin Wang Cc: Chen Li Cc: Luben Tuikov Cc: "Marek Olšák" Cc: Dennis Li Cc: Maarten Lankhorst Cc: Andrey Grodzovsky Cc: Sonny Jiang Cc: Boris Brezillon Cc: Tian Tao Cc: Jack Zhang Cc: etna...@lists.freedesktop.org Cc: l...@lists.freedesktop.org Cc: linux-me...@vger.kernel.org Cc: linaro-mm-...@lists.linaro.org Cc: Emma Anholt --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 ++ drivers/gpu/drm/etnaviv/etnaviv_sched.c | 2 ++ drivers/gpu/drm/lima/lima_sched.c| 2 ++ drivers/gpu/drm/panfrost/panfrost_job.c | 2 ++ drivers/gpu/drm/scheduler/sched_entity.c | 6 ++-- drivers/gpu/drm/scheduler/sched_fence.c | 17 + drivers/gpu/drm/scheduler/sched_main.c | 46 +--- drivers/gpu/drm/v3d/v3d_gem.c| 2 ++ include/drm/gpu_scheduler.h | 7 +++- 10 files changed, 74 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index c5386d13eb4a..a4ec092af9a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1226,6 +1226,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, if (r) goto error_unlock; +
Re: [PATCH v2] gpu: ttm: add missing NULL checks
Am 08.07.21 um 13:25 schrieb Pavel Skripkin: My local syzbot instance hit GPF in ttm_bo_release(). Unfortunately, syzbot didn't produce a reproducer for this, but I found out possible scenario: drm_gem_vram_create()<-- drm_gem_vram_object kzalloced (bo embedded in this object) ttm_bo_init() ttm_bo_init_reserved() ttm_resource_alloc() man->func->alloc() <-- allocation failure ttm_bo_put() ttm_bo_release() ttm_mem_io_free() <-- bo->resource == NULL passed as second argument *GPF* Added NULL check inside ttm_mem_io_free() to prevent reported GPF and make this function NULL save in future. Same problem was in ttm_bo_move_to_lru_tail() as Christian reported. ttm_bo_move_to_lru_tail() is called in ttm_bo_release() and mem pointer can be NULL as well as in ttm_mem_io_free(). Fail log: KASAN: null-ptr-deref in range [0x0020-0x0027] CPU: 1 PID: 10419 Comm: syz-executor.3 Not tainted 5.13.0-rc7-next-20210625 #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a-rebuilt.opensuse.org 04/01/2014 RIP: 0010:ttm_mem_io_free+0x28/0x170 drivers/gpu/drm/ttm/ttm_bo_util.c:66 Code: b1 90 41 56 41 55 41 54 55 48 89 fd 53 48 89 f3 e8 cd 19 24 fd 4c 8d 6b 20 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 <80> 3c 02 00 0f 85 2a 01 00 00 4c 8b 63 20 31 ff 4c 89 e6 e8 00 1f RSP: 0018:c900141df968 EFLAGS: 00010202 RAX: dc00 RBX: RCX: c90010da RDX: 0004 RSI: 84513ea3 RDI: 888041fbc010 RBP: 888041fbc010 R08: R09: R10: 0001 R11: R12: R13: 0020 R14: 88806b258800 R15: 88806b258a38 FS: 7fa6e9845640() GS:88807ec0() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: 7fad61265e18 CR3: 5ad79000 CR4: 00350ee0 DR0: DR1: DR2: DR3: DR6: fffe0ff0 DR7: 0400 Call Trace: ttm_bo_release+0xd94/0x10a0 drivers/gpu/drm/ttm/ttm_bo.c:422 kref_put include/linux/kref.h:65 [inline] ttm_bo_put drivers/gpu/drm/ttm/ttm_bo.c:470 [inline] ttm_bo_init_reserved+0x7cb/0x960 drivers/gpu/drm/ttm/ttm_bo.c:1050 ttm_bo_init+0x105/0x270 drivers/gpu/drm/ttm/ttm_bo.c:1074 drm_gem_vram_create+0x332/0x4c0 drivers/gpu/drm/drm_gem_vram_helper.c:228 Fixes: d3116756a710 ("drm/ttm: rename bo->mem and make it a pointer") Signed-off-by: Pavel Skripkin Reviewed-by: Christian König Going to push this to drm-misc-next-fixes. Thanks, Christian. --- Changes in v2: 1. Added NULL check in ttm_bo_move_to_lru_tail() 2. Changed subject line, since NULL check added in 2 funtions --- drivers/gpu/drm/ttm/ttm_bo.c | 3 +++ drivers/gpu/drm/ttm/ttm_bo_util.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 1b950b45cf4b..8d7fd65ccced 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -102,6 +102,9 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo, return; } + if (!mem) + return; + man = ttm_manager_type(bdev, mem->mem_type); list_move_tail(&bo->lru, &man->lru[bo->priority]); diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 2f57f824e6db..763fa6f4e07d 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -63,6 +63,9 @@ int ttm_mem_io_reserve(struct ttm_device *bdev, void ttm_mem_io_free(struct ttm_device *bdev, struct ttm_resource *mem) { + if (!mem) + return; + if (!mem->bus.offset && !mem->bus.addr) return;
Re: Oops in qxl_bo_move_notify()
Yeah, that's an already known issue. When the allocation fails bo->resource might be NULL now and we need to add checks for that corner case as well. Christian. Am 08.07.21 um 12:14 schrieb Daniel Vetter: On Wed, Jul 07, 2021 at 04:36:49PM +, Roberto Sassu wrote: Hi I'm getting this oops (on commit a180bd1d7e16): [ 17.711520] BUG: kernel NULL pointer dereference, address: 0010 [ 17.739451] RIP: 0010:qxl_bo_move_notify+0x35/0x80 [qxl] [ 17.827345] RSP: 0018:c9457c08 EFLAGS: 00010286 [ 17.827350] RAX: 0001 RBX: RCX: dc00 [ 17.827353] RDX: 0007 RSI: 0004 RDI: 85596feb [ 17.827356] RBP: 88800e311c00 R08: R09: [ 17.827358] R10: 8697b243 R11: fbfff0d2f648 R12: [ 17.827361] R13: 88800e311e48 R14: 88800e311e98 R15: 88800e311e90 [ 17.827364] FS: () GS:88805d80() knlGS: [ 17.861699] CS: 0010 DS: ES: CR0: 80050033 [ 17.861703] CR2: 0010 CR3: 2642c000 CR4: 00350ee0 [ 17.861707] Call Trace: [ 17.861712] ttm_bo_cleanup_memtype_use+0x4d/0xb0 [ttm] [ 17.861730] ttm_bo_release+0x42d/0x7c0 [ttm] [ 17.861746] ? ttm_bo_cleanup_refs+0x127/0x420 [ttm] [ 17.888300] ttm_bo_delayed_delete+0x289/0x390 [ttm] [ 17.888317] ? ttm_bo_cleanup_refs+0x420/0x420 [ttm] [ 17.888332] ? lock_release+0x9c/0x5c0 [ 17.901033] ? rcu_read_lock_held_common+0x1a/0x50 [ 17.905183] ttm_device_delayed_workqueue+0x18/0x50 [ttm] [ 17.909371] process_one_work+0x537/0x9f0 [ 17.913345] ? pwq_dec_nr_in_flight+0x160/0x160 [ 17.917297] ? lock_acquired+0xa4/0x580 [ 17.921168] ? worker_thread+0x169/0x600 [ 17.925034] worker_thread+0x7a/0x600 [ 17.928657] ? process_one_work+0x9f0/0x9f0 [ 17.932360] kthread+0x200/0x230 [ 17.935930] ? set_kthread_struct+0x80/0x80 [ 17.939593] ret_from_fork+0x22/0x30 [ 17.951737] CR2: 0010 [ 17.955496] ---[ end trace e30cc21c24e81ee5 ]--- I had a look at the code, and it seems that this is caused by trying to use bo->resource which is NULL. bo->resource is freed by ttm_bo_cleanup_refs() -> ttm_bo_cleanup_memtype_use() -> ttm_resource_free(). And then a notification is issued by ttm_bo_cleanup_refs() -> ttm_bo_put() -> ttm_bo_release() -> ttm_bo_cleanup_memtype_use(), this time with bo->release equal to NULL. I was thinking a proper way to fix this. Checking that bo->release is not NULL in qxl_bo_move_notify() would solve the issue. But maybe there is a better way, like avoiding that ttm_bo_cleanup_memtype_use() is called twice. Which way would be preferable? Adding Christian and Dave, who've touched all this recently iirc. -Daniel Thanks Roberto HUAWEI TECHNOLOGIES Duesseldorf GmbH, HRB 56063 Managing Director: Li Peng, Li Jian, Shi Yanli
Re: [PATCH] drm/msm/dpu: Add newlines to printks
On Thu, 8 Jul 2021 at 09:56, Stephen Boyd wrote: > > Add some missing newlines to the various DRM printks in this file. > Noticed while looking at logs. While we're here unbreak quoted > strings so grepping them is easier. > > Signed-off-by: Stephen Boyd Reviewed-by: Dmitry Baryshkov > --- > drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 12 +--- > 1 file changed, 5 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c > b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c > index 1c04b7cce43e..0e9d3fa1544b 100644 > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c > @@ -274,20 +274,20 @@ int dpu_encoder_helper_wait_for_irq(struct > dpu_encoder_phys *phys_enc, > > /* return EWOULDBLOCK since we know the wait isn't necessary */ > if (phys_enc->enable_state == DPU_ENC_DISABLED) { > - DRM_ERROR("encoder is disabled id=%u, intr=%d, irq=%d", > + DRM_ERROR("encoder is disabled id=%u, intr=%d, irq=%d\n", > DRMID(phys_enc->parent), intr_idx, > irq->irq_idx); > return -EWOULDBLOCK; > } > > if (irq->irq_idx < 0) { > - DRM_DEBUG_KMS("skip irq wait id=%u, intr=%d, irq=%s", > + DRM_DEBUG_KMS("skip irq wait id=%u, intr=%d, irq=%s\n", > DRMID(phys_enc->parent), intr_idx, > irq->name); > return 0; > } > > - DRM_DEBUG_KMS("id=%u, intr=%d, irq=%d, pp=%d, pending_cnt=%d", > + DRM_DEBUG_KMS("id=%u, intr=%d, irq=%d, pp=%d, pending_cnt=%d\n", > DRMID(phys_enc->parent), intr_idx, > irq->irq_idx, phys_enc->hw_pp->idx - PINGPONG_0, > atomic_read(wait_info->atomic_cnt)); > @@ -303,8 +303,7 @@ int dpu_encoder_helper_wait_for_irq(struct > dpu_encoder_phys *phys_enc, > if (irq_status) { > unsigned long flags; > > - DRM_DEBUG_KMS("irq not triggered id=%u, intr=%d, " > - "irq=%d, pp=%d, atomic_cnt=%d", > + DRM_DEBUG_KMS("irq not triggered id=%u, intr=%d, > irq=%d, pp=%d, atomic_cnt=%d\n", > DRMID(phys_enc->parent), intr_idx, > irq->irq_idx, > phys_enc->hw_pp->idx - PINGPONG_0, > @@ -315,8 +314,7 @@ int dpu_encoder_helper_wait_for_irq(struct > dpu_encoder_phys *phys_enc, > ret = 0; > } else { > ret = -ETIMEDOUT; > - DRM_DEBUG_KMS("irq timeout id=%u, intr=%d, " > - "irq=%d, pp=%d, atomic_cnt=%d", > + DRM_DEBUG_KMS("irq timeout id=%u, intr=%d, irq=%d, > pp=%d, atomic_cnt=%d\n", > DRMID(phys_enc->parent), intr_idx, > irq->irq_idx, > phys_enc->hw_pp->idx - PINGPONG_0, > > base-commit: e9f1cbc0c4114880090c7a578117d3b9cf184ad4 > -- > https://chromeos.dev > -- With best wishes Dmitry
Re: [PATCH] dma-buf: fix and rework dma_buf_poll v5
Sorry that was the wrong patch. Still not feeling that well :( Christian. Am 08.07.21 um 13:19 schrieb Christian König: Daniel pointed me towards this function and there are multiple obvious problems in the implementation. First of all the retry loop is not working as intended. In general the retry makes only sense if you grab the reference first and then check the sequence values. Then we should always also wait for the exclusive fence. It's also good practice to keep the reference around when installing callbacks to fences you don't own. And last the whole implementation was unnecessary complex and rather hard to understand which could lead to probably unexpected behavior of the IOCTL. Fix all this by reworking the implementation from scratch. Dropping the whole RCU approach and taking the lock instead. Only mildly tested and needs a thoughtful review of the code. v2: fix the reference counting as well v3: keep the excl fence handling as is for stable v4: back to testing all fences, drop RCU v5: handle in and out separately Signed-off-by: Christian König CC: sta...@vger.kernel.org --- drivers/dma-buf/dma-buf.c | 152 +- include/linux/dma-buf.h | 2 +- 2 files changed, 68 insertions(+), 86 deletions(-) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index eadd1eaa2fb5..439e2379e1cb 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -72,7 +72,7 @@ static void dma_buf_release(struct dentry *dentry) * If you hit this BUG() it means someone dropped their ref to the * dma-buf while still having pending operation to the buffer. */ - BUG_ON(dmabuf->cb_shared.active || dmabuf->cb_excl.active); + BUG_ON(dmabuf->cb_in.active || dmabuf->cb_out.active); dmabuf->ops->release(dmabuf); @@ -202,16 +202,57 @@ static void dma_buf_poll_cb(struct dma_fence *fence, struct dma_fence_cb *cb) wake_up_locked_poll(dcb->poll, dcb->active); dcb->active = 0; spin_unlock_irqrestore(&dcb->poll->lock, flags); + dma_fence_put(fence); +} + +static bool dma_buf_poll_shared(struct dma_resv *resv, + struct dma_buf_poll_cb_t *dcb) +{ + struct dma_resv_list *fobj = dma_resv_get_list(resv); + struct dma_fence *fence; + int i, r; + + if (!fobj) + return false; + + for (i = 0; i < fobj->shared_count; ++i) { + fence = rcu_dereference_protected(fobj->shared[i], + dma_resv_held(resv)); + dma_fence_get(fence); + r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb); + if (!r) + return true; + dma_fence_put(fence); + } + + return false; +} + +static bool dma_buf_poll_excl(struct dma_resv *resv, + struct dma_buf_poll_cb_t *dcb) +{ + struct dma_fence *fence = dma_resv_get_excl(resv); + int r; + + if (!fence) + return false; + + dma_fence_get(fence); + r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb); + if (!r) + return true; + dma_fence_put(fence); + + return false; } static __poll_t dma_buf_poll(struct file *file, poll_table *poll) { struct dma_buf *dmabuf; struct dma_resv *resv; - struct dma_resv_list *fobj; - struct dma_fence *fence_excl; + unsigned shared_count; __poll_t events; - unsigned shared_count, seq; + int r, i; dmabuf = file->private_data; if (!dmabuf || !dmabuf->resv) @@ -225,101 +266,42 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll) if (!events) return 0; -retry: - seq = read_seqcount_begin(&resv->seq); - rcu_read_lock(); - - fobj = rcu_dereference(resv->fence); - if (fobj) - shared_count = fobj->shared_count; - else - shared_count = 0; - fence_excl = rcu_dereference(resv->fence_excl); - if (read_seqcount_retry(&resv->seq, seq)) { - rcu_read_unlock(); - goto retry; - } - - if (fence_excl && (!(events & EPOLLOUT) || shared_count == 0)) { - struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl; - __poll_t pevents = EPOLLIN; + dma_resv_lock(resv, NULL); - if (shared_count == 0) - pevents |= EPOLLOUT; + if (events & EPOLLOUT) { + struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_out; + /* Check that callback isn't busy */ spin_lock_irq(&dmabuf->poll.lock); - if (dcb->active) { - dcb->active |= pevents; - events &= ~pevents; - } else - dcb->active = pevents; + if (dcb->active) +
[PATCH] drm/radeon: Fix NULL dereference when updating memory stats
From: Mikel Rychliski radeon_ttm_bo_destroy() is attempting to access the resource object to update memory counters. However, the resource object is already freed when ttm calls this function via the destroy callback. This causes an oops when a bo is freed: BUG: kernel NULL pointer dereference, address: 0010 RIP: 0010:radeon_ttm_bo_destroy+0x2c/0x100 [radeon] Call Trace: radeon_bo_unref+0x1a/0x30 [radeon] radeon_gem_object_free+0x33/0x50 [radeon] drm_gem_object_release_handle+0x69/0x70 [drm] drm_gem_handle_delete+0x62/0xa0 [drm] ? drm_mode_destroy_dumb+0x40/0x40 [drm] drm_ioctl_kernel+0xb2/0xf0 [drm] drm_ioctl+0x30a/0x3c0 [drm] ? drm_mode_destroy_dumb+0x40/0x40 [drm] radeon_drm_ioctl+0x49/0x80 [radeon] __x64_sys_ioctl+0x8e/0xd0 Avoid the issue by updating the counters in the delete_mem_notify callback instead. Also, fix memory statistic updating in radeon_bo_move() to identify the source type correctly. The source type needs to be saved before the move, because the moved from object may be altered by the move. Fixes: bfa3357ef9ab ("drm/ttm: allocate resource object instead of embedding it v2") Signed-off-by: Mikel Rychliski Reviewed-by: Christian König Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20210624045121.15643-1-mi...@mikelr.com --- drivers/gpu/drm/radeon/radeon_object.c | 29 +++--- drivers/gpu/drm/radeon/radeon_object.h | 2 +- drivers/gpu/drm/radeon/radeon_ttm.c| 13 +--- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index bfaaa3c969a3..56ede9d63b12 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -49,23 +49,23 @@ static void radeon_bo_clear_surface_reg(struct radeon_bo *bo); * function are calling it. */ -static void radeon_update_memory_usage(struct radeon_bo *bo, - unsigned mem_type, int sign) +static void radeon_update_memory_usage(struct ttm_buffer_object *bo, + unsigned int mem_type, int sign) { - struct radeon_device *rdev = bo->rdev; + struct radeon_device *rdev = radeon_get_rdev(bo->bdev); switch (mem_type) { case TTM_PL_TT: if (sign > 0) - atomic64_add(bo->tbo.base.size, &rdev->gtt_usage); + atomic64_add(bo->base.size, &rdev->gtt_usage); else - atomic64_sub(bo->tbo.base.size, &rdev->gtt_usage); + atomic64_sub(bo->base.size, &rdev->gtt_usage); break; case TTM_PL_VRAM: if (sign > 0) - atomic64_add(bo->tbo.base.size, &rdev->vram_usage); + atomic64_add(bo->base.size, &rdev->vram_usage); else - atomic64_sub(bo->tbo.base.size, &rdev->vram_usage); + atomic64_sub(bo->base.size, &rdev->vram_usage); break; } } @@ -76,8 +76,6 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) bo = container_of(tbo, struct radeon_bo, tbo); - radeon_update_memory_usage(bo, bo->tbo.resource->mem_type, -1); - mutex_lock(&bo->rdev->gem.mutex); list_del_init(&bo->list); mutex_unlock(&bo->rdev->gem.mutex); @@ -727,24 +725,21 @@ int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, } void radeon_bo_move_notify(struct ttm_buffer_object *bo, - bool evict, + unsigned int old_type, struct ttm_resource *new_mem) { struct radeon_bo *rbo; + radeon_update_memory_usage(bo, old_type, -1); + if (new_mem) + radeon_update_memory_usage(bo, new_mem->mem_type, 1); + if (!radeon_ttm_bo_is_radeon_bo(bo)) return; rbo = container_of(bo, struct radeon_bo, tbo); radeon_bo_check_tiling(rbo, 0, 1); radeon_vm_bo_invalidate(rbo->rdev, rbo); - - /* update statistics */ - if (!new_mem) - return; - - radeon_update_memory_usage(rbo, bo->resource->mem_type, -1); - radeon_update_memory_usage(rbo, new_mem->mem_type, 1); } vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index 1739c6a142cd..1afc7992ef91 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -161,7 +161,7 @@ extern void radeon_bo_get_tiling_flags(struct radeon_bo *bo, extern int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, bool force_drop); extern void radeon_
[PATCH] drm/qxl: add NULL check for bo->resource
When allocations fails that can be NULL now. Signed-off-by: Christian König Reported-by: Daniel Bristot de Oliveira Tested-by: Daniel Bristot de Oliveira --- drivers/gpu/drm/qxl/qxl_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c index 19fd39d9a00c..37a1b6a6ad6d 100644 --- a/drivers/gpu/drm/qxl/qxl_ttm.c +++ b/drivers/gpu/drm/qxl/qxl_ttm.c @@ -127,7 +127,7 @@ static void qxl_bo_move_notify(struct ttm_buffer_object *bo, struct qxl_bo *qbo; struct qxl_device *qdev; - if (!qxl_ttm_bo_is_qxl_bo(bo)) + if (!qxl_ttm_bo_is_qxl_bo(bo) || !bo->resource) return; qbo = to_qxl_bo(bo); qdev = to_qxl(qbo->tbo.base.dev); -- 2.25.1
Re: [PATCH] drm/msm/mdp5: fix 64-bit division in bandwidth calculation
On 22/06/2021 11:03, Dmitry Baryshkov wrote: Fix undefined symbols errors arising from 64-bit division on 32-bit arm targets. Add 64-bit version of mult_frac and use it for calculating bandwidth. ERROR: modpost: "__aeabi_ldivmod" [drivers/gpu/drm/msm/msm.ko] undefined! ERROR: modpost: "__aeabi_uldivmod" [drivers/gpu/drm/msm/msm.ko] undefined! Fixes: 7e0230fd096c ("drm/msm/mdp5: provide dynamic bandwidth management") Signed-off-by: Dmitry Baryshkov We are reworking now bandwidth management for mdp5, so both the original patch and the fix can be ignored for now. --- drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c | 2 +- drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c | 5 - drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c | 2 +- include/linux/math.h | 13 + 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c index a9332078aa13..52724d0a6fea 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c @@ -755,7 +755,7 @@ static int mdp5_crtc_atomic_check(struct drm_crtc *crtc, hw_cfg = mdp5_cfg_get_hw_config(mdp5_kms->cfg); if (hw_cfg->perf.ab_inefficiency) - crtc_bw = mult_frac(crtc_bw, hw_cfg->perf.ab_inefficiency, 100); + crtc_bw = mult_frac_ull(crtc_bw, hw_cfg->perf.ab_inefficiency, 100); mdp5_cstate->new_crtc_bw = crtc_bw; /* diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index 3e1b28d3e41b..85b7093a1218 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -301,6 +301,7 @@ static const struct mdp_kms_funcs kms_funcs = { void mdp5_kms_set_bandwidth(struct mdp5_kms *mdp5_kms) { int i; + u64 bw; u32 full_bw = 0; struct drm_crtc *tmp_crtc; @@ -311,7 +312,9 @@ void mdp5_kms_set_bandwidth(struct mdp5_kms *mdp5_kms) if (!tmp_crtc->enabled) continue; - full_bw += Bps_to_icc(to_mdp5_crtc_state(tmp_crtc->state)->new_crtc_bw / mdp5_kms->num_paths); + bw = to_mdp5_crtc_state(tmp_crtc->state)->new_crtc_bw; + do_div(bw, mdp5_kms->num_paths * 1000); /* Bps_to_icc */ + full_bw += bw; } DBG("SET BW to %d\n", full_bw); diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c index 85275665558b..2ede34177a90 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c @@ -191,7 +191,7 @@ static void mdp5_plane_calc_bw(struct drm_plane_state *state, struct drm_crtc_st prefill_div = vbp + vpw + vfp; #endif - pstate->plane_bw = max(plane_bw, mult_frac(plane_bw, hw_latency_lines, prefill_div)); + pstate->plane_bw = max(plane_bw, mult_frac_ull(plane_bw, hw_latency_lines, prefill_div)); } static int mdp5_plane_atomic_check_with_state(struct drm_crtc_state *crtc_state, diff --git a/include/linux/math.h b/include/linux/math.h index 53674a327e39..1327385905df 100644 --- a/include/linux/math.h +++ b/include/linux/math.h @@ -118,6 +118,19 @@ } \ ) +#define mult_frac_ull(x, numer, denom)( \ +{ \ + typeof(x) quot = (x); \ + typeof(x) rem; \ + do_div(quot, (denom)); \ + rem = (x) - quot * (denom); \ + rem = (rem * (numer)); \ + do_div(rem, (denom)); \ + (quot * (numer)) + rem; \ +} \ +) + + #define sector_div(a, b) do_div(a, b) /** -- With best wishes Dmitry
[PATCH v3] drm/panel: Add support for E Ink VB3300-KCA
Add support for the 10.3" E Ink panel described at: https://www.eink.com/product.html?type=productdetail&id=7 Signed-off-by: Alistair Francis Acked-by: Rob Herring --- .../bindings/display/panel/panel-simple.yaml | 2 ++ .../devicetree/bindings/vendor-prefixes.yaml | 2 ++ drivers/gpu/drm/panel/panel-simple.c | 29 +++ 3 files changed, 33 insertions(+) diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml index b3797ba2698b..799e20222551 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml @@ -128,6 +128,8 @@ properties: # Emerging Display Technology Corp. WVGA TFT Display with capacitive touch - edt,etm0700g0dh6 - edt,etm0700g0edh6 +# E Ink VB3300-KCA + - eink,vb3300-kca # Evervision Electronics Co. Ltd. VGG804821 5.0" WVGA TFT LCD Panel - evervision,vgg804821 # Foxlink Group 5" WVGA TFT LCD panel diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index 0199728d2eaf..3612c6020fe4 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -335,6 +335,8 @@ patternProperties: description: eGalax_eMPIA Technology Inc "^einfochips,.*": description: Einfochips + "^eink,.*": +description: E Ink Corporation "^elan,.*": description: Elan Microelectronic Corp. "^element14,.*": diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 21939d4352cf..210377b03f6f 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -2046,6 +2046,32 @@ static const struct panel_desc edt_etm0700g0bdh6 = { .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE, }; +static const struct display_timing eink_vb3300_kca_timing = { + .pixelclock = { 4000, 4000, 4000 }, + .hactive = { 334, 334, 334 }, + .hfront_porch = { 1, 1, 1 }, + .hback_porch = { 1, 1, 1 }, + .hsync_len = { 1, 1, 1 }, + .vactive = { 1405, 1405, 1405 }, + .vfront_porch = { 1, 1, 1 }, + .vback_porch = { 1, 1, 1 }, + .vsync_len = { 1, 1, 1 }, + .flags = DISPLAY_FLAGS_HSYNC_LOW | DISPLAY_FLAGS_VSYNC_LOW | +DISPLAY_FLAGS_DE_HIGH | DISPLAY_FLAGS_PIXDATA_POSEDGE, +}; + +static const struct panel_desc eink_vb3300_kca = { + .timings = &eink_vb3300_kca_timing, + .num_timings = 1, + .bpc = 6, + .size = { + .width = 157, + .height = 209, + }, + .bus_format = MEDIA_BUS_FMT_RGB888_1X24, + .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE, +}; + static const struct display_timing evervision_vgg804821_timing = { .pixelclock = { 2760, 3330, 5000 }, .hactive = { 800, 800, 800 }, @@ -4344,6 +4370,9 @@ static const struct of_device_id platform_of_match[] = { }, { .compatible = "edt,etm0700g0dh6", .data = &edt_etm0700g0dh6, + }, { + .compatible = "eink,vb3300-kca", + .data = &eink_vb3300_kca, }, { .compatible = "edt,etm0700g0bdh6", .data = &edt_etm0700g0bdh6, -- 2.31.1
Re: [PATCH] drm/rockchip: Implement mmap as GEM object function
ping for review Am 24.06.21 um 11:55 schrieb Thomas Zimmermann: Moving the driver-specific mmap code into a GEM object function allows for using DRM helpers for various mmap callbacks. The respective rockchip functions are being removed. The file_operations structure fops is now being created by the helper macro DEFINE_DRM_GEM_FOPS(). Signed-off-by: Thomas Zimmermann --- drivers/gpu/drm/rockchip/rockchip_drm_drv.c | 13 +- drivers/gpu/drm/rockchip/rockchip_drm_fbdev.c | 3 +- drivers/gpu/drm/rockchip/rockchip_drm_gem.c | 44 +-- drivers/gpu/drm/rockchip/rockchip_drm_gem.h | 7 --- 4 files changed, 15 insertions(+), 52 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c index b730b8d5d949..2e3ab573a817 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@ -208,16 +208,7 @@ static void rockchip_drm_unbind(struct device *dev) drm_dev_put(drm_dev); } -static const struct file_operations rockchip_drm_driver_fops = { - .owner = THIS_MODULE, - .open = drm_open, - .mmap = rockchip_gem_mmap, - .poll = drm_poll, - .read = drm_read, - .unlocked_ioctl = drm_ioctl, - .compat_ioctl = drm_compat_ioctl, - .release = drm_release, -}; +DEFINE_DRM_GEM_FOPS(rockchip_drm_driver_fops); static const struct drm_driver rockchip_drm_driver = { .driver_features= DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC, @@ -226,7 +217,7 @@ static const struct drm_driver rockchip_drm_driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import_sg_table = rockchip_gem_prime_import_sg_table, - .gem_prime_mmap = rockchip_gem_mmap_buf, + .gem_prime_mmap = drm_gem_prime_mmap, .fops = &rockchip_drm_driver_fops, .name = DRIVER_NAME, .desc = DRIVER_DESC, diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.c b/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.c index 2fdc455c4ad7..d8418dd39d0e 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "rockchip_drm_drv.h" @@ -24,7 +25,7 @@ static int rockchip_fbdev_mmap(struct fb_info *info, struct drm_fb_helper *helper = info->par; struct rockchip_drm_private *private = to_drm_private(helper); - return rockchip_gem_mmap_buf(private->fbdev_bo, vma); + return drm_gem_prime_mmap(private->fbdev_bo, vma); } static const struct fb_ops rockchip_drm_fbdev_ops = { diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c index 7971f57436dd..63eb73b624aa 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c @@ -240,12 +240,22 @@ static int rockchip_drm_gem_object_mmap(struct drm_gem_object *obj, int ret; struct rockchip_gem_object *rk_obj = to_rockchip_obj(obj); + /* +* Set vm_pgoff (used as a fake buffer offset by DRM) to 0 and map the +* whole buffer from the start. +*/ + vma->vm_pgoff = 0; + /* * We allocated a struct page table for rk_obj, so clear * VM_PFNMAP flag that was set by drm_gem_mmap_obj()/drm_gem_mmap(). */ + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_flags &= ~VM_PFNMAP; + vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); + vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); + if (rk_obj->pages) ret = rockchip_drm_gem_object_mmap_iommu(obj, vma); else @@ -257,39 +267,6 @@ static int rockchip_drm_gem_object_mmap(struct drm_gem_object *obj, return ret; } -int rockchip_gem_mmap_buf(struct drm_gem_object *obj, - struct vm_area_struct *vma) -{ - int ret; - - ret = drm_gem_mmap_obj(obj, obj->size, vma); - if (ret) - return ret; - - return rockchip_drm_gem_object_mmap(obj, vma); -} - -/* drm driver mmap file operations */ -int rockchip_gem_mmap(struct file *filp, struct vm_area_struct *vma) -{ - struct drm_gem_object *obj; - int ret; - - ret = drm_gem_mmap(filp, vma); - if (ret) - return ret; - - /* -* Set vm_pgoff (used as a fake buffer offset by DRM) to 0 and map the -* whole buffer from the start. -*/ - vma->vm_pgoff = 0; - - obj = vma->vm_private_data; - - return rockchip_drm_gem_object_mmap(obj, vma); -} - static void rockchip_gem_release_object(struct rockchip_gem_object *rk_obj) { drm_gem_object_release(&rk_obj->base); @@ -301,6 +278,7 @@ static const str
Re: [PATCH] drm/xen: Implement mmap as GEM object function
ping for review Am 24.06.21 um 11:53 schrieb Thomas Zimmermann: Moving the driver-specific mmap code into a GEM object function allows for using DRM helpers for various mmap callbacks. The respective xen functions are being removed. The file_operations structure fops is now being created by the helper macro DEFINE_DRM_GEM_FOPS(). Signed-off-by: Thomas Zimmermann --- drivers/gpu/drm/xen/xen_drm_front.c | 16 +--- drivers/gpu/drm/xen/xen_drm_front_gem.c | 108 +--- drivers/gpu/drm/xen/xen_drm_front_gem.h | 7 -- 3 files changed, 44 insertions(+), 87 deletions(-) diff --git a/drivers/gpu/drm/xen/xen_drm_front.c b/drivers/gpu/drm/xen/xen_drm_front.c index 9f14d99c763c..434064c820e8 100644 --- a/drivers/gpu/drm/xen/xen_drm_front.c +++ b/drivers/gpu/drm/xen/xen_drm_front.c @@ -469,19 +469,7 @@ static void xen_drm_drv_release(struct drm_device *dev) kfree(drm_info); } -static const struct file_operations xen_drm_dev_fops = { - .owner = THIS_MODULE, - .open = drm_open, - .release= drm_release, - .unlocked_ioctl = drm_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = drm_compat_ioctl, -#endif - .poll = drm_poll, - .read = drm_read, - .llseek = no_llseek, - .mmap = xen_drm_front_gem_mmap, -}; +DEFINE_DRM_GEM_FOPS(xen_drm_dev_fops); static const struct drm_driver xen_drm_driver = { .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC, @@ -489,7 +477,7 @@ static const struct drm_driver xen_drm_driver = { .prime_handle_to_fd= drm_gem_prime_handle_to_fd, .prime_fd_to_handle= drm_gem_prime_fd_to_handle, .gem_prime_import_sg_table = xen_drm_front_gem_import_sg_table, - .gem_prime_mmap= xen_drm_front_gem_prime_mmap, + .gem_prime_mmap= drm_gem_prime_mmap, .dumb_create = xen_drm_drv_dumb_create, .fops = &xen_drm_dev_fops, .name = "xendrm-du", diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c b/drivers/gpu/drm/xen/xen_drm_front_gem.c index b293c67230ef..dd358ba2bf8e 100644 --- a/drivers/gpu/drm/xen/xen_drm_front_gem.c +++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c @@ -57,6 +57,47 @@ static void gem_free_pages_array(struct xen_gem_object *xen_obj) xen_obj->pages = NULL; } +static int xen_drm_front_gem_object_mmap(struct drm_gem_object *gem_obj, +struct vm_area_struct *vma) +{ + struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); + int ret; + + vma->vm_ops = gem_obj->funcs->vm_ops; + + /* +* Clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the +* vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map +* the whole buffer. +*/ + vma->vm_flags &= ~VM_PFNMAP; + vma->vm_flags |= VM_MIXEDMAP; + vma->vm_pgoff = 0; + + /* +* According to Xen on ARM ABI (xen/include/public/arch-arm.h): +* all memory which is shared with other entities in the system +* (including the hypervisor and other guests) must reside in memory +* which is mapped as Normal Inner Write-Back Outer Write-Back +* Inner-Shareable. +*/ + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + + /* +* vm_operations_struct.fault handler will be called if CPU access +* to VM is here. For GPUs this isn't the case, because CPU doesn't +* touch the memory. Insert pages now, so both CPU and GPU are happy. +* +* FIXME: as we insert all the pages now then no .fault handler must +* be called, so don't provide one +*/ + ret = vm_map_pages(vma, xen_obj->pages, xen_obj->num_pages); + if (ret < 0) + DRM_ERROR("Failed to map pages into vma: %d\n", ret); + + return ret; +} + static const struct vm_operations_struct xen_drm_drv_vm_ops = { .open = drm_gem_vm_open, .close = drm_gem_vm_close, @@ -67,6 +108,7 @@ static const struct drm_gem_object_funcs xen_drm_front_gem_object_funcs = { .get_sg_table = xen_drm_front_gem_get_sg_table, .vmap = xen_drm_front_gem_prime_vmap, .vunmap = xen_drm_front_gem_prime_vunmap, + .mmap = xen_drm_front_gem_object_mmap, .vm_ops = &xen_drm_drv_vm_ops, }; @@ -238,58 +280,6 @@ xen_drm_front_gem_import_sg_table(struct drm_device *dev, return &xen_obj->base; } -static int gem_mmap_obj(struct xen_gem_object *xen_obj, - struct vm_area_struct *vma) -{ - int ret; - - /* -* clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the -* vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map -* the whole buffer. -*/
Re: [PATCH] drm/msm: Implement mmap as GEM object function
ping for review Am 24.06.21 um 11:03 schrieb Thomas Zimmermann: Moving the driver-specific mmap code into a GEM object function allows for using DRM helpers for various mmap callbacks. The respective msm functions are being removed. The file_operations structure fops is now being created by the helper macro DEFINE_DRM_GEM_FOPS(). Signed-off-by: Thomas Zimmermann --- drivers/gpu/drm/msm/msm_drv.c | 14 +- drivers/gpu/drm/msm/msm_drv.h | 1 - drivers/gpu/drm/msm/msm_fbdev.c | 10 + drivers/gpu/drm/msm/msm_gem.c | 67 - drivers/gpu/drm/msm/msm_gem.h | 3 -- drivers/gpu/drm/msm/msm_gem_prime.c | 11 - 6 files changed, 31 insertions(+), 75 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index fe7d17cd35ec..f62eaedfc0d7 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -985,17 +985,7 @@ static const struct drm_ioctl_desc msm_ioctls[] = { DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY, msm_ioctl_submitqueue_query, DRM_RENDER_ALLOW), }; -static const struct file_operations fops = { - .owner = THIS_MODULE, - .open = drm_open, - .release= drm_release, - .unlocked_ioctl = drm_ioctl, - .compat_ioctl = drm_compat_ioctl, - .poll = drm_poll, - .read = drm_read, - .llseek = no_llseek, - .mmap = msm_gem_mmap, -}; +DEFINE_DRM_GEM_FOPS(fops); static const struct drm_driver msm_driver = { .driver_features= DRIVER_GEM | @@ -1015,7 +1005,7 @@ static const struct drm_driver msm_driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import_sg_table = msm_gem_prime_import_sg_table, - .gem_prime_mmap = msm_gem_prime_mmap, + .gem_prime_mmap = drm_gem_prime_mmap, #ifdef CONFIG_DEBUG_FS .debugfs_init = msm_debugfs_init, #endif diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 2668941df529..8f1e0d7c8bbb 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -300,7 +300,6 @@ void msm_gem_shrinker_cleanup(struct drm_device *dev); struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj); int msm_gem_prime_vmap(struct drm_gem_object *obj, struct dma_buf_map *map); void msm_gem_prime_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map); -int msm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sg); int msm_gem_prime_pin(struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c index 227404077e39..07225907fd2d 100644 --- a/drivers/gpu/drm/msm/msm_fbdev.c +++ b/drivers/gpu/drm/msm/msm_fbdev.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "msm_drv.h" #include "msm_gem.h" @@ -48,15 +49,8 @@ static int msm_fbdev_mmap(struct fb_info *info, struct vm_area_struct *vma) struct drm_fb_helper *helper = (struct drm_fb_helper *)info->par; struct msm_fbdev *fbdev = to_msm_fbdev(helper); struct drm_gem_object *bo = msm_framebuffer_bo(fbdev->fb, 0); - int ret = 0; - ret = drm_gem_mmap_obj(bo, bo->size, vma); - if (ret) { - pr_err("%s:drm_gem_mmap_obj fail\n", __func__); - return ret; - } - - return msm_gem_mmap_obj(bo, vma); + return drm_gem_prime_mmap(bo, vma); } static int msm_fbdev_create(struct drm_fb_helper *helper, diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index b61f5466e522..71d835bc575d 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -211,46 +211,6 @@ void msm_gem_put_pages(struct drm_gem_object *obj) msm_gem_unlock(obj); } -int msm_gem_mmap_obj(struct drm_gem_object *obj, - struct vm_area_struct *vma) -{ - struct msm_gem_object *msm_obj = to_msm_bo(obj); - - vma->vm_flags &= ~VM_PFNMAP; - vma->vm_flags |= VM_MIXEDMAP; - - if (msm_obj->flags & MSM_BO_WC) { - vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); - } else if (msm_obj->flags & MSM_BO_UNCACHED) { - vma->vm_page_prot = pgprot_noncached(vm_get_page_prot(vma->vm_flags)); - } else { - /* -* Shunt off cached objs to shmem file so they have their own -* address_space (so unmap_mapping_range does what we want, -* in particular in the case of mmap'd dmabufs) -*/ - vma->vm_pgoff = 0; - vma_set_file(vma, obj->filp); - -
Re: [PATCH] drm/mediatek: Implement mmap as GEM object function
ping for review Am 24.06.21 um 11:01 schrieb Thomas Zimmermann: Moving the driver-specific mmap code into a GEM object function allows for using DRM helpers for various mmap callbacks. The respective mediatek functions are being removed. The file_operations structure fops is now being created by the helper macro DEFINE_DRM_GEM_FOPS(). Signed-off-by: Thomas Zimmermann --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 13 ++-- drivers/gpu/drm/mediatek/mtk_drm_gem.c | 44 +++--- drivers/gpu/drm/mediatek/mtk_drm_gem.h | 3 -- 3 files changed, 14 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index b46bdb8985da..bbfefb29c211 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -300,16 +300,7 @@ static void mtk_drm_kms_deinit(struct drm_device *drm) component_unbind_all(drm->dev, drm); } -static const struct file_operations mtk_drm_fops = { - .owner = THIS_MODULE, - .open = drm_open, - .release = drm_release, - .unlocked_ioctl = drm_ioctl, - .mmap = mtk_drm_gem_mmap, - .poll = drm_poll, - .read = drm_read, - .compat_ioctl = drm_compat_ioctl, -}; +DEFINE_DRM_GEM_FOPS(mtk_drm_fops); /* * We need to override this because the device used to import the memory is @@ -332,7 +323,7 @@ static const struct drm_driver mtk_drm_driver = { .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import = mtk_drm_gem_prime_import, .gem_prime_import_sg_table = mtk_gem_prime_import_sg_table, - .gem_prime_mmap = mtk_drm_gem_mmap_buf, + .gem_prime_mmap = drm_gem_prime_mmap, .fops = &mtk_drm_fops, .name = DRIVER_NAME, diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c index 280ea0d5e840..d0544962cfc1 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c @@ -14,11 +14,14 @@ #include "mtk_drm_drv.h" #include "mtk_drm_gem.h" +static int mtk_drm_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); + static const struct drm_gem_object_funcs mtk_drm_gem_object_funcs = { .free = mtk_drm_gem_free_object, .get_sg_table = mtk_gem_prime_get_sg_table, .vmap = mtk_drm_gem_prime_vmap, .vunmap = mtk_drm_gem_prime_vunmap, + .mmap = mtk_drm_gem_object_mmap, .vm_ops = &drm_gem_cma_vm_ops, }; @@ -145,11 +148,19 @@ static int mtk_drm_gem_object_mmap(struct drm_gem_object *obj, struct mtk_drm_gem_obj *mtk_gem = to_mtk_gem_obj(obj); struct mtk_drm_private *priv = obj->dev->dev_private; + /* +* Set vm_pgoff (used as a fake buffer offset by DRM) to 0 and map the +* whole buffer from the start. +*/ + vma->vm_pgoff = 0; + /* * dma_alloc_attrs() allocated a struct page table for mtk_gem, so clear * VM_PFNMAP flag that was set by drm_gem_mmap_obj()/drm_gem_mmap(). */ - vma->vm_flags &= ~VM_PFNMAP; + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; + vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); + vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); ret = dma_mmap_attrs(priv->dma_dev, vma, mtk_gem->cookie, mtk_gem->dma_addr, obj->size, mtk_gem->dma_attrs); @@ -159,37 +170,6 @@ static int mtk_drm_gem_object_mmap(struct drm_gem_object *obj, return ret; } -int mtk_drm_gem_mmap_buf(struct drm_gem_object *obj, struct vm_area_struct *vma) -{ - int ret; - - ret = drm_gem_mmap_obj(obj, obj->size, vma); - if (ret) - return ret; - - return mtk_drm_gem_object_mmap(obj, vma); -} - -int mtk_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) -{ - struct drm_gem_object *obj; - int ret; - - ret = drm_gem_mmap(filp, vma); - if (ret) - return ret; - - obj = vma->vm_private_data; - - /* -* Set vm_pgoff (used as a fake buffer offset by DRM) to 0 and map the -* whole buffer from the start. -*/ - vma->vm_pgoff = 0; - - return mtk_drm_gem_object_mmap(obj, vma); -} - /* * Allocate a sg_table for this GEM object. * Note: Both the table's contents, and the sg_table itself must be freed by diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.h b/drivers/gpu/drm/mediatek/mtk_drm_gem.h index 6da5ccb4b933..9a359a06cb73 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_gem.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.h @@ -39,9 +39,6 @@ struct mtk_drm_gem_obj *mtk_drm_gem_create(struct drm_device *dev, size_t size, bool alloc_kmap); int mtk_drm_gem_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args); -
Re: [PATCH] drm/exynox: Implement mmap as GEM object function
ping for review Am 24.06.21 um 11:00 schrieb Thomas Zimmermann: Moving the driver-specific mmap code into a GEM object function allows for using DRM helpers for various mmap callbacks. The respective exynos functions are being removed. The file_operations structure exynos_drm_driver_fops is now being created by the helper macro DEFINE_DRM_GEM_FOPS(). Signed-off-by: Thomas Zimmermann --- drivers/gpu/drm/exynos/exynos_drm_drv.c | 13 ++- drivers/gpu/drm/exynos/exynos_drm_fbdev.c | 20 ++- drivers/gpu/drm/exynos/exynos_drm_gem.c | 43 +-- drivers/gpu/drm/exynos/exynos_drm_gem.h | 5 --- 4 files changed, 13 insertions(+), 68 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index e60257f1f24b..1d46751cad02 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -102,16 +102,7 @@ static const struct drm_ioctl_desc exynos_ioctls[] = { DRM_RENDER_ALLOW), }; -static const struct file_operations exynos_drm_driver_fops = { - .owner = THIS_MODULE, - .open = drm_open, - .mmap = exynos_drm_gem_mmap, - .poll = drm_poll, - .read = drm_read, - .unlocked_ioctl = drm_ioctl, - .compat_ioctl = drm_compat_ioctl, - .release= drm_release, -}; +DEFINE_DRM_GEM_FOPS(exynos_drm_driver_fops); static const struct drm_driver exynos_drm_driver = { .driver_features= DRIVER_MODESET | DRIVER_GEM @@ -124,7 +115,7 @@ static const struct drm_driver exynos_drm_driver = { .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import = exynos_drm_gem_prime_import, .gem_prime_import_sg_table = exynos_drm_gem_prime_import_sg_table, - .gem_prime_mmap = exynos_drm_gem_prime_mmap, + .gem_prime_mmap = drm_gem_prime_mmap, .ioctls = exynos_ioctls, .num_ioctls = ARRAY_SIZE(exynos_ioctls), .fops = &exynos_drm_driver_fops, diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c index 5147f5929be7..02c97b9ca926 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -39,25 +40,8 @@ static int exynos_drm_fb_mmap(struct fb_info *info, struct drm_fb_helper *helper = info->par; struct exynos_drm_fbdev *exynos_fbd = to_exynos_fbdev(helper); struct exynos_drm_gem *exynos_gem = exynos_fbd->exynos_gem; - unsigned long vm_size; - int ret; - - vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; - - vm_size = vma->vm_end - vma->vm_start; - - if (vm_size > exynos_gem->size) - return -EINVAL; - ret = dma_mmap_attrs(to_dma_dev(helper->dev), vma, exynos_gem->cookie, -exynos_gem->dma_addr, exynos_gem->size, -exynos_gem->dma_attrs); - if (ret < 0) { - DRM_DEV_ERROR(to_dma_dev(helper->dev), "failed to mmap.\n"); - return ret; - } - - return 0; + return drm_gem_prime_mmap(&exynos_gem->base, vma); } static const struct fb_ops exynos_drm_fb_ops = { diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 4396224227d1..c4b63902ee7a 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -17,6 +17,8 @@ #include "exynos_drm_drv.h" #include "exynos_drm_gem.h" +static int exynos_drm_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); + static int exynos_drm_alloc_buf(struct exynos_drm_gem *exynos_gem, bool kvmap) { struct drm_device *dev = exynos_gem->base.dev; @@ -135,6 +137,7 @@ static const struct vm_operations_struct exynos_drm_gem_vm_ops = { static const struct drm_gem_object_funcs exynos_drm_gem_object_funcs = { .free = exynos_drm_gem_free_object, .get_sg_table = exynos_drm_gem_prime_get_sg_table, + .mmap = exynos_drm_gem_mmap, .vm_ops = &exynos_drm_gem_vm_ops, }; @@ -354,12 +357,16 @@ int exynos_drm_gem_dumb_create(struct drm_file *file_priv, return 0; } -static int exynos_drm_gem_mmap_obj(struct drm_gem_object *obj, - struct vm_area_struct *vma) +static int exynos_drm_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) { struct exynos_drm_gem *exynos_gem = to_exynos_gem(obj); int ret; + if (obj->import_attach) + return dma_buf_mmap(obj->dma_buf, vma, 0); + + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; + DRM_DEV_DEBUG_KMS(to_dma_dev(obj->dev), "flags = 0x%x\n",
RE: [PATCH] drm/qxl: add NULL check for bo->resource
> From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] > Sent: Thursday, July 8, 2021 1:47 PM > When allocations fails that can be NULL now. > > Signed-off-by: Christian König > Reported-by: Daniel Bristot de Oliveira > Tested-by: Daniel Bristot de Oliveira Hi Christian thanks, it worked. Tested-by: Roberto Sassu Roberto HUAWEI TECHNOLOGIES Duesseldorf GmbH, HRB 56063 Managing Director: Li Peng, Li Jian, Shi Yanli > --- > drivers/gpu/drm/qxl/qxl_ttm.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c > index 19fd39d9a00c..37a1b6a6ad6d 100644 > --- a/drivers/gpu/drm/qxl/qxl_ttm.c > +++ b/drivers/gpu/drm/qxl/qxl_ttm.c > @@ -127,7 +127,7 @@ static void qxl_bo_move_notify(struct > ttm_buffer_object *bo, > struct qxl_bo *qbo; > struct qxl_device *qdev; > > - if (!qxl_ttm_bo_is_qxl_bo(bo)) > + if (!qxl_ttm_bo_is_qxl_bo(bo) || !bo->resource) > return; > qbo = to_qxl_bo(bo); > qdev = to_qxl(qbo->tbo.base.dev); > -- > 2.25.1
Re: [PATCH v4 5/7] drm/panfrost: Add a new ioctl to submit batches
Am 05.07.21 um 11:32 schrieb Daniel Vetter: On Mon, Jul 05, 2021 at 10:29:48AM +0200, Boris Brezillon wrote: This should help limit the number of ioctls when submitting multiple jobs. The new ioctl also supports syncobj timelines and BO access flags. v4: * Implement panfrost_ioctl_submit() as a wrapper around panfrost_submit_job() * Replace stride fields by a version field which is mapped to a tuple internally v3: * Re-use panfrost_get_job_bos() and panfrost_get_job_in_syncs() in the old submit path Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panfrost/panfrost_drv.c | 562 drivers/gpu/drm/panfrost/panfrost_job.c | 3 + include/uapi/drm/panfrost_drm.h | 92 3 files changed, 479 insertions(+), 178 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 8e28ef30310b..a624e4f86aff 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -138,184 +138,6 @@ panfrost_get_job_mappings(struct drm_file *file_priv, struct panfrost_job *job) return 0; } -/** - * panfrost_lookup_bos() - Sets up job->bo[] with the GEM objects - * referenced by the job. - * @dev: DRM device - * @file_priv: DRM file for this fd - * @args: IOCTL args - * @job: job being set up - * - * Resolve handles from userspace to BOs and attach them to job. - * - * Note that this function doesn't need to unreference the BOs on - * failure, because that will happen at panfrost_job_cleanup() time. - */ -static int -panfrost_lookup_bos(struct drm_device *dev, - struct drm_file *file_priv, - struct drm_panfrost_submit *args, - struct panfrost_job *job) -{ - unsigned int i; - int ret; - - job->bo_count = args->bo_handle_count; - - if (!job->bo_count) - return 0; - - job->bo_flags = kvmalloc_array(job->bo_count, - sizeof(*job->bo_flags), - GFP_KERNEL | __GFP_ZERO); - if (!job->bo_flags) - return -ENOMEM; - - for (i = 0; i < job->bo_count; i++) - job->bo_flags[i] = PANFROST_BO_REF_EXCLUSIVE; - - ret = drm_gem_objects_lookup(file_priv, -(void __user *)(uintptr_t)args->bo_handles, -job->bo_count, &job->bos); - if (ret) - return ret; - - return panfrost_get_job_mappings(file_priv, job); -} - -/** - * panfrost_copy_in_sync() - Sets up job->deps with the sync objects - * referenced by the job. - * @dev: DRM device - * @file_priv: DRM file for this fd - * @args: IOCTL args - * @job: job being set up - * - * Resolve syncobjs from userspace to fences and attach them to job. - * - * Note that this function doesn't need to unreference the fences on - * failure, because that will happen at panfrost_job_cleanup() time. - */ -static int -panfrost_copy_in_sync(struct drm_device *dev, - struct drm_file *file_priv, - struct drm_panfrost_submit *args, - struct panfrost_job *job) -{ - u32 *handles; - int ret = 0; - int i, in_fence_count; - - in_fence_count = args->in_sync_count; - - if (!in_fence_count) - return 0; - - handles = kvmalloc_array(in_fence_count, sizeof(u32), GFP_KERNEL); - if (!handles) { - ret = -ENOMEM; - DRM_DEBUG("Failed to allocate incoming syncobj handles\n"); - goto fail; - } - - if (copy_from_user(handles, - (void __user *)(uintptr_t)args->in_syncs, - in_fence_count * sizeof(u32))) { - ret = -EFAULT; - DRM_DEBUG("Failed to copy in syncobj handles\n"); - goto fail; - } - - for (i = 0; i < in_fence_count; i++) { - struct dma_fence *fence; - - ret = drm_syncobj_find_fence(file_priv, handles[i], 0, 0, -&fence); - if (ret) - goto fail; - - ret = drm_gem_fence_array_add(&job->deps, fence); - - if (ret) - goto fail; - } - -fail: - kvfree(handles); - return ret; -} - -static int panfrost_ioctl_submit(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct panfrost_device *pfdev = dev->dev_private; - struct drm_panfrost_submit *args = data; - struct drm_syncobj *sync_out = NULL; - struct panfrost_submitqueue *queue; - struct panfrost_job *job; - int ret = 0; - - if (!args->jc) - return -EINVAL; - - if (args->requirements && args->requirements != PANFROST_JD_REQ_FS) - return -EINVAL; - - queue = panfrost_submitqueue_get(file->driver_
Re: [PATCH] drm/msm: Implement mmap as GEM object function
Am 08.07.21 um 14:04 schrieb Thomas Zimmermann: ping for review Nevermind, there's a newer version of this patch at https://lore.kernel.org/dri-devel/20210706084753.8194-1-tzimmerm...@suse.de/ Best regards Thomas Am 24.06.21 um 11:03 schrieb Thomas Zimmermann: Moving the driver-specific mmap code into a GEM object function allows for using DRM helpers for various mmap callbacks. The respective msm functions are being removed. The file_operations structure fops is now being created by the helper macro DEFINE_DRM_GEM_FOPS(). Signed-off-by: Thomas Zimmermann --- drivers/gpu/drm/msm/msm_drv.c | 14 +- drivers/gpu/drm/msm/msm_drv.h | 1 - drivers/gpu/drm/msm/msm_fbdev.c | 10 + drivers/gpu/drm/msm/msm_gem.c | 67 - drivers/gpu/drm/msm/msm_gem.h | 3 -- drivers/gpu/drm/msm/msm_gem_prime.c | 11 - 6 files changed, 31 insertions(+), 75 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index fe7d17cd35ec..f62eaedfc0d7 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -985,17 +985,7 @@ static const struct drm_ioctl_desc msm_ioctls[] = { DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY, msm_ioctl_submitqueue_query, DRM_RENDER_ALLOW), }; -static const struct file_operations fops = { - .owner = THIS_MODULE, - .open = drm_open, - .release = drm_release, - .unlocked_ioctl = drm_ioctl, - .compat_ioctl = drm_compat_ioctl, - .poll = drm_poll, - .read = drm_read, - .llseek = no_llseek, - .mmap = msm_gem_mmap, -}; +DEFINE_DRM_GEM_FOPS(fops); static const struct drm_driver msm_driver = { .driver_features = DRIVER_GEM | @@ -1015,7 +1005,7 @@ static const struct drm_driver msm_driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import_sg_table = msm_gem_prime_import_sg_table, - .gem_prime_mmap = msm_gem_prime_mmap, + .gem_prime_mmap = drm_gem_prime_mmap, #ifdef CONFIG_DEBUG_FS .debugfs_init = msm_debugfs_init, #endif diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 2668941df529..8f1e0d7c8bbb 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -300,7 +300,6 @@ void msm_gem_shrinker_cleanup(struct drm_device *dev); struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj); int msm_gem_prime_vmap(struct drm_gem_object *obj, struct dma_buf_map *map); void msm_gem_prime_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map); -int msm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sg); int msm_gem_prime_pin(struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c index 227404077e39..07225907fd2d 100644 --- a/drivers/gpu/drm/msm/msm_fbdev.c +++ b/drivers/gpu/drm/msm/msm_fbdev.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "msm_drv.h" #include "msm_gem.h" @@ -48,15 +49,8 @@ static int msm_fbdev_mmap(struct fb_info *info, struct vm_area_struct *vma) struct drm_fb_helper *helper = (struct drm_fb_helper *)info->par; struct msm_fbdev *fbdev = to_msm_fbdev(helper); struct drm_gem_object *bo = msm_framebuffer_bo(fbdev->fb, 0); - int ret = 0; - ret = drm_gem_mmap_obj(bo, bo->size, vma); - if (ret) { - pr_err("%s:drm_gem_mmap_obj fail\n", __func__); - return ret; - } - - return msm_gem_mmap_obj(bo, vma); + return drm_gem_prime_mmap(bo, vma); } static int msm_fbdev_create(struct drm_fb_helper *helper, diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index b61f5466e522..71d835bc575d 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -211,46 +211,6 @@ void msm_gem_put_pages(struct drm_gem_object *obj) msm_gem_unlock(obj); } -int msm_gem_mmap_obj(struct drm_gem_object *obj, - struct vm_area_struct *vma) -{ - struct msm_gem_object *msm_obj = to_msm_bo(obj); - - vma->vm_flags &= ~VM_PFNMAP; - vma->vm_flags |= VM_MIXEDMAP; - - if (msm_obj->flags & MSM_BO_WC) { - vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); - } else if (msm_obj->flags & MSM_BO_UNCACHED) { - vma->vm_page_prot = pgprot_noncached(vm_get_page_prot(vma->vm_flags)); - } else { - /* - * Shunt off cached objs to shmem file so they have their own - * address_space (so unmap_mapping_range does what we want, - * in particular in the case of mmap'd dmabufs) - */ - vma->vm_pgoff = 0; - vma_set_fil
[PATCH v1 0/7] drm/msm/dpu: add support for idependent DSI config
This patchseries adds support for independent DSI config to DPU1 display subdriver. Also drop one of msm_kms_funcs callbacks, made unnecessary now. Tested on RB5 (dpu, dsi). Previous iteration was tested by Alexey Minnekhanov. Changes since v1: - renamed dual DSI to bonded DSI as suggsted by Abhinav - added comments to _dpu_kms_initialize_dsi() regarding encoders usage The following changes since commit e88bbc91849b2bf57683119c339e52916d34433f: Revert "drm/msm/mdp5: provide dynamic bandwidth management" (2021-06-23 14:06:20 -0700) are available in the Git repository at: https://git.linaro.org/people/dmitry.baryshkov/kernel.git msm-drm-drop-set-encoder-mode-1 for you to fetch changes up to 142f79dfc41271576731a49516d63ad47a56e1ca: drm/msm/kms: drop set_encoder_mode callback (2021-07-08 15:20:52 +0300) Dmitry Baryshkov (7): drm/msm/dsi: rename dual DSI to bonded DSI drm/msm/dsi: add two helper functions drm/msm/dpu: support setting up two independent DSI connectors drm/msm/mdp5: move mdp5_encoder_set_intf_mode after msm_dsi_modeset_init drm/msm/dp: stop calling set_encoder_mode callback drm/msm/dsi: stop calling set_encoder_mode callback drm/msm/kms: drop set_encoder_mode callback drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 102 +-- drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c | 11 +--- drivers/gpu/drm/msm/dp/dp_display.c | 18 -- drivers/gpu/drm/msm/dsi/dsi.c| 9 ++- drivers/gpu/drm/msm/dsi/dsi.h| 9 ++- drivers/gpu/drm/msm/dsi/dsi_cfg.h| 2 +- drivers/gpu/drm/msm/dsi/dsi_host.c | 30 - drivers/gpu/drm/msm/dsi/dsi_manager.c| 93 drivers/gpu/drm/msm/msm_drv.h| 12 +++- drivers/gpu/drm/msm/msm_kms.h| 3 - 10 files changed, 136 insertions(+), 153 deletions(-)
[PATCH v1 1/7] drm/msm/dsi: rename dual DSI to bonded DSI
We are preparing to support two independent DSI hosts in the DSI/DPU code. To remove possible confusion (as both configurations can be referenced as dual DSI) let's rename old "dual DSI" (two DSI hosts driving single device, with clocks being locked) to "bonded DSI". Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/dsi/dsi.h | 8 ++-- drivers/gpu/drm/msm/dsi/dsi_cfg.h | 2 +- drivers/gpu/drm/msm/dsi/dsi_host.c| 30 ++-- drivers/gpu/drm/msm/dsi/dsi_manager.c | 69 +-- 4 files changed, 54 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi.h b/drivers/gpu/drm/msm/dsi/dsi.h index 9b8e9b07eced..856a532850c0 100644 --- a/drivers/gpu/drm/msm/dsi/dsi.h +++ b/drivers/gpu/drm/msm/dsi/dsi.h @@ -109,7 +109,7 @@ int msm_dsi_host_enable(struct mipi_dsi_host *host); int msm_dsi_host_disable(struct mipi_dsi_host *host); int msm_dsi_host_power_on(struct mipi_dsi_host *host, struct msm_dsi_phy_shared_timings *phy_shared_timings, - bool is_dual_dsi); + bool is_bonded_dsi); int msm_dsi_host_power_off(struct mipi_dsi_host *host); int msm_dsi_host_set_display_mode(struct mipi_dsi_host *host, const struct drm_display_mode *mode); @@ -123,7 +123,7 @@ int msm_dsi_host_set_src_pll(struct mipi_dsi_host *host, void msm_dsi_host_reset_phy(struct mipi_dsi_host *host); void msm_dsi_host_get_phy_clk_req(struct mipi_dsi_host *host, struct msm_dsi_phy_clk_request *clk_req, - bool is_dual_dsi); + bool is_bonded_dsi); void msm_dsi_host_destroy(struct mipi_dsi_host *host); int msm_dsi_host_modeset_init(struct mipi_dsi_host *host, struct drm_device *dev); @@ -145,8 +145,8 @@ int dsi_dma_base_get_6g(struct msm_dsi_host *msm_host, uint64_t *iova); int dsi_dma_base_get_v2(struct msm_dsi_host *msm_host, uint64_t *iova); int dsi_clk_init_v2(struct msm_dsi_host *msm_host); int dsi_clk_init_6g_v2(struct msm_dsi_host *msm_host); -int dsi_calc_clk_rate_v2(struct msm_dsi_host *msm_host, bool is_dual_dsi); -int dsi_calc_clk_rate_6g(struct msm_dsi_host *msm_host, bool is_dual_dsi); +int dsi_calc_clk_rate_v2(struct msm_dsi_host *msm_host, bool is_bonded_dsi); +int dsi_calc_clk_rate_6g(struct msm_dsi_host *msm_host, bool is_bonded_dsi); void msm_dsi_host_snapshot(struct msm_disp_state *disp_state, struct mipi_dsi_host *host); /* dsi phy */ struct msm_dsi_phy; diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.h b/drivers/gpu/drm/msm/dsi/dsi_cfg.h index ade9b609c7d9..2bce00d5a9fc 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_cfg.h +++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.h @@ -47,7 +47,7 @@ struct msm_dsi_host_cfg_ops { void* (*tx_buf_get)(struct msm_dsi_host *msm_host); void (*tx_buf_put)(struct msm_dsi_host *msm_host); int (*dma_base_get)(struct msm_dsi_host *msm_host, uint64_t *iova); - int (*calc_clk_rate)(struct msm_dsi_host *msm_host, bool is_dual_dsi); + int (*calc_clk_rate)(struct msm_dsi_host *msm_host, bool is_bonded_dsi); }; struct msm_dsi_cfg_handler { diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index ed504fe5074f..eb988faddbbf 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -679,7 +679,7 @@ void dsi_link_clk_disable_v2(struct msm_dsi_host *msm_host) clk_disable_unprepare(msm_host->byte_clk); } -static u32 dsi_get_pclk_rate(struct msm_dsi_host *msm_host, bool is_dual_dsi) +static u32 dsi_get_pclk_rate(struct msm_dsi_host *msm_host, bool is_bonded_dsi) { struct drm_display_mode *mode = msm_host->mode; u32 pclk_rate; @@ -692,17 +692,17 @@ static u32 dsi_get_pclk_rate(struct msm_dsi_host *msm_host, bool is_dual_dsi) * the clock rates have to be split between the two dsi controllers. * Adjust the byte and pixel clock rates for each dsi host accordingly. */ - if (is_dual_dsi) + if (is_bonded_dsi) pclk_rate /= 2; return pclk_rate; } -static void dsi_calc_pclk(struct msm_dsi_host *msm_host, bool is_dual_dsi) +static void dsi_calc_pclk(struct msm_dsi_host *msm_host, bool is_bonded_dsi) { u8 lanes = msm_host->lanes; u32 bpp = dsi_get_bpp(msm_host->format); - u32 pclk_rate = dsi_get_pclk_rate(msm_host, is_dual_dsi); + u32 pclk_rate = dsi_get_pclk_rate(msm_host, is_bonded_dsi); u64 pclk_bpp = (u64)pclk_rate * bpp; if (lanes == 0) { @@ -720,28 +720,28 @@ static void dsi_calc_pclk(struct msm_dsi_host *msm_host, bool is_dual_dsi) } -int dsi_calc_clk_rate_6g(struct msm_dsi_host *msm_host, bool is_dual_dsi) +int dsi_calc_clk_rate_6g(struct msm_dsi_host *msm_host, bool is_bonded_dsi) { if (!msm_host->mode) { pr_err("%s: mode not set\n", __func__); return -EINVAL; } - dsi_calc_pclk(msm_host, is_
[PATCH v1 3/7] drm/msm/dpu: support setting up two independent DSI connectors
Move setting up encoders from set_encoder_mode to _dpu_kms_initialize_dsi() / _dpu_kms_initialize_displayport(). This allows us to support not only "single DSI" and "bonded DSI" but also "two independent DSI" configurations. In future this would also help adding support for multiple DP connectors. Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 102 +--- 1 file changed, 57 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index 1d3a4f395e74..8459da36174e 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -471,30 +471,68 @@ static int _dpu_kms_initialize_dsi(struct drm_device *dev, struct dpu_kms *dpu_kms) { struct drm_encoder *encoder = NULL; + struct msm_display_info info; int i, rc = 0; if (!(priv->dsi[0] || priv->dsi[1])) return rc; - /*TODO: Support two independent DSI connectors */ - encoder = dpu_encoder_init(dev, DRM_MODE_ENCODER_DSI); - if (IS_ERR(encoder)) { - DPU_ERROR("encoder init failed for dsi display\n"); - return PTR_ERR(encoder); - } - - priv->encoders[priv->num_encoders++] = encoder; - + /* +* We support following confiurations: +* - Single DSI host (dsi0 or dsi1) +* - Two independent DSI hosts +* - Bonded DSI0 and DSI1 hosts +* +* TODO: Support swapping DSI0 and DSI1 in the bonded setup. +*/ for (i = 0; i < ARRAY_SIZE(priv->dsi); i++) { if (!priv->dsi[i]) continue; + if (!encoder) { + encoder = dpu_encoder_init(dev, DRM_MODE_ENCODER_DSI); + if (IS_ERR(encoder)) { + DPU_ERROR("encoder init failed for dsi display\n"); + return PTR_ERR(encoder); + } + + priv->encoders[priv->num_encoders++] = encoder; + + memset(&info, 0, sizeof(info)); + info.intf_type = encoder->encoder_type; + info.capabilities = msm_dsi_is_cmd_mode(priv->dsi[i]) ? + MSM_DISPLAY_CAP_CMD_MODE : + MSM_DISPLAY_CAP_VID_MODE; + } + rc = msm_dsi_modeset_init(priv->dsi[i], dev, encoder); if (rc) { DPU_ERROR("modeset_init failed for dsi[%d], rc = %d\n", i, rc); break; } + + info.h_tile_instance[info.num_of_h_tiles++] = i; + + /* Register non-bonded encoder here. If the encoder is bonded, +* it will be registered later, when both DSI hosts are +* initialized. +*/ + if (!msm_dsi_is_bonded_dsi(priv->dsi[i])) { + rc = dpu_encoder_setup(dev, encoder, &info); + if (rc) + DPU_ERROR("failed to setup DPU encoder %d: rc:%d\n", + encoder->base.id, rc); + encoder = NULL; + } + } + + /* Register bonded encoder here, when both DSI hosts are initialized */ + if (encoder) { + rc = dpu_encoder_setup(dev, encoder, &info); + if (rc) + DPU_ERROR("failed to setup DPU encoder %d: rc:%d\n", + encoder->base.id, rc); } return rc; @@ -505,6 +543,7 @@ static int _dpu_kms_initialize_displayport(struct drm_device *dev, struct dpu_kms *dpu_kms) { struct drm_encoder *encoder = NULL; + struct msm_display_info info; int rc = 0; if (!priv->dp) @@ -516,6 +555,7 @@ static int _dpu_kms_initialize_displayport(struct drm_device *dev, return PTR_ERR(encoder); } + memset(&info, 0, sizeof(info)); rc = msm_dp_modeset_init(priv->dp, dev, encoder); if (rc) { DPU_ERROR("modeset_init failed for DP, rc = %d\n", rc); @@ -524,6 +564,14 @@ static int _dpu_kms_initialize_displayport(struct drm_device *dev, } priv->encoders[priv->num_encoders++] = encoder; + + info.num_of_h_tiles = 1; + info.capabilities = MSM_DISPLAY_CAP_VID_MODE; + info.intf_type = encoder->encoder_type; + rc = dpu_encoder_setup(dev, encoder, &info); + if (rc) + DPU_ERROR("failed to setup DPU encoder %d: rc:%d\n", + encoder->base.id, rc); return rc; } @@ -726,41 +774,6 @@ static void dpu_kms_destroy(struct msm_kms *kms) msm_kms_destroy(&dpu_kms->base); } -sta
[PATCH v1 2/7] drm/msm/dsi: add two helper functions
Add two helper functions to be used by display drivers for setting up encoders. Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/dsi/dsi.c | 7 +++ drivers/gpu/drm/msm/dsi/dsi_manager.c | 14 ++ drivers/gpu/drm/msm/msm_drv.h | 12 ++-- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi.c b/drivers/gpu/drm/msm/dsi/dsi.c index 75afc12a7b25..5201d7eb0490 100644 --- a/drivers/gpu/drm/msm/dsi/dsi.c +++ b/drivers/gpu/drm/msm/dsi/dsi.c @@ -13,6 +13,13 @@ struct drm_encoder *msm_dsi_get_encoder(struct msm_dsi *msm_dsi) return msm_dsi->encoder; } +bool msm_dsi_is_cmd_mode(struct msm_dsi *msm_dsi) +{ + unsigned long host_flags = msm_dsi_host_get_mode_flags(msm_dsi->host); + + return !(host_flags & MIPI_DSI_MODE_VIDEO); +} + static int dsi_get_phy(struct msm_dsi *msm_dsi) { struct platform_device *pdev = msm_dsi->pdev; diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c index 1173663c6d5d..a81105633d3c 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_manager.c +++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c @@ -216,12 +216,6 @@ static int dsi_mgr_bridge_get_id(struct drm_bridge *bridge) return dsi_bridge->id; } -static bool dsi_mgr_is_cmd_mode(struct msm_dsi *msm_dsi) -{ - unsigned long host_flags = msm_dsi_host_get_mode_flags(msm_dsi->host); - return !(host_flags & MIPI_DSI_MODE_VIDEO); -} - void msm_dsi_manager_setup_encoder(int id) { struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id); @@ -231,7 +225,7 @@ void msm_dsi_manager_setup_encoder(int id) if (encoder && kms->funcs->set_encoder_mode) kms->funcs->set_encoder_mode(kms, encoder, -dsi_mgr_is_cmd_mode(msm_dsi)); +msm_dsi_is_cmd_mode(msm_dsi)); } static int msm_dsi_manager_panel_init(struct drm_connector *conn, u8 id) @@ -276,7 +270,7 @@ static int msm_dsi_manager_panel_init(struct drm_connector *conn, u8 id) if (other_dsi && other_dsi->panel && kms->funcs->set_split_display) { kms->funcs->set_split_display(kms, master_dsi->encoder, slave_dsi->encoder, - dsi_mgr_is_cmd_mode(msm_dsi)); + msm_dsi_is_cmd_mode(msm_dsi)); } out: @@ -839,3 +833,7 @@ void msm_dsi_manager_unregister(struct msm_dsi *msm_dsi) msm_dsim->dsi[msm_dsi->id] = NULL; } +bool msm_dsi_is_bonded_dsi(struct msm_dsi *msm_dsi) +{ + return IS_BONDED_DSI(); +} diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 1a48a709ffb3..e0528dfd965e 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -350,7 +350,8 @@ void __exit msm_dsi_unregister(void); int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, struct drm_device *dev, struct drm_encoder *encoder); void msm_dsi_snapshot(struct msm_disp_state *disp_state, struct msm_dsi *msm_dsi); - +bool msm_dsi_is_cmd_mode(struct msm_dsi *msm_dsi); +bool msm_dsi_is_bonded_dsi(struct msm_dsi *msm_dsi); #else static inline void __init msm_dsi_register(void) { @@ -367,7 +368,14 @@ static inline int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, static inline void msm_dsi_snapshot(struct msm_disp_state *disp_state, struct msm_dsi *msm_dsi) { } - +static inline bool msm_dsi_is_cmd_mode(struct msm_dsi *msm_dsi) +{ + return false; +} +static bool msm_dsi_is_bonded_dsi(struct msm_dsi *msm_dsi) +{ + return false; +} #endif #ifdef CONFIG_DRM_MSM_DP -- 2.30.2
[PATCH v1 4/7] drm/msm/mdp5: move mdp5_encoder_set_intf_mode after msm_dsi_modeset_init
Move a call to mdp5_encoder_set_intf_mode() after msm_dsi_modeset_init(), removing set_encoder_mode callback. Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c | 11 +++ 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index 15aed45022bc..b3b42672b2d4 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -209,13 +209,6 @@ static int mdp5_set_split_display(struct msm_kms *kms, slave_encoder); } -static void mdp5_set_encoder_mode(struct msm_kms *kms, - struct drm_encoder *encoder, - bool cmd_mode) -{ - mdp5_encoder_set_intf_mode(encoder, cmd_mode); -} - static void mdp5_kms_destroy(struct msm_kms *kms) { struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms)); @@ -287,7 +280,6 @@ static const struct mdp_kms_funcs kms_funcs = { .get_format = mdp_get_format, .round_pixclk= mdp5_round_pixclk, .set_split_display = mdp5_set_split_display, - .set_encoder_mode = mdp5_set_encoder_mode, .destroy = mdp5_kms_destroy, #ifdef CONFIG_DEBUG_FS .debugfs_init= mdp5_kms_debugfs_init, @@ -448,6 +440,9 @@ static int modeset_init_intf(struct mdp5_kms *mdp5_kms, } ret = msm_dsi_modeset_init(priv->dsi[dsi_id], dev, encoder); + if (!ret) + mdp5_encoder_set_intf_mode(encoder, msm_dsi_is_cmd_mode(priv->dsi[dsi_id])); + break; } default: -- 2.30.2
[PATCH v1 7/7] drm/msm/kms: drop set_encoder_mode callback
set_encoder_mode callback is completely unused now. Drop it from msm_kms_func(). Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/msm_kms.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_kms.h b/drivers/gpu/drm/msm/msm_kms.h index 086a2d59b8c8..9484e8b62630 100644 --- a/drivers/gpu/drm/msm/msm_kms.h +++ b/drivers/gpu/drm/msm/msm_kms.h @@ -117,9 +117,6 @@ struct msm_kms_funcs { struct drm_encoder *encoder, struct drm_encoder *slave_encoder, bool is_cmd_mode); - void (*set_encoder_mode)(struct msm_kms *kms, -struct drm_encoder *encoder, -bool cmd_mode); /* cleanup: */ void (*destroy)(struct msm_kms *kms); -- 2.30.2
[PATCH v1 6/7] drm/msm/dsi: stop calling set_encoder_mode callback
None of the display drivers now implement set_encoder_mode callback. Stop calling it from the modeset init code. Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/dsi/dsi.c | 2 -- drivers/gpu/drm/msm/dsi/dsi.h | 1 - drivers/gpu/drm/msm/dsi/dsi_manager.c | 12 3 files changed, 15 deletions(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi.c b/drivers/gpu/drm/msm/dsi/dsi.c index 5201d7eb0490..77c8dba297d8 100644 --- a/drivers/gpu/drm/msm/dsi/dsi.c +++ b/drivers/gpu/drm/msm/dsi/dsi.c @@ -251,8 +251,6 @@ int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, struct drm_device *dev, goto fail; } - msm_dsi_manager_setup_encoder(msm_dsi->id); - priv->bridges[priv->num_bridges++] = msm_dsi->bridge; priv->connectors[priv->num_connectors++] = msm_dsi->connector; diff --git a/drivers/gpu/drm/msm/dsi/dsi.h b/drivers/gpu/drm/msm/dsi/dsi.h index 856a532850c0..e0c3c4409377 100644 --- a/drivers/gpu/drm/msm/dsi/dsi.h +++ b/drivers/gpu/drm/msm/dsi/dsi.h @@ -80,7 +80,6 @@ struct drm_connector *msm_dsi_manager_connector_init(u8 id); struct drm_connector *msm_dsi_manager_ext_bridge_init(u8 id); int msm_dsi_manager_cmd_xfer(int id, const struct mipi_dsi_msg *msg); bool msm_dsi_manager_cmd_xfer_trigger(int id, u32 dma_base, u32 len); -void msm_dsi_manager_setup_encoder(int id); int msm_dsi_manager_register(struct msm_dsi *msm_dsi); void msm_dsi_manager_unregister(struct msm_dsi *msm_dsi); bool msm_dsi_manager_validate_current_config(u8 id); diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c index a81105633d3c..e7f4e1d8978a 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_manager.c +++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c @@ -216,18 +216,6 @@ static int dsi_mgr_bridge_get_id(struct drm_bridge *bridge) return dsi_bridge->id; } -void msm_dsi_manager_setup_encoder(int id) -{ - struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id); - struct msm_drm_private *priv = msm_dsi->dev->dev_private; - struct msm_kms *kms = priv->kms; - struct drm_encoder *encoder = msm_dsi_get_encoder(msm_dsi); - - if (encoder && kms->funcs->set_encoder_mode) - kms->funcs->set_encoder_mode(kms, encoder, -msm_dsi_is_cmd_mode(msm_dsi)); -} - static int msm_dsi_manager_panel_init(struct drm_connector *conn, u8 id) { struct msm_drm_private *priv = conn->dev->dev_private; -- 2.30.2
[PATCH v1 5/7] drm/msm/dp: stop calling set_encoder_mode callback
None of the display drivers now implement set_encoder_mode callback. Stop calling it from the modeset init code. Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/dp/dp_display.c | 18 -- 1 file changed, 18 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 051c1be1de7e..70b319a8fe83 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -102,8 +102,6 @@ struct dp_display_private { struct dp_display_mode dp_mode; struct msm_dp dp_display; - bool encoder_mode_set; - /* wait for audio signaling */ struct completion audio_comp; @@ -283,20 +281,6 @@ static void dp_display_send_hpd_event(struct msm_dp *dp_display) } -static void dp_display_set_encoder_mode(struct dp_display_private *dp) -{ - struct msm_drm_private *priv = dp->dp_display.drm_dev->dev_private; - struct msm_kms *kms = priv->kms; - - if (!dp->encoder_mode_set && dp->dp_display.encoder && - kms->funcs->set_encoder_mode) { - kms->funcs->set_encoder_mode(kms, - dp->dp_display.encoder, false); - - dp->encoder_mode_set = true; - } -} - static int dp_display_send_hpd_notification(struct dp_display_private *dp, bool hpd) { @@ -369,8 +353,6 @@ static void dp_display_host_init(struct dp_display_private *dp, int reset) if (dp->usbpd->orientation == ORIENTATION_CC2) flip = true; - dp_display_set_encoder_mode(dp); - dp_power_init(dp->power, flip); dp_ctrl_host_init(dp->ctrl, flip, reset); dp_aux_init(dp->aux); -- 2.30.2
Re: Aw: Re: BUG: MTK DRM/HDMI broken on 5.13 (mt7623/bpi-r2)
Hi On 08.07.21 11:35, Frank Wunderlich wrote: Hi just a small update, added debug in the vendor-specific functions for page_flip and vblank and it seems they never get called --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -87,21 +87,25 @@ static void mtk_drm_crtc_finish_page_flip(struct mtk_drm_crtc *mtk_crtc) { struct drm_crtc *crtc = &mtk_crtc->base; unsigned long flags; - +printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); spin_lock_irqsave(&crtc->dev->event_lock, flags); drm_crtc_send_vblank_event(crtc, mtk_crtc->event); drm_crtc_vblank_put(crtc); mtk_crtc->event = NULL; spin_unlock_irqrestore(&crtc->dev->event_lock, flags); +printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); } static void mtk_drm_finish_page_flip(struct mtk_drm_crtc *mtk_crtc) { +printk(KERN_ALERT "DEBUG: Passed %s %d update:%d,needsvblank:%d\n",__FUNCTION__,__LINE__,mtk_crtc->config_updating,mtk_crtc->pending_needs_vblank); drm_crtc_handle_vblank(&mtk_crtc->base); if (!mtk_crtc->config_updating && mtk_crtc->pending_needs_vblank) { +printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); mtk_drm_crtc_finish_page_flip(mtk_crtc); mtk_crtc->pending_needs_vblank = false; } +printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); } static void mtk_drm_crtc_destroy(struct drm_crtc *crtc) finish_page_flip is called by mtk_crtc_ddp_irq. this seems to be set in mtk_drm_crtc_enable_vblank with mtk_ddp_comp_enable_vblank. this is called correctly 113 static inline void mtk_ddp_comp_enable_vblank(struct mtk_ddp_comp *comp, 114 void (*vblank_cb)(void *), 115 void *vblank_cb_data) 116 { 117 printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); 118 if (comp->funcs && comp->funcs->enable_vblank) 119 { 120 comp->funcs->enable_vblank(comp->dev, vblank_cb, vblank_cb_data); 121 printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); 122 } 123 } i see both messages, but mtk_crtc_ddp_irq is never called and so the other 2 not. Yes, In my case the irq isr is also not called after resume which cause the warning even though "enable_vblank" do get called. Don't know why is that. root@bpi-r2:~# dmesg | grep -i DEBUG [6.433509] DEBUG: Passed mtk_drm_crtc_enable_vblank 510 [6.433530] DEBUG: Passed mtk_ddp_comp_enable_vblank 117 [6.433537] DEBUG: Passed mtk_ddp_comp_enable_vblank 121 <<< comp->funcs->enable_vblank should be mtk_drm_crtc_enable_vblank, right? No, this is a bit confusing , there are also the funcs of the components, see in file mtk_drm_ddp_comp.c so for mt7623 it is mtk_ovl_enable_vblank. Thanks, Dafna 641 static const struct drm_crtc_funcs mtk_crtc_funcs = { 642 .set_config = drm_atomic_helper_set_config, 643 .page_flip = drm_atomic_helper_page_flip, 644 .destroy= mtk_drm_crtc_destroy, 645 .reset = mtk_drm_crtc_reset, 646 .atomic_duplicate_state = mtk_drm_crtc_duplicate_state, 647 .atomic_destroy_state = mtk_drm_crtc_destroy_state, 648 .enable_vblank = mtk_drm_crtc_enable_vblank, <<< 649 .disable_vblank = mtk_drm_crtc_disable_vblank, 650 }; but it looks like a recursion: mtk_drm_crtc_enable_vblank calls mtk_ddp_comp_enable_vblank => enable_vblank (=mtk_drm_crtc_enable_vblank), but i see the messages not repeating mtk_drm_crtc_enable_vblank(struct drm_crtc *crtc) 511 mtk_ddp_comp_enable_vblank(comp, mtk_crtc_ddp_irq, &mtk_crtc->base); 113 static inline void mtk_ddp_comp_enable_vblank(struct mtk_ddp_comp *comp, 114 void (*vblank_cb)(void *), 115 void *vblank_cb_data) 116 { 118 if (comp->funcs && comp->funcs->enable_vblank) 120 comp->funcs->enable_vblank(comp->dev, vblank_cb, vblank_cb_data); but params do not match...comp->funcs->enable_vblank takes 3 arguments but comp->funcs->enable_vblank has only one.something i miss here... i guess not, but is watchdog somehow involved? i ask because i see this on reboot/poweroff: "watchdog: watchdog0: watchdog did not stop!" i see this with my 5.13, 5.12-drm (5.12.0+mtk/core drm-patches) and 5.12.14 too (hdmi is working there), but not 5.12.0! that means something in drm-patches (mtk/core) breaks watchdog. maybe the recursion mentioned above? regards Frank Gesendet: Donnerstag, 08. Juli 2021 um 09:22 Uhr Von: "Dafna Hirschfeld" Hi Frank, On 06.07.21 11:54, Frank Wunderlich wrote: Hi, i've noticed that HDMI is broken at least on my board (Bananapi-r2,mt7623) on 5.13. after some research i noticed that it is working till commit 2e477391522354e763aa62ee3e281c1ad9e8eb1b Author: Dafna Hirschfeld We also encountered that warning on mt8173 device - Acer Chromebook R13. It hap
Re: [PATCH 4/4] drm/msm: always wait for the exclusive fence
Am 03.07.21 um 01:01 schrieb Daniel Vetter: On Fri, Jul 02, 2021 at 01:16:42PM +0200, Christian König wrote: Drivers also need to to sync to the exclusive fence when a shared one is present. Completely untested since the driver won't even compile on !ARM. It's really not that hard to set up a cross-compiler, reasonable distros have that now all packages. Does explain though why you tend to break the arm build with drm-misc patches. Well having proper COMPILE_TEST handling in kconfig would be even better. Otherwise everybody needs to cross-compile for ARM, ARM64 (with all the variants, e.g. BCM, S3C64XX, S5PV210, KEEMBAY, ZYNQMP etc etc), MIPS and so on. We have tons of non-x86 drivers, but MSM is the only one which is painful to get to compile test. Christian. Please fix this. Signed-off-by: Christian König Reviewed-by: Daniel Vetter --- drivers/gpu/drm/msm/msm_gem.c | 16 +++- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index a94a43de95ef..72a07e311de3 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -817,17 +817,15 @@ int msm_gem_sync_object(struct drm_gem_object *obj, struct dma_fence *fence; int i, ret; - fobj = dma_resv_shared_list(obj->resv); - if (!fobj || (fobj->shared_count == 0)) { - fence = dma_resv_excl_fence(obj->resv); - /* don't need to wait on our own fences, since ring is fifo */ - if (fence && (fence->context != fctx->context)) { - ret = dma_fence_wait(fence, true); - if (ret) - return ret; - } + fence = dma_resv_excl_fence(obj->resv); + /* don't need to wait on our own fences, since ring is fifo */ + if (fence && (fence->context != fctx->context)) { + ret = dma_fence_wait(fence, true); + if (ret) + return ret; } + fobj = dma_resv_shared_list(obj->resv); if (!exclusive || !fobj) return 0; -- 2.25.1
[PATCH] MAINTAINERS: Add Raphael Gallais-Pou as STM32 DRM maintainer
Add Raphael Gallais-Pou as STM32 DRM maintainer. Signed-off-by: Raphael Gallais-Pou --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0f1171ceaf8b..4fa3bfc00f57 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6165,6 +6165,7 @@ DRM DRIVERS FOR STM M: Yannick Fertre M: Philippe Cornu M: Benjamin Gaignard +M: Raphael Gallais-Pou L: dri-devel@lists.freedesktop.org S: Maintained T: git git://anongit.freedesktop.org/drm/drm-misc -- 2.17.1
[PATCH] dma-heap: Let dma heap use dma_map_attrs to map & unmap iova
From: Guangming Cao For dma-heap users, they can't bypass cache sync when map/unmap iova with dma heap. But they can do it by adding DMA_ATTR_SKIP_CPU_SYNC into dma_alloc_attrs. To keep alignment, at dma_heap side, also use dma_buf_attachment.dma_map_attrs to do iova map & unmap. Signed-off-by: Guangming Cao --- drivers/dma-buf/heaps/cma_heap.c| 6 -- drivers/dma-buf/heaps/system_heap.c | 6 -- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c index 0c05b79870f9..2c9feb3bfc3e 100644 --- a/drivers/dma-buf/heaps/cma_heap.c +++ b/drivers/dma-buf/heaps/cma_heap.c @@ -99,9 +99,10 @@ static struct sg_table *cma_heap_map_dma_buf(struct dma_buf_attachment *attachme { struct dma_heap_attachment *a = attachment->priv; struct sg_table *table = &a->table; + int attrs = attachment->dma_map_attrs; int ret; - ret = dma_map_sgtable(attachment->dev, table, direction, 0); + ret = dma_map_sgtable(attachment->dev, table, direction, attrs); if (ret) return ERR_PTR(-ENOMEM); a->mapped = true; @@ -113,9 +114,10 @@ static void cma_heap_unmap_dma_buf(struct dma_buf_attachment *attachment, enum dma_data_direction direction) { struct dma_heap_attachment *a = attachment->priv; + int attrs = attachment->dma_map_attrs; a->mapped = false; - dma_unmap_sgtable(attachment->dev, table, direction, 0); + dma_unmap_sgtable(attachment->dev, table, direction, attrs); } static int cma_heap_dma_buf_begin_cpu_access(struct dma_buf *dmabuf, diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/system_heap.c index 23a7e74ef966..fc7b1e02988e 100644 --- a/drivers/dma-buf/heaps/system_heap.c +++ b/drivers/dma-buf/heaps/system_heap.c @@ -130,9 +130,10 @@ static struct sg_table *system_heap_map_dma_buf(struct dma_buf_attachment *attac { struct dma_heap_attachment *a = attachment->priv; struct sg_table *table = a->table; + int attrs = attachment->dma_map_attrs; int ret; - ret = dma_map_sgtable(attachment->dev, table, direction, 0); + ret = dma_map_sgtable(attachment->dev, table, direction, attrs); if (ret) return ERR_PTR(ret); @@ -145,9 +146,10 @@ static void system_heap_unmap_dma_buf(struct dma_buf_attachment *attachment, enum dma_data_direction direction) { struct dma_heap_attachment *a = attachment->priv; + int attrs = attachment->dma_map_attrs; a->mapped = false; - dma_unmap_sgtable(attachment->dev, table, direction, 0); + dma_unmap_sgtable(attachment->dev, table, direction, attrs); } static int system_heap_dma_buf_begin_cpu_access(struct dma_buf *dmabuf, -- 2.17.1
Re: [PATCH 06/7] drm/i915/guc: Optimize CTB writes and reads
On 08.07.2021 01:25, Matthew Brost wrote: > CTB writes are now in the path of command submission and should be > optimized for performance. Rather than reading CTB descriptor values > (e.g. head, tail) which could result in accesses across the PCIe bus, > store shadow local copies and only read/write the descriptor values when > absolutely necessary. Also store the current space in the each channel > locally. > > v2: > (Michal) > - Add additional sanity checks for head / tail pointers > - Use GUC_CTB_HDR_LEN rather than magic 1 > v3: > (Michal / John H) > - Drop redundant check of head value > v4: > (John H) > - Drop redundant checks of tail / head values > v5: > (Michal) > - Address more nits > > Signed-off-by: John Harrison > Signed-off-by: Matthew Brost > --- > drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 92 +++ > drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 6 ++ > 2 files changed, 66 insertions(+), 32 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > index db3e85b89573..d552d3016779 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > @@ -130,6 +130,10 @@ static void guc_ct_buffer_desc_init(struct > guc_ct_buffer_desc *desc) > static void guc_ct_buffer_reset(struct intel_guc_ct_buffer *ctb) > { > ctb->broken = false; > + ctb->tail = 0; > + ctb->head = 0; > + ctb->space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size); > + > guc_ct_buffer_desc_init(ctb->desc); > } > > @@ -383,10 +387,8 @@ static int ct_write(struct intel_guc_ct *ct, > { > struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; > struct guc_ct_buffer_desc *desc = ctb->desc; > - u32 head = desc->head; > - u32 tail = desc->tail; > + u32 tail = ctb->tail; > u32 size = ctb->size; > - u32 used; > u32 header; > u32 hxg; > u32 type; > @@ -396,25 +398,22 @@ static int ct_write(struct intel_guc_ct *ct, > if (unlikely(desc->status)) > goto corrupted; > > - if (unlikely((tail | head) >= size)) { > - CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n", > - head, tail, size); > + GEM_BUG_ON(tail > size); > + > +#ifdef CONFIG_DRM_I915_DEBUG_GUC > + if (unlikely(tail != READ_ONCE(desc->tail))) { > + CT_ERROR(ct, "Tail was modified %u != %u\n", > + desc->tail, tail); > + desc->status |= GUC_CTB_STATUS_MISMATCH; > + goto corrupted; > + } > + if (unlikely(desc->head >= size)) { READ_ONCE wouldn't hurt > + CT_ERROR(ct, "Invalid head offset %u >= %u)\n", > + desc->head, size); > desc->status |= GUC_CTB_STATUS_OVERFLOW; > goto corrupted; > } > - > - /* > - * tail == head condition indicates empty. GuC FW does not support > - * using up the entire buffer to get tail == head meaning full. > - */ > - if (tail < head) > - used = (size - head) + tail; > - else > - used = tail - head; > - > - /* make sure there is a space including extra dw for the header */ > - if (unlikely(used + len + GUC_CTB_HDR_LEN >= size)) > - return -ENOSPC; > +#endif > > /* >* dw0: CT header (including fence) > @@ -452,6 +451,10 @@ static int ct_write(struct intel_guc_ct *ct, >*/ > write_barrier(ct); > > + /* update local copies */ > + ctb->tail = tail; > + ctb->space -= len + GUC_CTB_HDR_LEN; it looks that we rely on previous call to h2g_has_room(), but maybe for completeness we should have sanity check in this function as well: GEM_BUG_ON(ctb->space < len + HDR_LEN); not a blocker, other LGTM, Reviewed-by: Michal Wajdeczko Michal > + > /* now update descriptor */ > WRITE_ONCE(desc->tail, tail); > > @@ -469,7 +472,7 @@ static int ct_write(struct intel_guc_ct *ct, > * @req: pointer to pending request > * @status: placeholder for status > * > - * For each sent request, Guc shall send bac CT response message. > + * For each sent request, GuC shall send back CT response message. > * Our message handler will update status of tracked request once > * response message with given fence is received. Wait here and > * check for valid response status value. > @@ -525,24 +528,36 @@ static inline bool ct_deadlocked(struct intel_guc_ct > *ct) > return ret; > } > > -static inline bool h2g_has_room(struct intel_guc_ct_buffer *ctb, u32 len_dw) > +static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw) > { > + struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; > struct guc_ct_buffer_desc *desc = ctb->desc; > - u32 head = READ_ONCE(desc->head); > + u32 head; > u32 space; > > - space = CIRC_SPACE(desc->tail, head, ctb->size); > + if (ctb->space >= l
Re: [PATCH] drm/meson: Convert to Linux IRQ interfaces
Hi Thomas, On Tue, Jul 6, 2021 at 9:45 AM Thomas Zimmermann wrote: > > Drop the DRM IRQ midlayer in favor of Linux IRQ interfaces. DRM's > IRQ helpers are mostly useful for UMS drivers. Modern KMS drivers > don't benefit from using it. > > Signed-off-by: Thomas Zimmermann Tested-by: Martin Blumenstingl and also (although I am no drm subsystem expert): Reviewed-by: Martin Blumenstingl [...] > - ret = drm_irq_install(drm, priv->vsync_irq); > + ret = request_irq(priv->vsync_irq, meson_irq, 0, drm->driver->name, > drm); I'd like to use dev_name(dev) instead of drm->driver->name in the future as that'll make it much easier to identify the corresponding IRQ in /proc/interrupts for example your patch makes this possible - thanks for this! Best regards, Martin
Re: [PATCH 1/2] drm/gud: Add Raspberry Pi Pico ID
Den 03.07.2021 21.24, skrev Peter Stuge: > Hi Noralf, > > Noralf Trønnes wrote: >> Add VID/PID for the Raspberry Pi Pico implementation. >> Source: https://github.com/notro/gud-pico >> >> +++ b/drivers/gpu/drm/gud/gud_drv.c >> @@ -660,6 +660,7 @@ static int gud_resume(struct usb_interface *intf) >> >> static const struct usb_device_id gud_id_table[] = { >> { USB_DEVICE_INTERFACE_CLASS(0x1d50, 0x614d, USB_CLASS_VENDOR_SPEC) }, >> +{ USB_DEVICE_INTERFACE_CLASS(0x16d0, 0x10a9, USB_CLASS_VENDOR_SPEC) }, >> { } >> }; > > A VID/PID isn't neccessarily tied to one implementation; as long as an > implementation is in fact compatible with the driver I consider it okay > to reuse a VID/PID, and the 0x1d50 conditions are met by gud-pico too. > That said, there's no harm in adding another id. :) > > Reviewed-by: Peter Stuge > Both patches applied, thanks for reviewing. Noralf.
Re: [PATCH] drm/meson: Convert to Linux IRQ interfaces
Hi Am 08.07.21 um 15:31 schrieb Martin Blumenstingl: Hi Thomas, On Tue, Jul 6, 2021 at 9:45 AM Thomas Zimmermann wrote: Drop the DRM IRQ midlayer in favor of Linux IRQ interfaces. DRM's IRQ helpers are mostly useful for UMS drivers. Modern KMS drivers don't benefit from using it. Signed-off-by: Thomas Zimmermann Tested-by: Martin Blumenstingl and also (although I am no drm subsystem expert): Reviewed-by: Martin Blumenstingl Oh, just when I committed the patch. But thanks for your reply. [...] - ret = drm_irq_install(drm, priv->vsync_irq); + ret = request_irq(priv->vsync_irq, meson_irq, 0, drm->driver->name, drm); I'd like to use dev_name(dev) instead of drm->driver->name in the future as that'll make it much easier to identify the corresponding IRQ in /proc/interrupts for example your patch makes this possible - thanks for this! I also thought about this, but every driver in DRM and apparently most drivers in general pass the driver's name here. I think the change would make a lot of sense, but it's probably worth a kernel-wide effort. Best regards Thomas Best regards, Martin -- Thomas Zimmermann Graphics Driver Developer SUSE Software Solutions Germany GmbH Maxfeldstr. 5, 90409 Nürnberg, Germany (HRB 36809, AG Nürnberg) Geschäftsführer: Felix Imendörffer OpenPGP_signature Description: OpenPGP digital signature
Aw: Re: Re: BUG: MTK DRM/HDMI broken on 5.13 (mt7623/bpi-r2)
> Gesendet: Donnerstag, 08. Juli 2021 um 14:30 Uhr > Von: "Dafna Hirschfeld" > > i see both messages, but mtk_crtc_ddp_irq is never called and so the other > > 2 not. > > Yes, In my case the irq isr is also not called after resume which cause the > warning > even though "enable_vblank" do get called. Don't know why is that. > > comp->funcs->enable_vblank should be mtk_drm_crtc_enable_vblank, right? > > No, this is a bit confusing , there are also the funcs of the components, see > in file mtk_drm_ddp_comp.c > so for mt7623 it is mtk_ovl_enable_vblank. thanks for pointing to this. in this function another struct is filled with the callback+data, and this callback seems to be called mtk_disp_ovl_irq_handler which name suggests also a irq as trigger 412 ret = devm_request_irq(dev, irq, mtk_disp_ovl_irq_handler, 413IRQF_TRIGGER_NONE, dev_name(dev), priv); 414 if (ret < 0) { 415 dev_err(dev, "Failed to request irq %d: %d\n", irq, ret); 416 return ret; 417 } as i don't see this error in dmesg, i guess the registration was successful. added again some debug and it looks like the interrupt callback (mtk_disp_ovl_irq_handler) is not called [5.125002] DEBUG: Passed mtk_disp_ovl_probe 416 int reg:0 [6.344029] DEBUG: Passed mtk_drm_crtc_enable_vblank 510 [6.344051] DEBUG: Passed mtk_ddp_comp_enable_vblank 117 [6.344057] DEBUG: Passed mtk_ovl_enable_vblank 107 [6.344062] DEBUG: Passed mtk_ovl_enable_vblank 112 [6.344066] DEBUG: Passed mtk_ddp_comp_enable_vblank 121 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c @@ -86,6 +86,7 @@ static irqreturn_t mtk_disp_ovl_irq_handler(int irq, void *dev_id) { struct mtk_disp_ovl *priv = dev_id; +printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); /* Clear frame completion interrupt */ writel(0x0, priv->regs + DISP_REG_OVL_INTSTA); @@ -93,6 +94,7 @@ static irqreturn_t mtk_disp_ovl_irq_handler(int irq, void *dev_id) return IRQ_NONE; priv->vblank_cb(priv->vblank_cb_data); +printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); return IRQ_HANDLED; } @@ -102,11 +104,12 @@ void mtk_ovl_enable_vblank(struct device *dev, void *vblank_cb_data) { struct mtk_disp_ovl *ovl = dev_get_drvdata(dev); - +printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); ovl->vblank_cb = vblank_cb; ovl->vblank_cb_data = vblank_cb_data; writel(0x0, ovl->regs + DISP_REG_OVL_INTSTA); writel_relaxed(OVL_FME_CPL_INT, ovl->regs + DISP_REG_OVL_INTEN); +printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); } void mtk_ovl_disable_vblank(struct device *dev) @@ -410,6 +413,7 @@ static int mtk_disp_ovl_probe(struct platform_device *pdev) ret = devm_request_irq(dev, irq, mtk_disp_ovl_irq_handler, IRQF_TRIGGER_NONE, dev_name(dev), priv); +printk(KERN_ALERT "DEBUG: Passed %s %d int reg:%d\n",__FUNCTION__,__LINE__,ret); if (ret < 0) { dev_err(dev, "Failed to request irq %d: %d\n", irq, ret); return ret; how can we trace this further? maybe watchdog related? > > > > "watchdog: watchdog0: watchdog did not stop!" > > > > i see this with my 5.13, 5.12-drm (5.12.0+mtk/core drm-patches) and 5.12.14 > > too (hdmi is working there), but not 5.12.0! > > that means something in drm-patches (mtk/core) breaks watchdog. maybe the > > recursion mentioned above?
[PATCH] drm/tegra: gr2d: Explicitly control module reset
From: Thierry Reding As of commit 4782c0a5dd88 ("clk: tegra: Don't deassert reset on enabling clocks"), module resets are no longer automatically deasserted when the module clock is enabled. To make sure that the gr2d module continues to work, we need to explicitly control the module reset. Fixes: 4782c0a5dd88 ("clk: tegra: Don't deassert reset on enabling clocks") Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/gr2d.c | 33 +++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c index de288cba3905..ba3722f1b865 100644 --- a/drivers/gpu/drm/tegra/gr2d.c +++ b/drivers/gpu/drm/tegra/gr2d.c @@ -4,9 +4,11 @@ */ #include +#include #include #include #include +#include #include "drm.h" #include "gem.h" @@ -19,6 +21,7 @@ struct gr2d_soc { struct gr2d { struct tegra_drm_client client; struct host1x_channel *channel; + struct reset_control *rst; struct clk *clk; const struct gr2d_soc *soc; @@ -208,6 +211,12 @@ static int gr2d_probe(struct platform_device *pdev) if (!syncpts) return -ENOMEM; + gr2d->rst = devm_reset_control_get(dev, NULL); + if (IS_ERR(gr2d->rst)) { + dev_err(dev, "cannot get reset\n"); + return PTR_ERR(gr2d->rst); + } + gr2d->clk = devm_clk_get(dev, NULL); if (IS_ERR(gr2d->clk)) { dev_err(dev, "cannot get clock\n"); @@ -220,6 +229,14 @@ static int gr2d_probe(struct platform_device *pdev) return err; } + usleep_range(2000, 4000); + + err = reset_control_deassert(gr2d->rst); + if (err < 0) { + dev_err(dev, "failed to deassert reset: %d\n", err); + goto disable_clk; + } + INIT_LIST_HEAD(&gr2d->client.base.list); gr2d->client.base.ops = &gr2d_client_ops; gr2d->client.base.dev = dev; @@ -234,8 +251,7 @@ static int gr2d_probe(struct platform_device *pdev) err = host1x_client_register(&gr2d->client.base); if (err < 0) { dev_err(dev, "failed to register host1x client: %d\n", err); - clk_disable_unprepare(gr2d->clk); - return err; + goto assert_rst; } /* initialize address register map */ @@ -245,6 +261,13 @@ static int gr2d_probe(struct platform_device *pdev) platform_set_drvdata(pdev, gr2d); return 0; + +assert_rst: + (void)reset_control_assert(gr2d->rst); +disable_clk: + clk_disable_unprepare(gr2d->clk); + + return err; } static int gr2d_remove(struct platform_device *pdev) @@ -259,6 +282,12 @@ static int gr2d_remove(struct platform_device *pdev) return err; } + err = reset_control_assert(gr2d->rst); + if (err < 0) + dev_err(&pdev->dev, "failed to assert reset: %d\n", err); + + usleep_range(2000, 4000); + clk_disable_unprepare(gr2d->clk); return 0; -- 2.32.0
Re: [PATCH 2/2 v3] drm/panel: ws2401: Add driver for WideChips WS2401
Den 08.07.2021 01.43, skrev Linus Walleij: > This adds a driver for panels based on the WideChips WS2401 display > controller. This display controller is used in the Samsung LMS380KF01 > display found in the Samsung GT-I8160 (Codina) mobile phone and > possibly others. > > As is common with Samsung displays manufacturer commands are necessary > to configure the display to a working state. > > The display optionally supports internal backlight control, but can > also use an external backlight. > > This driver re-uses the DBI infrastructure to communicate with the > display. > > Cc: phone-de...@vger.kernel.org > Cc: Douglas Anderson > Cc: Noralf Trønnes > Signed-off-by: Linus Walleij > --- Reviewed-by: Noralf Trønnes
Re: [PATCH] drm/tegra: gr2d: Explicitly control module reset
08.07.2021 17:37, Thierry Reding пишет: > From: Thierry Reding > > As of commit 4782c0a5dd88 ("clk: tegra: Don't deassert reset on enabling > clocks"), module resets are no longer automatically deasserted when the > module clock is enabled. To make sure that the gr2d module continues to > work, we need to explicitly control the module reset. > > Fixes: 4782c0a5dd88 ("clk: tegra: Don't deassert reset on enabling clocks") > Signed-off-by: Thierry Reding On which board do see this problem? TRM says that 2d should be in reset by default, but somehow it's not a problem on devices that use fastboot.. why would it touch the 2d reset? > --- > drivers/gpu/drm/tegra/gr2d.c | 33 +++-- > 1 file changed, 31 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c > index de288cba3905..ba3722f1b865 100644 > --- a/drivers/gpu/drm/tegra/gr2d.c > +++ b/drivers/gpu/drm/tegra/gr2d.c > @@ -4,9 +4,11 @@ > */ > > #include > +#include > #include > #include > #include > +#include > > #include "drm.h" > #include "gem.h" > @@ -19,6 +21,7 @@ struct gr2d_soc { > struct gr2d { > struct tegra_drm_client client; > struct host1x_channel *channel; > + struct reset_control *rst; Unused variable? > struct clk *clk; > > const struct gr2d_soc *soc; > @@ -208,6 +211,12 @@ static int gr2d_probe(struct platform_device *pdev) > if (!syncpts) > return -ENOMEM; > > + gr2d->rst = devm_reset_control_get(dev, NULL); > + if (IS_ERR(gr2d->rst)) { > + dev_err(dev, "cannot get reset\n"); > + return PTR_ERR(gr2d->rst); > + } > + > gr2d->clk = devm_clk_get(dev, NULL); > if (IS_ERR(gr2d->clk)) { > dev_err(dev, "cannot get clock\n"); > @@ -220,6 +229,14 @@ static int gr2d_probe(struct platform_device *pdev) > return err; > } > > + usleep_range(2000, 4000); > + > + err = reset_control_deassert(gr2d->rst); > + if (err < 0) { > + dev_err(dev, "failed to deassert reset: %d\n", err); > + goto disable_clk; > + } > + > INIT_LIST_HEAD(&gr2d->client.base.list); > gr2d->client.base.ops = &gr2d_client_ops; > gr2d->client.base.dev = dev; > @@ -234,8 +251,7 @@ static int gr2d_probe(struct platform_device *pdev) > err = host1x_client_register(&gr2d->client.base); > if (err < 0) { > dev_err(dev, "failed to register host1x client: %d\n", err); > - clk_disable_unprepare(gr2d->clk); > - return err; > + goto assert_rst; > } > > /* initialize address register map */ > @@ -245,6 +261,13 @@ static int gr2d_probe(struct platform_device *pdev) > platform_set_drvdata(pdev, gr2d); > > return 0; > + > +assert_rst: > + (void)reset_control_assert(gr2d->rst); (void)?
Re: [PATCH] drm/tegra: gr2d: Explicitly control module reset
08.07.2021 18:13, Dmitry Osipenko пишет: >> #include "drm.h" >> #include "gem.h" >> @@ -19,6 +21,7 @@ struct gr2d_soc { >> struct gr2d { >> struct tegra_drm_client client; >> struct host1x_channel *channel; >> +struct reset_control *rst; > Unused variable? Ah, I haven't noticed that it's struct. Looks okay.
Aw: Re: BUG: MTK DRM/HDMI broken on 5.13 (mt7623/bpi-r2)
> Gesendet: Donnerstag, 08. Juli 2021 um 11:35 Uhr > Von: "Frank Wunderlich" > i guess not, but is watchdog somehow involved? i ask because i see this on > reboot/poweroff: > > "watchdog: watchdog0: watchdog did not stop!" > > i see this with my 5.13, 5.12-drm (5.12.0+mtk/core drm-patches) and 5.12.14 > too (hdmi is working there), but not 5.12.0! > that means something in drm-patches (mtk/core) breaks watchdog. maybe the > recursion mentioned above? have to correct me: 5.12.0 shows this error too, so error not caused by drm-patches, but i guess unrelated to the possible irq issue causing hdmi not working on 5.13 (wait-for-vblank/page_flip tracebacks) i'm not aware who is also involved in the problem, so i want to avoid send people to the wrong way :) regards Frank
[PATCH 00/30] drm/i915/gem: ioctl clean-ups (v9)
Overview: - This patch series attempts to clean up some of the IOCTL mess we've created over the last few years. The most egregious bit being context mutability. In summary, this series: 1. Drops two never-used context params: RINGSIZE and NO_ZEROMAP 2. Drops the entire CONTEXT_CLONE API 3. Implements SINGLE_TIMELINE with a syncobj instead of actually sharing intel_timeline between engines. 4. Adds a few sanity restrictions to the balancing/bonding API. 5. Implements a proto-ctx mechanism so that the engine set and VM can only be set early on in the lifetime of a context, before anything ever executes on it. This effectively makes the VM and engine set immutable. This series has been tested with IGT as well as the Iris, ANV, and the Intel media driver doing an 8K decode (this uses bonding/balancing). I've also done quite a bit of git archeology to ensure that nothing in here will break anything that's already shipped at some point in history. It's possible I've missed something, but I've dug quite a bit. Details and motivation: --- In very broad strokes, there's an effort going on right now within Intel to try and clean up and simplify i915 anywhere we can. We obviously don't want to break any shipping userspace but, as can be seen by this series, there's a lot i915 theoretically supports which userspace doesn't actually need. Some of this, like the two context params used here, were simply oversights where we went through the usual API review process and merged the i915 bits but the userspace bits never landed for some reason. Not all are so innocent, however. For instance, there's an entire context cloning API which allows one to create a context with certain parameters "cloned" from some other context. This entire API has never been used by any userspace except IGT and there were never patches to any other userspace to use it. It never should have landed. Also, when we added support for setting explicit engine sets and sharing VMs across contexts, people decided to do so via SET_CONTEXT_PARAM. While this allowed them to re-use existing API, it did so at the cost of making those states mutable which leads to a plethora of potential race conditions. There were even IGT tests merged to cover some of theses: - gem_vm_create@async-destroy and gem_vm_create@destroy-race which test swapping out the VM on a running context. - gem_ctx_persistence@replace* which test whether a client can escape a non-persistent context by submitting a hanging batch and then swapping out the engine set before the hang is detected. - api_intel_bb@bb-with-vm which tests the that intel_bb_assign_vm works properly. This API is never used by any other IGT test. There is also an entire deferred flush and set state framework in i915_gem_cotnext.c which exists for safely swapping out the VM while there is work in-flight on a context. So, clearly people knew that this API was inherently racy and difficult to implement but they landed it anyway. Why? The best explanation I've been given is because it makes the API more "unified" or "symmetric" for this stuff to go through SET_CONTEXT_PARAM. It's not because any userspace actually wants to be able to swap out the VM or the set of engines on a running context. That would be utterly insane. This patch series cleans up this particular mess by introducing the concept of a i915_gem_proto_context data structure which contains context creation information. When you initially call GEM_CONTEXT_CREATE, a proto-context in created instead of an actual context. Then, the first time something is done on the context besides SET_CONTEXT_PARAM, an actual context is created. This allows us to keep the old drivers which use SET_CONTEXT_PARAM to set up the engine set (see also media) while ensuring that, once you have an i915_gem_context, the VM and the engine set are immutable state. Eventually, there are more clean-ups I'd like to do on top of this which should make working with contexts inside i915 simpler and safer: 1. Move the GEM handle -> vma LUT from i915_gem_context into either i915_ppgtt or drm_i915_file_private depending on whether or not the hardware has a full PPGTT. 2. Move the delayed context destruction code into intel_context or a per-engine wrapper struct rather than i915_gem_context. 3. Get rid of the separation between context close and context destroy 4. Get rid of the RCU on i915_gem_context However, these should probably be done as a separate patch series as this one is already starting to get longish, especially if you consider the 89 IGT patches that go along with it. Test-with: 20210707210215.351483-1-ja...@jlekstrand.net Jason Ekstrand (30): drm/i915: Drop I915_CONTEXT_PARAM_RINGSIZE drm/i915: Stop storing the ring size in the ring pointer (v3) drm/i915: Drop I915_CONTEXT_PARAM_NO_ZEROMAP drm/i915/gem: Set the watchdog timeout directly in intel_context_s
[PATCH 01/30] drm/i915: Drop I915_CONTEXT_PARAM_RINGSIZE
This reverts commit 88be76cdafc7 ("drm/i915: Allow userspace to specify ringsize on construction"). This API was originally added for OpenCL but the compute-runtime PR has sat open for a year without action so we can still pull it out if we want. I argue we should drop it for three reasons: 1. If the compute-runtime PR has sat open for a year, this clearly isn't that important. 2. It's a very leaky API. Ring size is an implementation detail of the current execlist scheduler and really only makes sense there. It can't apply to the older ring-buffer scheduler on pre-execlist hardware because that's shared across all contexts and it won't apply to the GuC scheduler that's in the pipeline. 3. Having userspace set a ring size in bytes is a bad solution to the problem of having too small a ring. There is no way that userspace has the information to know how to properly set the ring size so it's just going to detect the feature and always set it to the maximum of 512K. This is what the compute-runtime PR does. The scheduler in i915, on the other hand, does have the information to make an informed choice. It could detect if the ring size is a problem and grow it itself. Or, if that's too hard, we could just increase the default size from 16K to 32K or even 64K instead of relying on userspace to do it. Let's drop this API for now and, if someone decides they really care about solving this problem, they can do it properly. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/Makefile | 1 - drivers/gpu/drm/i915/gem/i915_gem_context.c | 85 +-- drivers/gpu/drm/i915/gt/intel_context_param.c | 63 -- drivers/gpu/drm/i915/gt/intel_context_param.h | 3 - include/uapi/drm/i915_drm.h | 20 + 5 files changed, 4 insertions(+), 168 deletions(-) delete mode 100644 drivers/gpu/drm/i915/gt/intel_context_param.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 01f28ad5ea578..10b3bb6207bab 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -89,7 +89,6 @@ gt-y += \ gt/gen8_ppgtt.o \ gt/intel_breadcrumbs.o \ gt/intel_context.o \ - gt/intel_context_param.o \ gt/intel_context_sseu.o \ gt/intel_engine_cs.o \ gt/intel_engine_heartbeat.o \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 7720b8c22c816..ddc3cc3f8f092 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1334,63 +1334,6 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, return err; } -static int __apply_ringsize(struct intel_context *ce, void *sz) -{ - return intel_context_set_ring_size(ce, (unsigned long)sz); -} - -static int set_ringsize(struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915)) - return -ENODEV; - - if (args->size) - return -EINVAL; - - if (!IS_ALIGNED(args->value, I915_GTT_PAGE_SIZE)) - return -EINVAL; - - if (args->value < I915_GTT_PAGE_SIZE) - return -EINVAL; - - if (args->value > 128 * I915_GTT_PAGE_SIZE) - return -EINVAL; - - return context_apply_all(ctx, -__apply_ringsize, -__intel_context_ring_size(args->value)); -} - -static int __get_ringsize(struct intel_context *ce, void *arg) -{ - long sz; - - sz = intel_context_get_ring_size(ce); - GEM_BUG_ON(sz > INT_MAX); - - return sz; /* stop on first engine */ -} - -static int get_ringsize(struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - int sz; - - if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915)) - return -ENODEV; - - if (args->size) - return -EINVAL; - - sz = context_apply_all(ctx, __get_ringsize, NULL); - if (sz < 0) - return sz; - - args->value = sz; - return 0; -} - int i915_gem_user_to_context_sseu(struct intel_gt *gt, const struct drm_i915_gem_context_param_sseu *user, @@ -2036,11 +1979,8 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, ret = set_persistence(ctx, args); break; - case I915_CONTEXT_PARAM_RINGSIZE: - ret = set_ringsize(ctx, args); - break; - case I915_CONTEXT_PARAM_BAN_PERIOD: + case I915_CONTEXT_PARAM_RINGSIZE: default: ret = -EINVAL; break; @@ -2068,18 +2008,6 @@ static int create_setparam(struct i915_user_extension __user *ext, void *data) return ctx_setparam(arg->fpr
[PATCH 02/30] drm/i915: Stop storing the ring size in the ring pointer (v3)
Previously, we were storing the ring size in the ring pointer before it was actually allocated. We would then guard setting the ring size on checking for CONTEXT_ALLOC_BIT. This is error-prone at best and really only saves us a few bytes on something that already burns at least 4K. Instead, this patch adds a new ring_size field and makes everything use that. v2 (Daniel Vetter): - Replace 512 * SZ_4K with SZ_2M v2 (Jason Ekstrand): - Rebase on top of page migration code Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 3 +-- drivers/gpu/drm/i915/gt/intel_context.c | 3 ++- drivers/gpu/drm/i915/gt/intel_context.h | 5 - drivers/gpu/drm/i915/gt/intel_context_types.h | 1 + drivers/gpu/drm/i915/gt/intel_engine_cs.c | 3 ++- drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +- drivers/gpu/drm/i915/gt/intel_migrate.c | 3 ++- drivers/gpu/drm/i915/gt/selftest_execlists.c | 2 +- drivers/gpu/drm/i915/gt/selftest_mocs.c | 2 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gvt/scheduler.c | 7 ++- 11 files changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index ddc3cc3f8f092..a4faf06022d5a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -211,8 +211,7 @@ static void intel_context_set_gem(struct intel_context *ce, GEM_BUG_ON(rcu_access_pointer(ce->gem_context)); RCU_INIT_POINTER(ce->gem_context, ctx); - if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) - ce->ring = __intel_context_ring_size(SZ_16K); + ce->ring_size = SZ_16K; if (rcu_access_pointer(ctx->vm)) { struct i915_address_space *vm; diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 4033184f13b9f..bd63813c8a802 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -371,7 +371,8 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) ce->engine = engine; ce->ops = engine->cops; ce->sseu = engine->sseu; - ce->ring = __intel_context_ring_size(SZ_4K); + ce->ring = NULL; + ce->ring_size = SZ_4K; ewma_runtime_init(&ce->runtime.avg); diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index f83a73a2b39fc..b10cbe8fee992 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -175,11 +175,6 @@ int intel_context_prepare_remote_request(struct intel_context *ce, struct i915_request *intel_context_create_request(struct intel_context *ce); -static inline struct intel_ring *__intel_context_ring_size(u64 sz) -{ - return u64_to_ptr(struct intel_ring, sz); -} - static inline bool intel_context_is_barrier(const struct intel_context *ce) { return test_bit(CONTEXT_BARRIER_BIT, &ce->flags); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index ed8c447a7346b..90026c1771055 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -82,6 +82,7 @@ struct intel_context { spinlock_t signal_lock; /* protects signals, the list of requests */ struct i915_vma *state; + u32 ring_size; struct intel_ring *ring; struct intel_timeline *timeline; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 5ca3d16643353..d561573ed98c2 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -807,7 +807,8 @@ intel_engine_create_pinned_context(struct intel_engine_cs *engine, __set_bit(CONTEXT_BARRIER_BIT, &ce->flags); ce->timeline = page_pack_bits(NULL, hwsp); - ce->ring = __intel_context_ring_size(ring_size); + ce->ring = NULL; + ce->ring_size = ring_size; i915_vm_put(ce->vm); ce->vm = i915_vm_get(vm); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index a27bac0a4bfb8..8ada1afe3d229 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -845,7 +845,7 @@ int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine) if (IS_ERR(vma)) return PTR_ERR(vma); - ring = intel_engine_create_ring(engine, (unsigned long)ce->ring); + ring = intel_engine_create_ring(engine, ce->ring_size); if (IS_ERR(ring)) { err = PTR_ERR(ring); goto err_vma; diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index 23c59ce66cee5..f10d2335fc8c6 100644 --- a/drivers/gpu/
[PATCH 03/30] drm/i915: Drop I915_CONTEXT_PARAM_NO_ZEROMAP
The idea behind this param is to support OpenCL drivers with relocations because OpenCL reserves 0x0 for NULL and, if we placed memory there, it would confuse CL kernels. It was originally sent out as part of a patch series including libdrm [1] and Beignet [2] support. However, the libdrm and Beignet patches never landed in their respective upstream projects so this API has never been used. It's never been used in Mesa or any other driver, either. Dropping this API allows us to delete a small bit of code. [1]: https://lists.freedesktop.org/archives/intel-gfx/2015-May/067030.html [2]: https://lists.freedesktop.org/archives/intel-gfx/2015-May/067031.html Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 16 ++-- .../gpu/drm/i915/gem/i915_gem_context_types.h| 1 - drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 8 include/uapi/drm/i915_drm.h | 4 4 files changed, 6 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index a4faf06022d5a..5fc0eb4beeeae 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1920,15 +1920,6 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, int ret = 0; switch (args->param) { - case I915_CONTEXT_PARAM_NO_ZEROMAP: - if (args->size) - ret = -EINVAL; - else if (args->value) - set_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); - else - clear_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); - break; - case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: if (args->size) ret = -EINVAL; @@ -1978,6 +1969,7 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, ret = set_persistence(ctx, args); break; + case I915_CONTEXT_PARAM_NO_ZEROMAP: case I915_CONTEXT_PARAM_BAN_PERIOD: case I915_CONTEXT_PARAM_RINGSIZE: default: @@ -2358,11 +2350,6 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, return -ENOENT; switch (args->param) { - case I915_CONTEXT_PARAM_NO_ZEROMAP: - args->size = 0; - args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); - break; - case I915_CONTEXT_PARAM_GTT_SIZE: args->size = 0; rcu_read_lock(); @@ -2410,6 +2397,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, args->value = i915_gem_context_is_persistent(ctx); break; + case I915_CONTEXT_PARAM_NO_ZEROMAP: case I915_CONTEXT_PARAM_BAN_PERIOD: case I915_CONTEXT_PARAM_RINGSIZE: default: diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 340473aa70de0..5ae71ec936f7c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -129,7 +129,6 @@ struct i915_gem_context { * @user_flags: small set of booleans controlled by the user */ unsigned long user_flags; -#define UCONTEXT_NO_ZEROMAP0 #define UCONTEXT_NO_ERROR_CAPTURE 1 #define UCONTEXT_BANNABLE 2 #define UCONTEXT_RECOVERABLE 3 diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 7ff2fc3c0b2c9..73acc65d25bad 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -290,7 +290,6 @@ struct i915_execbuffer { struct intel_context *reloc_context; u64 invalid_flags; /** Set of execobj.flags that are invalid */ - u32 context_flags; /** Set of execobj.flags to insert from the ctx */ u64 batch_len; /** Length of batch within object */ u32 batch_start_offset; /** Location within object of batch */ @@ -552,9 +551,6 @@ eb_validate_vma(struct i915_execbuffer *eb, entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP; } - if (!(entry->flags & EXEC_OBJECT_PINNED)) - entry->flags |= eb->context_flags; - return 0; } @@ -761,10 +757,6 @@ static int eb_select_context(struct i915_execbuffer *eb) if (rcu_access_pointer(ctx->vm)) eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; - eb->context_flags = 0; - if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags)) - eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS; - return 0; } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index f229c0abcbb54..79dcafaf476eb 100644 --- a/include/uapi/drm/i915
[PATCH 04/30] drm/i915/gem: Set the watchdog timeout directly in intel_context_set_gem (v2)
Instead of handling it like a context param, unconditionally set it when intel_contexts are created. For years we've had the idea of a watchdog uAPI floating about. The aim was for media, so that they could set very tight deadlines for their transcodes jobs, so that if you have a corrupt bitstream (especially for decoding) you don't hang your desktop too hard. But it's been stuck in limbo since forever, and this simplifies things a bit in preparation for the proto-context work. If we decide to actually make said uAPI a reality, we can do it through the proto- context easily enough. This does mean that we move from reading the request_timeout_ms param once per engine when engines are created instead of once at context creation. If someone changes request_timeout_ms between creating a context and setting engines, it will mean that they get the new timeout. If someone races setting request_timeout_ms and context creation, they can theoretically end up with different timeouts. However, since both of these are fairly harmless and require changing kernel params, we don't care. v2 (Tvrtko Ursulin): - Add a comment about races with request_timeout_ms Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 44 +++ .../gpu/drm/i915/gem/i915_gem_context_types.h | 4 -- drivers/gpu/drm/i915/gt/intel_context_param.h | 3 +- 3 files changed, 7 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 5fc0eb4beeeae..9750a1ac7023e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -232,7 +232,12 @@ static void intel_context_set_gem(struct intel_context *ce, intel_engine_has_timeslices(ce->engine)) __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); - intel_context_set_watchdog_us(ce, ctx->watchdog.timeout_us); + if (IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) && + ctx->i915->params.request_timeout_ms) { + unsigned int timeout_ms = ctx->i915->params.request_timeout_ms; + + intel_context_set_watchdog_us(ce, (u64)timeout_ms * 1000); + } } static void __free_engines(struct i915_gem_engines *e, unsigned int count) @@ -791,41 +796,6 @@ static void __assign_timeline(struct i915_gem_context *ctx, context_apply_all(ctx, __apply_timeline, timeline); } -static int __apply_watchdog(struct intel_context *ce, void *timeout_us) -{ - return intel_context_set_watchdog_us(ce, (uintptr_t)timeout_us); -} - -static int -__set_watchdog(struct i915_gem_context *ctx, unsigned long timeout_us) -{ - int ret; - - ret = context_apply_all(ctx, __apply_watchdog, - (void *)(uintptr_t)timeout_us); - if (!ret) - ctx->watchdog.timeout_us = timeout_us; - - return ret; -} - -static void __set_default_fence_expiry(struct i915_gem_context *ctx) -{ - struct drm_i915_private *i915 = ctx->i915; - int ret; - - if (!IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) || - !i915->params.request_timeout_ms) - return; - - /* Default expiry for user fences. */ - ret = __set_watchdog(ctx, i915->params.request_timeout_ms * 1000); - if (ret) - drm_notice(&i915->drm, - "Failed to configure default fence expiry! (%d)", - ret); -} - static struct i915_gem_context * i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) { @@ -870,8 +840,6 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) intel_timeline_put(timeline); } - __set_default_fence_expiry(ctx); - trace_i915_context_create(ctx); return ctx; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 5ae71ec936f7c..676592e27e7d2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -153,10 +153,6 @@ struct i915_gem_context { */ atomic_t active_count; - struct { - u64 timeout_us; - } watchdog; - /** * @hang_timestamp: The last time(s) this context caused a GPU hang */ diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.h b/drivers/gpu/drm/i915/gt/intel_context_param.h index dffedd983693d..0c69cb42d075c 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_param.h +++ b/drivers/gpu/drm/i915/gt/intel_context_param.h @@ -10,11 +10,10 @@ #include "intel_context.h" -static inline int +static inline void intel_context_set_watchdog_us(struct intel_context *ce, u64 timeout_us) { ce->watchdog.timeout_us = timeout_us; - return 0; } #endif /* INTEL_CONTEXT_PARAM_H */ -- 2.31.1
[PATCH 05/30] drm/i915/gem: Return void from context_apply_all
None of the callbacks we use with it return an error code anymore; they all return 0 unconditionally. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 26 +++-- 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 9750a1ac7023e..3503d46c88cbf 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -718,32 +718,25 @@ __context_engines_await(const struct i915_gem_context *ctx, return engines; } -static int +static void context_apply_all(struct i915_gem_context *ctx, - int (*fn)(struct intel_context *ce, void *data), + void (*fn)(struct intel_context *ce, void *data), void *data) { struct i915_gem_engines_iter it; struct i915_gem_engines *e; struct intel_context *ce; - int err = 0; e = __context_engines_await(ctx, NULL); - for_each_gem_engine(ce, e, it) { - err = fn(ce, data); - if (err) - break; - } + for_each_gem_engine(ce, e, it) + fn(ce, data); i915_sw_fence_complete(&e->fence); - - return err; } -static int __apply_ppgtt(struct intel_context *ce, void *vm) +static void __apply_ppgtt(struct intel_context *ce, void *vm) { i915_vm_put(ce->vm); ce->vm = i915_vm_get(vm); - return 0; } static struct i915_address_space * @@ -783,10 +776,9 @@ static void __set_timeline(struct intel_timeline **dst, intel_timeline_put(old); } -static int __apply_timeline(struct intel_context *ce, void *timeline) +static void __apply_timeline(struct intel_context *ce, void *timeline) { __set_timeline(&ce->timeline, timeline); - return 0; } static void __assign_timeline(struct i915_gem_context *ctx, @@ -1841,19 +1833,17 @@ set_persistence(struct i915_gem_context *ctx, return __context_set_persistence(ctx, args->value); } -static int __apply_priority(struct intel_context *ce, void *arg) +static void __apply_priority(struct intel_context *ce, void *arg) { struct i915_gem_context *ctx = arg; if (!intel_engine_has_timeslices(ce->engine)) - return 0; + return; if (ctx->sched.priority >= I915_PRIORITY_NORMAL) intel_context_set_use_semaphores(ce); else intel_context_clear_use_semaphores(ce); - - return 0; } static int set_priority(struct i915_gem_context *ctx, -- 2.31.1
[PATCH 06/30] drm/i915: Drop the CONTEXT_CLONE API (v2)
This API allows one context to grab bits out of another context upon creation. It can be used as a short-cut for setparam(getparam()) for things like I915_CONTEXT_PARAM_VM. However, it's never been used by any real userspace. It's used by a few IGT tests and that's it. Since it doesn't add any real value (most of the stuff you can CLONE you can copy in other ways), drop it. There is one thing that this API allows you to clone which you cannot clone via getparam/setparam: timelines. However, timelines are an implementation detail of i915 and not really something that needs to be exposed to userspace. Also, sharing timelines between contexts isn't obviously useful and supporting it has the potential to complicate i915 internally. It also doesn't add any functionality that the client can't get in other ways. If a client really wants a shared timeline, they can use a syncobj and set it as an in and out fence on every submit. v2 (Jason Ekstrand): - More detailed commit message Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 199 +- .../drm/i915/gt/intel_execlists_submission.c | 28 --- .../drm/i915/gt/intel_execlists_submission.h | 3 - include/uapi/drm/i915_drm.h | 16 +- 4 files changed, 6 insertions(+), 240 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 3503d46c88cbf..9f9369d3c0004 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1957,207 +1957,14 @@ static int create_setparam(struct i915_user_extension __user *ext, void *data) return ctx_setparam(arg->fpriv, arg->ctx, &local.param); } -static int clone_engines(struct i915_gem_context *dst, -struct i915_gem_context *src) +static int invalid_ext(struct i915_user_extension __user *ext, void *data) { - struct i915_gem_engines *clone, *e; - bool user_engines; - unsigned long n; - - e = __context_engines_await(src, &user_engines); - if (!e) - return -ENOENT; - - clone = alloc_engines(e->num_engines); - if (!clone) - goto err_unlock; - - for (n = 0; n < e->num_engines; n++) { - struct intel_engine_cs *engine; - - if (!e->engines[n]) { - clone->engines[n] = NULL; - continue; - } - engine = e->engines[n]->engine; - - /* -* Virtual engines are singletons; they can only exist -* inside a single context, because they embed their -* HW context... As each virtual context implies a single -* timeline (each engine can only dequeue a single request -* at any time), it would be surprising for two contexts -* to use the same engine. So let's create a copy of -* the virtual engine instead. -*/ - if (intel_engine_is_virtual(engine)) - clone->engines[n] = - intel_execlists_clone_virtual(engine); - else - clone->engines[n] = intel_context_create(engine); - if (IS_ERR_OR_NULL(clone->engines[n])) { - __free_engines(clone, n); - goto err_unlock; - } - - intel_context_set_gem(clone->engines[n], dst); - } - clone->num_engines = n; - i915_sw_fence_complete(&e->fence); - - /* Serialised by constructor */ - engines_idle_release(dst, rcu_replace_pointer(dst->engines, clone, 1)); - if (user_engines) - i915_gem_context_set_user_engines(dst); - else - i915_gem_context_clear_user_engines(dst); - return 0; - -err_unlock: - i915_sw_fence_complete(&e->fence); - return -ENOMEM; -} - -static int clone_flags(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - dst->user_flags = src->user_flags; - return 0; -} - -static int clone_schedattr(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - dst->sched = src->sched; - return 0; -} - -static int clone_sseu(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - struct i915_gem_engines *e = i915_gem_context_lock_engines(src); - struct i915_gem_engines *clone; - unsigned long n; - int err; - - /* no locking required; sole access under constructor*/ - clone = __context_engines_static(dst); - if (e->num_engines != clone->num_engines) { - err = -EINVAL; - goto unlock; - } - - for (n = 0; n < e->num_engines; n++) { - struct intel_context *ce = e->engines[n]; - -
[PATCH 07/30] drm/i915: Implement SINGLE_TIMELINE with a syncobj (v4)
This API is entirely unnecessary and I'd love to get rid of it. If userspace wants a single timeline across multiple contexts, they can either use implicit synchronization or a syncobj, both of which existed at the time this feature landed. The justification given at the time was that it would help GL drivers which are inherently single-timeline. However, neither of our GL drivers actually wanted the feature. i965 was already in maintenance mode at the time and iris uses syncobj for everything. Unfortunately, as much as I'd love to get rid of it, it is used by the media driver so we can't do that. We can, however, do the next-best thing which is to embed a syncobj in the context and do exactly what we'd expect from userspace internally. This isn't an entirely identical implementation because it's no longer atomic if userspace races with itself by calling execbuffer2 twice simultaneously from different threads. It won't crash in that case; it just doesn't guarantee any ordering between those two submits. It also means that sync files exported from different engines on a SINGLE_TIMELINE context will have different fence contexts. This is visible to userspace if it looks at the obj_name field of sync_fence_info. Moving SINGLE_TIMELINE to a syncobj emulation has a couple of technical advantages beyond mere annoyance. One is that intel_timeline is no longer an api-visible object and can remain entirely an implementation detail. This may be advantageous as we make scheduler changes going forward. Second is that, together with deleting the CLONE_CONTEXT API, we should now have a 1:1 mapping between intel_context and intel_timeline which may help us reduce locking. v2 (Tvrtko Ursulin): - Update the comment on i915_gem_context::syncobj to mention that it's an emulation and the possible race if userspace calls execbuffer2 twice on the same context concurrently. v2 (Jason Ekstrand): - Wrap the checks for eb.gem_context->syncobj in unlikely() - Drop the dma_fence reference - Improved commit message v3 (Jason Ekstrand): - Move the dma_fence_put() to before the error exit v4 (Tvrtko Ursulin): - Add a comment about fence contexts to the commit message Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 49 +-- .../gpu/drm/i915/gem/i915_gem_context_types.h | 14 +- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 16 ++ 3 files changed, 40 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 9f9369d3c0004..249bd36f14019 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -67,6 +67,8 @@ #include #include +#include + #include "gt/gen6_ppgtt.h" #include "gt/intel_context.h" #include "gt/intel_context_param.h" @@ -224,10 +226,6 @@ static void intel_context_set_gem(struct intel_context *ce, ce->vm = vm; } - GEM_BUG_ON(ce->timeline); - if (ctx->timeline) - ce->timeline = intel_timeline_get(ctx->timeline); - if (ctx->sched.priority >= I915_PRIORITY_NORMAL && intel_engine_has_timeslices(ce->engine)) __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); @@ -351,9 +349,6 @@ void i915_gem_context_release(struct kref *ref) mutex_destroy(&ctx->engines_mutex); mutex_destroy(&ctx->lut_mutex); - if (ctx->timeline) - intel_timeline_put(ctx->timeline); - put_pid(ctx->pid); mutex_destroy(&ctx->mutex); @@ -570,6 +565,9 @@ static void context_close(struct i915_gem_context *ctx) if (vm) i915_vm_close(vm); + if (ctx->syncobj) + drm_syncobj_put(ctx->syncobj); + ctx->file_priv = ERR_PTR(-EBADF); /* @@ -765,33 +763,11 @@ static void __assign_ppgtt(struct i915_gem_context *ctx, i915_vm_close(vm); } -static void __set_timeline(struct intel_timeline **dst, - struct intel_timeline *src) -{ - struct intel_timeline *old = *dst; - - *dst = src ? intel_timeline_get(src) : NULL; - - if (old) - intel_timeline_put(old); -} - -static void __apply_timeline(struct intel_context *ce, void *timeline) -{ - __set_timeline(&ce->timeline, timeline); -} - -static void __assign_timeline(struct i915_gem_context *ctx, - struct intel_timeline *timeline) -{ - __set_timeline(&ctx->timeline, timeline); - context_apply_all(ctx, __apply_timeline, timeline); -} - static struct i915_gem_context * i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) { struct i915_gem_context *ctx; + int ret; if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE && !HAS_EXECLISTS(i915)) @@ -820,16 +796,13 @@ i915_gem_create_context(struct drm_i915_private *
[PATCH 11/30] drm/i915/request: Remove the hook from await_execution
This was only ever used for FENCE_SUBMIT automatic engine selection which was removed in the previous commit. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 3 +- drivers/gpu/drm/i915/i915_request.c | 42 --- drivers/gpu/drm/i915/i915_request.h | 4 +- 3 files changed, 9 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 30498948c83d0..9aa7e10d16308 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -3492,8 +3492,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (in_fence) { if (args->flags & I915_EXEC_FENCE_SUBMIT) err = i915_request_await_execution(eb.request, - in_fence, - NULL); + in_fence); else err = i915_request_await_dma_fence(eb.request, in_fence); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index c5989c0b83d3e..86b4c9f2613d5 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -49,7 +49,6 @@ struct execute_cb { struct irq_work work; struct i915_sw_fence *fence; - void (*hook)(struct i915_request *rq, struct dma_fence *signal); struct i915_request *signal; }; @@ -180,17 +179,6 @@ static void irq_execute_cb(struct irq_work *wrk) kmem_cache_free(global.slab_execute_cbs, cb); } -static void irq_execute_cb_hook(struct irq_work *wrk) -{ - struct execute_cb *cb = container_of(wrk, typeof(*cb), work); - - cb->hook(container_of(cb->fence, struct i915_request, submit), -&cb->signal->fence); - i915_request_put(cb->signal); - - irq_execute_cb(wrk); -} - static __always_inline void __notify_execute_cb(struct i915_request *rq, bool (*fn)(struct irq_work *wrk)) { @@ -517,17 +505,12 @@ static bool __request_in_flight(const struct i915_request *signal) static int __await_execution(struct i915_request *rq, struct i915_request *signal, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal), gfp_t gfp) { struct execute_cb *cb; - if (i915_request_is_active(signal)) { - if (hook) - hook(rq, &signal->fence); + if (i915_request_is_active(signal)) return 0; - } cb = kmem_cache_alloc(global.slab_execute_cbs, gfp); if (!cb) @@ -537,12 +520,6 @@ __await_execution(struct i915_request *rq, i915_sw_fence_await(cb->fence); init_irq_work(&cb->work, irq_execute_cb); - if (hook) { - cb->hook = hook; - cb->signal = i915_request_get(signal); - cb->work.func = irq_execute_cb_hook; - } - /* * Register the callback first, then see if the signaler is already * active. This ensures that if we race with the @@ -1253,7 +1230,7 @@ emit_semaphore_wait(struct i915_request *to, goto await_fence; /* Only submit our spinner after the signaler is running! */ - if (__await_execution(to, from, NULL, gfp)) + if (__await_execution(to, from, gfp)) goto await_fence; if (__emit_semaphore_wait(to, from, from->fence.seqno)) @@ -1284,16 +1261,14 @@ static int intel_timeline_sync_set_start(struct intel_timeline *tl, static int __i915_request_await_execution(struct i915_request *to, - struct i915_request *from, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal)) + struct i915_request *from) { int err; GEM_BUG_ON(intel_context_is_barrier(from->context)); /* Submit both requests at the same time */ - err = __await_execution(to, from, hook, I915_FENCE_GFP); + err = __await_execution(to, from, I915_FENCE_GFP); if (err) return err; @@ -1406,9 +1381,7 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) int i915_request_await_execution(struct i915_request *rq, -struct dma_fence *fence, -void (*hook)(struct i915_request *rq, - struct dma_fence *signal)) +struct dma_fence *fence) { struct dma_fence **child = &fence; unsigned int nchild = 1; @@ -1441,8 +1414,7 @@ i915_request_await_ex
[PATCH 10/30] drm/i915/gem: Remove engine auto-magic with FENCE_SUBMIT (v2)
Even though FENCE_SUBMIT is only documented to wait until the request in the in-fence starts instead of waiting until it completes, it has a bit more magic than that. If FENCE_SUBMIT is used to submit something to a balanced engine, we would wait to assign engines until the primary request was ready to start and then attempt to assign it to a different engine than the primary. There is an IGT test (the bonded-slice subtest of gem_exec_balancer) which exercises this by submitting a primary batch to a specific VCS and then using FENCE_SUBMIT to submit a secondary which can run on any VCS and have i915 figure out which VCS to run it on such that they can run in parallel. However, this functionality has never been used in the real world. The media driver (the only user of FENCE_SUBMIT) always picks exactly two physical engines to bond and never asks us to pick which to use. v2 (Daniel Vetter): - Mention the exact IGT test this breaks Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/gt/intel_engine_types.h| 7 --- .../drm/i915/gt/intel_execlists_submission.c| 17 - 3 files changed, 1 insertion(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 7b7897242a837..30498948c83d0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -3493,7 +3493,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (args->flags & I915_EXEC_FENCE_SUBMIT) err = i915_request_await_execution(eb.request, in_fence, - eb.engine->bond_execute); + NULL); else err = i915_request_await_dma_fence(eb.request, in_fence); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 5b91068ab2779..1cb9c3b70b29a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -416,13 +416,6 @@ struct intel_engine_cs { */ void(*submit_request)(struct i915_request *rq); - /* -* Called on signaling of a SUBMIT_FENCE, passing along the signaling -* request down to the bonded pairs. -*/ - void(*bond_execute)(struct i915_request *rq, - struct dma_fence *signal); - void(*release)(struct intel_engine_cs *engine); struct intel_engine_execlists execlists; diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 98b256352c23d..56e25090da672 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3655,22 +3655,6 @@ static void virtual_submit_request(struct i915_request *rq) spin_unlock_irqrestore(&ve->base.sched_engine->lock, flags); } -static void -virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal) -{ - intel_engine_mask_t allowed, exec; - - allowed = ~to_request(signal)->engine->mask; - - /* Restrict the bonded request to run on only the available engines */ - exec = READ_ONCE(rq->execution_mask); - while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed)) - ; - - /* Prevent the master from being re-run on the bonded engines */ - to_request(signal)->execution_mask &= ~allowed; -} - struct intel_context * intel_execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count) @@ -3731,7 +3715,6 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, ve->base.sched_engine->schedule = i915_schedule; ve->base.sched_engine->kick_backend = kick_execlists; ve->base.submit_request = virtual_submit_request; - ve->base.bond_execute = virtual_bond_execute; INIT_LIST_HEAD(virtual_queue(ve)); tasklet_setup(&ve->base.sched_engine->tasklet, virtual_submission_tasklet); -- 2.31.1
[PATCH 12/30] drm/i915/gem: Disallow creating contexts with too many engines
There's no sense in allowing userspace to create more engines than it can possibly access via execbuf. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 5eca91ded3423..0ba8506fb966f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1639,11 +1639,11 @@ set_engines(struct i915_gem_context *ctx, return -EINVAL; } - /* -* Note that I915_EXEC_RING_MASK limits execbuf to only using the -* first 64 engines defined here. -*/ num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines); + /* RING_MASK has no shift so we can use it directly here */ + if (num_engines > I915_EXEC_RING_MASK + 1) + return -EINVAL; + set.engines = alloc_engines(num_engines); if (!set.engines) return -ENOMEM; -- 2.31.1
[PATCH 13/30] drm/i915: Stop manually RCU banging in reset_stats_ioctl (v2)
As far as I can tell, the only real reason for this is to avoid taking a reference to the i915_gem_context. The cost of those two atomics probably pales in comparison to the cost of the ioctl itself so we're really not buying ourselves anything here. We're about to make context lookup a tiny bit more complicated, so let's get rid of the one hand- rolled case. Some usermode drivers such as our Vulkan driver call GET_RESET_STATS on every execbuf so the perf here could theoretically be an issue. If this ever does become a performance issue for any such userspace drivers, they can use set CONTEXT_PARAM_RECOVERABLE to false and look for -EIO coming from execbuf to check for hangs instead. v2 (Daniel Vetter): - Add a comment in the commit message about recoverable contexts Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 13 - drivers/gpu/drm/i915/i915_drv.h | 8 +--- 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 0ba8506fb966f..61fe6d18d4068 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -2090,16 +2090,13 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_reset_stats *args = data; struct i915_gem_context *ctx; - int ret; if (args->flags || args->pad) return -EINVAL; - ret = -ENOENT; - rcu_read_lock(); - ctx = __i915_gem_context_lookup_rcu(file->driver_priv, args->ctx_id); + ctx = i915_gem_context_lookup(file->driver_priv, args->ctx_id); if (!ctx) - goto out; + return -ENOENT; /* * We opt for unserialised reads here. This may result in tearing @@ -2116,10 +2113,8 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, args->batch_active = atomic_read(&ctx->guilty_count); args->batch_pending = atomic_read(&ctx->active_count); - ret = 0; -out: - rcu_read_unlock(); - return ret; + i915_gem_context_put(ctx); + return 0; } /* GEM context-engines iterator: for_each_gem_engine() */ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6dff4ca012419..ae45ea7b26997 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1853,19 +1853,13 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags); -static inline struct i915_gem_context * -__i915_gem_context_lookup_rcu(struct drm_i915_file_private *file_priv, u32 id) -{ - return xa_load(&file_priv->context_xa, id); -} - static inline struct i915_gem_context * i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id) { struct i915_gem_context *ctx; rcu_read_lock(); - ctx = __i915_gem_context_lookup_rcu(file_priv, id); + ctx = xa_load(&file_priv->context_xa, id); if (ctx && !kref_get_unless_zero(&ctx->ref)) ctx = NULL; rcu_read_unlock(); -- 2.31.1
[PATCH 09/30] drm/i915/gem: Disallow bonding of virtual engines (v3)
This adds a bunch of complexity which the media driver has never actually used. The media driver does technically bond a balanced engine to another engine but the balanced engine only has one engine in the sibling set. This doesn't actually result in a virtual engine. This functionality was originally added to handle cases where we may have more than two video engines and media might want to load-balance their bonded submits by, for instance, submitting to a balanced vcs0-1 as the primary and then vcs2-3 as the secondary. However, no such hardware has shipped thus far and, if we ever want to enable such use-cases in the future, we'll use the up-and-coming parallel submit API which targets GuC submission. This makes I915_CONTEXT_ENGINES_EXT_BOND a total no-op. We leave the validation code in place in case we ever decide we want to do something interesting with the bonding information. v2 (Jason Ekstrand): - Don't delete quite as much code. v3 (Tvrtko Ursulin): - Add some history to the commit message Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 18 +- .../drm/i915/gt/intel_execlists_submission.c | 69 -- .../drm/i915/gt/intel_execlists_submission.h | 5 +- drivers/gpu/drm/i915/gt/selftest_execlists.c | 229 -- 4 files changed, 8 insertions(+), 313 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index e36e3b1ae14e4..5eca91ded3423 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1552,6 +1552,12 @@ set_engines__bond(struct i915_user_extension __user *base, void *data) } virtual = set->engines->engines[idx]->engine; + if (intel_engine_is_virtual(virtual)) { + drm_dbg(&i915->drm, + "Bonding with virtual engines not allowed\n"); + return -EINVAL; + } + err = check_user_mbz(&ext->flags); if (err) return err; @@ -1592,18 +1598,6 @@ set_engines__bond(struct i915_user_extension __user *base, void *data) n, ci.engine_class, ci.engine_instance); return -EINVAL; } - - /* -* A non-virtual engine has no siblings to choose between; and -* a submit fence will always be directed to the one engine. -*/ - if (intel_engine_is_virtual(virtual)) { - err = intel_virtual_engine_attach_bond(virtual, - master, - bond); - if (err) - return err; - } } return 0; diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 7dd7afccb3adc..98b256352c23d 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -182,18 +182,6 @@ struct virtual_engine { int prio; } nodes[I915_NUM_ENGINES]; - /* -* Keep track of bonded pairs -- restrictions upon on our selection -* of physical engines any particular request may be submitted to. -* If we receive a submit-fence from a master engine, we will only -* use one of sibling_mask physical engines. -*/ - struct ve_bond { - const struct intel_engine_cs *master; - intel_engine_mask_t sibling_mask; - } *bonds; - unsigned int num_bonds; - /* And finally, which physical engines this virtual engine maps onto. */ unsigned int num_siblings; struct intel_engine_cs *siblings[]; @@ -3413,7 +3401,6 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) i915_sched_engine_put(ve->base.sched_engine); intel_engine_free_request_pool(&ve->base); - kfree(ve->bonds); kfree(ve); } @@ -3668,33 +3655,13 @@ static void virtual_submit_request(struct i915_request *rq) spin_unlock_irqrestore(&ve->base.sched_engine->lock, flags); } -static struct ve_bond * -virtual_find_bond(struct virtual_engine *ve, - const struct intel_engine_cs *master) -{ - int i; - - for (i = 0; i < ve->num_bonds; i++) { - if (ve->bonds[i].master == master) - return &ve->bonds[i]; - } - - return NULL; -} - static void virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal) { - struct virtual_engine *ve = to_virtual_engine(rq->engine); intel_engine_mask_t allowed, exec; - struct ve_bond *bond; allowed = ~to_request(signal)->engine->mask; - bond = virtual_find_bond(ve, to_req
[PATCH 14/30] drm/i915/gem: Add a separate validate_priority helper
With the proto-context stuff added later in this series, we end up having to duplicate set_priority. This lets us avoid duplicating the validation logic. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 42 + 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 61fe6d18d4068..f9a6eac78c0ae 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -169,6 +169,28 @@ lookup_user_engine(struct i915_gem_context *ctx, return i915_gem_context_get_engine(ctx, idx); } +static int validate_priority(struct drm_i915_private *i915, +const struct drm_i915_gem_context_param *args) +{ + s64 priority = args->value; + + if (args->size) + return -EINVAL; + + if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) + return -ENODEV; + + if (priority > I915_CONTEXT_MAX_USER_PRIORITY || + priority < I915_CONTEXT_MIN_USER_PRIORITY) + return -EINVAL; + + if (priority > I915_CONTEXT_DEFAULT_PRIORITY && + !capable(CAP_SYS_NICE)) + return -EPERM; + + return 0; +} + static struct i915_address_space * context_get_vm_rcu(struct i915_gem_context *ctx) { @@ -1744,23 +1766,13 @@ static void __apply_priority(struct intel_context *ce, void *arg) static int set_priority(struct i915_gem_context *ctx, const struct drm_i915_gem_context_param *args) { - s64 priority = args->value; - - if (args->size) - return -EINVAL; - - if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) - return -ENODEV; - - if (priority > I915_CONTEXT_MAX_USER_PRIORITY || - priority < I915_CONTEXT_MIN_USER_PRIORITY) - return -EINVAL; + int err; - if (priority > I915_CONTEXT_DEFAULT_PRIORITY && - !capable(CAP_SYS_NICE)) - return -EPERM; + err = validate_priority(ctx->i915, args); + if (err) + return err; - ctx->sched.priority = priority; + ctx->sched.priority = args->value; context_apply_all(ctx, __apply_priority, ctx); return 0; -- 2.31.1
[PATCH 08/30] drm/i915: Drop getparam support for I915_CONTEXT_PARAM_ENGINES
This has never been used by any userspace except IGT and provides no real functionality beyond parroting back parameters userspace passed in as part of context creation or via setparam. If the context is in legacy mode (where you use I915_EXEC_RENDER and friends), it returns success with zero data so it's not useful for discovering what engines are in the context. It's also not a replacement for the recently removed I915_CONTEXT_CLONE_ENGINES because it doesn't return any of the balancing or bonding information. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 77 + 1 file changed, 1 insertion(+), 76 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 249bd36f14019..e36e3b1ae14e4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1724,78 +1724,6 @@ set_engines(struct i915_gem_context *ctx, return 0; } -static int -get_engines(struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - struct i915_context_param_engines __user *user; - struct i915_gem_engines *e; - size_t n, count, size; - bool user_engines; - int err = 0; - - e = __context_engines_await(ctx, &user_engines); - if (!e) - return -ENOENT; - - if (!user_engines) { - i915_sw_fence_complete(&e->fence); - args->size = 0; - return 0; - } - - count = e->num_engines; - - /* Be paranoid in case we have an impedance mismatch */ - if (!check_struct_size(user, engines, count, &size)) { - err = -EINVAL; - goto err_free; - } - if (overflows_type(size, args->size)) { - err = -EINVAL; - goto err_free; - } - - if (!args->size) { - args->size = size; - goto err_free; - } - - if (args->size < size) { - err = -EINVAL; - goto err_free; - } - - user = u64_to_user_ptr(args->value); - if (put_user(0, &user->extensions)) { - err = -EFAULT; - goto err_free; - } - - for (n = 0; n < count; n++) { - struct i915_engine_class_instance ci = { - .engine_class = I915_ENGINE_CLASS_INVALID, - .engine_instance = I915_ENGINE_CLASS_INVALID_NONE, - }; - - if (e->engines[n]) { - ci.engine_class = e->engines[n]->engine->uabi_class; - ci.engine_instance = e->engines[n]->engine->uabi_instance; - } - - if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) { - err = -EFAULT; - goto err_free; - } - } - - args->size = size; - -err_free: - i915_sw_fence_complete(&e->fence); - return err; -} - static int set_persistence(struct i915_gem_context *ctx, const struct drm_i915_gem_context_param *args) @@ -2126,10 +2054,6 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, ret = get_ppgtt(file_priv, ctx, args); break; - case I915_CONTEXT_PARAM_ENGINES: - ret = get_engines(ctx, args); - break; - case I915_CONTEXT_PARAM_PERSISTENCE: args->size = 0; args->value = i915_gem_context_is_persistent(ctx); @@ -2137,6 +2061,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, case I915_CONTEXT_PARAM_NO_ZEROMAP: case I915_CONTEXT_PARAM_BAN_PERIOD: + case I915_CONTEXT_PARAM_ENGINES: case I915_CONTEXT_PARAM_RINGSIZE: default: ret = -EINVAL; -- 2.31.1
[PATCH 15/30] drm/i915: Add gem/i915_gem_context.h to the docs
In order to prevent kernel doc warnings, also fill out docs for any missing fields and fix those that forgot the "@". Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- Documentation/gpu/i915.rst| 2 + .../gpu/drm/i915/gem/i915_gem_context_types.h | 43 --- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index e6fd9608e9c6d..204ebdaadb45a 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -422,6 +422,8 @@ Batchbuffer Parsing User Batchbuffer Execution -- +.. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_context_types.h + .. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c :doc: User command execution diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index df76767f0c41b..5f0673a2129f9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -30,19 +30,39 @@ struct i915_address_space; struct intel_timeline; struct intel_ring; +/** + * struct i915_gem_engines - A set of engines + */ struct i915_gem_engines { union { + /** @link: Link in i915_gem_context::stale::engines */ struct list_head link; + + /** @rcu: RCU to use when freeing */ struct rcu_head rcu; }; + + /** @fence: Fence used for delayed destruction of engines */ struct i915_sw_fence fence; + + /** @ctx: i915_gem_context backpointer */ struct i915_gem_context *ctx; + + /** @num_engines: Number of engines in this set */ unsigned int num_engines; + + /** @engines: Array of engines */ struct intel_context *engines[]; }; +/** + * struct i915_gem_engines_iter - Iterator for an i915_gem_engines set + */ struct i915_gem_engines_iter { + /** @idx: Index into i915_gem_engines::engines */ unsigned int idx; + + /** @engines: Engine set being iterated */ const struct i915_gem_engines *engines; }; @@ -53,10 +73,10 @@ struct i915_gem_engines_iter { * logical hardware state for a particular client. */ struct i915_gem_context { - /** i915: i915 device backpointer */ + /** @i915: i915 device backpointer */ struct drm_i915_private *i915; - /** file_priv: owning file descriptor */ + /** @file_priv: owning file descriptor */ struct drm_i915_file_private *file_priv; /** @@ -81,7 +101,9 @@ struct i915_gem_context { * CONTEXT_USER_ENGINES flag is set). */ struct i915_gem_engines __rcu *engines; - struct mutex engines_mutex; /* guards writes to engines */ + + /** @engines_mutex: guards writes to engines */ + struct mutex engines_mutex; /** * @syncobj: Shared timeline syncobj @@ -118,7 +140,7 @@ struct i915_gem_context { */ struct pid *pid; - /** link: place with &drm_i915_private.context_list */ + /** @link: place with &drm_i915_private.context_list */ struct list_head link; /** @@ -153,11 +175,13 @@ struct i915_gem_context { #define CONTEXT_CLOSED 0 #define CONTEXT_USER_ENGINES 1 + /** @mutex: guards everything that isn't engines or handles_vma */ struct mutex mutex; + /** @sched: scheduler parameters */ struct i915_sched_attr sched; - /** guilty_count: How many times this context has caused a GPU hang. */ + /** @guilty_count: How many times this context has caused a GPU hang. */ atomic_t guilty_count; /** * @active_count: How many times this context was active during a GPU @@ -171,15 +195,17 @@ struct i915_gem_context { unsigned long hang_timestamp[2]; #define CONTEXT_FAST_HANG_JIFFIES (120 * HZ) /* 3 hangs within 120s? Banned! */ - /** remap_slice: Bitmask of cache lines that need remapping */ + /** @remap_slice: Bitmask of cache lines that need remapping */ u8 remap_slice; /** -* handles_vma: rbtree to look up our context specific obj/vma for +* @handles_vma: rbtree to look up our context specific obj/vma for * the user handle. (user handles are per fd, but the binding is * per vm, which may be one per context or shared with the global GTT) */ struct radix_tree_root handles_vma; + + /** @lut_mutex: Locks handles_vma */ struct mutex lut_mutex; /** @@ -191,8 +217,11 @@ struct i915_gem_context { */ char name[TASK_COMM_LEN + 8]; + /** @stale: tracks stale engines to be destroyed */ struct { + /** @lock: guards engines */ spinlock_t lock; + /** @engines: list of stale engines */ struct list_head engines; } stal
[PATCH 17/30] drm/i915/gem: Rework error handling in default_engines
Since free_engines works for partially constructed engine sets, we can use the usual goto pattern. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 13 - 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 741624da8db78..5b75f98274b9e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -366,7 +366,7 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) { const struct intel_gt *gt = &ctx->i915->gt; struct intel_engine_cs *engine; - struct i915_gem_engines *e; + struct i915_gem_engines *e, *err; enum intel_engine_id id; e = alloc_engines(I915_NUM_ENGINES); @@ -384,18 +384,21 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) ce = intel_context_create(engine); if (IS_ERR(ce)) { - __free_engines(e, e->num_engines + 1); - return ERR_CAST(ce); + err = ERR_CAST(ce); + goto free_engines; } intel_context_set_gem(ce, ctx); e->engines[engine->legacy_idx] = ce; - e->num_engines = max(e->num_engines, engine->legacy_idx); + e->num_engines = max(e->num_engines, engine->legacy_idx + 1); } - e->num_engines++; return e; + +free_engines: + free_engines(e); + return err; } void i915_gem_context_release(struct kref *ref) -- 2.31.1
[PATCH 18/30] drm/i915/gem: Optionally set SSEU in intel_context_set_gem
For now this is a no-op because everyone passes in a null SSEU but it lets us get some of the error handling and selftest refactoring plumbed through. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 41 +++ .../gpu/drm/i915/gem/selftests/mock_context.c | 6 ++- 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 5b75f98274b9e..206721dccd24e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -266,9 +266,12 @@ context_get_vm_rcu(struct i915_gem_context *ctx) } while (1); } -static void intel_context_set_gem(struct intel_context *ce, - struct i915_gem_context *ctx) +static int intel_context_set_gem(struct intel_context *ce, +struct i915_gem_context *ctx, +struct intel_sseu sseu) { + int ret = 0; + GEM_BUG_ON(rcu_access_pointer(ce->gem_context)); RCU_INIT_POINTER(ce->gem_context, ctx); @@ -295,6 +298,12 @@ static void intel_context_set_gem(struct intel_context *ce, intel_context_set_watchdog_us(ce, (u64)timeout_ms * 1000); } + + /* A valid SSEU has no zero fields */ + if (sseu.slice_mask && !WARN_ON(ce->engine->class != RENDER_CLASS)) + ret = intel_context_reconfigure_sseu(ce, sseu); + + return ret; } static void __free_engines(struct i915_gem_engines *e, unsigned int count) @@ -362,7 +371,8 @@ static struct i915_gem_engines *alloc_engines(unsigned int count) return e; } -static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) +static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx, + struct intel_sseu rcs_sseu) { const struct intel_gt *gt = &ctx->i915->gt; struct intel_engine_cs *engine; @@ -375,6 +385,8 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) for_each_engine(engine, gt, id) { struct intel_context *ce; + struct intel_sseu sseu = {}; + int ret; if (engine->legacy_idx == INVALID_ENGINE) continue; @@ -388,10 +400,18 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) goto free_engines; } - intel_context_set_gem(ce, ctx); - e->engines[engine->legacy_idx] = ce; e->num_engines = max(e->num_engines, engine->legacy_idx + 1); + + if (engine->class == RENDER_CLASS) + sseu = rcs_sseu; + + ret = intel_context_set_gem(ce, ctx, sseu); + if (ret) { + err = ERR_PTR(ret); + goto free_engines; + } + } return e; @@ -705,6 +725,7 @@ __create_context(struct drm_i915_private *i915, { struct i915_gem_context *ctx; struct i915_gem_engines *e; + struct intel_sseu null_sseu = {}; int err; int i; @@ -722,7 +743,7 @@ __create_context(struct drm_i915_private *i915, INIT_LIST_HEAD(&ctx->stale.engines); mutex_init(&ctx->engines_mutex); - e = default_engines(ctx); + e = default_engines(ctx, null_sseu); if (IS_ERR(e)) { err = PTR_ERR(e); goto err_free; @@ -1508,6 +1529,7 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data) struct intel_engine_cs *stack[16]; struct intel_engine_cs **siblings; struct intel_context *ce; + struct intel_sseu null_sseu = {}; u16 num_siblings, idx; unsigned int n; int err; @@ -1580,7 +1602,7 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data) goto out_siblings; } - intel_context_set_gem(ce, set->ctx); + intel_context_set_gem(ce, set->ctx, null_sseu); if (cmpxchg(&set->engines->engines[idx], NULL, ce)) { intel_context_put(ce); @@ -1688,6 +1710,7 @@ set_engines(struct i915_gem_context *ctx, struct drm_i915_private *i915 = ctx->i915; struct i915_context_param_engines __user *user = u64_to_user_ptr(args->value); + struct intel_sseu null_sseu = {}; struct set_engines set = { .ctx = ctx }; unsigned int num_engines, n; u64 extensions; @@ -1697,7 +1720,7 @@ set_engines(struct i915_gem_context *ctx, if (!i915_gem_context_user_engines(ctx)) return 0; - set.engines = default_engines(ctx); + set.engines = default_engines(ctx, null_sseu); if (IS_ERR(set.eng
[PATCH 16/30] drm/i915/gem: Add an intermediate proto_context struct (v5)
The current context uAPI allows for two methods of setting context parameters: SET_CONTEXT_PARAM and CONTEXT_CREATE_EXT_SETPARAM. The former is allowed to be called at any time while the later happens as part of GEM_CONTEXT_CREATE. Currently, everything settable via one is settable via the other. While some params are fairly simple and setting them on a live context is harmless such the context priority, others are far trickier such as the VM or the set of engines. In order to swap out the VM, for instance, we have to delay until all current in-flight work is complete, swap in the new VM, and then continue. This leads to a plethora of potential race conditions we'd really rather avoid. Unfortunately, both methods of setting the VM and the engine set are in active use today so we can't simply disallow setting the VM or engine set vial SET_CONTEXT_PARAM. In order to work around this wart, this commit adds a proto-context struct which contains all the context create parameters. v2 (Daniel Vetter): - Better commit message - Use __set/clear_bit instead of set/clear_bit because there's no race and we don't need the atomics v3 (Daniel Vetter): - Use manual bitops and BIT() instead of __set_bit v4 (Daniel Vetter): - Add a changelog to the commit message - Better hyperlinking in docs - Create the default PPGTT in i915_gem_create_context v5 (Daniel Vetter): - Hand-roll the initialization of UCONTEXT_PERSISTENCE Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 84 +++ .../gpu/drm/i915/gem/i915_gem_context_types.h | 22 + .../gpu/drm/i915/gem/selftests/mock_context.c | 16 +++- 3 files changed, 105 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index f9a6eac78c0ae..741624da8db78 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -191,6 +191,43 @@ static int validate_priority(struct drm_i915_private *i915, return 0; } +static void proto_context_close(struct i915_gem_proto_context *pc) +{ + if (pc->vm) + i915_vm_put(pc->vm); + kfree(pc); +} + +static struct i915_gem_proto_context * +proto_context_create(struct drm_i915_private *i915, unsigned int flags) +{ + struct i915_gem_proto_context *pc, *err; + + pc = kzalloc(sizeof(*pc), GFP_KERNEL); + if (!pc) + return ERR_PTR(-ENOMEM); + + pc->user_flags = BIT(UCONTEXT_BANNABLE) | +BIT(UCONTEXT_RECOVERABLE); + if (i915->params.enable_hangcheck) + pc->user_flags |= BIT(UCONTEXT_PERSISTENCE); + pc->sched.priority = I915_PRIORITY_NORMAL; + + if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { + if (!HAS_EXECLISTS(i915)) { + err = ERR_PTR(-EINVAL); + goto proto_close; + } + pc->single_timeline = true; + } + + return pc; + +proto_close: + proto_context_close(pc); + return err; +} + static struct i915_address_space * context_get_vm_rcu(struct i915_gem_context *ctx) { @@ -660,7 +697,8 @@ static int __context_set_persistence(struct i915_gem_context *ctx, bool state) } static struct i915_gem_context * -__create_context(struct drm_i915_private *i915) +__create_context(struct drm_i915_private *i915, +const struct i915_gem_proto_context *pc) { struct i915_gem_context *ctx; struct i915_gem_engines *e; @@ -673,7 +711,7 @@ __create_context(struct drm_i915_private *i915) kref_init(&ctx->ref); ctx->i915 = i915; - ctx->sched.priority = I915_PRIORITY_NORMAL; + ctx->sched = pc->sched; mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->link); @@ -696,9 +734,7 @@ __create_context(struct drm_i915_private *i915) * is no remap info, it will be a NOP. */ ctx->remap_slice = ALL_L3_SLICES(i915); - i915_gem_context_set_bannable(ctx); - i915_gem_context_set_recoverable(ctx); - __context_set_persistence(ctx, true /* cgroup hook? */); + ctx->user_flags = pc->user_flags; for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; @@ -786,20 +822,22 @@ static void __assign_ppgtt(struct i915_gem_context *ctx, } static struct i915_gem_context * -i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) +i915_gem_create_context(struct drm_i915_private *i915, + const struct i915_gem_proto_context *pc) { struct i915_gem_context *ctx; int ret; - if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE && - !HAS_EXECLISTS(i915)) - return ERR_PTR(-EINVAL); - - ctx = __create_context(i915); + ctx = __create_context(i915, pc);
[PATCH 19/30] drm/i915: Add an i915_gem_vm_lookup helper
This is the VM equivalent of i915_gem_context_lookup. It's only used once in this patch but future patches will need to duplicate this lookup code so it's better to have it in a helper. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 6 +- drivers/gpu/drm/i915/i915_drv.h | 14 ++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 206721dccd24e..3c59d1e4080c4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1311,11 +1311,7 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, if (upper_32_bits(args->value)) return -ENOENT; - rcu_read_lock(); - vm = xa_load(&file_priv->vm_xa, args->value); - if (vm && !kref_get_unless_zero(&vm->ref)) - vm = NULL; - rcu_read_unlock(); + vm = i915_gem_vm_lookup(file_priv, args->value); if (!vm) return -ENOENT; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ae45ea7b26997..8c1994c16b920 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1867,6 +1867,20 @@ i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id) return ctx; } +static inline struct i915_address_space * +i915_gem_vm_lookup(struct drm_i915_file_private *file_priv, u32 id) +{ + struct i915_address_space *vm; + + rcu_read_lock(); + vm = xa_load(&file_priv->vm_xa, id); + if (vm && !kref_get_unless_zero(&vm->ref)) + vm = NULL; + rcu_read_unlock(); + + return vm; +} + /* i915_gem_evict.c */ int __must_check i915_gem_evict_something(struct i915_address_space *vm, u64 min_size, u64 alignment, -- 2.31.1
[PATCH 20/30] drm/i915/gem: Make an alignment check more sensible
What we really want to check is that size of the engines array, i.e. args->size - sizeof(*user) is divisible by the element size, i.e. sizeof(*user->engines) because that's what's required for computing the array length right below the check. However, we're currently not doing this and instead doing a compile-time check that sizeof(*user) is divisible by sizeof(*user->engines) and avoiding the subtraction. As far as I can tell, the only reason for the more confusing pair of checks is to avoid a single subtraction of a constant. The other thing the BUILD_BUG_ON might be trying to implicitly check is that offsetof(user->engines) == sizeof(*user) and we don't have any weird padding throwing us off. However, that's not the check it's doing and it's not even a reliable way to do that check. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 3c59d1e4080c4..f135fbc97c5a7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1723,9 +1723,8 @@ set_engines(struct i915_gem_context *ctx, goto replace; } - BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->engines))); if (args->size < sizeof(*user) || - !IS_ALIGNED(args->size, sizeof(*user->engines))) { + !IS_ALIGNED(args->size - sizeof(*user), sizeof(*user->engines))) { drm_dbg(&i915->drm, "Invalid size for engine array: %d\n", args->size); return -EINVAL; -- 2.31.1
[PATCH 21/30] drm/i915/gem: Use the proto-context to handle create parameters (v5)
This means that the proto-context needs to grow support for engine configuration information as well as setparam logic. Fortunately, we'll be deleting a lot of setparam logic on the primary context shortly so it will hopefully balance out. There's an extra bit of fun here when it comes to setting SSEU and the way it interacts with PARAM_ENGINES. Unfortunately, thanks to SET_CONTEXT_PARAM and not being allowed to pick the order in which we handle certain parameters, we have think about those interactions. v2 (Daniel Vetter): - Add a proto_context_free_user_engines helper - Comment on SSEU in the commit message - Use proto_context_set_persistence in set_proto_ctx_param v3 (Daniel Vetter): - Fix a doc comment - Do an explicit HAS_FULL_PPGTT check in set_proto_ctx_vm instead of relying on pc->vm != NULL. - Handle errors for CONTEXT_PARAM_PERSISTENCE - Don't allow more resetting user engines - Rework initialization of UCONTEXT_PERSISTENCE v4 (Jason Ekstrand): - Move hand-rolled initialization of UCONTEXT_PERSISTENCE to an earlier patch v5 (Jason Ekstrand): - Move proto_context_set_persistence to this patch Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 577 +- .../gpu/drm/i915/gem/i915_gem_context_types.h | 58 ++ 2 files changed, 618 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index f135fbc97c5a7..4972b8c91d942 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -193,11 +193,59 @@ static int validate_priority(struct drm_i915_private *i915, static void proto_context_close(struct i915_gem_proto_context *pc) { + int i; + if (pc->vm) i915_vm_put(pc->vm); + if (pc->user_engines) { + for (i = 0; i < pc->num_user_engines; i++) + kfree(pc->user_engines[i].siblings); + kfree(pc->user_engines); + } kfree(pc); } +static int proto_context_set_persistence(struct drm_i915_private *i915, +struct i915_gem_proto_context *pc, +bool persist) +{ + if (persist) { + /* +* Only contexts that are short-lived [that will expire or be +* reset] are allowed to survive past termination. We require +* hangcheck to ensure that the persistent requests are healthy. +*/ + if (!i915->params.enable_hangcheck) + return -EINVAL; + + pc->user_flags |= BIT(UCONTEXT_PERSISTENCE); + } else { + /* To cancel a context we use "preempt-to-idle" */ + if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) + return -ENODEV; + + /* +* If the cancel fails, we then need to reset, cleanly! +* +* If the per-engine reset fails, all hope is lost! We resort +* to a full GPU reset in that unlikely case, but realistically +* if the engine could not reset, the full reset does not fare +* much better. The damage has been done. +* +* However, if we cannot reset an engine by itself, we cannot +* cleanup a hanging persistent context without causing +* colateral damage, and we should not pretend we can by +* exposing the interface. +*/ + if (!intel_has_reset_engine(&i915->gt)) + return -ENODEV; + + pc->user_flags &= ~BIT(UCONTEXT_PERSISTENCE); + } + + return 0; +} + static struct i915_gem_proto_context * proto_context_create(struct drm_i915_private *i915, unsigned int flags) { @@ -207,6 +255,8 @@ proto_context_create(struct drm_i915_private *i915, unsigned int flags) if (!pc) return ERR_PTR(-ENOMEM); + pc->num_user_engines = -1; + pc->user_engines = NULL; pc->user_flags = BIT(UCONTEXT_BANNABLE) | BIT(UCONTEXT_RECOVERABLE); if (i915->params.enable_hangcheck) @@ -228,6 +278,430 @@ proto_context_create(struct drm_i915_private *i915, unsigned int flags) return err; } +static int set_proto_ctx_vm(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + const struct drm_i915_gem_context_param *args) +{ + struct drm_i915_private *i915 = fpriv->dev_priv; + struct i915_address_space *vm; + + if (args->size) + return -EINVAL; + + if (!HAS_FULL_PPGTT(i915)) + return -ENODEV; + + if (upper_32_bits(args->value)) + return -ENOENT; + + vm = i915_gem_vm_lookup
[PATCH 22/30] drm/i915/gem: Return an error ptr from context_lookup
We're about to start doing lazy context creation which means contexts get created in i915_gem_context_lookup and we may start having more errors than -ENOENT. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/gem/i915_gem_context.c| 12 ++-- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 4 ++-- drivers/gpu/drm/i915/i915_drv.h| 2 +- drivers/gpu/drm/i915/i915_perf.c | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 4972b8c91d942..7045e3afa7113 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -2636,8 +2636,8 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, int ret = 0; ctx = i915_gem_context_lookup(file_priv, args->ctx_id); - if (!ctx) - return -ENOENT; + if (IS_ERR(ctx)) + return PTR_ERR(ctx); switch (args->param) { case I915_CONTEXT_PARAM_GTT_SIZE: @@ -2705,8 +2705,8 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, int ret; ctx = i915_gem_context_lookup(file_priv, args->ctx_id); - if (!ctx) - return -ENOENT; + if (IS_ERR(ctx)) + return PTR_ERR(ctx); ret = ctx_setparam(file_priv, ctx, args); @@ -2725,8 +2725,8 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, return -EINVAL; ctx = i915_gem_context_lookup(file->driver_priv, args->ctx_id); - if (!ctx) - return -ENOENT; + if (IS_ERR(ctx)) + return PTR_ERR(ctx); /* * We opt for unserialised reads here. This may result in tearing diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 9aa7e10d16308..5ea8b4e23e428 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -750,8 +750,8 @@ static int eb_select_context(struct i915_execbuffer *eb) struct i915_gem_context *ctx; ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1); - if (unlikely(!ctx)) - return -ENOENT; + if (unlikely(IS_ERR(ctx))) + return PTR_ERR(ctx); eb->gem_context = ctx; if (rcu_access_pointer(ctx->vm)) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8c1994c16b920..d9278c973a734 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1864,7 +1864,7 @@ i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id) ctx = NULL; rcu_read_unlock(); - return ctx; + return ctx ? ctx : ERR_PTR(-ENOENT); } static inline struct i915_address_space * diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 9f94914958c39..b4ec114a4698b 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -3414,10 +3414,10 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, struct drm_i915_file_private *file_priv = file->driver_priv; specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle); - if (!specific_ctx) { + if (IS_ERR(specific_ctx)) { DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n", ctx_handle); - ret = -ENOENT; + ret = PTR_ERR(specific_ctx); goto err; } } -- 2.31.1
[PATCH 23/30] drm/i915/gt: Drop i915_address_space::file (v2)
There's a big comment saying how useful it is but no one is using this for anything anymore. It was added in 2bfa996e031b ("drm/i915: Store owning file on the i915_address_space") and used for debugfs at the time as well as telling the difference between the global GTT and a PPGTT. In f6e8aa387171 ("drm/i915: Report the number of closed vma held by each context in debugfs") we removed one use of it by switching to a context walk and comparing with the VM in the context. Finally, VM stats for debugfs were entirely nuked in db80a1294c23 ("drm/i915/gem: Remove per-client stats from debugfs/i915_gem_objects") v2 (Daniel Vetter): - Delete a struct drm_i915_file_private pre-declaration - Add a comment to the commit message about history Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 9 - drivers/gpu/drm/i915/gt/intel_gtt.h | 11 --- drivers/gpu/drm/i915/selftests/mock_gtt.c | 1 - 3 files changed, 21 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 7045e3afa7113..5a1402544d48d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1453,17 +1453,10 @@ static int gem_context_register(struct i915_gem_context *ctx, u32 *id) { struct drm_i915_private *i915 = ctx->i915; - struct i915_address_space *vm; int ret; ctx->file_priv = fpriv; - mutex_lock(&ctx->mutex); - vm = i915_gem_context_vm(ctx); - if (vm) - WRITE_ONCE(vm->file, fpriv); /* XXX */ - mutex_unlock(&ctx->mutex); - ctx->pid = get_task_pid(current, PIDTYPE_PID); snprintf(ctx->name, sizeof(ctx->name), "%s[%d]", current->comm, pid_nr(ctx->pid)); @@ -1562,8 +1555,6 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); - ppgtt->vm.file = file_priv; - if (args->extensions) { err = i915_user_extensions(u64_to_user_ptr(args->extensions), NULL, 0, diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 9bd89f2a01ff1..bc7153018ebd5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -140,7 +140,6 @@ typedef u64 gen8_pte_t; enum i915_cache_level; -struct drm_i915_file_private; struct drm_i915_gem_object; struct i915_fence_reg; struct i915_vma; @@ -220,16 +219,6 @@ struct i915_address_space { struct intel_gt *gt; struct drm_i915_private *i915; struct device *dma; - /* -* Every address space belongs to a struct file - except for the global -* GTT that is owned by the driver (and so @file is set to NULL). In -* principle, no information should leak from one context to another -* (or between files/processes etc) unless explicitly shared by the -* owner. Tracking the owner is important in order to free up per-file -* objects along with the file, to aide resource tracking, and to -* assign blame. -*/ - struct drm_i915_file_private *file; u64 total; /* size addr space maps (ex. 2GB for ggtt) */ u64 reserved; /* size addr space reserved */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index 5c7ae40bba634..cc047ec594f93 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -73,7 +73,6 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name) ppgtt->vm.gt = &i915->gt; ppgtt->vm.i915 = i915; ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE); - ppgtt->vm.file = ERR_PTR(-ENODEV); ppgtt->vm.dma = i915->drm.dev; i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT); -- 2.31.1
[PATCH 24/30] drm/i915/gem: Delay context creation (v3)
The current context uAPI allows for two methods of setting context parameters: SET_CONTEXT_PARAM and CONTEXT_CREATE_EXT_SETPARAM. The former is allowed to be called at any time while the later happens as part of GEM_CONTEXT_CREATE. Currently, everything settable via one is settable via the other. While some params are fairly simple and setting them on a live context is harmless such as the context priority, others are far trickier such as the VM or the set of engines. In order to swap out the VM, for instance, we have to delay until all current in-flight work is complete, swap in the new VM, and then continue. This leads to a plethora of potential race conditions we'd really rather avoid. In previous patches, we added a i915_gem_proto_context struct which is capable of storing and tracking all such create parameters. This commit delays the creation of the actual context until after the client is done configuring it with SET_CONTEXT_PARAM. From the perspective of the client, it has the same u32 context ID the whole time. From the perspective of i915, however, it's an i915_gem_proto_context right up until the point where we attempt to do something which the proto-context can't handle. Then the real context gets created. This is accomplished via a little xarray dance. When GEM_CONTEXT_CREATE is called, we create a proto-context, reserve a slot in context_xa but leave it NULL, the proto-context in the corresponding slot in proto_context_xa. Then, whenever we go to look up a context, we first check context_xa. If it's there, we return the i915_gem_context and we're done. If it's not, we look in proto_context_xa and, if we find it there, we create the actual context and kill the proto-context. In order for this dance to work properly, everything which ever touches a proto-context is guarded by drm_i915_file_private::proto_context_lock, including context creation. Yes, this means context creation now takes a giant global lock but it can't really be helped and that should never be on any driver's fast-path anyway. v2 (Daniel Vetter): - Commit message grammatical fixes. - Use WARN_ON instead of GEM_BUG_ON - Rename lazy_create_context_locked to finalize_create_context_locked - Rework the control-flow logic in the setparam ioctl - Better documentation all around v3 (kernel test robot): - Make finalize_create_context_locked static Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 203 ++ drivers/gpu/drm/i915/gem/i915_gem_context.h | 3 + .../gpu/drm/i915/gem/i915_gem_context_types.h | 54 + .../gpu/drm/i915/gem/selftests/mock_context.c | 5 +- drivers/gpu/drm/i915/i915_drv.h | 76 +-- 5 files changed, 283 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 5a1402544d48d..c4f89e4b1665f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -278,6 +278,42 @@ proto_context_create(struct drm_i915_private *i915, unsigned int flags) return err; } +static int proto_context_register_locked(struct drm_i915_file_private *fpriv, +struct i915_gem_proto_context *pc, +u32 *id) +{ + int ret; + void *old; + + lockdep_assert_held(&fpriv->proto_context_lock); + + ret = xa_alloc(&fpriv->context_xa, id, NULL, xa_limit_32b, GFP_KERNEL); + if (ret) + return ret; + + old = xa_store(&fpriv->proto_context_xa, *id, pc, GFP_KERNEL); + if (xa_is_err(old)) { + xa_erase(&fpriv->context_xa, *id); + return xa_err(old); + } + WARN_ON(old); + + return 0; +} + +static int proto_context_register(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + u32 *id) +{ + int ret; + + mutex_lock(&fpriv->proto_context_lock); + ret = proto_context_register_locked(fpriv, pc, id); + mutex_unlock(&fpriv->proto_context_lock); + + return ret; +} + static int set_proto_ctx_vm(struct drm_i915_file_private *fpriv, struct i915_gem_proto_context *pc, const struct drm_i915_gem_context_param *args) @@ -1448,12 +1484,12 @@ void i915_gem_init__contexts(struct drm_i915_private *i915) init_contexts(&i915->gem.contexts); } -static int gem_context_register(struct i915_gem_context *ctx, - struct drm_i915_file_private *fpriv, - u32 *id) +static void gem_context_register(struct i915_gem_context *ctx, +struct drm_i915_file_private *fpriv, +u32 id) { struct drm_i915_private *i915 = ctx->i915; - int ret; +
[PATCH 26/30] drm/i915/gem: Don't allow changing the engine set on running contexts (v3)
When the APIs were added to manage the engine set on a GEM context directly from userspace, the questionable choice was made to allow changing the engine set on a context at any time. This is horribly racy and there's absolutely no reason why any userspace would want to do this outside of trying to exercise interesting race conditions. By removing support for CONTEXT_PARAM_ENGINES from ctx_setparam, we make it impossible to change the engine set after the context has been fully created. This doesn't yet let us delete all the deferred engine clean-up code as that's still used for handling the case where the client dies or calls GEM_CONTEXT_DESTROY while work is in flight. However, moving to an API where the engine set is effectively immutable gives us more options to potentially clean that code up a bit going forward. It also removes a whole class of ways in which a client can hurt itself or try to get around kernel context banning. v2 (Jason Ekstrand): - Expand the commit mesage v3 (Jason Ekstrand): - Make it more obvious that I915_CONTEXT_PARAM_ENGINES returns -EINVAL Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 304 +--- 1 file changed, 1 insertion(+), 303 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 40acecfbbe5b5..5f5375b15c530 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1819,305 +1819,6 @@ static int set_sseu(struct i915_gem_context *ctx, return ret; } -struct set_engines { - struct i915_gem_context *ctx; - struct i915_gem_engines *engines; -}; - -static int -set_engines__load_balance(struct i915_user_extension __user *base, void *data) -{ - struct i915_context_engines_load_balance __user *ext = - container_of_user(base, typeof(*ext), base); - const struct set_engines *set = data; - struct drm_i915_private *i915 = set->ctx->i915; - struct intel_engine_cs *stack[16]; - struct intel_engine_cs **siblings; - struct intel_context *ce; - struct intel_sseu null_sseu = {}; - u16 num_siblings, idx; - unsigned int n; - int err; - - if (!HAS_EXECLISTS(i915)) - return -ENODEV; - - if (intel_uc_uses_guc_submission(&i915->gt.uc)) - return -ENODEV; /* not implement yet */ - - if (get_user(idx, &ext->engine_index)) - return -EFAULT; - - if (idx >= set->engines->num_engines) { - drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", - idx, set->engines->num_engines); - return -EINVAL; - } - - idx = array_index_nospec(idx, set->engines->num_engines); - if (set->engines->engines[idx]) { - drm_dbg(&i915->drm, - "Invalid placement[%d], already occupied\n", idx); - return -EEXIST; - } - - if (get_user(num_siblings, &ext->num_siblings)) - return -EFAULT; - - err = check_user_mbz(&ext->flags); - if (err) - return err; - - err = check_user_mbz(&ext->mbz64); - if (err) - return err; - - siblings = stack; - if (num_siblings > ARRAY_SIZE(stack)) { - siblings = kmalloc_array(num_siblings, -sizeof(*siblings), -GFP_KERNEL); - if (!siblings) - return -ENOMEM; - } - - for (n = 0; n < num_siblings; n++) { - struct i915_engine_class_instance ci; - - if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) { - err = -EFAULT; - goto out_siblings; - } - - siblings[n] = intel_engine_lookup_user(i915, - ci.engine_class, - ci.engine_instance); - if (!siblings[n]) { - drm_dbg(&i915->drm, - "Invalid sibling[%d]: { class:%d, inst:%d }\n", - n, ci.engine_class, ci.engine_instance); - err = -EINVAL; - goto out_siblings; - } - } - - ce = intel_execlists_create_virtual(siblings, n); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto out_siblings; - } - - intel_context_set_gem(ce, set->ctx, null_sseu); - - if (cmpxchg(&set->engines->engines[idx], NULL, ce)) { - intel_context_put(ce); - err = -EEXIST; - goto out_siblings; - } - -out_siblings: - if (siblings != stack) - kfree(siblings); - - return err; -} - -static int -set_engin
[PATCH 25/30] drm/i915/gem: Don't allow changing the VM on running contexts (v4)
When the APIs were added to manage VMs more directly from userspace, the questionable choice was made to allow changing out the VM on a context at any time. This is horribly racy and there's absolutely no reason why any userspace would want to do this outside of testing that exact race. By removing support for CONTEXT_PARAM_VM from ctx_setparam, we make it impossible to change out the VM after the context has been fully created. This lets us delete a bunch of deferred task code as well as a duplicated (and slightly different) copy of the code which programs the PPGTT registers. v2 (Jason Ekstrand): - Expand the commit message v3 (Daniel Vetter): - Don't drop the __rcu on the vm pointer v4 (Jason Ekstrand): - Make it more obvious that I915_CONTEXT_PARAM_VM returns -EINVAL Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 263 +- .../drm/i915/gem/selftests/i915_gem_context.c | 119 .../drm/i915/selftests/i915_mock_selftests.h | 1 - 3 files changed, 1 insertion(+), 382 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index c4f89e4b1665f..40acecfbbe5b5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1633,120 +1633,6 @@ int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data, return 0; } -struct context_barrier_task { - struct i915_active base; - void (*task)(void *data); - void *data; -}; - -static void cb_retire(struct i915_active *base) -{ - struct context_barrier_task *cb = container_of(base, typeof(*cb), base); - - if (cb->task) - cb->task(cb->data); - - i915_active_fini(&cb->base); - kfree(cb); -} - -I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault); -static int context_barrier_task(struct i915_gem_context *ctx, - intel_engine_mask_t engines, - bool (*skip)(struct intel_context *ce, void *data), - int (*pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data), - int (*emit)(struct i915_request *rq, void *data), - void (*task)(void *data), - void *data) -{ - struct context_barrier_task *cb; - struct i915_gem_engines_iter it; - struct i915_gem_engines *e; - struct i915_gem_ww_ctx ww; - struct intel_context *ce; - int err = 0; - - GEM_BUG_ON(!task); - - cb = kmalloc(sizeof(*cb), GFP_KERNEL); - if (!cb) - return -ENOMEM; - - i915_active_init(&cb->base, NULL, cb_retire, 0); - err = i915_active_acquire(&cb->base); - if (err) { - kfree(cb); - return err; - } - - e = __context_engines_await(ctx, NULL); - if (!e) { - i915_active_release(&cb->base); - return -ENOENT; - } - - for_each_gem_engine(ce, e, it) { - struct i915_request *rq; - - if (I915_SELFTEST_ONLY(context_barrier_inject_fault & - ce->engine->mask)) { - err = -ENXIO; - break; - } - - if (!(ce->engine->mask & engines)) - continue; - - if (skip && skip(ce, data)) - continue; - - i915_gem_ww_ctx_init(&ww, true); -retry: - err = intel_context_pin_ww(ce, &ww); - if (err) - goto err; - - if (pin) - err = pin(ce, &ww, data); - if (err) - goto err_unpin; - - rq = i915_request_create(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_unpin; - } - - err = 0; - if (emit) - err = emit(rq, data); - if (err == 0) - err = i915_active_add_request(&cb->base, rq); - - i915_request_add(rq); -err_unpin: - intel_context_unpin(ce); -err: - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - - if (err) - break; - } - i915_sw_fence_complete(&e->fence); - - cb->task = err ? NULL : task; /* caller needs to unwind instead */ - cb->data = data; - - i915_active_release(&cb->base); - - return err; -} - static int get_ppgtt(struct drm_i915_file_private *file_priv, struct i915_gem_context *