Re: [PATCH v3 06/12] drm/ttm: add TTM_PAGE_FLAG_EXTERNAL_MAPPABLE
Am 15.09.21 um 20:59 schrieb Matthew Auld: In commit: commit 667a50db0477d47fdff01c666f5ee1ce26b5264c Author: Thomas Hellstrom Date: Fri Jan 3 11:17:18 2014 +0100 drm/ttm: Refuse to fault (prime-) imported pages we introduced the restriction that imported pages should not be directly mappable through TTM(this also extends to userptr). In the next patch we want to introduce a shmem_tt backend, which should follow all the existing rules with TTM_PAGE_FLAG_EXTERNAL, since it will need to handle swapping itself, but with the above mapping restriction lifted. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Christian König --- drivers/gpu/drm/ttm/ttm_bo_vm.c | 6 -- include/drm/ttm/ttm_tt.h| 7 +++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 708390588c7c..fd6e18f12f50 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -163,8 +163,10 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, * (if at all) by redirecting mmap to the exporter. */ if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL)) { - dma_resv_unlock(bo->base.resv); - return VM_FAULT_SIGBUS; + if (!(bo->ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL_MAPPABLE)) { + dma_resv_unlock(bo->base.resv); + return VM_FAULT_SIGBUS; + } } return 0; diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index 7f54a83c95ef..800c9edb3e10 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -66,11 +66,18 @@ struct ttm_tt { * Note that enum ttm_bo_type.ttm_bo_type_sg objects will always enable * this flag. * +* TTM_PAGE_FLAG_EXTERNAL_MAPPABLE: Same behaviour as +* TTM_PAGE_FLAG_EXTERNAL, but with the reduced restriction that it is +* still valid to use TTM to map the pages directly. This is useful when +* implementing a ttm_tt backend which still allocates driver owned +* pages underneath(say with shmem). +* * TTM_PAGE_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE. */ #define TTM_PAGE_FLAG_SWAPPED (1 << 0) #define TTM_PAGE_FLAG_ZERO_ALLOC (1 << 1) #define TTM_PAGE_FLAG_EXTERNAL(1 << 2) +#define TTM_PAGE_FLAG_EXTERNAL_MAPPABLE(1 << 3 | TTM_PAGE_FLAG_EXTERNAL) That's really bad practice because an "if (!(flags & TTM_PAGE_FLAG_EXTERNAL_MAPPABLE))" has a different semantics as an "if (flags & TTM_PAGE_FLAG_EXTERNAL_MAPPABLE)". Rather add a TTM_PAGE_FLAG_UNMAPPABLE and make sure that it is set as appropriated. Regards, Christian. #define TTM_PAGE_FLAG_PRIV_POPULATED (1 << 31) uint32_t page_flags;
Re: [PATCH v3 05/12] drm/ttm: add some kernel-doc for TTM_PAGE_FLAG_*
Am 15.09.21 um 20:59 schrieb Matthew Auld: Move it to inline kernel-doc, otherwise we can't add empty lines it seems. Also drop the kernel-doc for pages_list, which doesn't seem to exist, and get rid of all the strange holes. As suggested on the other patch I would do the rename and renumbering in there and only the documentation change here. Christian. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Christian König --- include/drm/ttm/ttm_tt.h | 57 ++-- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index a6c284c21e72..7f54a83c95ef 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -38,35 +38,54 @@ struct ttm_resource; struct ttm_buffer_object; struct ttm_operation_ctx; -#define TTM_PAGE_FLAG_SWAPPED (1 << 4) -#define TTM_PAGE_FLAG_ZERO_ALLOC (1 << 6) -#define TTM_PAGE_FLAG_EXTERNAL (1 << 8) - -#define TTM_PAGE_FLAG_PRIV_POPULATED (1 << 31) - /** - * struct ttm_tt - * - * @pages: Array of pages backing the data. - * @page_flags: see TTM_PAGE_FLAG_* - * @num_pages: Number of pages in the page array. - * @sg: for SG objects via dma-buf - * @dma_address: The DMA (bus) addresses of the pages - * @swap_storage: Pointer to shmem struct file for swap storage. - * @pages_list: used by some page allocation backend - * @caching: The current caching state of the pages, see enum ttm_caching. - * - * This is a structure holding the pages, caching- and aperture binding - * status for a buffer object that isn't backed by fixed (VRAM / AGP) + * struct ttm_tt - This is a structure holding the pages, caching- and aperture + * binding status for a buffer object that isn't backed by fixed (VRAM / AGP) * memory. */ struct ttm_tt { + /** @pages: Array of pages backing the data. */ struct page **pages; + /** +* @page_flags: The page flags. +* +* Supported values: +* +* TTM_PAGE_FLAG_SWAPPED: Set if the pages have been swapped out. +* Calling ttm_tt_populate() will swap the pages back in, and unset the +* flag. +* +* TTM_PAGE_FLAG_ZERO_ALLOC: Set if the pages will be zeroed on +* allocation. +* +* TTM_PAGE_FLAG_EXTERNAL: Set if the underlying pages were allocated +* externally, like with dma-buf or userptr. This effectively disables +* TTM swapping out such pages. Also important is to prevent TTM from +* ever directly mapping these pages. +* +* Note that enum ttm_bo_type.ttm_bo_type_sg objects will always enable +* this flag. +* +* TTM_PAGE_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE. +*/ +#define TTM_PAGE_FLAG_SWAPPED (1 << 0) +#define TTM_PAGE_FLAG_ZERO_ALLOC (1 << 1) +#define TTM_PAGE_FLAG_EXTERNAL (1 << 2) + +#define TTM_PAGE_FLAG_PRIV_POPULATED (1 << 31) uint32_t page_flags; + /** @num_pages: Number of pages in the page array. */ uint32_t num_pages; + /** @sg: for SG objects via dma-buf. */ struct sg_table *sg; + /** @dma_address: The DMA (bus) addresses of the pages. */ dma_addr_t *dma_address; + /** @swap_storage: Pointer to shmem struct file for swap storage. */ struct file *swap_storage; + /** +* @caching: The current caching state of the pages, see enum +* ttm_caching. +*/ enum ttm_caching caching; };
Re: [PATCH v3 04/12] drm/ttm: s/FLAG_SG/FLAG_EXTERNAL/
Am 15.09.21 um 20:59 schrieb Matthew Auld: It covers more than just ttm_bo_type_sg usage, like with say dma-buf, since one other user is userptr in amdgpu, and in the future we might have some more. Hence EXTERNAL is likely a more suitable name. Suggested-by: Christian König Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 10 +- drivers/gpu/drm/nouveau/nouveau_bo.c| 4 ++-- drivers/gpu/drm/radeon/radeon_ttm.c | 8 drivers/gpu/drm/ttm/ttm_bo.c| 2 +- drivers/gpu/drm/ttm/ttm_bo_vm.c | 2 +- drivers/gpu/drm/ttm/ttm_tt.c| 10 +- include/drm/ttm/ttm_tt.h| 6 +++--- 7 files changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index c5fa6e62f6ca..a6d606f91dfd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -894,7 +894,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, DRM_ERROR("failed to pin userptr\n"); return r; } - } else if (ttm->page_flags & TTM_PAGE_FLAG_SG) { + } else if (ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL) { if (!ttm->sg) { struct dma_buf_attachment *attach; struct sg_table *sgt; @@ -1147,7 +1147,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev, return 0; } - if (ttm->page_flags & TTM_PAGE_FLAG_SG) + if (ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL) return 0; ret = ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx); @@ -1179,7 +1179,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev, return; } - if (ttm->page_flags & TTM_PAGE_FLAG_SG) + if (ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL) return; adev = amdgpu_ttm_adev(bdev); @@ -1210,8 +1210,8 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, return -ENOMEM; } - /* Set TTM_PAGE_FLAG_SG before populate but after create. */ - bo->ttm->page_flags |= TTM_PAGE_FLAG_SG; + /* Set TTM_PAGE_FLAG_EXTERNAL before populate but after create. */ + bo->ttm->page_flags |= TTM_PAGE_FLAG_EXTERNAL; gtt = (void *)bo->ttm; gtt->userptr = addr; diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 33dca2565cca..ba0fec252df7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1249,7 +1249,7 @@ nouveau_ttm_tt_populate(struct ttm_device *bdev, struct ttm_tt *ttm_dma = (void *)ttm; struct nouveau_drm *drm; struct device *dev; - bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); + bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL); if (ttm_tt_is_populated(ttm)) return 0; @@ -1272,7 +1272,7 @@ nouveau_ttm_tt_unpopulate(struct ttm_device *bdev, { struct nouveau_drm *drm; struct device *dev; - bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); + bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL); if (slave) return; diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 7793249bc549..d891491b6da8 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -545,14 +545,14 @@ static int radeon_ttm_tt_populate(struct ttm_device *bdev, { struct radeon_device *rdev = radeon_get_rdev(bdev); struct radeon_ttm_tt *gtt = radeon_ttm_tt_to_gtt(rdev, ttm); - bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); + bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL); if (gtt && gtt->userptr) { ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL); if (!ttm->sg) return -ENOMEM; - ttm->page_flags |= TTM_PAGE_FLAG_SG; + ttm->page_flags |= TTM_PAGE_FLAG_EXTERNAL; return 0; } @@ -569,13 +569,13 @@ static void radeon_ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm { struct radeon_device *rdev = radeon_get_rdev(bdev); struct radeon_ttm_tt *gtt = radeon_ttm_tt_to_gtt(rdev, ttm); - bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); + bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL); radeon_ttm_tt_unbind(bdev, ttm); if (gtt && gtt->userptr) { kfree(ttm->sg); - ttm->page_flags &= ~TTM_PAGE_FLAG_SG; + ttm->page_flags &= ~TTM_PAGE_FLAG_EXTERNAL; return; } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 3b22c0013dbf..642dc7ce3081 100644 --- a/
Re: [RFC PATCH v3 1/6] drm/doc: Color Management and HDR10 RFC
On Wed, 2021-09-15 at 17:01 +0300, Pekka Paalanen wrote: > On Fri, 30 Jul 2021 16:41:29 -0400 > Harry Wentland wrote: > > > Use the new DRM RFC doc section to capture the RFC previously only > > described in the cover letter at > > https://patchwork.freedesktop.org/series/89506/ > > > > v3: > > * Add sections on single-plane and multi-plane HDR > > * Describe approach to define HW details vs approach to define SW > > intentions > > * Link Jeremy Cline's excellent HDR summaries > > * Outline intention behind overly verbose doc > > * Describe FP16 use-case > > * Clean up links > > > > v2: create this doc > > > > v1: n/a > > > > Signed-off-by: Harry Wentland > > Hi Harry, > > I finally managed to go through this, comments below. Excellent to > have > pictures included. I wrote this reply over several days, sorry if > it's > not quite coherent. > > > > > > + > > + > > +Overview and background > > +=== > > + > > +I highly recommend you read `Jeremy Cline's HDR primer`_ > > + > > +Jeremy Cline did a much better job describing this. I highly > > recommend > > +you read it at [1]: > > + > > +.. _Jeremy Cline's HDR primer: > > https://www.jcline.org/blog/fedora/graphics/hdr/2021/05/07/hdr-in-linux-p1.html > > That's a nice write-up I didn't know about, thanks. > > I just wish such write-ups would be somehow peer-reviewed for > correctness and curated for proper referencing. Perhaps like we > develop > code: at least some initial peer review and then fixes when anyone > notices something to improve. Like... what you are doing here! :-) > > The post is perhaps a bit too narrow with OETF/EOTF terms, > accidentally > implying that OETF = EOTF^-1 which is not generally true, but that > all > depends on which O-to-E or E-to-O functions one is talking about. > Particularly there is a difference between functions used for signal > compression which needs an exact matching inverse function, and > functions containing tone-mapping and artistic effects that when > concatenated result in the (non-identity) OOTF. > > Nothing in the post seems to disagree with my current understanding > FWI'mW. I'm more than happy to update things that are incorrect or mis-leading since the last thing I want to do is muddy the waters. Personally, I would much prefer that any useful content from it be peer-reviewed and included directly in the documentation since, well, it's being hosted out of my laundry room and the cats have a habit of turning off the UPS... Do let me know if I can be of any assistance there; I'm no longer employed to do anything HDR-related, but I do like clear documentation so I could dedicate a bit of free time to it. - Jeremy
Re: [PATCH v3 01/12] drm/ttm: stop setting page->index for the ttm_tt
Am 15.09.21 um 20:59 schrieb Matthew Auld: In commit: commit 58aa6622d32af7d2c08d45085f44c54554a16ed7 Author: Thomas Hellstrom Date: Fri Jan 3 11:47:23 2014 +0100 drm/ttm: Correctly set page mapping and -index members we started setting the page->mapping and page->index to point to the virtual address space, if the pages were faulted with TTM. Apparently this was needed for core-mm to able to reverse lookup the virtual address given the struct page, and potentially unmap it from the page tables. However as pointed out by Thomas, since we are now using PFN_MAP, instead of say PFN_MIXED, this should no longer be the case. There was also apparently some usecase in vmwgfx which needed this for dirty tracking, but that also doesn't appear to be the case anymore, as pointed out by Thomas. We still need keep the page->mapping for now, since that is still needed for different reasons, but we try to address that in the next patch. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Christian König Reviewed-by: Christian König Fingers crossed that this really works as documented. --- drivers/gpu/drm/ttm/ttm_bo_vm.c | 2 -- drivers/gpu/drm/ttm/ttm_tt.c| 4 +--- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index f56be5bc0861..906ec8a1bf5a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -346,8 +346,6 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, } else if (unlikely(!page)) { break; } - page->index = drm_vma_node_start(&bo->base.vma_node) + - page_offset; pfn = page_to_pfn(page); } diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index dae52433beeb..1cc04c224988 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -367,10 +367,8 @@ static void ttm_tt_clear_mapping(struct ttm_tt *ttm) if (ttm->page_flags & TTM_PAGE_FLAG_SG) return; - for (i = 0; i < ttm->num_pages; ++i) { + for (i = 0; i < ttm->num_pages; ++i) (*page)->mapping = NULL; - (*page++)->index = 0; - } } void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
Re: [PATCH v3 03/12] drm/ttm: remove TTM_PAGE_FLAG_NO_RETRY
Am 15.09.21 um 20:59 schrieb Matthew Auld: No longer used it seems. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Christian König Reviewed-by: Christian König --- include/drm/ttm/ttm_tt.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index 89b15d673b22..842ce756213c 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -41,7 +41,6 @@ struct ttm_operation_ctx; #define TTM_PAGE_FLAG_SWAPPED (1 << 4) #define TTM_PAGE_FLAG_ZERO_ALLOC (1 << 6) #define TTM_PAGE_FLAG_SG (1 << 8) -#define TTM_PAGE_FLAG_NO_RETRY (1 << 9) #define TTM_PAGE_FLAG_PRIV_POPULATED (1 << 31)
Re: [PATCH v3 02/12] drm/ttm: move ttm_tt_{add,clear}_mapping into amdgpu
Am 15.09.21 um 20:59 schrieb Matthew Auld: Now that setting page->index shouldn't be needed anymore, we are just left with setting page->mapping, and here it looks like amdgpu is the only user, where pointing the page->mapping at the dev_mapping is used to verify that the pages do indeed belong to the device, if userspace later tries to touch them. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 27 - drivers/gpu/drm/ttm/ttm_tt.c| 25 --- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 1129e17e9f09..c5fa6e62f6ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1107,6 +1107,24 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, return >t->ttm; } +static void amdgpu_ttm_tt_add_mapping(struct ttm_device *bdev, + struct ttm_tt *ttm) +{ + pgoff_t i; + + for (i = 0; i < ttm->num_pages; ++i) + ttm->pages[i]->mapping = bdev->dev_mapping; +} + +static void amdgpu_ttm_tt_clear_mapping(struct ttm_tt *ttm) +{ + struct page **page = ttm->pages; + pgoff_t i; + + for (i = 0; i < ttm->num_pages; ++i) + (*page)->mapping = NULL; +} + /* * amdgpu_ttm_tt_populate - Map GTT pages visible to the device * @@ -1119,6 +1137,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev, { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; + int ret; /* user pages are bound by amdgpu_ttm_tt_pin_userptr() */ if (gtt->userptr) { @@ -1131,7 +1150,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev, if (ttm->page_flags & TTM_PAGE_FLAG_SG) return 0; - return ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx); + ret = ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx); + if (ret) + return ret; + + amdgpu_ttm_tt_add_mapping(bdev, ttm); I don't really see why this needs to be a separate function. Just inline the loop here. + return 0; } /* @@ -1159,6 +1183,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev, return; adev = amdgpu_ttm_adev(bdev); + amdgpu_ttm_tt_clear_mapping(ttm); Same here of course, apart from that looks good to me. Christian. return ttm_pool_free(&adev->mman.bdev.pool, ttm); } diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 1cc04c224988..980ecb079b2c 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -289,17 +289,6 @@ int ttm_tt_swapout(struct ttm_device *bdev, struct ttm_tt *ttm, return ret; } -static void ttm_tt_add_mapping(struct ttm_device *bdev, struct ttm_tt *ttm) -{ - pgoff_t i; - - if (ttm->page_flags & TTM_PAGE_FLAG_SG) - return; - - for (i = 0; i < ttm->num_pages; ++i) - ttm->pages[i]->mapping = bdev->dev_mapping; -} - int ttm_tt_populate(struct ttm_device *bdev, struct ttm_tt *ttm, struct ttm_operation_ctx *ctx) { @@ -336,7 +325,6 @@ int ttm_tt_populate(struct ttm_device *bdev, if (ret) goto error; - ttm_tt_add_mapping(bdev, ttm); ttm->page_flags |= TTM_PAGE_FLAG_PRIV_POPULATED; if (unlikely(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) { ret = ttm_tt_swapin(ttm); @@ -359,24 +347,11 @@ int ttm_tt_populate(struct ttm_device *bdev, } EXPORT_SYMBOL(ttm_tt_populate); -static void ttm_tt_clear_mapping(struct ttm_tt *ttm) -{ - pgoff_t i; - struct page **page = ttm->pages; - - if (ttm->page_flags & TTM_PAGE_FLAG_SG) - return; - - for (i = 0; i < ttm->num_pages; ++i) - (*page)->mapping = NULL; -} - void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm) { if (!ttm_tt_is_populated(ttm)) return; - ttm_tt_clear_mapping(ttm); if (bdev->funcs->ttm_tt_unpopulate) bdev->funcs->ttm_tt_unpopulate(bdev, ttm); else
Re: [virtio-dev] Re: [PATCH v1 08/12] drm/virtio: implement context init: stop using drv->context when creating fence
Hi, > > I guess you need to also update virtio_gpu_fence_event_process() > > then? It currently has the strict ordering logic baked in ... > > The update to virtio_gpu_fence_event_process was done as a preparation a > few months back: > > https://cgit.freedesktop.org/drm/drm-misc/commit/drivers/gpu/drm/virtio/virtgpu_fence.c?id=36549848ed27c22bb2ffd5d1468efc6505b05f97 Ah, ok, missed the detail that the context check is already there. thanks, Gerd
Re: [PATCH] drm/exynos: Make use of the helper function devm_platform_ioremap_resource()
21. 8. 31. 오후 4:49에 Cai Huoqing 이(가) 쓴 글: > Use the devm_platform_ioremap_resource() helper instead of > calling platform_get_resource() and devm_ioremap_resource() > separately > Picked it up. Thanks, Inki Dae > Signed-off-by: Cai Huoqing > --- > drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 4 +--- > drivers/gpu/drm/exynos/exynos_drm_dsi.c | 4 +--- > drivers/gpu/drm/exynos/exynos_drm_fimc.c | 5 + > drivers/gpu/drm/exynos/exynos_drm_fimd.c | 4 +--- > drivers/gpu/drm/exynos/exynos_drm_g2d.c | 5 + > drivers/gpu/drm/exynos/exynos_drm_gsc.c | 6 +- > drivers/gpu/drm/exynos/exynos_drm_rotator.c | 4 +--- > drivers/gpu/drm/exynos/exynos_drm_scaler.c| 4 +--- > drivers/gpu/drm/exynos/exynos_hdmi.c | 4 +--- > 9 files changed, 9 insertions(+), 31 deletions(-) > > diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c > b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c > index 9870c4e6af36..b5001db7a95c 100644 > --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c > +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c > @@ -793,7 +793,6 @@ static int exynos5433_decon_probe(struct platform_device > *pdev) > { > struct device *dev = &pdev->dev; > struct decon_context *ctx; > - struct resource *res; > int ret; > int i; > > @@ -818,8 +817,7 @@ static int exynos5433_decon_probe(struct platform_device > *pdev) > ctx->clks[i] = clk; > } > > - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); > - ctx->addr = devm_ioremap_resource(dev, res); > + ctx->addr = devm_platform_ioremap_resource(pdev, 0); > if (IS_ERR(ctx->addr)) > return PTR_ERR(ctx->addr); > > diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c > b/drivers/gpu/drm/exynos/exynos_drm_dsi.c > index e39fac889edc..8d137857818c 100644 > --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c > +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c > @@ -1738,7 +1738,6 @@ static const struct component_ops > exynos_dsi_component_ops = { > static int exynos_dsi_probe(struct platform_device *pdev) > { > struct device *dev = &pdev->dev; > - struct resource *res; > struct exynos_dsi *dsi; > int ret, i; > > @@ -1789,8 +1788,7 @@ static int exynos_dsi_probe(struct platform_device > *pdev) > } > } > > - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); > - dsi->reg_base = devm_ioremap_resource(dev, res); > + dsi->reg_base = devm_platform_ioremap_resource(pdev, 0); > if (IS_ERR(dsi->reg_base)) > return PTR_ERR(dsi->reg_base); > > diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c > b/drivers/gpu/drm/exynos/exynos_drm_fimc.c > index a3c718148c45..ecfd82d0afb7 100644 > --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c > +++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c > @@ -85,7 +85,6 @@ struct fimc_scaler { > /* > * A structure of fimc context. > * > - * @regs_res: register resources. > * @regs: memory mapped io registers. > * @lock: locking of operations. > * @clocks: fimc clocks. > @@ -103,7 +102,6 @@ struct fimc_context { > struct exynos_drm_ipp_formats *formats; > unsigned intnum_formats; > > - struct resource *regs_res; > void __iomem*regs; > spinlock_t lock; > struct clk *clocks[FIMC_CLKS_MAX]; > @@ -1327,8 +1325,7 @@ static int fimc_probe(struct platform_device *pdev) > ctx->num_formats = num_formats; > > /* resource memory */ > - ctx->regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); > - ctx->regs = devm_ioremap_resource(dev, ctx->regs_res); > + ctx->regs = devm_platform_ioremap_resource(pdev, 0); > if (IS_ERR(ctx->regs)) > return PTR_ERR(ctx->regs); > > diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c > b/drivers/gpu/drm/exynos/exynos_drm_fimd.c > index 700ca4fa6665..c735e53939d8 100644 > --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c > +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c > @@ -1202,9 +1202,7 @@ static int fimd_probe(struct platform_device *pdev) > return PTR_ERR(ctx->lcd_clk); > } > > - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); > - > - ctx->regs = devm_ioremap_resource(dev, res); > + ctx->regs = devm_platform_ioremap_resource(pdev, 0); > if (IS_ERR(ctx->regs)) > return PTR_ERR(ctx->regs); > > diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c > b/drivers/gpu/drm/exynos/exynos_drm_g2d.c > index b00230626c6a..471fd6c8135f 100644 > --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c > +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c > @@ -1449,7 +1449,6 @@ static const struct component_ops g2d_component_ops = { > static int g2d_probe(struct platform_device *pdev) > { > struct device *dev = &pdev->dev; > - struct resource *res; > struct g2d_data *g2d; > int ret; > > @@ -1491,9 +1490,7 @@ static int g2d
Re: [PATCH 8/8] usb: typec: altmodes/displayport: Notify drm subsys of hotplug events
Quoting Hans de Goede (2021-08-17 14:52:01) > diff --git a/drivers/usb/typec/altmodes/displayport.c > b/drivers/usb/typec/altmodes/displayport.c > index aa669b9cf70e..c1d8c23baa39 100644 > --- a/drivers/usb/typec/altmodes/displayport.c > +++ b/drivers/usb/typec/altmodes/displayport.c > @@ -125,6 +129,7 @@ static int dp_altmode_configure(struct dp_altmode *dp, u8 > con) > static int dp_altmode_status_update(struct dp_altmode *dp) > { > bool configured = !!DP_CONF_GET_PIN_ASSIGN(dp->data.conf); > + bool hpd = !!(dp->data.status & DP_STATUS_HPD_STATE); > u8 con = DP_STATUS_CONNECTION(dp->data.status); > int ret = 0; > > @@ -137,6 +142,11 @@ static int dp_altmode_status_update(struct dp_altmode > *dp) > ret = dp_altmode_configure(dp, con); > if (!ret) > dp->state = DP_STATE_CONFIGURE; > + } else { > + if (dp->hpd != hpd) { > + drm_connector_oob_hotplug_event(dp->connector_fwnode); > + dp->hpd = hpd; > + } > } > > return ret; > @@ -512,6 +522,7 @@ static const struct attribute_group dp_altmode_group = { > int dp_altmode_probe(struct typec_altmode *alt) > { > const struct typec_altmode *port = typec_altmode_get_partner(alt); > + struct fwnode_handle *fwnode; > struct dp_altmode *dp; > int ret; > > @@ -540,6 +551,11 @@ int dp_altmode_probe(struct typec_altmode *alt) > alt->desc = "DisplayPort"; > alt->ops = &dp_altmode_ops; > > + fwnode = dev_fwnode(alt->dev.parent->parent); /* typec_port fwnode */ > + dp->connector_fwnode = fwnode_find_reference(fwnode, "displayport", > 0); I'm trying to figure out how to translate this over to DT bindings. Is there a binding document for this fwnode reference? If not, can you please update Documentation/devicetree/bindings/connector/usb-connector.yaml with this property? I think this means that the type-c node would have a 'displayport = <&some_phandle>' property in it that points to the display port hardware device that's pumping out the DisplayPort data? > + if (IS_ERR(dp->connector_fwnode)) > + dp->connector_fwnode = NULL; > + > typec_altmode_set_drvdata(alt, dp); > > dp->state = DP_STATE_ENTER; > @@ -555,6 +571,13 @@ void dp_altmode_remove(struct typec_altmode *alt) > > sysfs_remove_group(&alt->dev.kobj, &dp_altmode_group); > cancel_work_sync(&dp->work); > + > + if (dp->connector_fwnode) { > + if (dp->hpd) > + drm_connector_oob_hotplug_event(dp->connector_fwnode); I was hoping that we could make a type-c connector into a drm_bridge. I'm thinking that it would be a DP-to-panel bridge. Then a panel could be created as well on the end of the type-c connector and the bridge would report hpd whenever the type-c logic figures out the cable has been connected and hpd is asserted. The actual DisplayPort hardware that's encoding data would then find the bridge through the graph binding connected to the output node. I'm not sure how MST is handled though. In that scenario maybe there's more than one panel? If you're interested the dts file that I'm trying to make this work for is sc7180-trogdor.dtsi and I need to hook up mdss_dp's output port to the two type-c connectors, usb_c0 and usb_c1, somehow. The two ports are actually muxed by the EC (parent node) so only one type-c port can be connected to the DP hardware at a time. > + > + fwnode_handle_put(dp->connector_fwnode); > + }
Re: [PATCH v5 11/16] drm/mediatek: add display MDP RDMA support for MT8195
Hi Chun-Kuang, Thanks for the review. On Thu, 2021-09-09 at 07:54 +0800, Chun-Kuang Hu wrote: > Hi, Nancy: > > Nancy.Lin 於 2021年9月6日 週一 下午3:15寫道: > > > > Add MDP_RDMA driver for MT8195. MDP_RDMA is the DMA engine of > > the ovl_adaptor component. > > > > Signed-off-by: Nancy.Lin > > --- > > drivers/gpu/drm/mediatek/Makefile | 3 +- > > drivers/gpu/drm/mediatek/mtk_disp_drv.h | 7 + > > drivers/gpu/drm/mediatek/mtk_mdp_rdma.c | 301 > > > > drivers/gpu/drm/mediatek/mtk_mdp_rdma.h | 37 +++ > > 4 files changed, 347 insertions(+), 1 deletion(-) > > create mode 100644 drivers/gpu/drm/mediatek/mtk_mdp_rdma.c > > create mode 100644 drivers/gpu/drm/mediatek/mtk_mdp_rdma.h > > > > diff --git a/drivers/gpu/drm/mediatek/Makefile > > b/drivers/gpu/drm/mediatek/Makefile > > index a38e88e82d12..6e604a933ed0 100644 > > --- a/drivers/gpu/drm/mediatek/Makefile > > +++ b/drivers/gpu/drm/mediatek/Makefile > > @@ -13,7 +13,8 @@ mediatek-drm-y := mtk_disp_aal.o \ > > mtk_drm_gem.o \ > > mtk_drm_plane.o \ > > mtk_dsi.o \ > > - mtk_dpi.o > > + mtk_dpi.o \ > > + mtk_mdp_rdma.o > > > > obj-$(CONFIG_DRM_MEDIATEK) += mediatek-drm.o > > > > diff --git a/drivers/gpu/drm/mediatek/mtk_disp_drv.h > > b/drivers/gpu/drm/mediatek/mtk_disp_drv.h > > index a33b13fe2b6e..b3a372cab0bd 100644 > > --- a/drivers/gpu/drm/mediatek/mtk_disp_drv.h > > +++ b/drivers/gpu/drm/mediatek/mtk_disp_drv.h > > @@ -8,6 +8,7 @@ > > > > #include > > #include "mtk_drm_plane.h" > > +#include "mtk_mdp_rdma.h" > > > > int mtk_aal_clk_enable(struct device *dev); > > void mtk_aal_clk_disable(struct device *dev); > > @@ -106,4 +107,10 @@ void mtk_rdma_enable_vblank(struct device > > *dev, > > void *vblank_cb_data); > > void mtk_rdma_disable_vblank(struct device *dev); > > > > +int mtk_mdp_rdma_clk_enable(struct device *dev); > > +void mtk_mdp_rdma_clk_disable(struct device *dev); > > +void mtk_mdp_rdma_start(struct device *dev, struct cmdq_pkt > > *cmdq_pkt); > > +void mtk_mdp_rdma_stop(struct device *dev, struct cmdq_pkt > > *cmdq_pkt); > > +void mtk_mdp_rdma_config(struct device *dev, struct > > mtk_mdp_rdma_cfg *cfg, > > +struct cmdq_pkt *cmdq_pkt); > > #endif > > diff --git a/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c > > b/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c > > new file mode 100644 > > index ..052434d960b9 > > --- /dev/null > > +++ b/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c > > @@ -0,0 +1,301 @@ > > +// SPDX-License-Identifier: GPL-2.0-only > > +/* > > + * Copyright (c) 2021 MediaTek Inc. > > + */ > > + > > +#include > > +#include > > +#include > > +#include > > +#include > > +#include > > +#include > > + > > +#include "mtk_drm_drv.h" > > +#include "mtk_disp_drv.h" > > +#include "mtk_mdp_rdma.h" > > + > > +#define > > MDP_RDMA_EN0x000 > > + #define FLD_ROT_ENABLEBIT(0) > > Maybe my description is not good, I like the style of rdma driver > [1]. > > [1] > https://urldefense.com/v3/__https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/gpu/drm/mediatek/mtk_disp_rdma.c?h=v5.14__;!!CTRNKA9wMg0ARbw!0WCmLLqZ2IclvjA-NLthJ-PGuByyzj_ImXoeNh2mvZ7Is9NFLTb37Pzr3jr4fK4j$ > > OK, I will change the alignment. > > + > > +#define > > MDP_RDMA_RESET 0x008 > > + > > +#define > > MDP_RDMA_CON 0x020 > > + #define FLD_OUTPUT_10BBIT(5) > > + #define FLD_SIMPLE_MODE BIT(4) > > + > > +#define > > MDP_RDMA_GMCIF_CON 0x028 > > + #define FLD_COMMAND_DIV BIT(0) > > + #define FLD_EXT_PREULTRA_EN BIT(3) > > + #define > > FLD_RD_REQ_TYPE GENMASK(7, 4) > > + #define VAL_RD_REQ_TYPE_BURST_8_ACCESS7 > > + #define > > FLD_ULTRA_EN GENMASK(13, 12) > > + #define VAL_ULTRA_EN_ENABLE 1 > > + #define > > FLD_PRE_ULTRA_EN GENMASK(17, 16) > > + #define VAL_PRE_ULTRA_EN_ENABLE 1 > > + #define FLD_EXT_ULTRA_EN BIT(18) > > + > > +#define > > MDP_RDMA_SRC_CON 0x030 > > + #define FLD_OUTPUT_ARGB BIT(25) > > + #define > > FLD_BIT_NUMBERGENMASK(19, 18) > > + #define FLD_UNIFORM_CONFIGBIT(17) > > + #define FLD_SWAP BIT(14) > > + #define > > FLD_SRC_FORMATGENMASK(3, 0) > > + > > +#define > > MDP_RDMA_COMP_CON
Re: [PATCH v5 08/16] soc: mediatek: add cmdq support of mtk-mmsys config API for mt8195 vdosys1
Hi Chun-Kuang, Thanks for the review. On Wed, 2021-09-08 at 00:29 +0800, Chun-Kuang Hu wrote: > Hi, Nancy: > > Nancy.Lin 於 2021年9月6日 週一 下午3:15寫道: > > > > Add cmdq support for mtk-mmsys config API. > > The mmsys config register settings need to take effect with the > > other > > HW settings(like OVL_ADAPTOR...) at the same vblanking time. > > > > If we use CPU to write the mmsys reg, we can't guarantee all the > > settings can be written in the same vblanking time. > > Cmdq is used for this purpose. We prepare all the related HW > > settings > > in one cmdq packet. The first command in the packet is "wait stream > > done", > > and then following with all the HW settings. After the cmdq packet > > is > > flush to GCE HW. The GCE waits for the "stream done event" to > > coming > > and then starts flushing all the HW settings. This can guarantee > > all > > the settings flush in the same vblanking. > > > > Signed-off-by: Nancy.Lin > > --- > > drivers/soc/mediatek/mtk-mmsys.c | 28 +- > > > > include/linux/soc/mediatek/mtk-mmsys.h | 6 +- > > 2 files changed, 28 insertions(+), 6 deletions(-) > > > > diff --git a/drivers/soc/mediatek/mtk-mmsys.c > > b/drivers/soc/mediatek/mtk-mmsys.c > > index 3a38b8269c71..060065501b8a 100644 > > --- a/drivers/soc/mediatek/mtk-mmsys.c > > +++ b/drivers/soc/mediatek/mtk-mmsys.c > > @@ -81,6 +81,7 @@ struct mtk_mmsys { > > const struct mtk_mmsys_driver_data *data; > > spinlock_t lock; /* protects mmsys_sw_rst_b reg */ > > struct reset_controller_dev rcdev; > > + struct cmdq_client_reg cmdq_base; > > }; > > > > void mtk_mmsys_ddp_connect(struct device *dev, > > @@ -174,7 +175,7 @@ static const struct reset_control_ops > > mtk_mmsys_reset_ops = { > > }; > > > > void mtk_mmsys_ddp_config(struct device *dev, enum > > mtk_mmsys_config_type config, > > - u32 id, u32 val) > > + u32 id, u32 val, struct cmdq_pkt > > *cmdq_pkt) > > { > > struct mtk_mmsys *mmsys = dev_get_drvdata(dev); > > const struct mtk_mmsys_config *mmsys_config = mmsys->data- > > >config; > > @@ -197,10 +198,20 @@ void mtk_mmsys_ddp_config(struct device *dev, > > enum mtk_mmsys_config_type config, > > mask = mmsys_config[i].mask; > > reg_val = val << mmsys_config[i].shift; > > > > - u32 tmp = readl(mmsys->regs + offset); > > - > > - tmp = (tmp & ~mask) | reg_val; > > - writel(tmp, mmsys->regs + offset); > > +#if IS_REACHABLE(CONFIG_MTK_CMDQ) > > + if (cmdq_pkt && mmsys->cmdq_base.size) { > > + cmdq_pkt_write_mask(cmdq_pkt, mmsys- > > >cmdq_base.subsys, > > + mmsys->cmdq_base.offset + > > offset, reg_val, > > + mask); > > + } else { > > +#endif > > + u32 tmp = readl(mmsys->regs + offset); > > + > > + tmp = (tmp & ~mask) | reg_val; > > + writel(tmp, mmsys->regs + offset); > > +#if IS_REACHABLE(CONFIG_MTK_CMDQ) > > + } > > +#endif > > } > > EXPORT_SYMBOL_GPL(mtk_mmsys_ddp_config); > > > > @@ -236,6 +247,13 @@ static int mtk_mmsys_probe(struct > > platform_device *pdev) > > } > > > > mmsys->data = of_device_get_match_data(&pdev->dev); > > + > > +#if IS_REACHABLE(CONFIG_MTK_CMDQ) > > + ret = cmdq_dev_get_client_reg(dev, &mmsys->cmdq_base, 0); > > Define mediatek,gce-client-reg in binding document first. > > Regards, > Chun-Kuang. > OK, I will add binding document in the next revision. Regards, Nancy Lin > > + if (ret) > > + dev_dbg(dev, "No mediatek,gce-client-reg!\n"); > > +#endif > > + > > platform_set_drvdata(pdev, mmsys); > > > > clks = platform_device_register_data(&pdev->dev, mmsys- > > >data->clk_driver, > > diff --git a/include/linux/soc/mediatek/mtk-mmsys.h > > b/include/linux/soc/mediatek/mtk-mmsys.h > > index ef2a6d9a834b..9705d242849a 100644 > > --- a/include/linux/soc/mediatek/mtk-mmsys.h > > +++ b/include/linux/soc/mediatek/mtk-mmsys.h > > @@ -6,6 +6,10 @@ > > #ifndef __MTK_MMSYS_H > > #define __MTK_MMSYS_H > > > > +#include > > +#include > > +#include > > + > > enum mtk_ddp_comp_id; > > struct device; > > > > @@ -75,6 +79,6 @@ void mtk_mmsys_ddp_disconnect(struct device *dev, > > enum mtk_ddp_comp_id next); > > > > void mtk_mmsys_ddp_config(struct device *dev, enum > > mtk_mmsys_config_type config, > > - u32 id, u32 val); > > + u32 id, u32 val, struct cmdq_pkt > > *cmdq_pkt); > > > > #endif /* __MTK_MMSYS_H */ > > -- > > 2.18.0 > >
Re: [PATCH v5 04/16] dt-bindings: reset: mt8195: add vdosys1 reset control bit
Hi Chun-Kuang, Thanks for the review. On Wed, 2021-09-08 at 00:06 +0800, Chun-Kuang Hu wrote: > Hi, Nancy: > > Nancy.Lin 於 2021年9月6日 週一 下午3:15寫道: > > > > Add vdosys1 reset control bit for MT8195 platform. > > > > Signed-off-by: Nancy.Lin > > --- > > include/dt-bindings/reset/mt8195-resets.h | 12 > > 1 file changed, 12 insertions(+) > > > > diff --git a/include/dt-bindings/reset/mt8195-resets.h > > b/include/dt-bindings/reset/mt8195-resets.h > > index a26bccc8b957..eaaa882c09bd 100644 > > --- a/include/dt-bindings/reset/mt8195-resets.h > > +++ b/include/dt-bindings/reset/mt8195-resets.h > > @@ -26,4 +26,16 @@ > > > > #define MT8195_TOPRGU_SW_RST_NUM 16 > > > > +/* VDOSYS1 */ > > +#define MT8195_VDOSYS1_SW0_RST_B_MERGE0_DL_ASYNC 25 > > +#define MT8195_VDOSYS1_SW0_RST_B_MERGE1_DL_ASYNC 26 > > +#define MT8195_VDOSYS1_SW0_RST_B_MERGE2_DL_ASYNC 27 > > +#define MT8195_VDOSYS1_SW0_RST_B_MERGE3_DL_ASYNC 28 > > +#define MT8195_VDOSYS1_SW0_RST_B_MERGE4_DL_ASYNC 29 > > +#define MT8195_VDOSYS1_SW1_RST_B_HDR_VDO_FE0_DL_ASYNC 51 > > +#define MT8195_VDOSYS1_SW1_RST_B_HDR_VDO_FE1_DL_ASYNC 52 > > +#define MT8195_VDOSYS1_SW1_RST_B_HDR_GFX_FE0_DL_ASYNC 53 > > +#define MT8195_VDOSYS1_SW1_RST_B_HDR_GFX_FE1_DL_ASYNC 54 > > +#define MT8195_VDOSYS1_SW1_RST_B_HDR_VDO_BE_DL_ASYNC 55 > > Maybe you should align the indent style with TOPRGU. > > Regards, > Chun-Kuang. > OK, I will modify it in the next revision. Regards, Nancy Lin > > + > > #endif /* _DT_BINDINGS_RESET_CONTROLLER_MT8195 */ > > -- > > 2.18.0 > >
Re: [PATCH v5 01/16] dt-bindings: mediatek: add vdosys1 RDMA definition for mt8195
Hi Chun-Kuang, Thanks for the review. On Tue, 2021-09-07 at 07:42 +0800, Chun-Kuang Hu wrote: > Hi, Nancy: > > Nancy.Lin 於 2021年9月6日 週一 下午3:15寫道: > > > > Add vdosys1 RDMA definition. > > > > Signed-off-by: Nancy.Lin > > --- > > .../display/mediatek/mediatek,mdp-rdma.yaml | 77 > > +++ > > 1 file changed, 77 insertions(+) > > create mode 100644 > > Documentation/devicetree/bindings/display/mediatek/mediatek,mdp- > > rdma.yaml > > > > diff --git > > a/Documentation/devicetree/bindings/display/mediatek/mediatek,mdp- > > rdma.yaml > > b/Documentation/devicetree/bindings/display/mediatek/mediatek,mdp- > > rdma.yaml > > new file mode 100644 > > index ..3610093848e1 > > --- /dev/null > > +++ > > b/Documentation/devicetree/bindings/display/mediatek/mediatek,mdp- > > rdma.yaml > > I've compared the rdma driver in mdp [1] with the rdma driver in > display [2], both are similar. The difference are like merge0 versus > merge5. So I would like both binding document are placed together. In > display folder? In media folder? In SoC folder? I've no idea which > one > is better, but at lease put together. > > [1] > https://urldefense.com/v3/__https://patchwork.kernel.org/project/linux-mediatek/patch/20210824100027.25989-6-moudy...@mediatek.com/__;!!CTRNKA9wMg0ARbw!1MjfK1sAMDvP9fU1GX6QvfLEfapYEcLmsYP2AhkAOZ6LVaLTLi6vAnJMMqH3vrJ3$ > > [2] > https://urldefense.com/v3/__https://patchwork.kernel.org/project/linux-mediatek/patch/20210906071539.12953-12-nancy@mediatek.com/__;!!CTRNKA9wMg0ARbw!1MjfK1sAMDvP9fU1GX6QvfLEfapYEcLmsYP2AhkAOZ6LVaLTLi6vAnJMMuM29V9T$ > > > Regards, > Chun-Kuang. > OK, I will discuss this with Moudy. Regards, Nancy Lin > > @@ -0,0 +1,77 @@ > > +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) > > +%YAML 1.2 > > +--- > > +$id: > > https://urldefense.com/v3/__http://devicetree.org/schemas/display/mediatek/mediatek,mdp-rdma.yaml*__;Iw!!CTRNKA9wMg0ARbw!1MjfK1sAMDvP9fU1GX6QvfLEfapYEcLmsYP2AhkAOZ6LVaLTLi6vAnJMMheRB2bL$ > > > > +$schema: > > https://urldefense.com/v3/__http://devicetree.org/meta-schemas/core.yaml*__;Iw!!CTRNKA9wMg0ARbw!1MjfK1sAMDvP9fU1GX6QvfLEfapYEcLmsYP2AhkAOZ6LVaLTLi6vAnJMMkoF4_Zs$ > > > > + > > +title: mediatek display MDP RDMA > > + > > +maintainers: > > + - CK Hu > > + > > +description: | > > + The mediatek display MDP RDMA stands for Read Direct Memory > > Access. > > + It provides real time data to the back-end panel driver, such as > > DSI, > > + DPI and DP_INTF. > > + It contains one line buffer to store the sufficient pixel data. > > + RDMA device node must be siblings to the central MMSYS_CONFIG > > node. > > + For a description of the MMSYS_CONFIG binding, see > > + Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.ya > > ml for details. > > + > > +properties: > > + compatible: > > +oneOf: > > + - items: > > + - const: mediatek,mt8195-vdo1-rdma > > + > > + reg: > > +maxItems: 1 > > + > > + interrupts: > > +maxItems: 1 > > + > > + power-domains: > > +description: A phandle and PM domain specifier as defined by > > bindings of > > + the power controller specified by phandle. See > > + Documentation/devicetree/bindings/power/power-domain.yaml > > for details. > > + > > + clocks: > > +items: > > + - description: RDMA Clock > > + > > + iommus: > > +description: > > + This property should point to the respective IOMMU block > > with master port as argument, > > + see > > Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml for > > details. > > + > > + mediatek,gce-client-reg: > > +description: > > + The register of display function block to be set by gce. > > There are 4 arguments, > > + such as gce node, subsys id, offset and register size. The > > subsys id that is > > + mapping to the register of display function blocks is > > defined in the gce header > > + include/include/dt-bindings/gce/-gce.h of each chips. > > +$ref: /schemas/types.yaml#/definitions/phandle-array > > +maxItems: 1 > > + > > +required: > > + - compatible > > + - reg > > + - power-domains > > + - clocks > > + - iommus > > + > > +additionalProperties: false > > + > > +examples: > > + - | > > + > > +vdo1_rdma0: vdo1_rdma@1c104000 { > > +compatible = "mediatek,mt8195-vdo1-rdma"; > > +reg = <0 0x1c104000 0 0x1000>; > > +interrupts = ; > > +clocks = <&vdosys1 CLK_VDO1_MDP_RDMA0>; > > +power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>; > > +iommus = <&iommu_vdo M4U_PORT_L2_MDP_RDMA0>; > > +mediatek,gce-client-reg = <&gce1 SUBSYS_1c10 0x4000 > > 0x1000>; > > +}; > > + > > -- > > 2.18.0 > >
Re: [PATCH v5 09/16] soc: mediatek: mmsys: modify reset controller for MT8195 vdosys1
Dear Philipp, Thanks for the review. On Mon, 2021-09-06 at 09:29 +0200, Philipp Zabel wrote: > Hi Nancy, > > On Mon, 2021-09-06 at 15:15 +0800, Nancy.Lin wrote: > > MT8195 vdosys1 has more than 32 reset bits and a different reset > > base > > than other chips. Modify mmsys for support 64 bit and different > > reset > > base. > > > > Signed-off-by: Nancy.Lin > > --- > > drivers/soc/mediatek/mt8195-mmsys.h | 1 + > > drivers/soc/mediatek/mtk-mmsys.c| 15 --- > > drivers/soc/mediatek/mtk-mmsys.h| 1 + > > 3 files changed, 14 insertions(+), 3 deletions(-) > > > > diff --git a/drivers/soc/mediatek/mt8195-mmsys.h > > b/drivers/soc/mediatek/mt8195-mmsys.h > > index 648baaec112b..f67801c42fd9 100644 > > --- a/drivers/soc/mediatek/mt8195-mmsys.h > > +++ b/drivers/soc/mediatek/mt8195-mmsys.h > > @@ -123,6 +123,7 @@ > > #define MT8195_VDO1_MIXER_SOUT_SEL_IN > > 0xf68 > > #define MT8195_MIXER_SOUT_SEL_IN_FROM_DISP_MIXER (0 << > > 0) > > > > +#define MT8195_VDO1_SW0_RST_B 0x1d0 > > #define MT8195_VDO1_MERGE0_ASYNC_CFG_WD0xe30 > > #define MT8195_VDO1_MERGE1_ASYNC_CFG_WD0xe40 > > #define MT8195_VDO1_MERGE2_ASYNC_CFG_WD0xe50 > > diff --git a/drivers/soc/mediatek/mtk-mmsys.c > > b/drivers/soc/mediatek/mtk-mmsys.c > > index 060065501b8a..97cb26339ef6 100644 > > --- a/drivers/soc/mediatek/mtk-mmsys.c > > +++ b/drivers/soc/mediatek/mtk-mmsys.c > > @@ -18,6 +18,8 @@ > > #include "mt8365-mmsys.h" > > #include "mt8195-mmsys.h" > > > > +#define MMSYS_SW_RESET_PER_REG 32 > > + > > static const struct mtk_mmsys_driver_data mt2701_mmsys_driver_data > > = { > > .clk_driver = "clk-mt2701-mm", > > .routes = mmsys_default_routing_table, > > @@ -48,12 +50,14 @@ static const struct mtk_mmsys_driver_data > > mt8173_mmsys_driver_data = { > > .clk_driver = "clk-mt8173-mm", > > .routes = mmsys_default_routing_table, > > .num_routes = ARRAY_SIZE(mmsys_default_routing_table), > > + .sw_reset_start = MMSYS_SW0_RST_B, > > }; > > > > static const struct mtk_mmsys_driver_data mt8183_mmsys_driver_data > > = { > > .clk_driver = "clk-mt8183-mm", > > .routes = mmsys_mt8183_routing_table, > > .num_routes = ARRAY_SIZE(mmsys_mt8183_routing_table), > > + .sw_reset_start = MMSYS_SW0_RST_B, > > }; > > > > static const struct mtk_mmsys_driver_data mt8365_mmsys_driver_data > > = { > > @@ -74,6 +78,7 @@ static const struct mtk_mmsys_driver_data > > mt8195_vdosys1_driver_data = { > > .num_routes = ARRAY_SIZE(mmsys_mt8195_routing_table), > > .config = mmsys_mt8195_config_table, > > .num_configs = ARRAY_SIZE(mmsys_mt8195_config_table), > > + .sw_reset_start = MT8195_VDO1_SW0_RST_B, > > }; > > > > struct mtk_mmsys { > > @@ -126,19 +131,23 @@ static int mtk_mmsys_reset_update(struct > > reset_controller_dev *rcdev, unsigned l > > { > > struct mtk_mmsys *mmsys = container_of(rcdev, struct mtk_mmsys, > > rcdev); > > unsigned long flags; > > + u32 offset; > > u32 reg; > > int i; > > > > + offset = (id / MMSYS_SW_RESET_PER_REG) * sizeof(u32); > > + id = id % MMSYS_SW_RESET_PER_REG; > > + > > spin_lock_irqsave(&mmsys->lock, flags); > > > > - reg = readl_relaxed(mmsys->regs + MMSYS_SW0_RST_B); > > + reg = readl_relaxed(mmsys->regs + mmsys->data->sw_reset_start + > > offset); > > > > if (assert) > > reg &= ~BIT(id); > > else > > reg |= BIT(id); > > > > - writel_relaxed(reg, mmsys->regs + MMSYS_SW0_RST_B); > > + writel_relaxed(reg, mmsys->regs + mmsys->data->sw_reset_start + > > offset); > > > > spin_unlock_irqrestore(&mmsys->lock, flags); > > > > @@ -237,7 +246,7 @@ static int mtk_mmsys_probe(struct > > platform_device *pdev) > > spin_lock_init(&mmsys->lock); > > > > mmsys->rcdev.owner = THIS_MODULE; > > - mmsys->rcdev.nr_resets = 32; > > + mmsys->rcdev.nr_resets = 64; > > If only MT8195 vdosys1 has more than 32 reset bits, this should be > kept > at 32 for the others. > > regards OK, I will modify it in the next revision. > Philipp
RE: [RFC v1 4/6] drm/virtio: Probe and implement VIRTIO_GPU_F_RELEASE_FENCE feature
Hi Gerd, > Hi, > > > --- a/include/uapi/linux/virtio_gpu.h > > +++ b/include/uapi/linux/virtio_gpu.h > > @@ -60,6 +60,8 @@ > > */ > > #define VIRTIO_GPU_F_RESOURCE_BLOB 3 > > > > +#define VIRTIO_GPU_F_RELEASE_FENCE 4 > > + > > enum virtio_gpu_ctrl_type { > > VIRTIO_GPU_UNDEFINED = 0, > > Where is the virtio-spec update for that? [Kasireddy, Vivek] I was going to do that if there'd a consensus over DRM_CAP_RELEASE_FENCE. Otherwise, I don't think VIRTIO_GPU_F_RELEASE_FENCE is needed. Thanks, Vivek > > thanks, > Gerd
Re: [Freedreno] [PATCH] drm/msm: Do not run snapshot on non-DPU devices
Hi Fabio On 2021-09-14 10:48, Fabio Estevam wrote: Since commit 98659487b845 ("drm/msm: add support to take dpu snapshot") the following NULL pointer dereference is seen on i.MX53: [ 3.275493] msm msm: bound 3000.gpu (ops a3xx_ops) [ 3.287174] [drm] Initialized msm 1.8.0 20130625 for msm on minor 0 [ 3.293915] 8<--- cut here --- [ 3.297012] Unable to handle kernel NULL pointer dereference at virtual address 0028 [ 3.305244] pgd = (ptrval) [ 3.307989] [0028] *pgd= [ 3.311624] Internal error: Oops: 805 [#1] SMP ARM [ 3.316430] Modules linked in: [ 3.319503] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0+g682d702b426b #1 [ 3.326652] Hardware name: Freescale i.MX53 (Device Tree Support) [ 3.332754] PC is at __mutex_init+0x14/0x54 [ 3.336969] LR is at msm_disp_snapshot_init+0x24/0xa0 i.MX53 does not use the DPU controller. Fix the problem by only calling msm_disp_snapshot_init() on platforms that use the DPU controller. Cc: sta...@vger.kernel.org Fixes: 98659487b845 ("drm/msm: add support to take dpu snapshot") Signed-off-by: Fabio Estevam --- drivers/gpu/drm/msm/msm_drv.c | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 2e6fc185e54d..2aa2266454b7 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -630,10 +630,11 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) if (ret) goto err_msm_uninit; - ret = msm_disp_snapshot_init(ddev); - if (ret) - DRM_DEV_ERROR(dev, "msm_disp_snapshot_init failed ret = %d\n", ret); - + if (kms) { + ret = msm_disp_snapshot_init(ddev); + if (ret) + DRM_DEV_ERROR(dev, "msm_disp_snapshot_init failed ret = %d\n", ret); + } Are you not using DPU or are you not using mdp4/mdp5 as well? Even if you are using any of mdps, kms should not be NULL. Hence wanted to check the test case. drm_mode_config_reset(ddev); #ifdef CONFIG_DRM_FBDEV_EMULATION
Re: [virtio-dev] [PATCH v1 09/12] drm/virtio: implement context init: allocate an array of fence contexts
i On Tue, Sep 14, 2021 at 6:26 PM Gurchetan Singh wrote: > > > > On Tue, Sep 14, 2021 at 10:53 AM Chia-I Wu wrote: >> >> ,On Mon, Sep 13, 2021 at 6:57 PM Gurchetan Singh >> wrote: >> > >> > >> > >> > >> > On Mon, Sep 13, 2021 at 11:52 AM Chia-I Wu wrote: >> >> >> >> . >> >> >> >> On Mon, Sep 13, 2021 at 10:48 AM Gurchetan Singh >> >> wrote: >> >> > >> >> > >> >> > >> >> > On Fri, Sep 10, 2021 at 12:33 PM Chia-I Wu wrote: >> >> >> >> >> >> On Wed, Sep 8, 2021 at 6:37 PM Gurchetan Singh >> >> >> wrote: >> >> >> > >> >> >> > We don't want fences from different 3D contexts (virgl, gfxstream, >> >> >> > venus) to be on the same timeline. With explicit context creation, >> >> >> > we can specify the number of ring each context wants. >> >> >> > >> >> >> > Execbuffer can specify which ring to use. >> >> >> > >> >> >> > Signed-off-by: Gurchetan Singh >> >> >> > Acked-by: Lingfeng Yang >> >> >> > --- >> >> >> > drivers/gpu/drm/virtio/virtgpu_drv.h | 3 +++ >> >> >> > drivers/gpu/drm/virtio/virtgpu_ioctl.c | 34 >> >> >> > -- >> >> >> > 2 files changed, 35 insertions(+), 2 deletions(-) >> >> >> > >> >> >> > diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h >> >> >> > b/drivers/gpu/drm/virtio/virtgpu_drv.h >> >> >> > index a5142d60c2fa..cca9ab505deb 100644 >> >> >> > --- a/drivers/gpu/drm/virtio/virtgpu_drv.h >> >> >> > +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h >> >> >> > @@ -56,6 +56,7 @@ >> >> >> > #define STATE_ERR 2 >> >> >> > >> >> >> > #define MAX_CAPSET_ID 63 >> >> >> > +#define MAX_RINGS 64 >> >> >> > >> >> >> > struct virtio_gpu_object_params { >> >> >> > unsigned long size; >> >> >> > @@ -263,6 +264,8 @@ struct virtio_gpu_fpriv { >> >> >> > uint32_t ctx_id; >> >> >> > uint32_t context_init; >> >> >> > bool context_created; >> >> >> > + uint32_t num_rings; >> >> >> > + uint64_t base_fence_ctx; >> >> >> > struct mutex context_lock; >> >> >> > }; >> >> >> > >> >> >> > diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c >> >> >> > b/drivers/gpu/drm/virtio/virtgpu_ioctl.c >> >> >> > index f51f3393a194..262f79210283 100644 >> >> >> > --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c >> >> >> > +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c >> >> >> > @@ -99,6 +99,11 @@ static int virtio_gpu_execbuffer_ioctl(struct >> >> >> > drm_device *dev, void *data, >> >> >> > int in_fence_fd = exbuf->fence_fd; >> >> >> > int out_fence_fd = -1; >> >> >> > void *buf; >> >> >> > + uint64_t fence_ctx; >> >> >> > + uint32_t ring_idx; >> >> >> > + >> >> >> > + fence_ctx = vgdev->fence_drv.context; >> >> >> > + ring_idx = 0; >> >> >> > >> >> >> > if (vgdev->has_virgl_3d == false) >> >> >> > return -ENOSYS; >> >> >> > @@ -106,6 +111,17 @@ static int virtio_gpu_execbuffer_ioctl(struct >> >> >> > drm_device *dev, void *data, >> >> >> > if ((exbuf->flags & ~VIRTGPU_EXECBUF_FLAGS)) >> >> >> > return -EINVAL; >> >> >> > >> >> >> > + if ((exbuf->flags & VIRTGPU_EXECBUF_RING_IDX)) { >> >> >> > + if (exbuf->ring_idx >= vfpriv->num_rings) >> >> >> > + return -EINVAL; >> >> >> > + >> >> >> > + if (!vfpriv->base_fence_ctx) >> >> >> > + return -EINVAL; >> >> >> > + >> >> >> > + fence_ctx = vfpriv->base_fence_ctx; >> >> >> > + ring_idx = exbuf->ring_idx; >> >> >> > + } >> >> >> > + >> >> >> > exbuf->fence_fd = -1; >> >> >> > >> >> >> > virtio_gpu_create_context(dev, file); >> >> >> > @@ -173,7 +189,7 @@ static int virtio_gpu_execbuffer_ioctl(struct >> >> >> > drm_device *dev, void *data, >> >> >> > goto out_memdup; >> >> >> > } >> >> >> > >> >> >> > - out_fence = virtio_gpu_fence_alloc(vgdev, >> >> >> > vgdev->fence_drv.context, 0); >> >> >> > + out_fence = virtio_gpu_fence_alloc(vgdev, fence_ctx, >> >> >> > ring_idx); >> >> >> > if(!out_fence) { >> >> >> > ret = -ENOMEM; >> >> >> > goto out_unresv; >> >> >> > @@ -691,7 +707,7 @@ static int virtio_gpu_context_init_ioctl(struct >> >> >> > drm_device *dev, >> >> >> > return -EINVAL; >> >> >> > >> >> >> > /* Number of unique parameters supported at this time. */ >> >> >> > - if (num_params > 1) >> >> >> > + if (num_params > 2) >> >> >> > return -EINVAL; >> >> >> > >> >> >> > ctx_set_params = >> >> >> > memdup_user(u64_to_user_ptr(args->ctx_set_params), >> >> >> > @@ -731,6 +747,20 @@ static int virtio_gpu_context_init_ioctl(struct >> >> >> > drm_device *dev, >> >> >> > >> >> >> > vfpriv->context_init |= value; >> >> >> > break; >> >> >> > + case VIRTGPU_CONTEXT_PARAM_NUM_RINGS: >> >> >> > + if (vfpriv->base_fence_ctx) { >> >> >> > +
[PATCH 2/2] drm/i915/uapi: Add query for hwconfig table
From: Rodrigo Vivi GuC contains a consolidated table with a bunch of information about the current device. Previously, this information was spread and hardcoded to all the components including GuC, i915 and various UMDs. The goal here is to consolidate the data into GuC in a way that all interested components can grab the very latest and synchronized information using a simple query. As per most of the other queries, this one can be called twice. Once with item.length=0 to determine the exact buffer size, then allocate the user memory and call it again for to retrieve the table data. For example: struct drm_i915_query_item item = { .query_id = DRM_I915_QUERY_HWCONCFIG_TABLE; }; query.items_ptr = (int64_t) &item; query.num_items = 1; ioctl(fd, DRM_IOCTL_I915_QUERY, query, sizeof(query)); if (item.length <= 0) return -ENOENT; data = malloc(item.length); item.data_ptr = (int64_t) &data; ioctl(fd, DRM_IOCTL_I915_QUERY, query, sizeof(query)); // Parse the data as appropriate... The returned array is a simple and flexible KLV (Key/Length/Value) formatted table. For example, it could be just: enum device_attr { ATTR_SOME_VALUE = 0, ATTR_SOME_MASK = 1, }; static const u32 hwconfig[] = { ATTR_SOME_VALUE, 1, // Value Length in DWords 8, // Value ATTR_SOME_MASK, 3, 0x00, 0x, 0xFF00, }; The attribute ids are defined in a hardware spec. Cc: Tvrtko Ursulin Cc: Kenneth Graunke Cc: Michal Wajdeczko Cc: Slawomir Milczarek Signed-off-by: Rodrigo Vivi Signed-off-by: John Harrison Reviewed-by: Matthew Brost --- drivers/gpu/drm/i915/i915_query.c | 23 +++ include/uapi/drm/i915_drm.h | 1 + 2 files changed, 24 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index 5e2b909827f4..96989a37453c 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -477,12 +477,35 @@ static int query_memregion_info(struct drm_i915_private *i915, return total_length; } +static int query_hwconfig_table(struct drm_i915_private *i915, + struct drm_i915_query_item *query_item) +{ + struct intel_gt *gt = &i915->gt; + struct intel_guc_hwconfig *hwconfig = >->uc.guc.hwconfig; + + if (!hwconfig->size || !hwconfig->ptr) + return -ENODEV; + + if (query_item->length == 0) + return hwconfig->size; + + if (query_item->length < hwconfig->size) + return -EINVAL; + + if (copy_to_user(u64_to_user_ptr(query_item->data_ptr), +hwconfig->ptr, hwconfig->size)) + return -EFAULT; + + return hwconfig->size; +} + static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv, struct drm_i915_query_item *query_item) = { query_topology_info, query_engine_info, query_perf_config, query_memregion_info, + query_hwconfig_table, }; int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index bde5860b3686..a1281f35b190 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2499,6 +2499,7 @@ struct drm_i915_query_item { #define DRM_I915_QUERY_ENGINE_INFO 2 #define DRM_I915_QUERY_PERF_CONFIG 3 #define DRM_I915_QUERY_MEMORY_REGIONS 4 +#define DRM_I915_QUERY_HWCONFIG_TABLE 5 /* Must be kept compact -- no holes and well documented */ /** -- 2.25.1
[PATCH 0/2] Add support for querying hw info that UMDs need
From: John Harrison Various UMDs require hardware configuration information about the current platform. A bunch of static information is available in a fixed table that can be retrieved from the GuC. Test-with: 20210915215558.2473428-2-john.c.harri...@intel.com UMD: https://github.com/intel/compute-runtime/pull/432/files UMD: https://github.com/intel/media-driver/pull/1239/files CC: Katarzyna Cencelewska CC: Tony Ye CC: Jason Ekstrand Signed-off-by: John Harrison Reviewed-by: Matthew Brost John Harrison (1): drm/i915/guc: Add fetch of hwconfig table Rodrigo Vivi (1): drm/i915/uapi: Add query for hwconfig table drivers/gpu/drm/i915/Makefile | 1 + .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h | 1 + .../gpu/drm/i915/gt/uc/abi/guc_errors_abi.h | 4 + drivers/gpu/drm/i915/gt/uc/intel_guc.c| 3 +- drivers/gpu/drm/i915/gt/uc/intel_guc.h| 2 + .../gpu/drm/i915/gt/uc/intel_guc_hwconfig.c | 156 ++ .../gpu/drm/i915/gt/uc/intel_guc_hwconfig.h | 19 +++ drivers/gpu/drm/i915/gt/uc/intel_uc.c | 6 + drivers/gpu/drm/i915/i915_query.c | 23 +++ include/uapi/drm/i915_drm.h | 1 + 10 files changed, 215 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.h -- 2.25.1
[PATCH 1/2] drm/i915/guc: Add fetch of hwconfig table
From: John Harrison Implement support for fetching the hardware description table from the GuC. The call is made twice - once without a destination buffer to query the size and then a second time to fill in the buffer. Note that the table is only available on ADL-P and later platforms. Cc: Michal Wajdeczko Signed-off-by: Rodrigo Vivi Signed-off-by: John Harrison Reviewed-by: Matthew Brost --- drivers/gpu/drm/i915/Makefile | 1 + .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h | 1 + .../gpu/drm/i915/gt/uc/abi/guc_errors_abi.h | 4 + drivers/gpu/drm/i915/gt/uc/intel_guc.c| 3 +- drivers/gpu/drm/i915/gt/uc/intel_guc.h| 2 + .../gpu/drm/i915/gt/uc/intel_guc_hwconfig.c | 156 ++ .../gpu/drm/i915/gt/uc/intel_guc_hwconfig.h | 19 +++ drivers/gpu/drm/i915/gt/uc/intel_uc.c | 6 + 8 files changed, 191 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index c1e9f7369fb5..3789f03a1021 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -190,6 +190,7 @@ i915-y += gt/uc/intel_uc.o \ gt/uc/intel_guc_rc.o \ gt/uc/intel_guc_slpc.o \ gt/uc/intel_guc_submission.o \ + gt/uc/intel_guc_hwconfig.o \ gt/uc/intel_huc.o \ gt/uc/intel_huc_debugfs.o \ gt/uc/intel_huc_fw.o diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index 8ff58aff..72fd492b726a 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -137,6 +137,7 @@ enum intel_guc_action { INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009, INTEL_GUC_ACTION_SETUP_PC_GUCRC = 0x3004, INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, + INTEL_GUC_ACTION_GET_HWCONFIG = 0x4100, INTEL_GUC_ACTION_REGISTER_CONTEXT = 0x4502, INTEL_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503, INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h index 488b6061ee89..f9e2a6aaef4a 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h @@ -8,6 +8,10 @@ enum intel_guc_response_status { INTEL_GUC_RESPONSE_STATUS_SUCCESS = 0x0, + INTEL_GUC_RESPONSE_NOT_SUPPORTED = 0x20, + INTEL_GUC_RESPONSE_NO_ATTRIBUTE_TABLE = 0x201, + INTEL_GUC_RESPONSE_NO_DECRYPTION_KEY = 0x202, + INTEL_GUC_RESPONSE_DECRYPTION_FAILED = 0x204, INTEL_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000, }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index fbfcae727d7f..82c0ce0090c6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -422,13 +422,14 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *request, u32 len, /* * No GuC command should ever take longer than 10ms. * Fast commands should still complete in 10us. +* Except for the hwconfig table query, which takes ~50ms. */ ret = __intel_wait_for_register_fw(uncore, guc_send_reg(guc, 0), GUC_HXG_MSG_0_ORIGIN, FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC), - 10, 10, &header); + 10, 100, &header); if (unlikely(ret)) { timeout: drm_err(&i915->drm, "mmio request %#x: no reply %x\n", diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 5dd174babf7a..ec38a69ca3fe 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -13,6 +13,7 @@ #include "intel_guc_fw.h" #include "intel_guc_fwif.h" #include "intel_guc_ct.h" +#include "intel_guc_hwconfig.h" #include "intel_guc_log.h" #include "intel_guc_reg.h" #include "intel_guc_slpc_types.h" @@ -37,6 +38,7 @@ struct intel_guc { struct intel_guc_ct ct; /** @slpc: sub-structure containing SLPC related data and objects */ struct intel_guc_slpc slpc; + struct intel_guc_hwconfig hwconfig; /** @sched_engine: Global engine used to submit requests to GuC */ struct i915_sched_engine *sched_engine; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c new file mode 100644 index ..af4fc9fdbaaf --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c @@
Re: [virtio-dev] Re: [PATCH v1 08/12] drm/virtio: implement context init: stop using drv->context when creating fence
On Tue, Sep 14, 2021 at 10:53 PM Gerd Hoffmann wrote: > On Wed, Sep 08, 2021 at 06:37:13PM -0700, Gurchetan Singh wrote: > > The plumbing is all here to do this. Since we always use the > > default fence context when allocating a fence, this makes no > > functional difference. > > > > We can't process just the largest fence id anymore, since it's > > it's associated with different timelines. It's fine for fence_id > > 260 to signal before 259. As such, process each fence_id > > individually. > > I guess you need to also update virtio_gpu_fence_event_process() > then? It currently has the strict ordering logic baked in ... > The update to virtio_gpu_fence_event_process was done as a preparation a few months back: https://cgit.freedesktop.org/drm/drm-misc/commit/drivers/gpu/drm/virtio/virtgpu_fence.c?id=36549848ed27c22bb2ffd5d1468efc6505b05f97 > > take care, > Gerd > > > - > To unsubscribe, e-mail: virtio-dev-unsubscr...@lists.oasis-open.org > For additional commands, e-mail: virtio-dev-h...@lists.oasis-open.org > >
Re: [PATCH v2] drm/panfrost: Calculate lock region size correctly
Took me a careful read, but this is Reviewed-by: Alyssa Rosenzweig Thanks for hunting this down!
Re: [PATCH V6 2/2] drm/vkms: Add support for virtual hardware mode
On 09/01, Sumera Priyadarsini wrote: > Add a virtual hardware or vblank-less mode as a module > to enable VKMS to emulate virtual hardware drivers. This means > no vertical blanking events occur and pageflips are completed > arbitrarily and when required for updating the frame. > > Add a new drm_crtc_funcs struct, vkms_vblankless_crtc_funcs and a > drm_crtc_helper_funcs struct, vkms_vblankless_crtc_helper_funcs() > which hold the atomic helpers for virtual hardware mode. > The existing vkms_crtc_funcs struct and vkms_crtc_helper_funcs > struct hold atomic helpers for the default vblank mode. > This makes the code flow clearer and testing > virtual hardware mode easier. > > Add a function vkms_crtc_composer() which calls the helper function, > vkms_composer_common() for plane composition in vblank-less mode. > vkms_crtc_composer() is directly called in the atomic hook in > vkms_crtc_atomic_begin(). > > However, some crc captures still use vblanks which causes the crc-based > igt tests to crash. So, no CRC functions are called in vblankless mode > for now and will be implemented in a later patch. Hi Sumera, this approach lgtm. something on the writeback engine needs to be adjusted, as I checked a leak when unloading the driver (after a kms_writeback testcase). One fix is pointed below. As far as I checked to the other issue, a wb job is prepared and enqueued, but never cleaned up. Not sure if wb ops have any dependency on vblanks (and we should also skip) or something wrong when getting crtc state during the composer work (wb_pending condition is not working as expected when in the vhw mode). The error log is here: https://paste.debian.net/hidden/54bf7945/ Daniel, do you have any idea from the top of your head? > > This patchset has been tested with the igt tests- kms_writeback, kms_atomic > , kms_lease, kms_flip, kms_pipe_get_crc and preserves results except for > subtests related to crc reads and vertical blanking, in which case, > tests are skipped. > > The patch is based on Rodrigo Siqueira's > patch(https://patchwork.freedesktop.org/patch/316851/?series=48469&rev=3) > and the ensuing review. > > Signed-off-by: Sumera Priyadarsini > --- > Changes in V6: > - Skip CRC functions in vblankless mode > - Refactor helper function names(Melissa) > Changes in V5: > - Move vkms_crtc_composer() to this patch(Melissa) > - Add more clarification for "vblank-less" mode(Pekka) > - Replace kzalloc() with kvmalloc() in compose_active_planes() > to fix memory allocation error for output frame > - Fix checkpatch warnings (Melissa) > Changes in V3: > - Refactor patchset(Melissa) > Changes in V2: > - Add atomic helper functions in a separate struct for virtual hardware > mode (Daniel) > - Remove spinlock across 'vkms_output->lock' in vkms_crtc.c(Daniel) > - Add vkms_composer_common() (Daniel) > --- > drivers/gpu/drm/vkms/vkms_composer.c | 21 +++-- > drivers/gpu/drm/vkms/vkms_crtc.c | 43 +-- > drivers/gpu/drm/vkms/vkms_drv.c | 16 +++--- > drivers/gpu/drm/vkms/vkms_drv.h | 2 ++ > drivers/gpu/drm/vkms/vkms_writeback.c | 3 +- > 5 files changed, 74 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/vkms/vkms_composer.c > b/drivers/gpu/drm/vkms/vkms_composer.c > index bca746fb5b53..a009589b2c3a 100644 > --- a/drivers/gpu/drm/vkms/vkms_composer.c > +++ b/drivers/gpu/drm/vkms/vkms_composer.c > @@ -176,11 +176,12 @@ static int compose_active_planes(void **vaddr_out, > { > struct drm_framebuffer *fb = &primary_composer->fb; > struct drm_gem_object *gem_obj = drm_gem_fb_get_obj(fb, 0); > + > const void *vaddr; > int i; > > if (!*vaddr_out) { > - *vaddr_out = kzalloc(gem_obj->size, GFP_KERNEL); > + *vaddr_out = kvmalloc(gem_obj->size, GFP_KERNEL); > if (!*vaddr_out) { > DRM_ERROR("Cannot allocate memory for output frame."); > return -ENOMEM; > @@ -229,7 +230,7 @@ int vkms_composer_common(struct vkms_crtc_state > *crtc_state, > > if (ret) { > if ((ret == -EINVAL || ret == -ENOMEM) && !wb_pending) > - kfree(vaddr_out); > + kvfree(vaddr_out); > return ret; > } > > @@ -241,7 +242,7 @@ int vkms_composer_common(struct vkms_crtc_state > *crtc_state, > crtc_state->wb_pending = false; > spin_unlock_irq(&out->composer_lock); > } else { > - kfree(vaddr_out); > + kvfree(vaddr_out); > } > > return 0; > @@ -296,6 +297,20 @@ void vkms_composer_worker(struct work_struct *work) > drm_crtc_add_crc_entry(crtc, true, frame_start++, &crc32); > } > > +void vkms_crtc_composer(struct vkms_crtc_state *crtc_state) > +{ > + struct drm_crtc *crtc = crtc_state->base.crtc; > + struct vkms_output *out = drm_crtc_to_vkms_output(crtc); > + u32 crc32 = 0; > + int ret; > + > +
[PATCH v7 2/3] drm/mediatek: implment the dsi hs packets aligned
Some dsi devices require the packets on lanes aligned at the end, or the screen will shift or scroll. Signed-off-by: Jitao Shi --- drivers/gpu/drm/mediatek/mtk_dsi.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index 93b40c245f00..9d72e6dce0bf 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -194,6 +194,8 @@ struct mtk_dsi { struct clk *hs_clk; u32 data_rate; + /* force dsi line end without dsi_null data */ + bool hs_packet_end_aligned; unsigned long mode_flags; enum mipi_dsi_pixel_format format; @@ -499,6 +501,13 @@ static void mtk_dsi_config_vdo_timing(struct mtk_dsi *dsi) DRM_WARN("HFP + HBP less than d-phy, FPS will under 60Hz\n"); } + if (dsi->hs_packet_end_aligned) { + horizontal_sync_active_byte = roundup(horizontal_sync_active_byte, dsi->lanes) - 2; + horizontal_frontporch_byte = roundup(horizontal_frontporch_byte, dsi->lanes) - 2; + horizontal_backporch_byte = roundup(horizontal_backporch_byte, dsi->lanes) - 2; + horizontal_backporch_byte -= (vm->hactive * dsi_tmp_buf_bpp + 2) % dsi->lanes; + } + writel(horizontal_sync_active_byte, dsi->regs + DSI_HSA_WC); writel(horizontal_backporch_byte, dsi->regs + DSI_HBP_WC); writel(horizontal_frontporch_byte, dsi->regs + DSI_HFP_WC); @@ -793,6 +802,7 @@ static int mtk_dsi_host_attach(struct mipi_dsi_host *host, dsi->lanes = device->lanes; dsi->format = device->format; dsi->mode_flags = device->mode_flags; + dsi->hs_packet_end_aligned = device->hs_packet_end_aligned; return 0; } -- 2.25.1
[PATCH v7 3/3] drm/bridge: anx7625: config hs packets end aligned to avoid screen shift
This device requires the packets on lanes aligned at the end to fix screen shift or scroll. Signed-off-by: Jitao Shi --- drivers/gpu/drm/bridge/analogix/anx7625.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index 14d73fb1dd15..d76fb63fa9f7 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -1327,6 +1327,7 @@ static int anx7625_attach_dsi(struct anx7625_data *ctx) MIPI_DSI_MODE_VIDEO_SYNC_PULSE | MIPI_DSI_MODE_NO_EOT_PACKET | MIPI_DSI_MODE_VIDEO_HSE; + dsi->hs_packet_end_aligned = true; if (mipi_dsi_attach(dsi) < 0) { DRM_DEV_ERROR(dev, "fail to attach dsi to host.\n"); -- 2.25.1
[PATCH v7 1/3] drm/dsi: transer dsi hs packet aligned
Some DSI devices reqire the hs packet starting and ending at same time on all dsi lanes. So use a flag to those devices. Signed-off-by: Jitao Shi --- include/drm/drm_mipi_dsi.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index af7ba8071eb0..8e8563792682 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -177,6 +177,7 @@ struct mipi_dsi_device_info { * @lp_rate: maximum lane frequency for low power mode in hertz, this should * be set to the real limits of the hardware, zero is only accepted for * legacy drivers + * @hs_packet_end_aligned: transfer dsi hs packet ending aligned */ struct mipi_dsi_device { struct mipi_dsi_host *host; @@ -189,6 +190,7 @@ struct mipi_dsi_device { unsigned long mode_flags; unsigned long hs_rate; unsigned long lp_rate; + bool hs_packet_end_aligned; }; #define MIPI_DSI_MODULE_PREFIX "mipi-dsi:" -- 2.25.1
[PATCH v7 0/3] force hsa hbp hfp packets multiple of lanenum to avoid screen shift
Changes since v6: - Add "bool hs_packet_end_aligned" in "struct mipi_dsi_device" to control the dsi aligned. - Config the "hs_packet_end_aligned" in ANX7725 .attach(). Changes since v5: - Search the anx7625 compatible as flag to control dsi output aligned. Changes since v4: - Move "dt-bindings: drm/bridge: anx7625: add force_dsi_end_without_null" before "drm/mediatek: force hsa hbp hfp packets multiple of lanenum to avoid". - Retitle "dt-bindings: drm/bridge: anx7625: add force_dsi_end_without_null". Jitao Shi (3): drm/dsi: transer dsi hs packet aligned drm/mediatek: implment the dsi hs packets aligned drm/bridge: anx7625: config hs packets end aligned to avoid screen shift drivers/gpu/drm/bridge/analogix/anx7625.c | 1 + drivers/gpu/drm/mediatek/mtk_dsi.c| 10 ++ include/drm/drm_mipi_dsi.h| 2 ++ 3 files changed, 13 insertions(+) -- 2.25.1
Re: [RFC PATCH v2 2/2] drm/bridge: parade-ps8640: Add support for AUX channel
Hi, On Tue, Sep 14, 2021 at 5:28 PM Philip Chen wrote: > > > > Changes in v2: > > > - Handle the case where an AUX transaction has no payload > > > - Add a reg polling for p0.0x83 to confirm AUX cmd is issued and > > > read data is returned > > > - Replace regmap_noinc_read/write with looped regmap_read/write, > > > as regmap_noinc_read/write doesn't read one byte at a time unless > > > max_raw_read/write is set to 1. > > > > What about if you set val_bytes? I think you just need to set that to > > "1" and it'll work? > I think val_bytes is already set to 1 as we set val_bits to 8. See: > map->format.val_bytes = DIV_ROUND_UP(config->val_bits, 8); To me that feels like a bug in the regmap API, then. I can't see how it would make any sense for this function not to take val_bytes into account... I wonder if other users are somehow getting lucky today. Maybe users that are using this for MMIO get lucky because max_raw_read is set properly. ...and maybe other i2c users get lucky because some peripherals are OK w/ this bug? AKA, maybe this actually works in most cases for FIFOs: write address of bridge chip on i2c bus write R/W bit on i2c bus write FIFO register address on i2c bus read byte read byte read byte ... read byte read byte end transaction Normally for i2c you assume that the other side will read from subsequent register addresses for each "read byte", but I suppose it's possible that some i2c devices are setup to realize that if the register address was the address of a FIFO that it shouldn't read from the next register address but should just read the next byte in the FIFO? In any case, it's fine to do it with a loop like you're doing but it still seems weird that you'd need to. -Doug
Re: [PATCH v3 3/3] drm/bridge: parade-ps8640: Add support for AUX channel
Hi, On Tue, Sep 14, 2021 at 5:57 PM Stephen Boyd wrote: > > Quoting Philip Chen (2021-09-14 16:28:45) > > diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c > > b/drivers/gpu/drm/bridge/parade-ps8640.c > > index 8d3e7a147170..dc349d729f5a 100644 > > --- a/drivers/gpu/drm/bridge/parade-ps8640.c > > +++ b/drivers/gpu/drm/bridge/parade-ps8640.c > > @@ -117,6 +144,129 @@ static inline struct ps8640 *bridge_to_ps8640(struct > > drm_bridge *e) > [...] > > + case DP_AUX_I2C_WRITE: > > + case DP_AUX_I2C_READ: > > + break; > > + default: > > + return -EINVAL; > > + } > > + > > + ret = regmap_write(map, PAGE0_AUXCH_CFG3, AUXCH_CFG3_RESET); > > + if (ret) { > > + dev_err(dev, "failed to write PAGE0_AUXCH_CFG3: %d\n", ret); > > Can we use DRM_DEV_ERROR()? I've never gotten clear guidance here. For instance, in some other review I suggested using the DRM wrapper and got told "no" [1]. ;-) The driver landed without the DRM_ERROR versions. I don't really care lots so it's fine with me to use use DRM_DEV_ERROR, I just wish I understood the rules... [1] https://lore.kernel.org/all/49db7ef3-fa53-a274-7c69-c2d840b13...@denx.de/ > > + return ret; > > + } > > + > > + /* Assume it's good */ > > + msg->reply = 0; > > + > > + addr_len[0] = msg->address & 0xff; > > + addr_len[1] = (msg->address >> 8) & 0xff; > > + addr_len[2] = ((msg->request << 4) & SWAUX_CMD_MASK) | > > + ((msg->address >> 16) & SWAUX_ADDR_19_16_MASK); > > It really feels like this out to be possible with some sort of > cpu_to_le32() API. We're shoving msg->address into 3 bytes and then > adding in the request and some length. So we could do something like: > > u32 addr_len; > > addr_len = FIELD_PREP(SWAUX_ADDR_MASK, msg->address); > addr_len |= FIELD_PREP(SWAUX_CMD_MASK, msg->request); > if (len) > addr_len |= FIELD_PREP(LEN_MASK, len - 1); > else > addr_len |= FIELD_PREP(LEN_MASK, SWAUX_NO_PAYLOAD ); > > cpu_to_le32s(&addr_len); > > regmap_bulk_write(map, PAGE0_SWAUX_ADDR_7_0, &addr_len, > sizeof(addr_len)); You're arguing that your version of the code is more efficient? Easier to understand? Something else? To me, Philip's initial version is crystal clear and easy to map to the bridge datasheet but I need to think more to confirm that your version is right. Thinking is hard and I like to avoid it when possible. In any case, it's definitely bikeshedding and I'll yield if everyone likes the other version better. ;-) > > + return ret; > > + } > > + > > + switch (data & SWAUX_STATUS_MASK) { > > + /* Ignore the DEFER cases as they are already handled in hardware */ > > + case SWAUX_STATUS_NACK: > > + case SWAUX_STATUS_I2C_NACK: > > + /* > > +* The programming guide is not clear about whether a I2C > > NACK > > +* would trigger SWAUX_STATUS_NACK or > > SWAUX_STATUS_I2C_NACK. So > > +* we handle both cases together. > > +*/ > > + if (is_native_aux) > > + msg->reply |= DP_AUX_NATIVE_REPLY_NACK; > > + else > > + msg->reply |= DP_AUX_I2C_REPLY_NACK; > > + > > + len = data & SWAUX_M_MASK; > > + return len; > > Why no 'return data & SWAUX_M_MASK;' and skip the assignment? Actually, I think it's the "return" that's a bug, isn't it? If we're doing a "read" and we're returning a positive number of bytes then we need to actually _read_ them. Reading happens below, doesn't it? -Doug
Re: [PATCH v3 3/3] drm/bridge: parade-ps8640: Add support for AUX channel
Hi Fabio On Wed, Sep 15, 2021 at 2:00 PM Fabio Estevam wrote: > > On Wed, Sep 15, 2021 at 5:41 PM Philip Chen wrote: > > > As regmap_read() should always read 1 byte at a time, should I just do: > > regmap_read(map, PAGE0_SWAUX_RDATA, (unsigned int*)(buf + i)) > > There is also regmap_bulk_read() if you need to read more data. Thanks for the review. PAGE0_SWAUX_RDATA is a single-byte FIFO buffer. So I'll need to read one byte at a time cyclically.
Re: [PATCH 0/9] drm: Add privacy-screen class and connector properties
OK! Looked over all of these patches. Patches 2 and 4 have some comments that should be addressed, but otherwise this series is: Reviewed-by: Lyude Paul Let me know when/if you need help pushing this upstream On Mon, 2021-09-06 at 09:35 +0200, Hans de Goede wrote: > Hi all, > > Here is the privacy-screen related code which I last posted in April 2021 > To the best of my knowledge there is consensus about / everyone is in > agreement with the new userspace API (2 connector properties) this > patch-set add (patch 1 of the series). > > This is unchanged (except for a rebase on drm-tip), what has changed is > that the first userspace consumer of the new properties is now fully ready > for merging (it is just waiting for the kernel bits to land first): > > - > https://gitlab.gnome.org/GNOME/gsettings-desktop-schemas/-/merge_requests/49 > - https://gitlab.gnome.org/GNOME/mutter/-/merge_requests/1952 > - https://gitlab.gnome.org/GNOME/gnome-control-center/-/merge_requests/1032 > > Having a userspace-consumer of the API fully ready for merging, clears the > last blocker for this series. It has already has been reviewed before > by Emil Velikov, but it could really do with another review. > > The new API works as designed and add the following features to GNOME: > > 1. Showing an OSD notification when the privacy-screen is toggled on/off > through hotkeys handled by the embedded-controller > 2. Allowing control of the privacy-screen from the GNOME control-panel, > including the on/off slider shown there updating to match the hw-setting > when the setting is changed with the control-panel open. > 3. Restoring the last user-setting at login > > This series consists of a number of different parts: > > 1. A new version of Rajat's privacy-screen connector properties patch, > this adds new userspace API in the form of new properties > > 2. Since on most devices the privacy screen is actually controlled by > some vendor specific ACPI/WMI interface which has a driver under > drivers/platform/x86, we need some "glue" code to make this functionality > available to KMS drivers. Patches 2-4 add a new privacy-screen class for > this, which allows non KMS drivers (and possibly KMS drivers too) to > register a privacy-screen device and also adds an interface for KMS drivers > to get access to the privacy-screen associated with a specific connector. > This is modelled similar to how we deal with e.g. PWMs and GPIOs in the > kernel, including separate includes for consumers and providers(drivers). > > 3. Some drm_connector helper functions to keep the actual changes needed > for this in individual KMS drivers as small as possible (patch 5). > > 4. Make the thinkpad_acpi code register a privacy-screen device on > ThinkPads with a privacy-screen (patches 6-8) > > 5. Make the i915 driver export the privacy-screen functionality through > the connector properties on the eDP connector. > > I believe that it would be best to merge the entire series, including > the thinkpad_acpi changes through drm-misc in one go. As the pdx86 > subsys maintainer I hereby give my ack for merging the thinkpad_acpi > changes through drm-misc. > > There is one small caveat with this series, which it is good to be > aware of. The i915 driver will now return -EPROBE_DEFER on Thinkpads > with an eprivacy screen, until the thinkpad_acpi driver is loaded. > This means that initrd generation tools will need to be updated to > include thinkpad_acpi when the i915 driver is added to the initrd. > Without this the loading of the i915 driver will be delayed to after > the switch to real rootfs. > > Regards, > > Hans > > > Hans de Goede (8): > drm: Add privacy-screen class (v3) > drm/privacy-screen: Add X86 specific arch init code > drm/privacy-screen: Add notifier support > drm/connector: Add a drm_connector privacy-screen helper functions > platform/x86: thinkpad_acpi: Add hotkey_notify_extended_hotkey() > helper > platform/x86: thinkpad_acpi: Get privacy-screen / lcdshadow ACPI > handles only once > platform/x86: thinkpad_acpi: Register a privacy-screen device > drm/i915: Add privacy-screen support > > Rajat Jain (1): > drm/connector: Add support for privacy-screen properties (v4) > > Documentation/gpu/drm-kms-helpers.rst | 15 + > Documentation/gpu/drm-kms.rst | 2 + > MAINTAINERS | 8 + > drivers/gpu/drm/Kconfig | 4 + > drivers/gpu/drm/Makefile | 1 + > drivers/gpu/drm/drm_atomic_uapi.c | 4 + > drivers/gpu/drm/drm_connector.c | 214 + > drivers/gpu/drm/drm_drv.c | 4 + > drivers/gpu/drm/drm_privacy_screen.c | 468 +++ > drivers/gpu/drm/drm_privacy_screen_x86.c | 86 > drivers/gpu/drm/i915/display/intel_display.c | 5 + > drivers/gpu/drm/i915/display/intel_dp.c | 10 + > drivers/gpu/drm/i915/i915_pci.c
Re: [PATCH 9/9] drm/i915: Add privacy-screen support
On Mon, 2021-09-06 at 09:35 +0200, Hans de Goede wrote: > Add support for eDP panels with a built-in privacy screen using the > new drm_privacy_screen class. > > One thing which stands out here is the addition of these 2 lines to > intel_atomic_commit_tail: > > for_each_new_connector_in_state(&state->base, connector, ... > drm_connector_update_privacy_screen(connector, state); > > It may seem more logical to instead take care of updating the > privacy-screen state by marking the crtc as needing a modeset and then > do this in both the encoder update_pipe (for fast-sets) and enable > (for full modesets) callbacks. But ATM these callbacks only get passed > the new connector_state and these callbacks are all called after > drm_atomic_helper_swap_state() at which point there is no way to get > the old state from the new state. I was going to suggest that you workaround this simply by adding a variable that corresponds to the most recently committed privacy screen state somewhere in a driver private structure. But, then I realized that's basically the same as what you're doing now except that your current solution stores said state in a shared struct. So, I think you probably do have the right idea here as long as we don't get any non-ACPI providers in the future. This also seems like something that wouldn't be difficult to fixup down the line if that ends up changing. > > Without access to the old state, we do not know if the sw_state of > the privacy-screen has changes so we would need to call > drm_privacy_screen_set_sw_state() unconditionally. This is undesirable > since all current known privacy-screen providers use ACPI calls which > are somewhat expensive to make. > > Also, as all providers use ACPI calls, rather then poking GPU registers, > there is no need to order this together with other encoder operations. > Since no GPU poking is involved having this as a separate step of the > commit process actually is the logical thing to do. > > Reviewed-by: Emil Velikov > Signed-off-by: Hans de Goede > --- > drivers/gpu/drm/i915/display/intel_display.c | 5 + > drivers/gpu/drm/i915/display/intel_dp.c | 10 ++ > drivers/gpu/drm/i915/i915_pci.c | 12 > 3 files changed, 27 insertions(+) > > diff --git a/drivers/gpu/drm/i915/display/intel_display.c > b/drivers/gpu/drm/i915/display/intel_display.c > index 5560d2f4c352..7285873d329a 100644 > --- a/drivers/gpu/drm/i915/display/intel_display.c > +++ b/drivers/gpu/drm/i915/display/intel_display.c > @@ -10140,6 +10140,8 @@ static void intel_atomic_commit_tail(struct > intel_atomic_state *state) > struct drm_device *dev = state->base.dev; > struct drm_i915_private *dev_priv = to_i915(dev); > struct intel_crtc_state *new_crtc_state, *old_crtc_state; > + struct drm_connector_state *new_connector_state; > + struct drm_connector *connector; > struct intel_crtc *crtc; > u64 put_domains[I915_MAX_PIPES] = {}; > intel_wakeref_t wakeref = 0; > @@ -10237,6 +10239,9 @@ static void intel_atomic_commit_tail(struct > intel_atomic_state *state) > intel_color_load_luts(new_crtc_state); > } > > + for_each_new_connector_in_state(&state->base, connector, > new_connector_state, i) > + drm_connector_update_privacy_screen(connector, &state- > >base); > + > /* > * Now that the vblank has passed, we can go ahead and program the > * optimal watermarks on platforms that need two-step watermark > diff --git a/drivers/gpu/drm/i915/display/intel_dp.c > b/drivers/gpu/drm/i915/display/intel_dp.c > index 7f8e8865048f..3aa2072cccf6 100644 > --- a/drivers/gpu/drm/i915/display/intel_dp.c > +++ b/drivers/gpu/drm/i915/display/intel_dp.c > @@ -37,6 +37,7 @@ > #include > #include > #include > +#include > #include > > #include "g4x_dp.h" > @@ -5217,6 +5218,7 @@ static bool intel_edp_init_connector(struct intel_dp > *intel_dp, > struct drm_connector *connector = &intel_connector->base; > struct drm_display_mode *fixed_mode = NULL; > struct drm_display_mode *downclock_mode = NULL; > + struct drm_privacy_screen *privacy_screen; > bool has_dpcd; > enum pipe pipe = INVALID_PIPE; > struct edid *edid; > @@ -5308,6 +5310,14 @@ static bool intel_edp_init_connector(struct intel_dp > *intel_dp, > fixed_mode->hdisplay, fixed_mode->vdisplay); > } > > + privacy_screen = drm_privacy_screen_get(dev->dev, NULL); > + if (!IS_ERR(privacy_screen)) { > + drm_connector_attach_privacy_screen_provider(connector, > + > privacy_screen); > + } else if (PTR_ERR(privacy_screen) != -ENODEV) { > + drm_warn(&dev_priv->drm, "Error getting privacy-screen\n"); > + } > + > return true; > > out_vdd_off: >
[Bug 214425] New: [drm][amdgpu][TTM] Page pool memory never gets freed
https://bugzilla.kernel.org/show_bug.cgi?id=214425 Bug ID: 214425 Summary: [drm][amdgpu][TTM] Page pool memory never gets freed Product: Drivers Version: 2.5 Kernel Version: 5.14.3 Hardware: x86-64 OS: Linux Tree: Mainline Status: NEW Severity: normal Priority: P1 Component: Video(DRI - non Intel) Assignee: drivers_video-...@kernel-bugs.osdl.org Reporter: dou...@swarmtech.cz Regression: No Hello, while playing certain webGL games, I've noticed what appears to be a memory leak in the kernel. Further investigation revealed that after about an hour of gameplay, over 3GB of memory (half of all available RAM on my machine) will be taken by the TTM page pool. While the excessive allocation may be caused by a resource leak in the game itself (I need to investigate that further), the larger problem is that TTM never releases the memory even after I quit the game. Closing the game only moves the allocated memory from active buffer objects to idle memory pool where it'll get stuck until I reboot the system. Shutting down X server doesn't release the memory either. System specs: HP Probook 455 G7 AMD Ryzen 5 4500U CPU AMD Renoir GPU (Mesa 21.2.1, LLVM 12.0) Gentoo Linux TTM statistics before quitting the game: /sys/kernel/debug/ttm/buffer_objects: 3116 /sys/kernel/debug/ttm/page_pool: --- 0--- --- 1--- --- 2--- --- 3--- --- 4--- --- 5--- --- 6--- --- 7--- --- 8--- --- 9--- ---10--- wc :2211820 1212 uc :0000000 0000 wc 32 :0000000 0000 uc 32 :0000000 0000 total : 3410 of 939433 /sys/kernel/debug/ttm/page_pool_shrink: 2898/512 === TTM statistics after quitting the game (until reboot): /sys/kernel/debug/ttm/buffer_objects: 403 /sys/kernel/debug/ttm/page_pool: --- 0--- --- 1--- --- 2--- --- 3--- --- 4--- --- 5--- --- 6--- --- 7--- --- 8--- --- 9--- ---10--- wc : 151 134 205 255 241 790 193 416 1121 83 uc :0000000 0000 wc 32 :0000000 0000 uc 32 :0000000 0000 total : 853035 of 939433 /sys/kernel/debug/ttm/page_pool_shrink: 853034/1 -- You may reply to this email to add a comment. You are receiving this mail because: You are watching the assignee of the bug.
Re: [Intel-gfx] [PATCH v9 04/17] drm/i915/pxp: allocate a vcs context for pxp usage
On Wed, Sep 15, 2021 at 04:53:35PM +0300, Jani Nikula wrote: > On Fri, 10 Sep 2021, Daniele Ceraolo Spurio > wrote: > > diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.h > > b/drivers/gpu/drm/i915/pxp/intel_pxp.h > > new file mode 100644 > > index ..e87550fb9821 > > --- /dev/null > > +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.h > > @@ -0,0 +1,35 @@ > > +/* SPDX-License-Identifier: MIT */ > > +/* > > + * Copyright(c) 2020, Intel Corporation. All rights reserved. > > + */ > > + > > +#ifndef __INTEL_PXP_H__ > > +#define __INTEL_PXP_H__ > > + > > +#include "gt/intel_gt_types.h" > > I've been trying to promote the idea that we don't include headers from > headers, unless really necessary. It helps with build times by reducing > rebuilds due to changes, but more importantly, it helps with coming up > with abstractions that don't need to look at the guts of other > components. > > The above include line pulls in 67 other includes. And it has to look at > the same files a *lot* more times to know not to include them again. > > Maybe we need to start being more aggressive about hiding the > abstractions behind the interfaces and headers. Static inlines are > nothing but micro-optimizations that leak abstractions. Do we need > these? Yeap, we have a few cases where this is already happening... Should we start using the container_of more directly and avoid the a_to_b() helpers? Should we create the a_to_b() helpers only inside .c files like we have in a few other cases? In this pxp case here it looks like using the container of directly is everywhere is better... is this your recommendation? > > > +#include "intel_pxp_types.h" > > + > > +static inline struct intel_gt *pxp_to_gt(const struct intel_pxp *pxp) > > +{ > > + return container_of(pxp, struct intel_gt, pxp); > > +} > > I think it's questionable to claim the parameter is const, when you can > do: > > const struct intel_pxp *const_pxp = something; > struct intel_pxp *pxp = &pxp_to_gt(const_pxp)->pxp; > > BR, > Jani. > > > + > > +static inline bool intel_pxp_is_enabled(const struct intel_pxp *pxp) > > +{ > > + return pxp->ce; > > +} > > + > > +#ifdef CONFIG_DRM_I915_PXP > > +void intel_pxp_init(struct intel_pxp *pxp); > > +void intel_pxp_fini(struct intel_pxp *pxp); > > +#else > > +static inline void intel_pxp_init(struct intel_pxp *pxp) > > +{ > > +} > > + > > +static inline void intel_pxp_fini(struct intel_pxp *pxp) > > +{ > > +} > > +#endif > > + > > +#endif /* __INTEL_PXP_H__ */ > > diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h > > b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h > > new file mode 100644 > > index ..bd12c520e60a > > --- /dev/null > > +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h > > @@ -0,0 +1,15 @@ > > +/* SPDX-License-Identifier: MIT */ > > +/* > > + * Copyright(c) 2020, Intel Corporation. All rights reserved. > > + */ > > + > > +#ifndef __INTEL_PXP_TYPES_H__ > > +#define __INTEL_PXP_TYPES_H__ > > + > > +struct intel_context; > > + > > +struct intel_pxp { > > + struct intel_context *ce; > > +}; > > + > > +#endif /* __INTEL_PXP_TYPES_H__ */ > > -- > Jani Nikula, Intel Open Source Graphics Center
Re: [PATCH v3 3/3] drm/bridge: parade-ps8640: Add support for AUX channel
On Wed, Sep 15, 2021 at 5:41 PM Philip Chen wrote: > As regmap_read() should always read 1 byte at a time, should I just do: > regmap_read(map, PAGE0_SWAUX_RDATA, (unsigned int*)(buf + i)) There is also regmap_bulk_read() if you need to read more data.
Re: [Intel-gfx] [PATCH 14/27] drm/i915/guc: Assign contexts in parent-child relationship consecutive guc_ids
On Wed, Sep 15, 2021 at 01:04:45PM -0700, John Harrison wrote: > On 8/20/2021 15:44, Matthew Brost wrote: > > Assign contexts in parent-child relationship consecutive guc_ids. This > > is accomplished by partitioning guc_id space between ones that need to > > be consecutive (1/16 available guc_ids) and ones that do not (15/16 of > > available guc_ids). The consecutive search is implemented via the bitmap > > API. > > > > This is a precursor to the full GuC multi-lrc implementation but aligns > > to how GuC mutli-lrc interface is defined - guc_ids must be consecutive > > when using the GuC multi-lrc interface. > > > > v2: > > (Daniel Vetter) > >- Explictly state why we assign consecutive guc_ids > > > > Signed-off-by: Matthew Brost > > --- > > drivers/gpu/drm/i915/gt/uc/intel_guc.h| 6 +- > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 107 +- > > 2 files changed, 86 insertions(+), 27 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h > > b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > > index 023953e77553..3f95b1b4f15c 100644 > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > > @@ -61,9 +61,13 @@ struct intel_guc { > > */ > > spinlock_t lock; > > /** > > -* @guc_ids: used to allocate new guc_ids > > +* @guc_ids: used to allocate new guc_ids, single-lrc > > */ > > struct ida guc_ids; > > + /** > > +* @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc > > +*/ > > + unsigned long *guc_ids_bitmap; > > /** @num_guc_ids: number of guc_ids that can be used */ > > u32 num_guc_ids; > > /** @max_guc_ids: max number of guc_ids that can be used */ > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > index 00d54bb00bfb..e9dfd43d29a0 100644 > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > @@ -125,6 +125,18 @@ guc_create_virtual(struct intel_engine_cs **siblings, > > unsigned int count); > > #define GUC_REQUEST_SIZE 64 /* bytes */ > > +/* > > + * We reserve 1/16 of the guc_ids for multi-lrc as these need to be > > contiguous > > + * per the GuC submission interface. A different allocation algorithm is > > used > > + * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to > The 'hence' clause seems to be attached to the wrong reason. The id space is > partition because of the contiguous vs random requirements of multi vs > single LRC, not because a different allocator is used in one partion vs the > other. > Kinda? The reason I partitioned it because to algorithms are different, we could a unified space with a single algorithm, right? It was just easier split the space and use 2 already existing data structures rather cook up an algorithm in a unified space. There isn't a requirement from the GuC that the space is partitioned, the only requirement is multi-lrc IDs are contiguous. All this being said, I think comment is correct. > > + * partition the guc_id space. We believe the number of multi-lrc contexts > > in > > + * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids > > for > > + * multi-lrc. > > + */ > > +#define NUMBER_MULTI_LRC_GUC_ID(guc) \ > > + ((guc)->submission_state.num_guc_ids / 16 > 32 ? \ > > +(guc)->submission_state.num_guc_ids / 16 : 32) > > + > > /* > >* Below is a set of functions which control the GuC scheduling state > > which > >* require a lock. > > @@ -1176,6 +1188,10 @@ int intel_guc_submission_init(struct intel_guc *guc) > > INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts); > > intel_gt_pm_unpark_work_init(&guc->submission_state.destroyed_worker, > > destroyed_worker_func); > > + guc->submission_state.guc_ids_bitmap = > > + bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); > > + if (!guc->submission_state.guc_ids_bitmap) > > + return -ENOMEM; > > return 0; > > } > > @@ -1188,6 +1204,7 @@ void intel_guc_submission_fini(struct intel_guc *guc) > > guc_lrc_desc_pool_destroy(guc); > > guc_flush_destroyed_contexts(guc); > > i915_sched_engine_put(guc->sched_engine); > > + bitmap_free(guc->submission_state.guc_ids_bitmap); > > } > > static void queue_request(struct i915_sched_engine *sched_engine, > > @@ -1239,18 +1256,43 @@ static void guc_submit_request(struct i915_request > > *rq) > > spin_unlock_irqrestore(&sched_engine->lock, flags); > > } > > -static int new_guc_id(struct intel_guc *guc) > > +static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) > > { > > - return ida_simple_get(&guc->submission_state.guc_ids, 0, > > - guc->submission_state.num_guc_ids, G
Re: [PATCH 8/9] platform/x86: thinkpad_acpi: Register a privacy-screen device
On Mon, 2021-09-06 at 09:35 +0200, Hans de Goede wrote: > Register a privacy-screen device on laptops with a privacy-screen, > this exports the PrivacyGuard features to user-space using a > standardized vendor-agnostic sysfs interface. Note the sysfs interface > is read-only. > > Registering a privacy-screen device with the new privacy-screen class > code will also allow the GPU driver to get a handle to it and export > the privacy-screen setting as a property on the DRM connector object > for the LCD panel. This DRM connector property is news standardized Looks like a typo here --^ > interface which all user-space code should use to query and control > the privacy-screen. > > Reviewed-by: Emil Velikov > Signed-off-by: Hans de Goede > --- > Changes in v2: > - Make the new lcdshadow_set_sw_state, lcdshadow_get_hw_state and > lcdshadow_ops symbols static > - Update state and call drm_privacy_screen_call_notifier_chain() > when the state is changed by pressing the Fn + D hotkey combo > --- > drivers/platform/x86/Kconfig | 2 + > drivers/platform/x86/thinkpad_acpi.c | 91 > 2 files changed, 68 insertions(+), 25 deletions(-) > > diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig > index d12db6c316ea..ae00a27f9f95 100644 > --- a/drivers/platform/x86/Kconfig > +++ b/drivers/platform/x86/Kconfig > @@ -509,7 +509,9 @@ config THINKPAD_ACPI > depends on ACPI_VIDEO || ACPI_VIDEO = n > depends on BACKLIGHT_CLASS_DEVICE > depends on I2C > + depends on DRM > select ACPI_PLATFORM_PROFILE > + select DRM_PRIVACY_SCREEN > select HWMON > select NVRAM > select NEW_LEDS > diff --git a/drivers/platform/x86/thinkpad_acpi.c > b/drivers/platform/x86/thinkpad_acpi.c > index b8f2556c4797..044b238730ba 100644 > --- a/drivers/platform/x86/thinkpad_acpi.c > +++ b/drivers/platform/x86/thinkpad_acpi.c > @@ -73,6 +73,7 @@ > #include > #include > #include > +#include > #include "dual_accel_detect.h" > > /* ThinkPad CMOS commands */ > @@ -157,6 +158,7 @@ enum tpacpi_hkey_event_t { > TP_HKEY_EV_VOL_UP = 0x1015, /* Volume up or unmute */ > TP_HKEY_EV_VOL_DOWN = 0x1016, /* Volume down or unmute > */ > TP_HKEY_EV_VOL_MUTE = 0x1017, /* Mixer output mute */ > + TP_HKEY_EV_PRIVACYGUARD_TOGGLE = 0x130f, /* Toggle priv.guard > on/off */ > > /* Reasons for waking up from S3/S4 */ > TP_HKEY_EV_WKUP_S3_UNDOCK = 0x2304, /* undock requested, S3 */ > @@ -3889,6 +3891,12 @@ static bool hotkey_notify_extended_hotkey(const u32 > hkey) > { > unsigned int scancode; > > + switch (hkey) { > + case TP_HKEY_EV_PRIVACYGUARD_TOGGLE: > + tpacpi_driver_event(hkey); > + return true; > + } > + > /* Extended keycodes start at 0x300 and our offset into the map > * TP_ACPI_HOTKEYSCAN_EXTENDED_START. The calculated scancode > * will be positive, but might not be in the correct range. > @@ -9819,30 +9827,40 @@ static struct ibm_struct battery_driver_data = { > * LCD Shadow subdriver, for the Lenovo PrivacyGuard feature > */ > > +static struct drm_privacy_screen *lcdshadow_dev; > static acpi_handle lcdshadow_get_handle; > static acpi_handle lcdshadow_set_handle; > -static int lcdshadow_state; > > -static int lcdshadow_on_off(bool state) > +static int lcdshadow_set_sw_state(struct drm_privacy_screen *priv, > + enum drm_privacy_screen_status state) > { > int output; > > + if (WARN_ON(!mutex_is_locked(&priv->lock))) > + return -EIO; > + > if (!acpi_evalf(lcdshadow_set_handle, &output, NULL, "dd", > (int)state)) > return -EIO; > > - lcdshadow_state = state; > + priv->hw_state = priv->sw_state = state; > return 0; > } > > -static int lcdshadow_set(bool on) > +static void lcdshadow_get_hw_state(struct drm_privacy_screen *priv) > { > - if (lcdshadow_state < 0) > - return lcdshadow_state; > - if (lcdshadow_state == on) > - return 0; > - return lcdshadow_on_off(on); > + int output; > + > + if (!acpi_evalf(lcdshadow_get_handle, &output, NULL, "dd", 0)) > + return; > + > + priv->hw_state = priv->sw_state = output & 0x1; > } > > +static const struct drm_privacy_screen_ops lcdshadow_ops = { > + .set_sw_state = lcdshadow_set_sw_state, > + .get_hw_state = lcdshadow_get_hw_state, > +}; > + > static int tpacpi_lcdshadow_init(struct ibm_init_struct *iibm) > { > acpi_status status1, status2; > @@ -9850,36 +9868,44 @@ static int tpacpi_lcdshadow_init(struct > ibm_init_struct *iibm) > > status1 = acpi_get_handle(hkey_handle, "GSSS", > &lcdshadow_get_handle); > status2 = acpi_get_handle(hkey_handle, "", > &
[PATCH] drm/rockchip: remove unused psr_list{,_lock}
Some leftover cleanup from commit 6c836d965bad ("drm/rockchip: Use the helpers for PSR"). Cc: Heiko Stuebner Cc: Sean Paul Signed-off-by: Brian Norris --- drivers/gpu/drm/rockchip/rockchip_drm_drv.c | 3 --- drivers/gpu/drm/rockchip/rockchip_drm_drv.h | 2 -- 2 files changed, 5 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c index bfba9793d238..6fa686e6c7e9 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@ -138,9 +138,6 @@ static int rockchip_drm_bind(struct device *dev) drm_dev->dev_private = private; - INIT_LIST_HEAD(&private->psr_list); - mutex_init(&private->psr_list_lock); - ret = rockchip_drm_init_iommu(drm_dev); if (ret) goto err_free; diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h index e33c2dcd0d4b..aa0909e8edf9 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h @@ -48,8 +48,6 @@ struct rockchip_drm_private { struct iommu_domain *domain; struct mutex mm_lock; struct drm_mm mm; - struct list_head psr_list; - struct mutex psr_list_lock; }; int rockchip_drm_dma_attach_device(struct drm_device *drm_dev, -- 2.33.0.309.g3052b89438-goog
Re: [PATCH v3 3/3] drm/bridge: parade-ps8640: Add support for AUX channel
Hi On Tue, Sep 14, 2021 at 5:57 PM Stephen Boyd wrote: > > Quoting Philip Chen (2021-09-14 16:28:45) > > diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c > > b/drivers/gpu/drm/bridge/parade-ps8640.c > > index 8d3e7a147170..dc349d729f5a 100644 > > --- a/drivers/gpu/drm/bridge/parade-ps8640.c > > +++ b/drivers/gpu/drm/bridge/parade-ps8640.c > > @@ -117,6 +144,129 @@ static inline struct ps8640 *bridge_to_ps8640(struct > > drm_bridge *e) > [...] > > + case DP_AUX_I2C_WRITE: > > + case DP_AUX_I2C_READ: > > + break; > > + default: > > + return -EINVAL; > > + } > > + > > + ret = regmap_write(map, PAGE0_AUXCH_CFG3, AUXCH_CFG3_RESET); > > + if (ret) { > > + dev_err(dev, "failed to write PAGE0_AUXCH_CFG3: %d\n", ret); > > Can we use DRM_DEV_ERROR()? Sure. > > > + return ret; > > + } > > + > > + /* Assume it's good */ > > + msg->reply = 0; > > + > > + addr_len[0] = msg->address & 0xff; > > + addr_len[1] = (msg->address >> 8) & 0xff; > > + addr_len[2] = ((msg->request << 4) & SWAUX_CMD_MASK) | > > + ((msg->address >> 16) & SWAUX_ADDR_19_16_MASK); > > It really feels like this out to be possible with some sort of > cpu_to_le32() API. We're shoving msg->address into 3 bytes and then > adding in the request and some length. So we could do something like: > > u32 addr_len; > > addr_len = FIELD_PREP(SWAUX_ADDR_MASK, msg->address); > addr_len |= FIELD_PREP(SWAUX_CMD_MASK, msg->request); > if (len) > addr_len |= FIELD_PREP(LEN_MASK, len - 1); > else > addr_len |= FIELD_PREP(LEN_MASK, SWAUX_NO_PAYLOAD ); > > cpu_to_le32s(&addr_len); > > regmap_bulk_write(map, PAGE0_SWAUX_ADDR_7_0, &addr_len, > sizeof(addr_len)); > Yes, thanks for the advice. Will add this change to v4. > > + addr_len[3] = (len == 0) ? SWAUX_NO_PAYLOAD : > > + ((len - 1) & SWAUX_LENGTH_MASK); > > + > > + regmap_bulk_write(map, PAGE0_SWAUX_ADDR_7_0, addr_len, > > + ARRAY_SIZE(addr_len)); > > + > > + if (len && (request == DP_AUX_NATIVE_WRITE || > > + request == DP_AUX_I2C_WRITE)) { > > + /* Write to the internal FIFO buffer */ > > + for (i = 0; i < len; i++) { > > + ret = regmap_write(map, PAGE0_SWAUX_WDATA, buf[i]); > > + if (ret) { > > + dev_err(dev, "failed to write WDATA: %d\n", > > DRM_DEV_ERROR? Sure. > > > + ret); > > + return ret; > > + } > > + } > > + } > > + > > + regmap_write(map, PAGE0_SWAUX_CTRL, SWAUX_SEND); > > + > > + /* Zero delay loop because i2c transactions are slow already */ > > + regmap_read_poll_timeout(map, PAGE0_SWAUX_CTRL, data, > > +!(data & SWAUX_SEND), 0, 50 * 1000); > > + > > + regmap_read(map, PAGE0_SWAUX_STATUS, &data); > > + if (ret) { > > + dev_err(dev, "failed to read PAGE0_SWAUX_STATUS: %d\n", > > ret); > > DRM_DEV_ERROR? Sure. > > > + return ret; > > + } > > + > > + switch (data & SWAUX_STATUS_MASK) { > > + /* Ignore the DEFER cases as they are already handled in hardware */ > > + case SWAUX_STATUS_NACK: > > + case SWAUX_STATUS_I2C_NACK: > > + /* > > +* The programming guide is not clear about whether a I2C > > NACK > > +* would trigger SWAUX_STATUS_NACK or > > SWAUX_STATUS_I2C_NACK. So > > +* we handle both cases together. > > +*/ > > + if (is_native_aux) > > + msg->reply |= DP_AUX_NATIVE_REPLY_NACK; > > + else > > + msg->reply |= DP_AUX_I2C_REPLY_NACK; > > + > > + len = data & SWAUX_M_MASK; > > + return len; > > Why no 'return data & SWAUX_M_MASK;' and skip the assignment? I want to make it clear that we are returning the number of bytes that we have read/written instead of some error code. If you think it's not super helpful, I can just return data & SWAUX_M_MASK. > > > + case SWAUX_STATUS_ACKM: > > Move this up and add fallthrough? Thanks. Will add this change to v4. > > > + len = data & SWAUX_M_MASK; > > + return len; > > + case SWAUX_STATUS_INVALID: > > + return -EOPNOTSUPP; > > + case SWAUX_STATUS_TIMEOUT: > > + return -ETIMEDOUT; > > + } > > + > > + if (len && (request == DP_AUX_NATIVE_READ || > > + request == DP_AUX_I2C_READ)) { > > + /* Read from the internal FIFO buffer */ > > + for (i = 0; i < len; i++) { > > + ret = regmap_read(map, PAGE0_S
[PATCH v2 13/13] drm/msm: Implement HDCP 1.x using the new drm HDCP helpers
From: Sean Paul This patch adds HDCP 1.x support to msm DP connectors using the new HDCP helpers. Cc: Stephen Boyd Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-15-s...@poorly.run #v1 Changes in v2: -Squash [1] into this patch with the following changes (Stephen) -Update the sc7180 dtsi file -Remove resource names and just use index (Stephen) [1] https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-14-s...@poorly.run --- arch/arm64/boot/dts/qcom/sc7180.dtsi | 4 +- drivers/gpu/drm/msm/Makefile | 1 + drivers/gpu/drm/msm/dp/dp_debug.c| 49 ++- drivers/gpu/drm/msm/dp/dp_debug.h| 6 +- drivers/gpu/drm/msm/dp/dp_display.c | 45 ++- drivers/gpu/drm/msm/dp/dp_display.h | 5 + drivers/gpu/drm/msm/dp/dp_drm.c | 68 - drivers/gpu/drm/msm/dp/dp_drm.h | 5 + drivers/gpu/drm/msm/dp/dp_hdcp.c | 433 +++ drivers/gpu/drm/msm/dp/dp_hdcp.h | 27 ++ drivers/gpu/drm/msm/dp/dp_parser.c | 22 +- drivers/gpu/drm/msm/dp/dp_parser.h | 4 + drivers/gpu/drm/msm/dp/dp_reg.h | 44 ++- drivers/gpu/drm/msm/msm_atomic.c | 15 + 14 files changed, 709 insertions(+), 19 deletions(-) create mode 100644 drivers/gpu/drm/msm/dp/dp_hdcp.c create mode 100644 drivers/gpu/drm/msm/dp/dp_hdcp.h diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi index c8921e2d6480..3ae6fc7a2c01 100644 --- a/arch/arm64/boot/dts/qcom/sc7180.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi @@ -3088,7 +3088,9 @@ mdss_dp: displayport-controller@ae9 { compatible = "qcom,sc7180-dp"; status = "disabled"; - reg = <0 0x0ae9 0 0x1400>; + reg = <0 0x0ae9 0 0x1400>, + <0 0x0aed1000 0 0x174>, + <0 0x0aee1000 0 0x2c>; interrupt-parent = <&mdss>; interrupts = <12>; diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index 904535eda0c4..98731fd262d6 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -109,6 +109,7 @@ msm-$(CONFIG_DRM_MSM_DP)+= dp/dp_aux.o \ dp/dp_ctrl.o \ dp/dp_display.o \ dp/dp_drm.o \ + dp/dp_hdcp.o \ dp/dp_hpd.o \ dp/dp_link.o \ dp/dp_panel.o \ diff --git a/drivers/gpu/drm/msm/dp/dp_debug.c b/drivers/gpu/drm/msm/dp/dp_debug.c index 2f6247e80e9d..de16fca8782a 100644 --- a/drivers/gpu/drm/msm/dp/dp_debug.c +++ b/drivers/gpu/drm/msm/dp/dp_debug.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "dp_parser.h" #include "dp_catalog.h" @@ -15,6 +16,7 @@ #include "dp_ctrl.h" #include "dp_debug.h" #include "dp_display.h" +#include "dp_hdcp.h" #define DEBUG_NAME "msm_dp" @@ -24,6 +26,7 @@ struct dp_debug_private { struct dp_usbpd *usbpd; struct dp_link *link; struct dp_panel *panel; + struct dp_hdcp *hdcp; struct drm_connector **connector; struct device *dev; struct drm_device *drm_dev; @@ -349,6 +352,38 @@ static int dp_test_active_open(struct inode *inode, inode->i_private); } +static ssize_t dp_hdcp_key_write(struct file *file, const char __user *ubuf, +size_t len, loff_t *offp) +{ + char *input_buffer; + int ret = 0; + struct dp_debug_private *debug = file->private_data; + struct drm_device *dev; + + dev = debug->drm_dev; + + if (len != (DRM_HDCP_KSV_LEN + DP_HDCP_NUM_KEYS * DP_HDCP_KEY_LEN)) + return -EINVAL; + + if (!debug->hdcp) + return -ENOENT; + + input_buffer = memdup_user_nul(ubuf, len); + if (IS_ERR(input_buffer)) + return PTR_ERR(input_buffer); + + ret = dp_hdcp_ingest_key(debug->hdcp, input_buffer, len); + + kfree(input_buffer); + if (ret < 0) { + DRM_ERROR("Could not ingest HDCP key, ret=%d\n", ret); + return ret; + } + + *offp += len; + return len; +} + static const struct file_operations dp_debug_fops = { .open = simple_open, .read = dp_debug_read_info, @@ -363,6 +398,12 @@ static const struct file_operations test_active_fops = { .write = dp_test_active_write }; +static const struct file_operations dp_hdcp_key_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = dp_hdcp_key_write, +}; + static int dp_debug_init(struct dp_debug *dp_debug, struct drm_minor *minor) { int rc = 0; @@ -384,6 +425,10 @@ static int dp_debug_init(struct dp_debug *dp_debug, struct drm_minor *minor) minor->debugfs_root, debug, &dp_test_type_fops); + debugfs_create_file("msm
[PATCH v2 12/13] dt-bindings: msm/dp: Add bindings for HDCP registers
From: Sean Paul This patch adds the bindings for the MSM DisplayPort HDCP registers which are required to write the HDCP key into the display controller as well as the registers to enable HDCP authentication/key exchange/encryption. Cc: Rob Herring Cc: Stephen Boyd Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-13-s...@poorly.run #v1 Changes in v2: -Drop register range names (Stephen) -Fix yaml errors (Rob) --- .../devicetree/bindings/display/msm/dp-controller.yaml | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml index 64d8d9e5e47a..80a55e9ff532 100644 --- a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml +++ b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml @@ -19,7 +19,7 @@ properties: - qcom,sc7180-dp reg: -maxItems: 1 +maxItems: 3 interrupts: maxItems: 1 @@ -99,8 +99,9 @@ examples: #include displayport-controller@ae9 { -compatible = "qcom,sc7180-dp"; -reg = <0xae9 0x1400>; +reg = <0 0x0ae9 0 0x1400>, + <0 0x0aed1000 0 0x174>, + <0 0x0aee1000 0 0x2c>; interrupt-parent = <&mdss>; interrupts = <12>; clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>, -- Sean Paul, Software Engineer, Google / Chromium OS
[PATCH v2 11/13] drm/msm/dp: Re-order dp_audio_put in deinit_sub_modules
From: Sean Paul Audio is initialized last, it should be de-initialized first to match the order in dp_init_sub_modules(). Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-12-s...@poorly.run #v1 Changes in v2: -None --- drivers/gpu/drm/msm/dp/dp_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index fbe4c2cd52a3..19946024e235 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -714,9 +714,9 @@ static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data) static void dp_display_deinit_sub_modules(struct dp_display_private *dp) { dp_debug_put(dp->debug); + dp_audio_put(dp->audio); dp_panel_put(dp->panel); dp_aux_put(dp->aux); - dp_audio_put(dp->audio); } static int dp_init_sub_modules(struct dp_display_private *dp) -- Sean Paul, Software Engineer, Google / Chromium OS
[PATCH v2 10/13] drm/msm/dpu: Remove encoder->enable() hack
From: Sean Paul encoder->commit() was being misused because there were some global resources which needed to be tweaked in encoder->enable() which were not accessible in dpu_encoder.c. That is no longer true and the redirect serves no purpose any longer. So remove the indirection. Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-11-s...@poorly.run #v1 Changes in v2: -None --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 5 + drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 22 - drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h | 2 -- drivers/gpu/drm/msm/disp/dpu1/dpu_trace.h | 4 4 files changed, 1 insertion(+), 32 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 984f8a59cb73..ddc542a0d41f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -2122,11 +2122,8 @@ static void dpu_encoder_frame_done_timeout(struct timer_list *t) static const struct drm_encoder_helper_funcs dpu_encoder_helper_funcs = { .mode_set = dpu_encoder_virt_mode_set, .disable = dpu_encoder_virt_disable, - .enable = dpu_kms_encoder_enable, + .enable = dpu_encoder_virt_enable, .atomic_check = dpu_encoder_virt_atomic_check, - - /* This is called by dpu_kms_encoder_enable */ - .commit = dpu_encoder_virt_enable, }; static const struct drm_encoder_funcs dpu_encoder_funcs = { diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index fb0d9f781c66..4a0b55d145ad 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -381,28 +381,6 @@ static void dpu_kms_flush_commit(struct msm_kms *kms, unsigned crtc_mask) } } -/* - * Override the encoder enable since we need to setup the inline rotator and do - * some crtc magic before enabling any bridge that might be present. - */ -void dpu_kms_encoder_enable(struct drm_encoder *encoder) -{ - const struct drm_encoder_helper_funcs *funcs = encoder->helper_private; - struct drm_device *dev = encoder->dev; - struct drm_crtc *crtc; - - /* Forward this enable call to the commit hook */ - if (funcs && funcs->commit) - funcs->commit(encoder); - - drm_for_each_crtc(crtc, dev) { - if (!(crtc->state->encoder_mask & drm_encoder_mask(encoder))) - continue; - - trace_dpu_kms_enc_enable(DRMID(crtc)); - } -} - static void dpu_kms_complete_commit(struct msm_kms *kms, unsigned crtc_mask) { struct dpu_kms *dpu_kms = to_dpu_kms(kms); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h index 323a6bce9e64..f1ebb60dacab 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h @@ -248,8 +248,6 @@ void *dpu_debugfs_get_root(struct dpu_kms *dpu_kms); int dpu_enable_vblank(struct msm_kms *kms, struct drm_crtc *crtc); void dpu_disable_vblank(struct msm_kms *kms, struct drm_crtc *crtc); -void dpu_kms_encoder_enable(struct drm_encoder *encoder); - /** * dpu_kms_get_clk_rate() - get the clock rate * @dpu_kms: pointer to dpu_kms structure diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_trace.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_trace.h index 37bba57675a8..54d74341e690 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_trace.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_trace.h @@ -266,10 +266,6 @@ DEFINE_EVENT(dpu_drm_obj_template, dpu_crtc_complete_commit, TP_PROTO(uint32_t drm_id), TP_ARGS(drm_id) ); -DEFINE_EVENT(dpu_drm_obj_template, dpu_kms_enc_enable, - TP_PROTO(uint32_t drm_id), - TP_ARGS(drm_id) -); DEFINE_EVENT(dpu_drm_obj_template, dpu_kms_commit, TP_PROTO(uint32_t drm_id), TP_ARGS(drm_id) -- Sean Paul, Software Engineer, Google / Chromium OS
[PATCH v2 09/13] drm/msm/dpu: Remove useless checks in dpu_encoder
From: Sean Paul A couple more useless checks to remove in dpu_encoder. Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-10-s...@poorly.run #v1 Changes in v2: -None --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 12 1 file changed, 12 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 0e9d3fa1544b..984f8a59cb73 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -1153,10 +1153,6 @@ static void dpu_encoder_virt_enable(struct drm_encoder *drm_enc) struct msm_drm_private *priv; struct drm_display_mode *cur_mode = NULL; - if (!drm_enc) { - DPU_ERROR("invalid encoder\n"); - return; - } dpu_enc = to_dpu_encoder_virt(drm_enc); mutex_lock(&dpu_enc->enc_lock); @@ -1203,14 +1199,6 @@ static void dpu_encoder_virt_disable(struct drm_encoder *drm_enc) struct msm_drm_private *priv; int i = 0; - if (!drm_enc) { - DPU_ERROR("invalid encoder\n"); - return; - } else if (!drm_enc->dev) { - DPU_ERROR("invalid dev\n"); - return; - } - dpu_enc = to_dpu_encoder_virt(drm_enc); DPU_DEBUG_ENC(dpu_enc, "\n"); -- Sean Paul, Software Engineer, Google / Chromium OS
[PATCH v2 08/13] drm/msm/dpu_kms: Re-order dpu includes
From: Sean Paul Make includes alphabetical in dpu_kms.c Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-9-s...@poorly.run #v1 Changes in v2: -None --- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index ae48f41821cf..fb0d9f781c66 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -21,14 +21,14 @@ #include "msm_gem.h" #include "disp/msm_disp_snapshot.h" -#include "dpu_kms.h" #include "dpu_core_irq.h" +#include "dpu_crtc.h" +#include "dpu_encoder.h" #include "dpu_formats.h" #include "dpu_hw_vbif.h" -#include "dpu_vbif.h" -#include "dpu_encoder.h" +#include "dpu_kms.h" #include "dpu_plane.h" -#include "dpu_crtc.h" +#include "dpu_vbif.h" #define CREATE_TRACE_POINTS #include "dpu_trace.h" -- Sean Paul, Software Engineer, Google / Chromium OS
[PATCH v2 07/13] drm/i915/hdcp: Use HDCP helpers for i915
From: Sean Paul Now that all of the HDCP 1.x logic has been migrated to the central HDCP helpers, use it in the i915 driver. The majority of the driver code for HDCP 1.x will live in intel_hdcp.c, however there are a few helper hooks which are connector-specific and need to be partially or fully implemented in the intel_dp_hdcp.c or intel_hdmi.c. We'll leave most of the HDCP 2.x code alone since we don't have another implementation of HDCP 2.x to use as reference for what should and should not live in the drm helpers. The helper will call the overly general enable/disable/is_capable HDCP 2.x callbacks and leave the interesting stuff for the driver. Once we have another HDCP 2.x implementation, we should do a similar migration. Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-8-s...@poorly.run #v1 Changes in v2: -Fix mst helper function pointer reported by 0-day --- drivers/gpu/drm/i915/display/intel_ddi.c | 29 +- .../drm/i915/display/intel_display_debugfs.c | 6 +- .../drm/i915/display/intel_display_types.h| 58 +- drivers/gpu/drm/i915/display/intel_dp_hdcp.c | 345 +++ drivers/gpu/drm/i915/display/intel_dp_mst.c | 17 +- drivers/gpu/drm/i915/display/intel_hdcp.c | 935 +++--- drivers/gpu/drm/i915/display/intel_hdcp.h | 30 +- drivers/gpu/drm/i915/display/intel_hdmi.c | 256 ++--- 8 files changed, 417 insertions(+), 1259 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 23ef291f7b30..8bdf41593174 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -26,6 +26,7 @@ */ #include +#include #include "i915_drv.h" #include "intel_audio.h" @@ -3131,6 +3132,9 @@ static void intel_enable_ddi(struct intel_atomic_state *state, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { + struct intel_connector *connector = to_intel_connector(conn_state->connector); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); + drm_WARN_ON(state->base.dev, crtc_state->has_pch_encoder); if (!crtc_state->bigjoiner_slave) @@ -3147,12 +3151,10 @@ static void intel_enable_ddi(struct intel_atomic_state *state, else intel_enable_ddi_dp(state, encoder, crtc_state, conn_state); - /* Enable hdcp if it's desired */ - if (conn_state->content_protection == - DRM_MODE_CONTENT_PROTECTION_DESIRED) - intel_hdcp_enable(to_intel_connector(conn_state->connector), - crtc_state, - (u8)conn_state->hdcp_content_type); + if (connector->hdcp_helper_data) + drm_hdcp_helper_atomic_commit(connector->hdcp_helper_data, + &state->base, + &dig_port->hdcp_mutex); } static void intel_disable_ddi_dp(struct intel_atomic_state *state, @@ -3212,7 +3214,13 @@ static void intel_disable_ddi(struct intel_atomic_state *state, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - intel_hdcp_disable(to_intel_connector(old_conn_state->connector)); + struct intel_connector *connector = to_intel_connector(old_conn_state->connector); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); + + if (connector->hdcp_helper_data) + drm_hdcp_helper_atomic_commit(connector->hdcp_helper_data, + &state->base, + &dig_port->hdcp_mutex); if (intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_HDMI)) intel_disable_ddi_hdmi(state, encoder, old_crtc_state, @@ -3243,13 +3251,18 @@ void intel_ddi_update_pipe(struct intel_atomic_state *state, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { + struct intel_connector *connector = to_intel_connector(conn_state->connector); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI) && !intel_encoder_is_mst(encoder)) intel_ddi_update_pipe_dp(state, encoder, crtc_state, conn_state); - intel_hdcp_update_pipe(state, encoder, crtc_state, conn_state); + if (connector->hdcp_helper_data) + drm_hdcp_helper_atomic_commit(connector->hdcp_helper_data, + &state->base, + &dig_port->hdcp_mutex); } static void diff --git a/drivers/gpu/drm/i915/display/intel_d
[PATCH v2 06/13] drm/i915/hdcp: Retain hdcp_capable return codes
From: Sean Paul The shim functions return error codes, but they are discarded in intel_hdcp.c. This patch plumbs the return codes through so they are properly handled. Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-7-s...@poorly.run #v1 Changes in v2: -None --- .../drm/i915/display/intel_display_debugfs.c | 9 +++- drivers/gpu/drm/i915/display/intel_hdcp.c | 51 ++- drivers/gpu/drm/i915/display/intel_hdcp.h | 4 +- 3 files changed, 37 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 68f4ba8c46e7..5ffd31e9908f 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -644,6 +644,7 @@ static void intel_panel_info(struct seq_file *m, struct intel_panel *panel) static void intel_hdcp_info(struct seq_file *m, struct intel_connector *intel_connector) { + int ret; bool hdcp_cap, hdcp2_cap; if (!intel_connector->hdcp.shim) { @@ -651,8 +652,12 @@ static void intel_hdcp_info(struct seq_file *m, goto out; } - hdcp_cap = intel_hdcp_capable(intel_connector); - hdcp2_cap = intel_hdcp2_capable(intel_connector); + ret = intel_hdcp_capable(intel_connector, &hdcp_cap); + if (ret) + hdcp_cap = false; + ret = intel_hdcp2_capable(intel_connector, &hdcp2_cap); + if (ret) + hdcp2_cap = false; if (hdcp_cap) seq_puts(m, "HDCP1.4 "); diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index af166baf8c71..59275919e7b9 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -153,50 +153,49 @@ int intel_hdcp_read_valid_bksv(struct intel_digital_port *dig_port, } /* Is HDCP1.4 capable on Platform and Sink */ -bool intel_hdcp_capable(struct intel_connector *connector) +int intel_hdcp_capable(struct intel_connector *connector, bool *capable) { struct intel_digital_port *dig_port = intel_attached_dig_port(connector); const struct intel_hdcp_shim *shim = connector->hdcp.shim; - bool capable = false; u8 bksv[5]; + *capable = false; + if (!shim) - return capable; + return 0; - if (shim->hdcp_capable) { - shim->hdcp_capable(dig_port, &capable); - } else { - if (!intel_hdcp_read_valid_bksv(dig_port, shim, bksv)) - capable = true; - } + if (shim->hdcp_capable) + return shim->hdcp_capable(dig_port, capable); + + if (!intel_hdcp_read_valid_bksv(dig_port, shim, bksv)) + *capable = true; - return capable; + return 0; } /* Is HDCP2.2 capable on Platform and Sink */ -bool intel_hdcp2_capable(struct intel_connector *connector) +int intel_hdcp2_capable(struct intel_connector *connector, bool *capable) { struct intel_digital_port *dig_port = intel_attached_dig_port(connector); struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_hdcp *hdcp = &connector->hdcp; - bool capable = false; + + *capable = false; /* I915 support for HDCP2.2 */ if (!hdcp->hdcp2_supported) - return false; + return 0; /* MEI interface is solid */ mutex_lock(&dev_priv->hdcp_comp_mutex); if (!dev_priv->hdcp_comp_added || !dev_priv->hdcp_master) { mutex_unlock(&dev_priv->hdcp_comp_mutex); - return false; + return 0; } mutex_unlock(&dev_priv->hdcp_comp_mutex); /* Sink's capability for HDCP2.2 */ - hdcp->shim->hdcp_2_2_capable(dig_port, &capable); - - return capable; + return hdcp->shim->hdcp_2_2_capable(dig_port, capable); } static bool intel_hdcp_in_use(struct drm_i915_private *dev_priv, @@ -2332,6 +2331,7 @@ int intel_hdcp_enable(struct intel_connector *connector, struct intel_digital_port *dig_port = intel_attached_dig_port(connector); struct intel_hdcp *hdcp = &connector->hdcp; unsigned long check_link_interval = DRM_HDCP_CHECK_PERIOD_MS; + bool capable; int ret = -EINVAL; if (!hdcp->shim) @@ -2350,21 +2350,27 @@ int intel_hdcp_enable(struct intel_connector *connector, * Considering that HDCP2.2 is more secure than HDCP1.4, If the setup * is capable of HDCP2.2, it is preferred to use HDCP2.2. */ - if (intel_hdcp2_capable(connector)) { + ret = intel_hdcp2_capable(connector, &capable); + if (capable) { ret = _intel_hdcp2_enable(connector); - if (!ret) + if (!ret) { chec
[PATCH v2 05/13] drm/i915/hdcp: Consolidate HDCP setup/state cache
From: Sean Paul Stick all of the setup for HDCP into a dedicated function. No functional change, but this will facilitate moving HDCP logic into helpers. Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-6-s...@poorly.run #v1 Changes in v2: -None --- drivers/gpu/drm/i915/display/intel_hdcp.c | 52 +++ 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index feebafead046..af166baf8c71 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -2167,6 +2167,37 @@ static enum mei_fw_tc intel_get_mei_fw_tc(enum transcoder cpu_transcoder) } } +static int +_intel_hdcp_setup(struct intel_connector *connector, + const struct intel_crtc_state *pipe_config, u8 content_type) +{ + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_digital_port *dig_port = intel_attached_dig_port(connector); + struct intel_hdcp *hdcp = &connector->hdcp; + int ret = 0; + + if (!connector->encoder) { + drm_err(&dev_priv->drm, "[%s:%d] encoder is not initialized\n", + connector->base.name, connector->base.base.id); + return -ENODEV; + } + + hdcp->content_type = content_type; + + if (intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST)) { + hdcp->cpu_transcoder = pipe_config->mst_master_transcoder; + hdcp->stream_transcoder = pipe_config->cpu_transcoder; + } else { + hdcp->cpu_transcoder = pipe_config->cpu_transcoder; + hdcp->stream_transcoder = INVALID_TRANSCODER; + } + + if (DISPLAY_VER(dev_priv) >= 12) + dig_port->hdcp_port_data.fw_tc = intel_get_mei_fw_tc(hdcp->cpu_transcoder); + + return ret; +} + static int initialize_hdcp_port_data(struct intel_connector *connector, struct intel_digital_port *dig_port, const struct intel_hdcp_shim *shim) @@ -2306,28 +2337,14 @@ int intel_hdcp_enable(struct intel_connector *connector, if (!hdcp->shim) return -ENOENT; - if (!connector->encoder) { - drm_err(&dev_priv->drm, "[%s:%d] encoder is not initialized\n", - connector->base.name, connector->base.base.id); - return -ENODEV; - } - mutex_lock(&hdcp->mutex); mutex_lock(&dig_port->hdcp_mutex); drm_WARN_ON(&dev_priv->drm, hdcp->value == DRM_MODE_CONTENT_PROTECTION_ENABLED); - hdcp->content_type = content_type; - - if (intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST)) { - hdcp->cpu_transcoder = pipe_config->mst_master_transcoder; - hdcp->stream_transcoder = pipe_config->cpu_transcoder; - } else { - hdcp->cpu_transcoder = pipe_config->cpu_transcoder; - hdcp->stream_transcoder = INVALID_TRANSCODER; - } - if (DISPLAY_VER(dev_priv) >= 12) - dig_port->hdcp_port_data.fw_tc = intel_get_mei_fw_tc(hdcp->cpu_transcoder); + ret = _intel_hdcp_setup(connector, pipe_config, content_type); + if (ret) + goto out; /* * Considering that HDCP2.2 is more secure than HDCP1.4, If the setup @@ -2355,6 +2372,7 @@ int intel_hdcp_enable(struct intel_connector *connector, true); } +out: mutex_unlock(&dig_port->hdcp_mutex); mutex_unlock(&hdcp->mutex); return ret; -- Sean Paul, Software Engineer, Google / Chromium OS
[PATCH v2 04/13] drm/hdcp: Expand HDCP helper library for enable/disable/check
From: Sean Paul This patch expands upon the HDCP helper library to manage HDCP enable, disable, and check. Previous to this patch, the majority of the state management and sink interaction is tucked inside the Intel driver with the understanding that once a new platform supported HDCP we could make good decisions about what should be centralized. With the addition of HDCP support for Qualcomm, it's time to migrate the protocol-specific bits of HDCP authentication, key exchange, and link checks to the HDCP helper. In terms of functionality, this migration is 1:1 with the Intel driver, however things are laid out a bit differently than with intel_hdcp.c, which is why this is a separate patch from the i915 transition to the helper. On i915, the "shim" vtable is used to account for HDMI vs. DP vs. DP-MST differences whereas the helper library uses a LUT to account for the register offsets and a remote read function to route the messages. On i915, storing the sink information in the source is done inline whereas now we use the new drm_hdcp_helper_funcs vtable to store and fetch information to/from source hw. Finally, instead of calling enable/disable directly from the driver, we'll leave that decision to the helper and by calling drm_hdcp_helper_atomic_commit() from the driver. All told, this will centralize the protocol and state handling in the helper, ensuring we collect all of our bugs^Wlogic in one place. Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-5-s...@poorly.run #v1 Changes in v2: -Fixed set-but-unused variable identified by 0-day --- drivers/gpu/drm/drm_hdcp.c | 1103 include/drm/drm_hdcp.h | 191 +++ 2 files changed, 1294 insertions(+) diff --git a/drivers/gpu/drm/drm_hdcp.c b/drivers/gpu/drm/drm_hdcp.c index 742313ce8f6f..47c6e6923a76 100644 --- a/drivers/gpu/drm/drm_hdcp.c +++ b/drivers/gpu/drm/drm_hdcp.c @@ -6,15 +6,20 @@ * Ramalingam C */ +#include #include #include #include +#include +#include #include #include #include +#include #include #include +#include #include #include #include @@ -513,3 +518,1101 @@ bool drm_hdcp_atomic_check(struct drm_connector *connector, return old_hdcp != new_hdcp; } EXPORT_SYMBOL(drm_hdcp_atomic_check); + +struct drm_hdcp_helper_data { + struct mutex mutex; + struct mutex *driver_mutex; + + struct drm_connector *connector; + const struct drm_hdcp_helper_funcs *funcs; + + u64 value; + unsigned int enabled_type; + + struct delayed_work check_work; + struct work_struct prop_work; + + struct drm_dp_aux *aux; + const struct drm_hdcp_hdcp1_receiver_reg_lut *hdcp1_lut; +}; + +struct drm_hdcp_hdcp1_receiver_reg_lut { + unsigned int bksv; + unsigned int ri; + unsigned int aksv; + unsigned int an; + unsigned int ainfo; + unsigned int v[5]; + unsigned int bcaps; + unsigned int bcaps_mask_repeater_present; + unsigned int bstatus; +}; + +static const struct drm_hdcp_hdcp1_receiver_reg_lut drm_hdcp_hdcp1_ddc_lut = { + .bksv = DRM_HDCP_DDC_BKSV, + .ri = DRM_HDCP_DDC_RI_PRIME, + .aksv = DRM_HDCP_DDC_AKSV, + .an = DRM_HDCP_DDC_AN, + .ainfo = DRM_HDCP_DDC_AINFO, + .v = { DRM_HDCP_DDC_V_PRIME(0), DRM_HDCP_DDC_V_PRIME(1), + DRM_HDCP_DDC_V_PRIME(2), DRM_HDCP_DDC_V_PRIME(3), + DRM_HDCP_DDC_V_PRIME(4) }, + .bcaps = DRM_HDCP_DDC_BCAPS, + .bcaps_mask_repeater_present = DRM_HDCP_DDC_BCAPS_REPEATER_PRESENT, + .bstatus = DRM_HDCP_DDC_BSTATUS, +}; + +static const struct drm_hdcp_hdcp1_receiver_reg_lut drm_hdcp_hdcp1_dpcd_lut = { + .bksv = DP_AUX_HDCP_BKSV, + .ri = DP_AUX_HDCP_RI_PRIME, + .aksv = DP_AUX_HDCP_AKSV, + .an = DP_AUX_HDCP_AN, + .ainfo = DP_AUX_HDCP_AINFO, + .v = { DP_AUX_HDCP_V_PRIME(0), DP_AUX_HDCP_V_PRIME(1), + DP_AUX_HDCP_V_PRIME(2), DP_AUX_HDCP_V_PRIME(3), + DP_AUX_HDCP_V_PRIME(4) }, + .bcaps = DP_AUX_HDCP_BCAPS, + .bcaps_mask_repeater_present = DP_BCAPS_REPEATER_PRESENT, + + /* +* For some reason the HDMI and DP HDCP specs call this register +* definition by different names. In the HDMI spec, it's called BSTATUS, +* but in DP it's called BINFO. +*/ + .bstatus = DP_AUX_HDCP_BINFO, +}; + +static int drm_hdcp_remote_ddc_read(struct i2c_adapter *i2c, + unsigned int offset, u8 *value, size_t len) +{ + int ret; + u8 start = offset & 0xff; + struct i2c_msg msgs[] = { + { + .addr = DRM_HDCP_DDC_ADDR, + .flags = 0, + .len = 1, + .buf = &start, + }, + { + .addr = DRM_HDCP_DDC_ADDR, + .flags = I2C_M_RD, +
[PATCH v2 03/13] drm/hdcp: Update property value on content type and user changes
From: Sean Paul This patch updates the connector's property value in 2 cases which were previously missed: 1- Content type changes. The value should revert back to DESIRED from ENABLED in case the driver must re-authenticate the link due to the new content type. 2- Userspace sets value to DESIRED while ENABLED. In this case, the value should be reset immediately to ENABLED since the link is actively being encrypted. To accommodate these changes, I've split up the conditionals to make things a bit more clear (as much as one can with this mess of state). Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-4-s...@poorly.run #v1 Changes in v2: -None --- drivers/gpu/drm/drm_hdcp.c | 26 +- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/drm_hdcp.c b/drivers/gpu/drm/drm_hdcp.c index dd8fa91c51d6..742313ce8f6f 100644 --- a/drivers/gpu/drm/drm_hdcp.c +++ b/drivers/gpu/drm/drm_hdcp.c @@ -487,21 +487,29 @@ bool drm_hdcp_atomic_check(struct drm_connector *connector, return true; /* -* Nothing to do if content type is unchanged and one of: -* - state didn't change +* Content type changes require an HDCP disable/enable cycle. +*/ + if (new_conn_state->hdcp_content_type != old_conn_state->hdcp_content_type) { + new_conn_state->content_protection = + DRM_MODE_CONTENT_PROTECTION_DESIRED; + return true; + } + + /* +* Ignore meaningless state changes: * - HDCP was activated since the last commit -* - attempting to set to desired while already enabled +* - Attempting to set to desired while already enabled */ - if (old_hdcp == new_hdcp || - (old_hdcp == DRM_MODE_CONTENT_PROTECTION_DESIRED && + if ((old_hdcp == DRM_MODE_CONTENT_PROTECTION_DESIRED && new_hdcp == DRM_MODE_CONTENT_PROTECTION_ENABLED) || (old_hdcp == DRM_MODE_CONTENT_PROTECTION_ENABLED && new_hdcp == DRM_MODE_CONTENT_PROTECTION_DESIRED)) { - if (old_conn_state->hdcp_content_type == - new_conn_state->hdcp_content_type) - return false; + new_conn_state->content_protection = + DRM_MODE_CONTENT_PROTECTION_ENABLED; +return false; } - return true; + /* Finally, if state changes, we need action */ + return old_hdcp != new_hdcp; } EXPORT_SYMBOL(drm_hdcp_atomic_check); -- Sean Paul, Software Engineer, Google / Chromium OS
[PATCH v2 02/13] drm/hdcp: Avoid changing crtc state in hdcp atomic check
From: Sean Paul Instead of forcing a modeset in the hdcp atomic check, simply return true if the content protection value is changing and let the driver decide whether a modeset is required or not. Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-3-s...@poorly.run #v1 Changes in v2: -None --- drivers/gpu/drm/drm_hdcp.c | 33 +++-- drivers/gpu/drm/i915/display/intel_atomic.c | 5 ++-- include/drm/drm_hdcp.h | 2 +- 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/drm_hdcp.c b/drivers/gpu/drm/drm_hdcp.c index 522326b03e66..dd8fa91c51d6 100644 --- a/drivers/gpu/drm/drm_hdcp.c +++ b/drivers/gpu/drm/drm_hdcp.c @@ -430,11 +430,14 @@ EXPORT_SYMBOL(drm_hdcp_update_content_protection); * @connector: drm_connector on which content protection state needs an update * * This function can be used by display drivers to perform an atomic check on the - * hdcp state elements. If hdcp state has changed, this function will set - * mode_changed on the crtc driving the connector so it can update its hardware - * to match the hdcp state. + * hdcp state elements. If hdcp state has changed in a manner which requires the + * driver to enable or disable content protection, this function will return + * true. + * + * Returns: + * true if the driver must enable/disable hdcp, false otherwise */ -void drm_hdcp_atomic_check(struct drm_connector *connector, +bool drm_hdcp_atomic_check(struct drm_connector *connector, struct drm_atomic_state *state) { struct drm_connector_state *new_conn_state, *old_conn_state; @@ -452,10 +455,12 @@ void drm_hdcp_atomic_check(struct drm_connector *connector, * If the connector is being disabled with CP enabled, mark it * desired so it's re-enabled when the connector is brought back */ - if (old_hdcp == DRM_MODE_CONTENT_PROTECTION_ENABLED) + if (old_hdcp == DRM_MODE_CONTENT_PROTECTION_ENABLED) { new_conn_state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED; - return; + return true; + } + return false; } new_crtc_state = drm_atomic_get_new_crtc_state(state, @@ -467,9 +472,19 @@ void drm_hdcp_atomic_check(struct drm_connector *connector, */ if (drm_atomic_crtc_needs_modeset(new_crtc_state) && (old_hdcp == DRM_MODE_CONTENT_PROTECTION_ENABLED && -new_hdcp != DRM_MODE_CONTENT_PROTECTION_UNDESIRED)) +new_hdcp != DRM_MODE_CONTENT_PROTECTION_UNDESIRED)) { new_conn_state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED; + return true; + } + + /* +* Coming back from disable or changing CRTC with DESIRED state requires +* that the driver try CP enable. +*/ + if (new_hdcp == DRM_MODE_CONTENT_PROTECTION_DESIRED && + new_conn_state->crtc != old_conn_state->crtc) + return true; /* * Nothing to do if content type is unchanged and one of: @@ -484,9 +499,9 @@ void drm_hdcp_atomic_check(struct drm_connector *connector, new_hdcp == DRM_MODE_CONTENT_PROTECTION_DESIRED)) { if (old_conn_state->hdcp_content_type == new_conn_state->hdcp_content_type) - return; + return false; } - new_crtc_state->mode_changed = true; + return true; } EXPORT_SYMBOL(drm_hdcp_atomic_check); diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index 1e306e8427ec..c7b5470c40aa 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -122,8 +122,6 @@ int intel_digital_connector_atomic_check(struct drm_connector *conn, to_intel_digital_connector_state(old_state); struct drm_crtc_state *crtc_state; - drm_hdcp_atomic_check(conn, state); - if (!new_state->crtc) return 0; @@ -139,7 +137,8 @@ int intel_digital_connector_atomic_check(struct drm_connector *conn, new_conn_state->base.picture_aspect_ratio != old_conn_state->base.picture_aspect_ratio || new_conn_state->base.content_type != old_conn_state->base.content_type || new_conn_state->base.scaling_mode != old_conn_state->base.scaling_mode || - !drm_connector_atomic_hdr_metadata_equal(old_state, new_state)) + !drm_connector_atomic_hdr_metadata_equal(old_state, new_state) || + drm_hdcp_atomic_check(conn, state)) crtc_state->mode_changed = true; return 0; diff --git a/include/drm/drm_hdcp.h b/include/drm/drm_hdcp.h index d
[PATCH v2 01/13] drm/hdcp: Add drm_hdcp_atomic_check()
From: Sean Paul This patch moves the hdcp atomic check from i915 to drm_hdcp so other drivers can use it. No functional changes, just cleaned up some of the code when moving it over. Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-2-s...@poorly.run #v1 Changes in v2: -None --- drivers/gpu/drm/drm_hdcp.c | 71 - drivers/gpu/drm/i915/display/intel_atomic.c | 4 +- drivers/gpu/drm/i915/display/intel_hdcp.c | 47 -- drivers/gpu/drm/i915/display/intel_hdcp.h | 3 - include/drm/drm_hdcp.h | 3 + 5 files changed, 75 insertions(+), 53 deletions(-) diff --git a/drivers/gpu/drm/drm_hdcp.c b/drivers/gpu/drm/drm_hdcp.c index ca9b8f697202..522326b03e66 100644 --- a/drivers/gpu/drm/drm_hdcp.c +++ b/drivers/gpu/drm/drm_hdcp.c @@ -13,13 +13,14 @@ #include #include +#include +#include #include #include #include #include #include #include -#include #include "drm_internal.h" @@ -421,3 +422,71 @@ void drm_hdcp_update_content_protection(struct drm_connector *connector, dev->mode_config.content_protection_property); } EXPORT_SYMBOL(drm_hdcp_update_content_protection); + +/** + * drm_hdcp_atomic_check - Helper for drivers to call during connector->atomic_check + * + * @state: pointer to the atomic state being checked + * @connector: drm_connector on which content protection state needs an update + * + * This function can be used by display drivers to perform an atomic check on the + * hdcp state elements. If hdcp state has changed, this function will set + * mode_changed on the crtc driving the connector so it can update its hardware + * to match the hdcp state. + */ +void drm_hdcp_atomic_check(struct drm_connector *connector, + struct drm_atomic_state *state) +{ + struct drm_connector_state *new_conn_state, *old_conn_state; + struct drm_crtc_state *new_crtc_state; + u64 old_hdcp, new_hdcp; + + old_conn_state = drm_atomic_get_old_connector_state(state, connector); + old_hdcp = old_conn_state->content_protection; + + new_conn_state = drm_atomic_get_new_connector_state(state, connector); + new_hdcp = new_conn_state->content_protection; + + if (!new_conn_state->crtc) { + /* +* If the connector is being disabled with CP enabled, mark it +* desired so it's re-enabled when the connector is brought back +*/ + if (old_hdcp == DRM_MODE_CONTENT_PROTECTION_ENABLED) + new_conn_state->content_protection = + DRM_MODE_CONTENT_PROTECTION_DESIRED; + return; + } + + new_crtc_state = drm_atomic_get_new_crtc_state(state, + new_conn_state->crtc); + /* + * Fix the HDCP uapi content protection state in case of modeset. + * FIXME: As per HDCP content protection property uapi doc, an uevent() + * need to be sent if there is transition from ENABLED->DESIRED. + */ + if (drm_atomic_crtc_needs_modeset(new_crtc_state) && + (old_hdcp == DRM_MODE_CONTENT_PROTECTION_ENABLED && +new_hdcp != DRM_MODE_CONTENT_PROTECTION_UNDESIRED)) + new_conn_state->content_protection = + DRM_MODE_CONTENT_PROTECTION_DESIRED; + + /* +* Nothing to do if content type is unchanged and one of: +* - state didn't change +* - HDCP was activated since the last commit +* - attempting to set to desired while already enabled +*/ + if (old_hdcp == new_hdcp || + (old_hdcp == DRM_MODE_CONTENT_PROTECTION_DESIRED && +new_hdcp == DRM_MODE_CONTENT_PROTECTION_ENABLED) || + (old_hdcp == DRM_MODE_CONTENT_PROTECTION_ENABLED && +new_hdcp == DRM_MODE_CONTENT_PROTECTION_DESIRED)) { + if (old_conn_state->hdcp_content_type == + new_conn_state->hdcp_content_type) + return; + } + + new_crtc_state->mode_changed = true; +} +EXPORT_SYMBOL(drm_hdcp_atomic_check); diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index b4e7ac51aa31..1e306e8427ec 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -32,13 +32,13 @@ #include #include #include +#include #include #include "intel_atomic.h" #include "intel_cdclk.h" #include "intel_display_types.h" #include "intel_global_state.h" -#include "intel_hdcp.h" #include "intel_psr.h" #include "skl_universal_plane.h" @@ -122,7 +122,7 @@ int intel_digital_connector_atomic_check(struct drm_connector *conn, to_intel_digital_connector_state(old_state); struct drm_crtc_state *crtc_state; -
Re: [Intel-gfx] [PATCH 12/27] drm/i915/guc: Add multi-lrc context registration
On Wed, Sep 15, 2021 at 01:23:19PM -0700, John Harrison wrote: > On 9/15/2021 12:31, Matthew Brost wrote: > > On Wed, Sep 15, 2021 at 12:21:35PM -0700, John Harrison wrote: > > > On 8/20/2021 15:44, Matthew Brost wrote: > > > > Add multi-lrc context registration H2G. In addition a workqueue and > > > > process descriptor are setup during multi-lrc context registration as > > > > these data structures are needed for multi-lrc submission. > > > > > > > > Signed-off-by: Matthew Brost > > > > --- > > > >drivers/gpu/drm/i915/gt/intel_context_types.h | 12 ++ > > > >drivers/gpu/drm/i915/gt/intel_lrc.c | 5 + > > > >drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 2 +- > > > >.../gpu/drm/i915/gt/uc/intel_guc_submission.c | 109 > > > > +- > > > >4 files changed, 126 insertions(+), 2 deletions(-) > > > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h > > > > b/drivers/gpu/drm/i915/gt/intel_context_types.h > > > > index 0fafc178cf2c..6f567ebeb039 100644 > > > > --- a/drivers/gpu/drm/i915/gt/intel_context_types.h > > > > +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h > > > > @@ -232,8 +232,20 @@ struct intel_context { > > > > /** @parent: pointer to parent if child */ > > > > struct intel_context *parent; > > > > + > > > > + /** @guc_wqi_head: head pointer in work queue */ > > > > + u16 guc_wqi_head; > > > > + /** @guc_wqi_tail: tail pointer in work queue */ > > > > + u16 guc_wqi_tail; > > > > + > > > These should be in the 'guc_state' sub-struct? Would be good to keep all > > > GuC > > > specific content in one self-contained struct. Especially given the other > > > child/parent fields are no going to be guc_ prefixed any more. > > > > > Right now I have everything in guc_state protected by guc_state.lock, > > these fields are not protected by this lock. IMO it is better to use a > > different sub-structure for the parallel fields (even if anonymous). > Hmm, I still think it is bad to be scattering back-end specific fields > amongst regular fields. The GuC patches include a whole bunch of complaints > about execlist back-end specific stuff leaking through to the higher levels, > we really shouldn't be guilty of doing the same with GuC if at all possible. > At the very least, the GuC specific fields should be grouped together at the > end of the struct rather than inter-mingled. > How 2 different sub-structures - parallel (shared) & guc_parallel (guc specific)? > > > > > > /** @guc_number_children: number of children if parent > > > > */ > > > > u8 guc_number_children; > > > > + > > > > + /** > > > > +* @parent_page: page in context used by parent for > > > > work queue, > > > Maybe 'page in context record'? Otherwise, exactly what 'context' is meant > > > here? It isn't the 'struct intel_context'. The contetx record is saved as > > > 'ce->state' / 'ce->lrc_reg_state', yes? Is it possible to link to either > > > of > > It is the page in ce->state / page minus LRC reg offset in > > ce->lrg_reg_state. Will update the commit to make that clear. > > > > > those field? Probably not given that they don't appear to have any > > > kerneldoc > > > description :(. Maybe add that in too :). > > > > > > > +* work queue descriptor > > > Later on, it is described as 'process descriptor and work queue'. It would > > > be good to be consistent. > > > > > Yep. Will fix. > > > > > > +*/ > > > > + u8 parent_page; > > > > }; > > > >#ifdef CONFIG_DRM_I915_SELFTEST > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c > > > > b/drivers/gpu/drm/i915/gt/intel_lrc.c > > > > index bb4af4977920..0ddbad4e062a 100644 > > > > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > > > > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > > > > @@ -861,6 +861,11 @@ __lrc_alloc_state(struct intel_context *ce, struct > > > > intel_engine_cs *engine) > > > > context_size += PAGE_SIZE; > > > > } > > > > + if (intel_context_is_parent(ce)) { > > > > + ce->parent_page = context_size / PAGE_SIZE; > > > > + context_size += PAGE_SIZE; > > > > + } > > > > + > > > > obj = i915_gem_object_create_lmem(engine->i915, context_size, > > > > 0); > > > > if (IS_ERR(obj)) > > > > obj = i915_gem_object_create_shmem(engine->i915, > > > > context_size); > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h > > > > b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h > > > > index fa4be13c8854..0e600a3b8f1e 100644 > > > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h > > > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h > > > > @@ -52,7 +52,7 @@ > > > >#define GUC_DOORBELL_INVALID 256 > > > > -#define GUC_WQ_SIZE(PAGE_SIZE * 2) > > > > +#define GUC_WQ
[PATCH v2 00/13] drm/hdcp: Pull HDCP auth/exchange/check into helpers
From: Sean Paul Hello again, This is the second version of the HDCP helper patchset. See version 1 here: https://patchwork.freedesktop.org/series/94623/ In this second version, I've fixed up the oopsies exposed by 0-day and yamllint and incorporated early review feedback from the dt/dts reviews. Please take a look, Sean Sean Paul (13): drm/hdcp: Add drm_hdcp_atomic_check() drm/hdcp: Avoid changing crtc state in hdcp atomic check drm/hdcp: Update property value on content type and user changes drm/hdcp: Expand HDCP helper library for enable/disable/check drm/i915/hdcp: Consolidate HDCP setup/state cache drm/i915/hdcp: Retain hdcp_capable return codes drm/i915/hdcp: Use HDCP helpers for i915 drm/msm/dpu_kms: Re-order dpu includes drm/msm/dpu: Remove useless checks in dpu_encoder drm/msm/dpu: Remove encoder->enable() hack drm/msm/dp: Re-order dp_audio_put in deinit_sub_modules dt-bindings: msm/dp: Add bindings for HDCP registers drm/msm: Implement HDCP 1.x using the new drm HDCP helpers .../bindings/display/msm/dp-controller.yaml |7 +- arch/arm64/boot/dts/qcom/sc7180.dtsi |4 +- drivers/gpu/drm/drm_hdcp.c| 1197 - drivers/gpu/drm/i915/display/intel_atomic.c |7 +- drivers/gpu/drm/i915/display/intel_ddi.c | 29 +- .../drm/i915/display/intel_display_debugfs.c | 11 +- .../drm/i915/display/intel_display_types.h| 58 +- drivers/gpu/drm/i915/display/intel_dp_hdcp.c | 345 ++--- drivers/gpu/drm/i915/display/intel_dp_mst.c | 17 +- drivers/gpu/drm/i915/display/intel_hdcp.c | 1011 +++--- drivers/gpu/drm/i915/display/intel_hdcp.h | 35 +- drivers/gpu/drm/i915/display/intel_hdmi.c | 256 ++-- drivers/gpu/drm/msm/Makefile |1 + drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 17 +- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 30 +- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h |2 - drivers/gpu/drm/msm/disp/dpu1/dpu_trace.h |4 - drivers/gpu/drm/msm/dp/dp_debug.c | 49 +- drivers/gpu/drm/msm/dp/dp_debug.h |6 +- drivers/gpu/drm/msm/dp/dp_display.c | 47 +- drivers/gpu/drm/msm/dp/dp_display.h |5 + drivers/gpu/drm/msm/dp/dp_drm.c | 68 +- drivers/gpu/drm/msm/dp/dp_drm.h |5 + drivers/gpu/drm/msm/dp/dp_hdcp.c | 433 ++ drivers/gpu/drm/msm/dp/dp_hdcp.h | 27 + drivers/gpu/drm/msm/dp/dp_parser.c| 22 +- drivers/gpu/drm/msm/dp/dp_parser.h|4 + drivers/gpu/drm/msm/dp/dp_reg.h | 44 +- drivers/gpu/drm/msm/msm_atomic.c | 15 + include/drm/drm_hdcp.h| 194 +++ 30 files changed, 2561 insertions(+), 1389 deletions(-) create mode 100644 drivers/gpu/drm/msm/dp/dp_hdcp.c create mode 100644 drivers/gpu/drm/msm/dp/dp_hdcp.h -- Sean Paul, Software Engineer, Google / Chromium OS
Re: [PATCH 4/9] drm/privacy-screen: Add notifier support
On Mon, 2021-09-06 at 09:35 +0200, Hans de Goede wrote: > Add support for privacy-screen consumers to register a notifier to > be notified of external (e.g. done by the hw itself on a hotkey press) > state changes. > > Reviewed-by: Emil Velikov > Signed-off-by: Hans de Goede > --- > drivers/gpu/drm/drm_privacy_screen.c | 67 +++ > include/drm/drm_privacy_screen_consumer.h | 15 + > include/drm/drm_privacy_screen_driver.h | 4 ++ > 3 files changed, 86 insertions(+) > > diff --git a/drivers/gpu/drm/drm_privacy_screen.c > b/drivers/gpu/drm/drm_privacy_screen.c > index 294a09194bfb..7a5f878c3171 100644 > --- a/drivers/gpu/drm/drm_privacy_screen.c > +++ b/drivers/gpu/drm/drm_privacy_screen.c > @@ -255,6 +255,49 @@ void drm_privacy_screen_get_state(struct > drm_privacy_screen *priv, > } > EXPORT_SYMBOL(drm_privacy_screen_get_state); > > +/** > + * drm_privacy_screen_register_notifier - register a notifier > + * @priv: Privacy screen to register the notifier with > + * @nb: Notifier-block for the notifier to register > + * > + * Register a notifier with the privacy-screen to be notified of changes > made > + * to the privacy-screen state from outside of the privacy-screen class. > + * E.g. the state may be changed by the hardware itself in response to a > + * hotkey press. > + * > + * The notifier is called with no locks held. The new hw_state and sw_state > + * can be retrieved using the drm_privacy_screen_get_state() function. > + * A pointer to the drm_privacy_screen's struct is passed as the void *data > + * argument of the notifier_block's notifier_call. > + * > + * The notifier will NOT be called when changes are made through > + * drm_privacy_screen_set_sw_state(). It is only called for external > changes. > + * > + * Return: 0 on success, negative error code on failure. > + */ > +int drm_privacy_screen_register_notifier(struct drm_privacy_screen *priv, > + struct notifier_block *nb) > +{ > + return blocking_notifier_chain_register(&priv->notifier_head, nb); > +} > +EXPORT_SYMBOL(drm_privacy_screen_register_notifier); > + > +/** > + * drm_privacy_screen_unregister_notifier - unregister a notifier > + * @priv: Privacy screen to register the notifier with > + * @nb: Notifier-block for the notifier to register > + * > + * Unregister a notifier registered with > drm_privacy_screen_register_notifier(). > + * > + * Return: 0 on success, negative error code on failure. > + */ > +int drm_privacy_screen_unregister_notifier(struct drm_privacy_screen *priv, > + struct notifier_block *nb) > +{ > + return blocking_notifier_chain_unregister(&priv->notifier_head, nb); > +} > +EXPORT_SYMBOL(drm_privacy_screen_unregister_notifier); > + > /*** drm_privacy_screen_driver.h functions ***/ > > static ssize_t sw_state_show(struct device *dev, > @@ -352,6 +395,7 @@ struct drm_privacy_screen *drm_privacy_screen_register( > return ERR_PTR(-ENOMEM); > > mutex_init(&priv->lock); > + BLOCKING_INIT_NOTIFIER_HEAD(&priv->notifier_head); > > priv->dev.class = drm_class; > priv->dev.type = &drm_privacy_screen_type; > @@ -399,3 +443,26 @@ void drm_privacy_screen_unregister(struct > drm_privacy_screen *priv) > device_unregister(&priv->dev); > } > EXPORT_SYMBOL(drm_privacy_screen_unregister); > + > +/** > + * drm_privacy_screen_call_notifier_chain - notify consumers of state > change > + * @priv: Privacy screen to register the notifier with > + * > + * A privacy-screen provider driver can call this functions upon external > + * changes to the privacy-screen state. E.g. the state may be changed by > the > + * hardware itself in response to a hotkey press. > + * This function must be called without holding the privacy-screen lock. > + * the driver must update sw_state and hw_state to reflect the new state > before > + * calling this function. > + * The expected behavior from the driver upon receiving an external state > + * change event is: 1. Take the lock; 2. Update sw_state and hw_state; > + * 3. Release the lock. 4. Call drm_privacy_screen_call_notifier_chain(). > + */ > +void drm_privacy_screen_call_notifier_chain(struct drm_privacy_screen > *priv) > +{ > + if (WARN_ON(mutex_is_locked(&priv->lock))) > + return; Are we sure about this check? mutex_is_locked() checks whether a mutex is locked by anyone, not just us. So this seems like it would cause us to WARN_ON() and abort if anyone else (not just ourselves) is holding the lock to read the privacy screen state. > + > + blocking_notifier_call_chain(&priv->notifier_head, 0, priv); > +} > +EXPORT_SYMBOL(drm_privacy_screen_call_notifier_chain); > diff --git a/include/drm/drm_privacy_screen_consumer.h > b/include/drm/drm_privacy_screen_consumer.h > index 0cbd23b0453d..7f66a90d15b7 100644 > --- a/include/drm/drm_privacy_screen_consumer.h > +++ b/include/drm/drm_privacy_scree
Re: [Intel-gfx] [PATCH 12/27] drm/i915/guc: Add multi-lrc context registration
On 9/15/2021 12:31, Matthew Brost wrote: On Wed, Sep 15, 2021 at 12:21:35PM -0700, John Harrison wrote: On 8/20/2021 15:44, Matthew Brost wrote: Add multi-lrc context registration H2G. In addition a workqueue and process descriptor are setup during multi-lrc context registration as these data structures are needed for multi-lrc submission. Signed-off-by: Matthew Brost --- drivers/gpu/drm/i915/gt/intel_context_types.h | 12 ++ drivers/gpu/drm/i915/gt/intel_lrc.c | 5 + drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 2 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 109 +- 4 files changed, 126 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 0fafc178cf2c..6f567ebeb039 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -232,8 +232,20 @@ struct intel_context { /** @parent: pointer to parent if child */ struct intel_context *parent; + + /** @guc_wqi_head: head pointer in work queue */ + u16 guc_wqi_head; + /** @guc_wqi_tail: tail pointer in work queue */ + u16 guc_wqi_tail; + These should be in the 'guc_state' sub-struct? Would be good to keep all GuC specific content in one self-contained struct. Especially given the other child/parent fields are no going to be guc_ prefixed any more. Right now I have everything in guc_state protected by guc_state.lock, these fields are not protected by this lock. IMO it is better to use a different sub-structure for the parallel fields (even if anonymous). Hmm, I still think it is bad to be scattering back-end specific fields amongst regular fields. The GuC patches include a whole bunch of complaints about execlist back-end specific stuff leaking through to the higher levels, we really shouldn't be guilty of doing the same with GuC if at all possible. At the very least, the GuC specific fields should be grouped together at the end of the struct rather than inter-mingled. /** @guc_number_children: number of children if parent */ u8 guc_number_children; + + /** +* @parent_page: page in context used by parent for work queue, Maybe 'page in context record'? Otherwise, exactly what 'context' is meant here? It isn't the 'struct intel_context'. The contetx record is saved as 'ce->state' / 'ce->lrc_reg_state', yes? Is it possible to link to either of It is the page in ce->state / page minus LRC reg offset in ce->lrg_reg_state. Will update the commit to make that clear. those field? Probably not given that they don't appear to have any kerneldoc description :(. Maybe add that in too :). +* work queue descriptor Later on, it is described as 'process descriptor and work queue'. It would be good to be consistent. Yep. Will fix. +*/ + u8 parent_page; }; #ifdef CONFIG_DRM_I915_SELFTEST diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index bb4af4977920..0ddbad4e062a 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -861,6 +861,11 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) context_size += PAGE_SIZE; } + if (intel_context_is_parent(ce)) { + ce->parent_page = context_size / PAGE_SIZE; + context_size += PAGE_SIZE; + } + obj = i915_gem_object_create_lmem(engine->i915, context_size, 0); if (IS_ERR(obj)) obj = i915_gem_object_create_shmem(engine->i915, context_size); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index fa4be13c8854..0e600a3b8f1e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -52,7 +52,7 @@ #define GUC_DOORBELL_INVALID 256 -#define GUC_WQ_SIZE(PAGE_SIZE * 2) +#define GUC_WQ_SIZE(PAGE_SIZE / 2) Is this size actually dictated by the GuC API? Or is it just a driver level decision? If the latter, shouldn't this be below instead? Driver level decision. What exactly do you mean by below? The next chunk of the patch - where WQ_OFFSET is defined and the whole caboodle is described. /* Work queue item header definitions */ #define WQ_STATUS_ACTIVE 1 diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 14b24298cdd7..dbcb9ab28a9a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -340,6 +340,39 @@ static struct i915_priolist *to_priolist(struct rb_node *rb) return rb_entry(rb, struct i915_priolist, node)
Re: [Intel-gfx] [PATCH 14/27] drm/i915/guc: Assign contexts in parent-child relationship consecutive guc_ids
On 8/20/2021 15:44, Matthew Brost wrote: Assign contexts in parent-child relationship consecutive guc_ids. This is accomplished by partitioning guc_id space between ones that need to be consecutive (1/16 available guc_ids) and ones that do not (15/16 of available guc_ids). The consecutive search is implemented via the bitmap API. This is a precursor to the full GuC multi-lrc implementation but aligns to how GuC mutli-lrc interface is defined - guc_ids must be consecutive when using the GuC multi-lrc interface. v2: (Daniel Vetter) - Explictly state why we assign consecutive guc_ids Signed-off-by: Matthew Brost --- drivers/gpu/drm/i915/gt/uc/intel_guc.h| 6 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 107 +- 2 files changed, 86 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 023953e77553..3f95b1b4f15c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -61,9 +61,13 @@ struct intel_guc { */ spinlock_t lock; /** -* @guc_ids: used to allocate new guc_ids +* @guc_ids: used to allocate new guc_ids, single-lrc */ struct ida guc_ids; + /** +* @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc +*/ + unsigned long *guc_ids_bitmap; /** @num_guc_ids: number of guc_ids that can be used */ u32 num_guc_ids; /** @max_guc_ids: max number of guc_ids that can be used */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 00d54bb00bfb..e9dfd43d29a0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -125,6 +125,18 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count); #define GUC_REQUEST_SIZE 64 /* bytes */ +/* + * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous + * per the GuC submission interface. A different allocation algorithm is used + * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to The 'hence' clause seems to be attached to the wrong reason. The id space is partition because of the contiguous vs random requirements of multi vs single LRC, not because a different allocator is used in one partion vs the other. + * partition the guc_id space. We believe the number of multi-lrc contexts in + * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for + * multi-lrc. + */ +#define NUMBER_MULTI_LRC_GUC_ID(guc) \ + ((guc)->submission_state.num_guc_ids / 16 > 32 ? \ +(guc)->submission_state.num_guc_ids / 16 : 32) + /* * Below is a set of functions which control the GuC scheduling state which * require a lock. @@ -1176,6 +1188,10 @@ int intel_guc_submission_init(struct intel_guc *guc) INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts); intel_gt_pm_unpark_work_init(&guc->submission_state.destroyed_worker, destroyed_worker_func); + guc->submission_state.guc_ids_bitmap = + bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); + if (!guc->submission_state.guc_ids_bitmap) + return -ENOMEM; return 0; } @@ -1188,6 +1204,7 @@ void intel_guc_submission_fini(struct intel_guc *guc) guc_lrc_desc_pool_destroy(guc); guc_flush_destroyed_contexts(guc); i915_sched_engine_put(guc->sched_engine); + bitmap_free(guc->submission_state.guc_ids_bitmap); } static void queue_request(struct i915_sched_engine *sched_engine, @@ -1239,18 +1256,43 @@ static void guc_submit_request(struct i915_request *rq) spin_unlock_irqrestore(&sched_engine->lock, flags); } -static int new_guc_id(struct intel_guc *guc) +static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) { - return ida_simple_get(&guc->submission_state.guc_ids, 0, - guc->submission_state.num_guc_ids, GFP_KERNEL | - __GFP_RETRY_MAYFAIL | __GFP_NOWARN); + int ret; + + GEM_BUG_ON(intel_context_is_child(ce)); + + if (intel_context_is_parent(ce)) + ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, + NUMBER_MULTI_LRC_GUC_ID(guc), + order_base_2(ce->guc_number_children + + 1)); + else + ret = ida_simple_get(&guc->submission_state.guc_ids, +NUMBER_MULTI_LRC_GUC_ID(guc), +guc->submission_state.num_guc_ids, +
Re: [PATCH 2/9] drm: Add privacy-screen class (v3)
On Mon, 2021-09-06 at 09:35 +0200, Hans de Goede wrote: > On some new laptops the LCD panel has a builtin electronic privacy-screen. > We want to export this functionality as a property on the drm connector > object. But often this functionality is not exposed on the GPU but on some > other (ACPI) device. > > This commit adds a privacy-screen class allowing the driver for these > other devices to register themselves as a privacy-screen provider; and > allowing the drm/kms code to get a privacy-screen provider associated > with a specific GPU/connector combo. > > Changes in v2: > - Make CONFIG_DRM_PRIVACY_SCREEN a bool which controls if the drm_privacy > code gets built as part of the main drm module rather then making it > a tristate which builds its own module. > - Add a #if IS_ENABLED(CONFIG_DRM_PRIVACY_SCREEN) check to > drm_privacy_screen_consumer.h and define stubs when the check fails. > Together these 2 changes fix several dependency issues. > - Remove module related code now that this is part of the main drm.ko > - Use drm_class as class for the privacy-screen devices instead of > adding a separate class for this > > Changes in v3: > - Make the static inline drm_privacy_screen_get_state() stub set sw_state > and hw_state to PRIVACY_SCREEN_DISABLED to squelch an uninitialized > variable warning when CONFIG_DRM_PRIVICAY_SCREEN is not set > > Reviewed-by: Emil Velikov > Signed-off-by: Hans de Goede > --- > Documentation/gpu/drm-kms-helpers.rst | 15 + > MAINTAINERS | 8 + > drivers/gpu/drm/Kconfig | 4 + > drivers/gpu/drm/Makefile | 1 + > drivers/gpu/drm/drm_drv.c | 4 + > drivers/gpu/drm/drm_privacy_screen.c | 401 ++ > include/drm/drm_privacy_screen_consumer.h | 50 +++ > include/drm/drm_privacy_screen_driver.h | 80 + > include/drm/drm_privacy_screen_machine.h | 41 +++ > 9 files changed, 604 insertions(+) > create mode 100644 drivers/gpu/drm/drm_privacy_screen.c > create mode 100644 include/drm/drm_privacy_screen_consumer.h > create mode 100644 include/drm/drm_privacy_screen_driver.h > create mode 100644 include/drm/drm_privacy_screen_machine.h > > diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm- > kms-helpers.rst > index 389892f36185..5d8715d2f998 100644 > --- a/Documentation/gpu/drm-kms-helpers.rst > +++ b/Documentation/gpu/drm-kms-helpers.rst > @@ -423,3 +423,18 @@ Legacy CRTC/Modeset Helper Functions Reference > > .. kernel-doc:: drivers/gpu/drm/drm_crtc_helper.c > :export: > + > +Privacy-screen class > + > + > +.. kernel-doc:: drivers/gpu/drm/drm_privacy_screen.c > + :doc: overview > + > +.. kernel-doc:: include/drm/drm_privacy_screen_driver.h > + :internal: > + > +.. kernel-doc:: include/drm/drm_privacy_screen_machine.h > + :internal: > + > +.. kernel-doc:: drivers/gpu/drm/drm_privacy_screen.c > + :export: > diff --git a/MAINTAINERS b/MAINTAINERS > index ede4a37a53b3..a272ca600f98 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -6376,6 +6376,14 @@ F: drivers/gpu/drm/drm_panel.c > F: drivers/gpu/drm/panel/ > F: include/drm/drm_panel.h > > +DRM PRIVACY-SCREEN CLASS > +M: Hans de Goede > +L: dri-devel@lists.freedesktop.org > +S: Maintained > +T: git git://anongit.freedesktop.org/drm/drm-misc > +F: drivers/gpu/drm/drm_privacy_screen* > +F: include/drm/drm_privacy_screen* > + > DRM TTM SUBSYSTEM > M: Christian Koenig > M: Huang Rui > diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig > index b17e231ca6f7..7249b010ab90 100644 > --- a/drivers/gpu/drm/Kconfig > +++ b/drivers/gpu/drm/Kconfig > @@ -481,3 +481,7 @@ config DRM_PANEL_ORIENTATION_QUIRKS > config DRM_LIB_RANDOM > bool > default n > + > +config DRM_PRIVACY_SCREEN > + bool > + default n This is probably worth documenting for folks configuring their kernels to explain what this actually does (something simple like "Controls programmable privacy screens found on some devices, if unsure select Y" would probably be fine) > diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile > index 0dff40bb863c..788fc37096f6 100644 > --- a/drivers/gpu/drm/Makefile > +++ b/drivers/gpu/drm/Makefile > @@ -32,6 +32,7 @@ drm-$(CONFIG_OF) += drm_of.o > drm-$(CONFIG_PCI) += drm_pci.o > drm-$(CONFIG_DEBUG_FS) += drm_debugfs.o drm_debugfs_crc.o > drm-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o > +drm-$(CONFIG_DRM_PRIVACY_SCREEN) += drm_privacy_screen.o > > obj-$(CONFIG_DRM_DP_AUX_BUS) += drm_dp_aux_bus.o > > diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c > index 7a5097467ba5..dc293b771c3f 100644 > --- a/drivers/gpu/drm/drm_drv.c > +++ b/drivers/gpu/drm/drm_drv.c > @@ -43,6 +43,7 @@ > #include > #include > #include > +#include > > #include "drm_crtc_internal.h" > #include "drm_internal.h" > @@ -102
Re: [PATCH 1/9] drm/connector: Add support for privacy-screen properties (v4)
On Mon, 2021-09-06 at 09:35 +0200, Hans de Goede wrote: > From: Rajat Jain > > Add support for generic electronic privacy screen properties, that > can be added by systems that have an integrated EPS. > > Changes in v2 (Hans de Goede) > - Create 2 properties, "privacy-screen sw-state" and > "privacy-screen hw-state", to deal with devices where the OS might be > locked out of making state changes > - Write kerneldoc explaining how the 2 properties work together, what > happens when changes to the state are made outside of the DRM code's > control, etc. > > Changes in v3 (Hans de Goede) > - Some small tweaks to the kerneldoc describing the 2 properties > > Changes in v4 (Hans de Goede) > - Change the "Enabled, locked" and "Disabled, locked" hw-state enum value > names to "Enabled-locked" and "Disabled-locked". The xrandr command shows > all possible enum values separated by commas in its output, so having a > comma in an enum name is not a good idea. > - Do not add a privacy_screen_hw_state member to drm_connector_state > since this property is immutable its value must be directly stored in the > obj->properties->values array > > Signed-off-by: Rajat Jain > Co-authored-by: Hans de Goede > Acked-by: Pekka Paalanen > Reviewed-by: Mario Limonciello > Reviewed-by: Emil Velikov > Signed-off-by: Hans de Goede > --- > Documentation/gpu/drm-kms.rst | 2 + > drivers/gpu/drm/drm_atomic_uapi.c | 4 ++ > drivers/gpu/drm/drm_connector.c | 101 ++ > include/drm/drm_connector.h | 44 + > 4 files changed, 151 insertions(+) > > diff --git a/Documentation/gpu/drm-kms.rst b/Documentation/gpu/drm-kms.rst > index 1ef7951ded5e..d14bf1c35d7e 100644 > --- a/Documentation/gpu/drm-kms.rst > +++ b/Documentation/gpu/drm-kms.rst > @@ -506,6 +506,8 @@ Property Types and Blob Property Support > .. kernel-doc:: drivers/gpu/drm/drm_property.c > :export: > > +.. _standard_connector_properties: > + > Standard Connector Properties > - > > diff --git a/drivers/gpu/drm/drm_atomic_uapi.c > b/drivers/gpu/drm/drm_atomic_uapi.c > index 909f31833181..cdd31fc78bfc 100644 > --- a/drivers/gpu/drm/drm_atomic_uapi.c > +++ b/drivers/gpu/drm/drm_atomic_uapi.c > @@ -797,6 +797,8 @@ static int drm_atomic_connector_set_property(struct > drm_connector *connector, > fence_ptr); > } else if (property == connector->max_bpc_property) { > state->max_requested_bpc = val; > + } else if (property == connector->privacy_screen_sw_state_property) > { > + state->privacy_screen_sw_state = val; > } else if (connector->funcs->atomic_set_property) { > return connector->funcs->atomic_set_property(connector, > state, property, val); > @@ -874,6 +876,8 @@ drm_atomic_connector_get_property(struct drm_connector > *connector, > *val = 0; > } else if (property == connector->max_bpc_property) { > *val = state->max_requested_bpc; > + } else if (property == connector->privacy_screen_sw_state_property) > { > + *val = state->privacy_screen_sw_state; > } else if (connector->funcs->atomic_get_property) { > return connector->funcs->atomic_get_property(connector, > state, property, val); > diff --git a/drivers/gpu/drm/drm_connector.c > b/drivers/gpu/drm/drm_connector.c > index e0a30e0ee86a..dd1ca68881ba 100644 > --- a/drivers/gpu/drm/drm_connector.c > +++ b/drivers/gpu/drm/drm_connector.c > @@ -1264,6 +1264,46 @@ static const struct drm_prop_enum_list > dp_colorspaces[] = { > * For DVI-I and TVout there is also a matching property "select > subconnector" > * allowing to switch between signal types. > * DP subconnector corresponds to a downstream port. > + * > + * privacy-screen sw-state, privacy-screen hw-state: > + * These 2 optional properties can be used to query the state of the > + * electronic privacy screen that is available on some displays; and in > + * some cases also control the state. If a driver implements these > + * properties then both properties must be present. > + * > + * "privacy-screen hw-state" is read-only and reflects the actual state > + * of the privacy-screen, possible values: "Enabled", "Disabled, > + * "Enabled-locked", "Disabled-locked". The locked states indicate > + * that the state cannot be changed through the DRM API. E.g. there > + * might be devices where the firmware-setup options, or a hardware > + * slider-switch, offer always on / off modes. > + * > + * "privacy-screen sw-state" can be set to change the privacy-screen > state > + * when not locked. In this case the driver must update the hw-state > + * property to reflect the new state on completion of the commit of the > + * sw-state propert
Re: [Intel-gfx] [PATCH 13/27] drm/i915/guc: Ensure GuC schedule operations do not operate on child contexts
On Wed, Sep 15, 2021 at 12:24:41PM -0700, John Harrison wrote: > On 8/20/2021 15:44, Matthew Brost wrote: > > In GuC parent-child contexts the parent context controls the scheduling, > > ensure only the parent does the scheduling operations. > > > > Signed-off-by: Matthew Brost > > --- > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 24 ++- > > 1 file changed, 18 insertions(+), 6 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > index dbcb9ab28a9a..00d54bb00bfb 100644 > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > @@ -320,6 +320,12 @@ static void decr_context_committed_requests(struct > > intel_context *ce) > > GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); > > } > > +static struct intel_context * > > +request_to_scheduling_context(struct i915_request *rq) > > +{ > > + return intel_context_to_parent(rq->context); > > +} > > + > > static bool context_guc_id_invalid(struct intel_context *ce) > > { > > return ce->guc_id.id == GUC_INVALID_LRC_ID; > > @@ -1684,6 +1690,7 @@ static void __guc_context_sched_disable(struct > > intel_guc *guc, > > GEM_BUG_ON(guc_id == GUC_INVALID_LRC_ID); > > + GEM_BUG_ON(intel_context_is_child(ce)); > > trace_intel_context_sched_disable(ce); > > guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), > > @@ -1898,6 +1905,8 @@ static void guc_context_sched_disable(struct > > intel_context *ce) > > u16 guc_id; > > bool enabled; > > + GEM_BUG_ON(intel_context_is_child(ce)); > > + > > if (submission_disabled(guc) || context_guc_id_invalid(ce) || > > !lrc_desc_registered(guc, ce->guc_id.id)) { > > spin_lock_irqsave(&ce->guc_state.lock, flags); > > @@ -2286,6 +2295,8 @@ static void guc_signal_context_fence(struct > > intel_context *ce) > > { > > unsigned long flags; > > + GEM_BUG_ON(intel_context_is_child(ce)); > > + > > spin_lock_irqsave(&ce->guc_state.lock, flags); > > clr_context_wait_for_deregister_to_register(ce); > > __guc_signal_context_fence(ce); > > @@ -2315,7 +2326,7 @@ static void guc_context_init(struct intel_context *ce) > > static int guc_request_alloc(struct i915_request *rq) > > { > > - struct intel_context *ce = rq->context; > > + struct intel_context *ce = request_to_scheduling_context(rq); > > struct intel_guc *guc = ce_to_guc(ce); > > unsigned long flags; > > int ret; > > @@ -2358,11 +2369,12 @@ static int guc_request_alloc(struct i915_request > > *rq) > > * exhausted and return -EAGAIN to the user indicating that they can try > > * again in the future. > > * > > -* There is no need for a lock here as the timeline mutex ensures at > > -* most one context can be executing this code path at once. The > > -* guc_id_ref is incremented once for every request in flight and > > -* decremented on each retire. When it is zero, a lock around the > > -* increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. > > +* There is no need for a lock here as the timeline mutex (or > > +* parallel_submit mutex in the case of multi-lrc) ensures at most one > > +* context can be executing this code path at once. The guc_id_ref is > Isn't that now two? One uni-LRC holding the timeline mutex and one multi-LRC > holding the parallel submit mutex? > This is actually a stale comment and need scrub this. The parallel_submit mutex is gone, now we grab the ce->timeline locks starting at the parent and then all children in a loop. I think the original comment is sufficient. Matt > John. > > > +* incremented once for every request in flight and decremented on each > > +* retire. When it is zero, a lock around the increment (in pin_guc_id) > > +* is needed to seal a race with unpin_guc_id. > > */ > > if (atomic_add_unless(&ce->guc_id.ref, 1, 0)) > > goto out; >
Re: [Intel-gfx] [PATCH 12/27] drm/i915/guc: Add multi-lrc context registration
On Wed, Sep 15, 2021 at 12:21:35PM -0700, John Harrison wrote: > On 8/20/2021 15:44, Matthew Brost wrote: > > Add multi-lrc context registration H2G. In addition a workqueue and > > process descriptor are setup during multi-lrc context registration as > > these data structures are needed for multi-lrc submission. > > > > Signed-off-by: Matthew Brost > > --- > > drivers/gpu/drm/i915/gt/intel_context_types.h | 12 ++ > > drivers/gpu/drm/i915/gt/intel_lrc.c | 5 + > > drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 2 +- > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 109 +- > > 4 files changed, 126 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h > > b/drivers/gpu/drm/i915/gt/intel_context_types.h > > index 0fafc178cf2c..6f567ebeb039 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_context_types.h > > +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h > > @@ -232,8 +232,20 @@ struct intel_context { > > /** @parent: pointer to parent if child */ > > struct intel_context *parent; > > + > > + /** @guc_wqi_head: head pointer in work queue */ > > + u16 guc_wqi_head; > > + /** @guc_wqi_tail: tail pointer in work queue */ > > + u16 guc_wqi_tail; > > + > These should be in the 'guc_state' sub-struct? Would be good to keep all GuC > specific content in one self-contained struct. Especially given the other > child/parent fields are no going to be guc_ prefixed any more. > Right now I have everything in guc_state protected by guc_state.lock, these fields are not protected by this lock. IMO it is better to use a different sub-structure for the parallel fields (even if anonymous). > > > /** @guc_number_children: number of children if parent */ > > u8 guc_number_children; > > + > > + /** > > +* @parent_page: page in context used by parent for work queue, > Maybe 'page in context record'? Otherwise, exactly what 'context' is meant > here? It isn't the 'struct intel_context'. The contetx record is saved as > 'ce->state' / 'ce->lrc_reg_state', yes? Is it possible to link to either of It is the page in ce->state / page minus LRC reg offset in ce->lrg_reg_state. Will update the commit to make that clear. > those field? Probably not given that they don't appear to have any kerneldoc > description :(. Maybe add that in too :). > > > +* work queue descriptor > Later on, it is described as 'process descriptor and work queue'. It would > be good to be consistent. > Yep. Will fix. > > +*/ > > + u8 parent_page; > > }; > > #ifdef CONFIG_DRM_I915_SELFTEST > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c > > b/drivers/gpu/drm/i915/gt/intel_lrc.c > > index bb4af4977920..0ddbad4e062a 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > > @@ -861,6 +861,11 @@ __lrc_alloc_state(struct intel_context *ce, struct > > intel_engine_cs *engine) > > context_size += PAGE_SIZE; > > } > > + if (intel_context_is_parent(ce)) { > > + ce->parent_page = context_size / PAGE_SIZE; > > + context_size += PAGE_SIZE; > > + } > > + > > obj = i915_gem_object_create_lmem(engine->i915, context_size, 0); > > if (IS_ERR(obj)) > > obj = i915_gem_object_create_shmem(engine->i915, context_size); > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h > > b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h > > index fa4be13c8854..0e600a3b8f1e 100644 > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h > > @@ -52,7 +52,7 @@ > > #define GUC_DOORBELL_INVALID 256 > > -#define GUC_WQ_SIZE(PAGE_SIZE * 2) > > +#define GUC_WQ_SIZE(PAGE_SIZE / 2) > Is this size actually dictated by the GuC API? Or is it just a driver level > decision? If the latter, shouldn't this be below instead? > Driver level decision. What exactly do you mean by below? > > /* Work queue item header definitions */ > > #define WQ_STATUS_ACTIVE 1 > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > index 14b24298cdd7..dbcb9ab28a9a 100644 > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > @@ -340,6 +340,39 @@ static struct i915_priolist *to_priolist(struct > > rb_node *rb) > > return rb_entry(rb, struct i915_priolist, node); > > } > > +/* > > + * When using multi-lrc submission an extra page in the context state is > > + * reserved for the process descriptor and work queue. > > + * > > + * The layout of this page is below: > > + * 0 guc_process_desc > > + * ... unused > > + * PAGE_
[PATCH] drm/i915: zero fill vma name buffer
In capture_vma() Coverity complains of a possible buffer overrun. Even though this is a static function where all call sites can be checked, limiting the copy length could save some future grief. CID 93300 (#1 of 1): Copy into fixed size buffer (STRING_OVERFLOW) 4. fixed_size_dest: You might overrun the 16-character fixed-size string c->name by copying name without checking the length. 5. parameter_as_source: Note: This defect has an elevated risk because the source argument is a parameter of the current function. 1326strcpy(c->name, name); Fix any possible overflows by using strncpy(). Zero fill the name buffer to guarantee ASCII string NULL termination. Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: David Airlie Cc: Daniel Vetter Cc: intel-...@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Cc: linux-ker...@vger.kernel.org Signed-off-by: Tim Gardner --- drivers/gpu/drm/i915/i915_gpu_error.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 9cf6ac575de1..154df174e2d7 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1297,10 +1297,11 @@ static bool record_context(struct i915_gem_context_coredump *e, return simulated; } +#define VMA_NAME_LEN 16 struct intel_engine_capture_vma { struct intel_engine_capture_vma *next; struct i915_vma *vma; - char name[16]; + char name[VMA_NAME_LEN]; }; static struct intel_engine_capture_vma * @@ -1314,7 +1315,7 @@ capture_vma(struct intel_engine_capture_vma *next, if (!vma) return next; - c = kmalloc(sizeof(*c), gfp); + c = kzalloc(sizeof(*c), gfp); if (!c) return next; @@ -1323,7 +1324,7 @@ capture_vma(struct intel_engine_capture_vma *next, return next; } - strcpy(c->name, name); + strncpy(c->name, name, VMA_NAME_LEN-1); c->vma = vma; /* reference held while active */ c->next = next; -- 2.33.0
Re: [PATCH] drm/i915/guc/slpc: remove unneeded clflush calls
On 9/15/2021 12:24, Belgaumkar, Vinay wrote: On 9/14/2021 12:51 PM, Lucas De Marchi wrote: The clflush calls here aren't doing anything since we are not writting something and flushing the cache lines to be visible to GuC. Here the intention seems to be to make sure whatever GuC has written is visible to the CPU before we read them. However a clflush from the CPU side is the wrong instruction to use. Is there a right instruction to use? Either we need to verify that no flush/invalidate is required or we need to add in a replacement that does the correct thing? John. From code inspection on the other clflush() calls in i915/gt/uc/ these are the only ones with this behavrior. The others are apparently making sure what we write is visible to GuC. Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 65a3e7fdb2b2..2e996b77df80 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -108,7 +108,6 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc) GEM_BUG_ON(!slpc->vma); - drm_clflush_virt_range(slpc->vaddr, sizeof(u32)); data = slpc->vaddr; return data->header.global_state; @@ -172,8 +171,6 @@ static int slpc_query_task_state(struct intel_guc_slpc *slpc) drm_err(&i915->drm, "Failed to query task state (%pe)\n", ERR_PTR(ret)); - drm_clflush_virt_range(slpc->vaddr, SLPC_PAGE_SIZE_BYTES); - LGTM. Reviewed-by: Vinay Belgaumkar return ret; }
Re: [Intel-gfx] [PATCH 13/27] drm/i915/guc: Ensure GuC schedule operations do not operate on child contexts
On 8/20/2021 15:44, Matthew Brost wrote: In GuC parent-child contexts the parent context controls the scheduling, ensure only the parent does the scheduling operations. Signed-off-by: Matthew Brost --- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 24 ++- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index dbcb9ab28a9a..00d54bb00bfb 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -320,6 +320,12 @@ static void decr_context_committed_requests(struct intel_context *ce) GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); } +static struct intel_context * +request_to_scheduling_context(struct i915_request *rq) +{ + return intel_context_to_parent(rq->context); +} + static bool context_guc_id_invalid(struct intel_context *ce) { return ce->guc_id.id == GUC_INVALID_LRC_ID; @@ -1684,6 +1690,7 @@ static void __guc_context_sched_disable(struct intel_guc *guc, GEM_BUG_ON(guc_id == GUC_INVALID_LRC_ID); + GEM_BUG_ON(intel_context_is_child(ce)); trace_intel_context_sched_disable(ce); guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), @@ -1898,6 +1905,8 @@ static void guc_context_sched_disable(struct intel_context *ce) u16 guc_id; bool enabled; + GEM_BUG_ON(intel_context_is_child(ce)); + if (submission_disabled(guc) || context_guc_id_invalid(ce) || !lrc_desc_registered(guc, ce->guc_id.id)) { spin_lock_irqsave(&ce->guc_state.lock, flags); @@ -2286,6 +2295,8 @@ static void guc_signal_context_fence(struct intel_context *ce) { unsigned long flags; + GEM_BUG_ON(intel_context_is_child(ce)); + spin_lock_irqsave(&ce->guc_state.lock, flags); clr_context_wait_for_deregister_to_register(ce); __guc_signal_context_fence(ce); @@ -2315,7 +2326,7 @@ static void guc_context_init(struct intel_context *ce) static int guc_request_alloc(struct i915_request *rq) { - struct intel_context *ce = rq->context; + struct intel_context *ce = request_to_scheduling_context(rq); struct intel_guc *guc = ce_to_guc(ce); unsigned long flags; int ret; @@ -2358,11 +2369,12 @@ static int guc_request_alloc(struct i915_request *rq) * exhausted and return -EAGAIN to the user indicating that they can try * again in the future. * -* There is no need for a lock here as the timeline mutex ensures at -* most one context can be executing this code path at once. The -* guc_id_ref is incremented once for every request in flight and -* decremented on each retire. When it is zero, a lock around the -* increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. +* There is no need for a lock here as the timeline mutex (or +* parallel_submit mutex in the case of multi-lrc) ensures at most one +* context can be executing this code path at once. The guc_id_ref is Isn't that now two? One uni-LRC holding the timeline mutex and one multi-LRC holding the parallel submit mutex? John. +* incremented once for every request in flight and decremented on each +* retire. When it is zero, a lock around the increment (in pin_guc_id) +* is needed to seal a race with unpin_guc_id. */ if (atomic_add_unless(&ce->guc_id.ref, 1, 0)) goto out;
Re: [PATCH] drm/i915/guc/slpc: remove unneeded clflush calls
On 9/14/2021 12:51 PM, Lucas De Marchi wrote: The clflush calls here aren't doing anything since we are not writting something and flushing the cache lines to be visible to GuC. Here the intention seems to be to make sure whatever GuC has written is visible to the CPU before we read them. However a clflush from the CPU side is the wrong instruction to use. From code inspection on the other clflush() calls in i915/gt/uc/ these are the only ones with this behavrior. The others are apparently making sure what we write is visible to GuC. Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 65a3e7fdb2b2..2e996b77df80 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -108,7 +108,6 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc) GEM_BUG_ON(!slpc->vma); - drm_clflush_virt_range(slpc->vaddr, sizeof(u32)); data = slpc->vaddr; return data->header.global_state; @@ -172,8 +171,6 @@ static int slpc_query_task_state(struct intel_guc_slpc *slpc) drm_err(&i915->drm, "Failed to query task state (%pe)\n", ERR_PTR(ret)); - drm_clflush_virt_range(slpc->vaddr, SLPC_PAGE_SIZE_BYTES); - LGTM. Reviewed-by: Vinay Belgaumkar return ret; }
Re: [Intel-gfx] [PATCH 12/27] drm/i915/guc: Add multi-lrc context registration
On 8/20/2021 15:44, Matthew Brost wrote: Add multi-lrc context registration H2G. In addition a workqueue and process descriptor are setup during multi-lrc context registration as these data structures are needed for multi-lrc submission. Signed-off-by: Matthew Brost --- drivers/gpu/drm/i915/gt/intel_context_types.h | 12 ++ drivers/gpu/drm/i915/gt/intel_lrc.c | 5 + drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 2 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 109 +- 4 files changed, 126 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 0fafc178cf2c..6f567ebeb039 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -232,8 +232,20 @@ struct intel_context { /** @parent: pointer to parent if child */ struct intel_context *parent; + + /** @guc_wqi_head: head pointer in work queue */ + u16 guc_wqi_head; + /** @guc_wqi_tail: tail pointer in work queue */ + u16 guc_wqi_tail; + These should be in the 'guc_state' sub-struct? Would be good to keep all GuC specific content in one self-contained struct. Especially given the other child/parent fields are no going to be guc_ prefixed any more. /** @guc_number_children: number of children if parent */ u8 guc_number_children; + + /** +* @parent_page: page in context used by parent for work queue, Maybe 'page in context record'? Otherwise, exactly what 'context' is meant here? It isn't the 'struct intel_context'. The contetx record is saved as 'ce->state' / 'ce->lrc_reg_state', yes? Is it possible to link to either of those field? Probably not given that they don't appear to have any kerneldoc description :(. Maybe add that in too :). +* work queue descriptor Later on, it is described as 'process descriptor and work queue'. It would be good to be consistent. +*/ + u8 parent_page; }; #ifdef CONFIG_DRM_I915_SELFTEST diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index bb4af4977920..0ddbad4e062a 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -861,6 +861,11 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) context_size += PAGE_SIZE; } + if (intel_context_is_parent(ce)) { + ce->parent_page = context_size / PAGE_SIZE; + context_size += PAGE_SIZE; + } + obj = i915_gem_object_create_lmem(engine->i915, context_size, 0); if (IS_ERR(obj)) obj = i915_gem_object_create_shmem(engine->i915, context_size); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index fa4be13c8854..0e600a3b8f1e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -52,7 +52,7 @@ #define GUC_DOORBELL_INVALID 256 -#define GUC_WQ_SIZE (PAGE_SIZE * 2) +#define GUC_WQ_SIZE(PAGE_SIZE / 2) Is this size actually dictated by the GuC API? Or is it just a driver level decision? If the latter, shouldn't this be below instead? /* Work queue item header definitions */ #define WQ_STATUS_ACTIVE 1 diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 14b24298cdd7..dbcb9ab28a9a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -340,6 +340,39 @@ static struct i915_priolist *to_priolist(struct rb_node *rb) return rb_entry(rb, struct i915_priolist, node); } +/* + * When using multi-lrc submission an extra page in the context state is + * reserved for the process descriptor and work queue. + * + * The layout of this page is below: + * 0 guc_process_desc + * ... unused + * PAGE_SIZE / 2 work queue start + * ... work queue + * PAGE_SIZE - 1 work queue end + */ +#define WQ_OFFSET (PAGE_SIZE / 2) Can this not be derived from GUC_WQ_SIZE given that the two are fundamentally linked? E.g. '#define WQ_OFFSET (PAGE_SIZE - GUC_WQ_SIZE)'? And maybe have a '#define WQ_TOTAL_SIZE PAGE_SIZE' and use that in all of WQ_OFFSET, GUC_WQ_SIZE and the allocation itself in intel_lrc.c? Also, the process descriptor is actually an array of descriptors sized by the number of children? Or am I misunderstanding the code below? In so, shouldn't there be a 'COMPILE_BUG_ON((MAX_ENGINE_INSTANCE * sizeof(descriptor)) < (WQ_
[PATCH v3 12/12] drm/i915/ttm: enable shmem tt backend
Enable shmem tt backend, and enable shrinking. Signed-off-by: Matthew Auld Cc: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index e758de336b96..6199e8c067ff 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -1050,6 +1050,7 @@ static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj) static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = { .name = "i915_gem_object_ttm", + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, .get_pages = i915_ttm_get_pages, .put_pages = i915_ttm_put_pages, -- 2.26.3
[PATCH v3 11/12] drm/i915/ttm: make evicted shmem pages visible to the shrinker
We currently just evict lmem objects to system memory when under memory pressure. For this case we lack the usual object mm.pages, which effectively hides the pages from the i915-gem shrinker, until we actually "attach" the TT to the object, or in the case of lmem-only objects it just gets migrated back to lmem when touched again. For such cases we can make the object visible as soon as we populate the TT with shmem pages, and then hide it again when doing the unpopulate. Signed-off-by: Matthew Auld Cc: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_object.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 29 +++- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 11 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 561d6bd0a5c9..28b831c78c47 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -540,6 +540,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj); void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj); +void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj); void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj); static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index 6b38e4414c5a..02175e8ad069 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -482,13 +482,12 @@ void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj) spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } -static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj, - struct list_head *head) +static void ___i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj, + struct list_head *head) { struct drm_i915_private *i915 = obj_to_i915(obj); unsigned long flags; - GEM_BUG_ON(!i915_gem_object_has_pages(obj)); if (!i915_gem_object_is_shrinkable(obj)) return; @@ -507,6 +506,21 @@ static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj, spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } +/** + * __i915_gem_object_make_shrinkable - Move the object to the tail of the + * shrinkable list. Objects on this list might be swapped out. Used with + * WILLNEED objects. + * @obj: The GEM object. + * + * DO NOT USE. This is intended to be called on very special objects that don't + * yet have mm.pages, but are guaranteed to have potentially reclaimable pages + * underneath. + */ +void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj) +{ + ___i915_gem_object_make_shrinkable(obj, + &obj_to_i915(obj)->mm.shrink_list); +} /** * i915_gem_object_make_shrinkable - Move the object to the tail of the @@ -518,8 +532,8 @@ static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj, */ void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj) { - __i915_gem_object_make_shrinkable(obj, - &obj_to_i915(obj)->mm.shrink_list); + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); + __i915_gem_object_make_shrinkable(obj); } /** @@ -533,6 +547,7 @@ void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj) */ void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj) { - __i915_gem_object_make_shrinkable(obj, - &obj_to_i915(obj)->mm.purge_list); + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); + ___i915_gem_object_make_shrinkable(obj, + &obj_to_i915(obj)->mm.purge_list); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 079a7a655ede..e758de336b96 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -234,6 +234,15 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev, if (ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) ttm->page_flags &= ~TTM_PAGE_FLAG_SWAPPED; + /* +* Even if we lack mm.pages for this object(which will be the case when +* something is evicted to system memory by TTM), we still want to make +* this object visible to the shrinker, since the underlying ttm_tt +* still has the real shmem pages. When unpopulating the tt(possibly due +* to shrinking) we hide it again from the shrinker. +*/ + __i915_gem_object_make_shrinkabl
[PATCH v3 08/12] drm/i915/ttm: add tt shmem backend
For cached objects we can allocate our pages directly in shmem. This should make it possible(in a later patch) to utilise the existing i915-gem shrinker code for such objects. For now this is still disabled. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Christian König --- drivers/gpu/drm/i915/gem/i915_gem_object.h | 8 + drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 14 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.c| 217 ++--- 3 files changed, 209 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 48112b9d76df..561d6bd0a5c9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -618,6 +618,14 @@ int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj, bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj, enum intel_memory_type type); +struct sg_table *shmem_alloc_st(struct drm_i915_private *i915, + size_t size, struct intel_memory_region *mr, + struct address_space *mapping, + unsigned int max_segment); +void shmem_free_st(struct sg_table *st, struct address_space *mapping, + bool dirty, bool backup); +void __shmem_writeback(size_t size, struct address_space *mapping); + #ifdef CONFIG_MMU_NOTIFIER static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 36b711ae9e28..19e55cc29a15 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -25,8 +25,8 @@ static void check_release_pagevec(struct pagevec *pvec) cond_resched(); } -static void shmem_free_st(struct sg_table *st, struct address_space *mapping, - bool dirty, bool backup) +void shmem_free_st(struct sg_table *st, struct address_space *mapping, + bool dirty, bool backup) { struct sgt_iter sgt_iter; struct pagevec pvec; @@ -52,10 +52,10 @@ static void shmem_free_st(struct sg_table *st, struct address_space *mapping, kfree(st); } -static struct sg_table *shmem_alloc_st(struct drm_i915_private *i915, - size_t size, struct intel_memory_region *mr, - struct address_space *mapping, - unsigned int max_segment) +struct sg_table *shmem_alloc_st(struct drm_i915_private *i915, + size_t size, struct intel_memory_region *mr, + struct address_space *mapping, + unsigned int max_segment) { const unsigned long page_count = size / PAGE_SIZE; unsigned long i; @@ -300,7 +300,7 @@ shmem_truncate(struct drm_i915_gem_object *obj) obj->mm.pages = ERR_PTR(-EFAULT); } -static void __shmem_writeback(size_t size, struct address_space *mapping) +void __shmem_writeback(size_t size, struct address_space *mapping) { struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index aefaf9293005..a93e3a9ef698 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -32,11 +32,17 @@ */ #define I915_TTM_MAX_PLACEMENTS INTEL_REGION_UNKNOWN +/* In our ttm backend external objects translate to the shmem_tt backend */ +#define I915_TTM_TT_SHMEM TTM_PAGE_FLAG_EXTERNAL_MAPPABLE + /** * struct i915_ttm_tt - TTM page vector with additional private information * @ttm: The base TTM page vector. * @dev: The struct device used for dma mapping and unmapping. * @cached_st: The cached scatter-gather table. + * @obj: The GEM object. Should be valid while we have a valid bo->ttm. + * @filp: The shmem file, if using shmem backend. + * @backup: Swap out the pages when unpopulating, if using shmem backend. * * Note that DMA may be going on right up to the point where the page- * vector is unpopulated in delayed destroy. Hence keep the @@ -48,6 +54,9 @@ struct i915_ttm_tt { struct ttm_tt ttm; struct device *dev; struct sg_table *cached_st; + struct drm_i915_gem_object *obj; + struct file *filp; + bool backup; }; static const struct ttm_place sys_placement_flags = { @@ -167,12 +176,105 @@ i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj, placement->busy_placement = busy; } +static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev, + struct ttm_tt *ttm, + struct ttm_operation_ctx *ctx) +{ + struct drm_i915_private *i915 = container_of(bdev, typ
[PATCH v3 10/12] drm/i915: try to simplify make_{un}shrinkable
Drop the atomic shrink_pin stuff, and just have make_{un}shrinkable update the shrinker visible lists immediately. This at least simplifies the next patch, and does make the behaviour more obvious. The potential downside is that make_unshrinkable now grabs a global lock even when the object itself is no longer shrinkable(transitioning from purgeable <-> shrinkable doesn't seem to be a thing), for example in the ppGTT insertion paths we should now be careful not to needlessly call make_unshrinkable multiple times. Outside of that there is some fallout in intel_context which relies on nesting calls to shrink_pin. Signed-off-by: Matthew Auld Cc: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_object.c| 9 .../gpu/drm/i915/gem/i915_gem_object_types.h | 3 +- drivers/gpu/drm/i915/gem/i915_gem_pages.c | 16 +- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 52 +-- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 1 - drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 1 - drivers/gpu/drm/i915/gt/intel_context.c | 9 +--- 7 files changed, 41 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 6fb9afb65034..e8265a432fcb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -305,15 +305,6 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj) */ atomic_inc(&i915->mm.free_count); - /* -* This serializes freeing with the shrinker. Since the free -* is delayed, first by RCU then by the workqueue, we want the -* shrinker to be able to free pages of unreferenced objects, -* or else we may oom whilst there are plenty of deferred -* freed objects. -*/ - i915_gem_object_make_unshrinkable(obj); - /* * Since we require blocking on struct_mutex to unbind the freed * object from the GPU before releasing resources back to the diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 2471f36aaff3..a035ac26a090 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -453,7 +453,6 @@ struct drm_i915_gem_object { * instead go through the pin/unpin interfaces. */ atomic_t pages_pin_count; - atomic_t shrink_pin; /** * Priority list of potential placements for this object. @@ -514,7 +513,7 @@ struct drm_i915_gem_object { struct i915_gem_object_page_iter get_dma_page; /** -* Element within i915->mm.unbound_list or i915->mm.bound_list, +* Element within i915->mm.shrink_list or i915->mm.purge_list, * locked by i915->mm.obj_lock. */ struct list_head link; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 8eb1c3a6fc9c..f0df1394d7f6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -64,28 +64,16 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, GEM_BUG_ON(i915_gem_object_has_tiling_quirk(obj)); i915_gem_object_set_tiling_quirk(obj); GEM_BUG_ON(!list_empty(&obj->mm.link)); - atomic_inc(&obj->mm.shrink_pin); shrinkable = false; } if (shrinkable) { - struct list_head *list; - unsigned long flags; - assert_object_held(obj); - spin_lock_irqsave(&i915->mm.obj_lock, flags); - - i915->mm.shrink_count++; - i915->mm.shrink_memory += obj->base.size; if (obj->mm.madv != I915_MADV_WILLNEED) - list = &i915->mm.purge_list; + i915_gem_object_make_purgeable(obj); else - list = &i915->mm.shrink_list; - list_add_tail(&obj->mm.link, list); - - atomic_set(&obj->mm.shrink_pin, 0); - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); + i915_gem_object_make_shrinkable(obj); } } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index e382b7f2353b..6b38e4414c5a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -455,23 +455,26 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, #define obj_to_i915(obj__) to_i915((obj__)->base.dev) +/** + * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By + * default all object types that support shrinking(see IS_SHRINKABLE), will also + * make the
[PATCH v3 06/12] drm/ttm: add TTM_PAGE_FLAG_EXTERNAL_MAPPABLE
In commit: commit 667a50db0477d47fdff01c666f5ee1ce26b5264c Author: Thomas Hellstrom Date: Fri Jan 3 11:17:18 2014 +0100 drm/ttm: Refuse to fault (prime-) imported pages we introduced the restriction that imported pages should not be directly mappable through TTM(this also extends to userptr). In the next patch we want to introduce a shmem_tt backend, which should follow all the existing rules with TTM_PAGE_FLAG_EXTERNAL, since it will need to handle swapping itself, but with the above mapping restriction lifted. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Christian König --- drivers/gpu/drm/ttm/ttm_bo_vm.c | 6 -- include/drm/ttm/ttm_tt.h| 7 +++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 708390588c7c..fd6e18f12f50 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -163,8 +163,10 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, * (if at all) by redirecting mmap to the exporter. */ if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL)) { - dma_resv_unlock(bo->base.resv); - return VM_FAULT_SIGBUS; + if (!(bo->ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL_MAPPABLE)) { + dma_resv_unlock(bo->base.resv); + return VM_FAULT_SIGBUS; + } } return 0; diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index 7f54a83c95ef..800c9edb3e10 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -66,11 +66,18 @@ struct ttm_tt { * Note that enum ttm_bo_type.ttm_bo_type_sg objects will always enable * this flag. * +* TTM_PAGE_FLAG_EXTERNAL_MAPPABLE: Same behaviour as +* TTM_PAGE_FLAG_EXTERNAL, but with the reduced restriction that it is +* still valid to use TTM to map the pages directly. This is useful when +* implementing a ttm_tt backend which still allocates driver owned +* pages underneath(say with shmem). +* * TTM_PAGE_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE. */ #define TTM_PAGE_FLAG_SWAPPED (1 << 0) #define TTM_PAGE_FLAG_ZERO_ALLOC (1 << 1) #define TTM_PAGE_FLAG_EXTERNAL (1 << 2) +#define TTM_PAGE_FLAG_EXTERNAL_MAPPABLE(1 << 3 | TTM_PAGE_FLAG_EXTERNAL) #define TTM_PAGE_FLAG_PRIV_POPULATED (1 << 31) uint32_t page_flags; -- 2.26.3
[PATCH v3 09/12] drm/i915/ttm: use cached system pages when evicting lmem
This should let us do an accelerated copy directly to the shmem pages when temporarily moving lmem-only objects, where the i915-gem shrinker can later kick in to swap out the pages, if needed. Signed-off-by: Matthew Auld Cc: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index a93e3a9ef698..079a7a655ede 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -126,11 +126,11 @@ static enum ttm_caching i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj) { /* -* Objects only allowed in system get cached cpu-mappings. -* Other objects get WC mapping for now. Even if in system. +* Objects only allowed in system get cached cpu-mappings, or when +* evicting lmem-only buffers to system for swapping. Other objects get +* WC mapping for now. Even if in system. */ - if (obj->mm.region->type == INTEL_MEMORY_SYSTEM && - obj->mm.n_placements <= 1) + if (obj->mm.n_placements <= 1) return ttm_cached; return ttm_write_combined; -- 2.26.3
[PATCH v3 07/12] drm/i915/gem: Break out some shmem backend utils
From: Thomas Hellström Break out some shmem backend utils for future reuse by the TTM backend: shmem_alloc_st(), shmem_free_st() and __shmem_writeback() which we can use to provide a shmem-backed TTM page pool for cached-only TTM buffer objects. Main functional change here is that we now compute the page sizes using the dma segments rather than using the physical page address segments. v2(Reported-by: kernel test robot ) - Make sure we initialise the mapping on the error path in shmem_get_pages() Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld --- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 181 +- 1 file changed, 106 insertions(+), 75 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 11f072193f3b..36b711ae9e28 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -25,46 +25,61 @@ static void check_release_pagevec(struct pagevec *pvec) cond_resched(); } -static int shmem_get_pages(struct drm_i915_gem_object *obj) +static void shmem_free_st(struct sg_table *st, struct address_space *mapping, + bool dirty, bool backup) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct intel_memory_region *mem = obj->mm.region; - const unsigned long page_count = obj->base.size / PAGE_SIZE; + struct sgt_iter sgt_iter; + struct pagevec pvec; + struct page *page; + + mapping_clear_unevictable(mapping); + + pagevec_init(&pvec); + for_each_sgt_page(page, sgt_iter, st) { + if (dirty) + set_page_dirty(page); + + if (backup) + mark_page_accessed(page); + + if (!pagevec_add(&pvec, page)) + check_release_pagevec(&pvec); + } + if (pagevec_count(&pvec)) + check_release_pagevec(&pvec); + + sg_free_table(st); + kfree(st); +} + +static struct sg_table *shmem_alloc_st(struct drm_i915_private *i915, + size_t size, struct intel_memory_region *mr, + struct address_space *mapping, + unsigned int max_segment) +{ + const unsigned long page_count = size / PAGE_SIZE; unsigned long i; - struct address_space *mapping; struct sg_table *st; struct scatterlist *sg; - struct sgt_iter sgt_iter; struct page *page; unsigned long last_pfn = 0; /* suppress gcc warning */ - unsigned int max_segment = i915_sg_segment_size(); - unsigned int sg_page_sizes; gfp_t noreclaim; int ret; - /* -* Assert that the object is not currently in any GPU domain. As it -* wasn't in the GTT, there shouldn't be any way it could have been in -* a GPU cache -*/ - GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); - GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); - /* * If there's no chance of allocating enough pages for the whole * object, bail early. */ - if (obj->base.size > resource_size(&mem->region)) - return -ENOMEM; + if (size > resource_size(&mr->region)) + return ERR_PTR(-ENOMEM); st = kmalloc(sizeof(*st), GFP_KERNEL); if (!st) - return -ENOMEM; + return ERR_PTR(-ENOMEM); -rebuild_st: if (sg_alloc_table(st, page_count, GFP_KERNEL)) { kfree(st); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } /* @@ -73,14 +88,12 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) * * Fail silently without starting the shrinker */ - mapping = obj->base.filp->f_mapping; mapping_set_unevictable(mapping); noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); noreclaim |= __GFP_NORETRY | __GFP_NOWARN; sg = st->sgl; st->nents = 0; - sg_page_sizes = 0; for (i = 0; i < page_count; i++) { const unsigned int shrink[] = { I915_SHRINK_BOUND | I915_SHRINK_UNBOUND, @@ -135,10 +148,9 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) if (!i || sg->length >= max_segment || page_to_pfn(page) != last_pfn + 1) { - if (i) { - sg_page_sizes |= sg->length; + if (i) sg = sg_next(sg); - } + st->nents++; sg_set_page(sg, page, PAGE_SIZE, 0); } else { @@ -149,14 +161,65 @@ static int shmem_get_pages(struct drm_i915_gem_object *
[PATCH v3 05/12] drm/ttm: add some kernel-doc for TTM_PAGE_FLAG_*
Move it to inline kernel-doc, otherwise we can't add empty lines it seems. Also drop the kernel-doc for pages_list, which doesn't seem to exist, and get rid of all the strange holes. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Christian König --- include/drm/ttm/ttm_tt.h | 57 ++-- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index a6c284c21e72..7f54a83c95ef 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -38,35 +38,54 @@ struct ttm_resource; struct ttm_buffer_object; struct ttm_operation_ctx; -#define TTM_PAGE_FLAG_SWAPPED (1 << 4) -#define TTM_PAGE_FLAG_ZERO_ALLOC (1 << 6) -#define TTM_PAGE_FLAG_EXTERNAL (1 << 8) - -#define TTM_PAGE_FLAG_PRIV_POPULATED (1 << 31) - /** - * struct ttm_tt - * - * @pages: Array of pages backing the data. - * @page_flags: see TTM_PAGE_FLAG_* - * @num_pages: Number of pages in the page array. - * @sg: for SG objects via dma-buf - * @dma_address: The DMA (bus) addresses of the pages - * @swap_storage: Pointer to shmem struct file for swap storage. - * @pages_list: used by some page allocation backend - * @caching: The current caching state of the pages, see enum ttm_caching. - * - * This is a structure holding the pages, caching- and aperture binding - * status for a buffer object that isn't backed by fixed (VRAM / AGP) + * struct ttm_tt - This is a structure holding the pages, caching- and aperture + * binding status for a buffer object that isn't backed by fixed (VRAM / AGP) * memory. */ struct ttm_tt { + /** @pages: Array of pages backing the data. */ struct page **pages; + /** +* @page_flags: The page flags. +* +* Supported values: +* +* TTM_PAGE_FLAG_SWAPPED: Set if the pages have been swapped out. +* Calling ttm_tt_populate() will swap the pages back in, and unset the +* flag. +* +* TTM_PAGE_FLAG_ZERO_ALLOC: Set if the pages will be zeroed on +* allocation. +* +* TTM_PAGE_FLAG_EXTERNAL: Set if the underlying pages were allocated +* externally, like with dma-buf or userptr. This effectively disables +* TTM swapping out such pages. Also important is to prevent TTM from +* ever directly mapping these pages. +* +* Note that enum ttm_bo_type.ttm_bo_type_sg objects will always enable +* this flag. +* +* TTM_PAGE_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE. +*/ +#define TTM_PAGE_FLAG_SWAPPED (1 << 0) +#define TTM_PAGE_FLAG_ZERO_ALLOC (1 << 1) +#define TTM_PAGE_FLAG_EXTERNAL (1 << 2) + +#define TTM_PAGE_FLAG_PRIV_POPULATED (1 << 31) uint32_t page_flags; + /** @num_pages: Number of pages in the page array. */ uint32_t num_pages; + /** @sg: for SG objects via dma-buf. */ struct sg_table *sg; + /** @dma_address: The DMA (bus) addresses of the pages. */ dma_addr_t *dma_address; + /** @swap_storage: Pointer to shmem struct file for swap storage. */ struct file *swap_storage; + /** +* @caching: The current caching state of the pages, see enum +* ttm_caching. +*/ enum ttm_caching caching; }; -- 2.26.3
[PATCH v3 04/12] drm/ttm: s/FLAG_SG/FLAG_EXTERNAL/
It covers more than just ttm_bo_type_sg usage, like with say dma-buf, since one other user is userptr in amdgpu, and in the future we might have some more. Hence EXTERNAL is likely a more suitable name. Suggested-by: Christian König Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 10 +- drivers/gpu/drm/nouveau/nouveau_bo.c| 4 ++-- drivers/gpu/drm/radeon/radeon_ttm.c | 8 drivers/gpu/drm/ttm/ttm_bo.c| 2 +- drivers/gpu/drm/ttm/ttm_bo_vm.c | 2 +- drivers/gpu/drm/ttm/ttm_tt.c| 10 +- include/drm/ttm/ttm_tt.h| 6 +++--- 7 files changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index c5fa6e62f6ca..a6d606f91dfd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -894,7 +894,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, DRM_ERROR("failed to pin userptr\n"); return r; } - } else if (ttm->page_flags & TTM_PAGE_FLAG_SG) { + } else if (ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL) { if (!ttm->sg) { struct dma_buf_attachment *attach; struct sg_table *sgt; @@ -1147,7 +1147,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev, return 0; } - if (ttm->page_flags & TTM_PAGE_FLAG_SG) + if (ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL) return 0; ret = ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx); @@ -1179,7 +1179,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev, return; } - if (ttm->page_flags & TTM_PAGE_FLAG_SG) + if (ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL) return; adev = amdgpu_ttm_adev(bdev); @@ -1210,8 +1210,8 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, return -ENOMEM; } - /* Set TTM_PAGE_FLAG_SG before populate but after create. */ - bo->ttm->page_flags |= TTM_PAGE_FLAG_SG; + /* Set TTM_PAGE_FLAG_EXTERNAL before populate but after create. */ + bo->ttm->page_flags |= TTM_PAGE_FLAG_EXTERNAL; gtt = (void *)bo->ttm; gtt->userptr = addr; diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 33dca2565cca..ba0fec252df7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1249,7 +1249,7 @@ nouveau_ttm_tt_populate(struct ttm_device *bdev, struct ttm_tt *ttm_dma = (void *)ttm; struct nouveau_drm *drm; struct device *dev; - bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); + bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL); if (ttm_tt_is_populated(ttm)) return 0; @@ -1272,7 +1272,7 @@ nouveau_ttm_tt_unpopulate(struct ttm_device *bdev, { struct nouveau_drm *drm; struct device *dev; - bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); + bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL); if (slave) return; diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 7793249bc549..d891491b6da8 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -545,14 +545,14 @@ static int radeon_ttm_tt_populate(struct ttm_device *bdev, { struct radeon_device *rdev = radeon_get_rdev(bdev); struct radeon_ttm_tt *gtt = radeon_ttm_tt_to_gtt(rdev, ttm); - bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); + bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL); if (gtt && gtt->userptr) { ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL); if (!ttm->sg) return -ENOMEM; - ttm->page_flags |= TTM_PAGE_FLAG_SG; + ttm->page_flags |= TTM_PAGE_FLAG_EXTERNAL; return 0; } @@ -569,13 +569,13 @@ static void radeon_ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm { struct radeon_device *rdev = radeon_get_rdev(bdev); struct radeon_ttm_tt *gtt = radeon_ttm_tt_to_gtt(rdev, ttm); - bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); + bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL); radeon_ttm_tt_unbind(bdev, ttm); if (gtt && gtt->userptr) { kfree(ttm->sg); - ttm->page_flags &= ~TTM_PAGE_FLAG_SG; + ttm->page_flags &= ~TTM_PAGE_FLAG_EXTERNAL; return; } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 3b22c0013dbf..642dc7ce3081 100644 --- a/drivers/gpu/dr
[PATCH v3 03/12] drm/ttm: remove TTM_PAGE_FLAG_NO_RETRY
No longer used it seems. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Christian König --- include/drm/ttm/ttm_tt.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index 89b15d673b22..842ce756213c 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -41,7 +41,6 @@ struct ttm_operation_ctx; #define TTM_PAGE_FLAG_SWAPPED (1 << 4) #define TTM_PAGE_FLAG_ZERO_ALLOC (1 << 6) #define TTM_PAGE_FLAG_SG (1 << 8) -#define TTM_PAGE_FLAG_NO_RETRY (1 << 9) #define TTM_PAGE_FLAG_PRIV_POPULATED (1 << 31) -- 2.26.3
[PATCH v3 02/12] drm/ttm: move ttm_tt_{add, clear}_mapping into amdgpu
Now that setting page->index shouldn't be needed anymore, we are just left with setting page->mapping, and here it looks like amdgpu is the only user, where pointing the page->mapping at the dev_mapping is used to verify that the pages do indeed belong to the device, if userspace later tries to touch them. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 27 - drivers/gpu/drm/ttm/ttm_tt.c| 25 --- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 1129e17e9f09..c5fa6e62f6ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1107,6 +1107,24 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, return >t->ttm; } +static void amdgpu_ttm_tt_add_mapping(struct ttm_device *bdev, + struct ttm_tt *ttm) +{ + pgoff_t i; + + for (i = 0; i < ttm->num_pages; ++i) + ttm->pages[i]->mapping = bdev->dev_mapping; +} + +static void amdgpu_ttm_tt_clear_mapping(struct ttm_tt *ttm) +{ + struct page **page = ttm->pages; + pgoff_t i; + + for (i = 0; i < ttm->num_pages; ++i) + (*page)->mapping = NULL; +} + /* * amdgpu_ttm_tt_populate - Map GTT pages visible to the device * @@ -1119,6 +1137,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev, { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; + int ret; /* user pages are bound by amdgpu_ttm_tt_pin_userptr() */ if (gtt->userptr) { @@ -1131,7 +1150,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev, if (ttm->page_flags & TTM_PAGE_FLAG_SG) return 0; - return ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx); + ret = ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx); + if (ret) + return ret; + + amdgpu_ttm_tt_add_mapping(bdev, ttm); + return 0; } /* @@ -1159,6 +1183,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev, return; adev = amdgpu_ttm_adev(bdev); + amdgpu_ttm_tt_clear_mapping(ttm); return ttm_pool_free(&adev->mman.bdev.pool, ttm); } diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 1cc04c224988..980ecb079b2c 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -289,17 +289,6 @@ int ttm_tt_swapout(struct ttm_device *bdev, struct ttm_tt *ttm, return ret; } -static void ttm_tt_add_mapping(struct ttm_device *bdev, struct ttm_tt *ttm) -{ - pgoff_t i; - - if (ttm->page_flags & TTM_PAGE_FLAG_SG) - return; - - for (i = 0; i < ttm->num_pages; ++i) - ttm->pages[i]->mapping = bdev->dev_mapping; -} - int ttm_tt_populate(struct ttm_device *bdev, struct ttm_tt *ttm, struct ttm_operation_ctx *ctx) { @@ -336,7 +325,6 @@ int ttm_tt_populate(struct ttm_device *bdev, if (ret) goto error; - ttm_tt_add_mapping(bdev, ttm); ttm->page_flags |= TTM_PAGE_FLAG_PRIV_POPULATED; if (unlikely(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) { ret = ttm_tt_swapin(ttm); @@ -359,24 +347,11 @@ int ttm_tt_populate(struct ttm_device *bdev, } EXPORT_SYMBOL(ttm_tt_populate); -static void ttm_tt_clear_mapping(struct ttm_tt *ttm) -{ - pgoff_t i; - struct page **page = ttm->pages; - - if (ttm->page_flags & TTM_PAGE_FLAG_SG) - return; - - for (i = 0; i < ttm->num_pages; ++i) - (*page)->mapping = NULL; -} - void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm) { if (!ttm_tt_is_populated(ttm)) return; - ttm_tt_clear_mapping(ttm); if (bdev->funcs->ttm_tt_unpopulate) bdev->funcs->ttm_tt_unpopulate(bdev, ttm); else -- 2.26.3
[PATCH v3 01/12] drm/ttm: stop setting page->index for the ttm_tt
In commit: commit 58aa6622d32af7d2c08d45085f44c54554a16ed7 Author: Thomas Hellstrom Date: Fri Jan 3 11:47:23 2014 +0100 drm/ttm: Correctly set page mapping and -index members we started setting the page->mapping and page->index to point to the virtual address space, if the pages were faulted with TTM. Apparently this was needed for core-mm to able to reverse lookup the virtual address given the struct page, and potentially unmap it from the page tables. However as pointed out by Thomas, since we are now using PFN_MAP, instead of say PFN_MIXED, this should no longer be the case. There was also apparently some usecase in vmwgfx which needed this for dirty tracking, but that also doesn't appear to be the case anymore, as pointed out by Thomas. We still need keep the page->mapping for now, since that is still needed for different reasons, but we try to address that in the next patch. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Christian König --- drivers/gpu/drm/ttm/ttm_bo_vm.c | 2 -- drivers/gpu/drm/ttm/ttm_tt.c| 4 +--- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index f56be5bc0861..906ec8a1bf5a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -346,8 +346,6 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, } else if (unlikely(!page)) { break; } - page->index = drm_vma_node_start(&bo->base.vma_node) + - page_offset; pfn = page_to_pfn(page); } diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index dae52433beeb..1cc04c224988 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -367,10 +367,8 @@ static void ttm_tt_clear_mapping(struct ttm_tt *ttm) if (ttm->page_flags & TTM_PAGE_FLAG_SG) return; - for (i = 0; i < ttm->num_pages; ++i) { + for (i = 0; i < ttm->num_pages; ++i) (*page)->mapping = NULL; - (*page++)->index = 0; - } } void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm) -- 2.26.3
Re: [PATCH v3 4/8] powerpc/pseries/svm: Add a powerpc version of cc_platform_has()
On Wed, Sep 15, 2021 at 07:18:34PM +0200, Christophe Leroy wrote: > Could you please provide more explicit explanation why inlining such an > helper is considered as bad practice and messy ? Tom already told you to look at the previous threads. Let's read them together. This one, for example: https://lore.kernel.org/lkml/ysscwvpxevxw%2f...@infradead.org/ | > To take it out of line, I'm leaning towards the latter, creating a new | > file that is built based on the ARCH_HAS_PROTECTED_GUEST setting. | | Yes. In general everytime architectures have to provide the prototype | and not just the implementation of something we end up with a giant mess | sooner or later. In a few cases that is still warranted due to | performance concerns, but i don't think that is the case here. So I think what Christoph means here is that you want to have the generic prototype defined in a header and arches get to implement it exactly to the letter so that there's no mess. As to what mess exactly, I'd let him explain that. > Because as demonstrated in my previous response some days ago, taking that > outline ends up with an unneccessary ugly generated code and we don't > benefit front GCC's capability to fold in and opt out unreachable code. And this is real fast path where a couple of instructions matter or what? set_memory_encrypted/_decrypted doesn't look like one to me. > I can't see your point here. Inlining the function wouldn't add any > ifdeffery as far as I can see. If the function is touching defines etc, they all need to be visible. If that function needs to call other functions - which is the case on x86, perhaps not so much on power - then you need to either ifdef around them or provide stubs with ifdeffery in the headers. And you need to make them global functions instead of keeping them static to the same compilation unit, etc, etc. With a separate compilation unit, you don't need any of that and it is all kept in that single file. -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
Re: [PATCH 1/2] drm/sched: fix the bug of time out calculation(v4)
Pushed Andrey On 2021-09-15 7:45 a.m., Christian König wrote: Yes, I think so as well. Andrey can you push this? Christian. Am 15.09.21 um 00:59 schrieb Grodzovsky, Andrey: AFAIK this one is independent. Christian, can you confirm ? Andrey *From:* amd-gfx on behalf of Alex Deucher *Sent:* 14 September 2021 15:33 *To:* Christian König *Cc:* Liu, Monk ; amd-gfx list ; Maling list - DRI developers *Subject:* Re: [PATCH 1/2] drm/sched: fix the bug of time out calculation(v4) Was this fix independent of the other discussions? Should this be applied to drm-misc? Alex On Wed, Sep 1, 2021 at 4:42 PM Alex Deucher wrote: > > On Wed, Sep 1, 2021 at 2:50 AM Christian König > wrote: > > > > Am 01.09.21 um 02:46 schrieb Monk Liu: > > > issue: > > > in cleanup_job the cancle_delayed_work will cancel a TO timer > > > even the its corresponding job is still running. > > > > > > fix: > > > do not cancel the timer in cleanup_job, instead do the cancelling > > > only when the heading job is signaled, and if there is a "next" job > > > we start_timeout again. > > > > > > v2: > > > further cleanup the logic, and do the TDR timer cancelling if the signaled job > > > is the last one in its scheduler. > > > > > > v3: > > > change the issue description > > > remove the cancel_delayed_work in the begining of the cleanup_job > > > recover the implement of drm_sched_job_begin. > > > > > > v4: > > > remove the kthread_should_park() checking in cleanup_job routine, > > > we should cleanup the signaled job asap > > > > > > TODO: > > > 1)introduce pause/resume scheduler in job_timeout to serial the handling > > > of scheduler and job_timeout. > > > 2)drop the bad job's del and insert in scheduler due to above serialization > > > (no race issue anymore with the serialization) > > > > > > tested-by: jingwen > > > Signed-off-by: Monk Liu > > > > Reviewed-by: Christian König > > > > Are you planning to push this to drm-misc? > > Alex > > > > > --- > > > drivers/gpu/drm/scheduler/sched_main.c | 26 +- > > > 1 file changed, 9 insertions(+), 17 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c > > > index a2a9536..3e0bbc7 100644 > > > --- a/drivers/gpu/drm/scheduler/sched_main.c > > > +++ b/drivers/gpu/drm/scheduler/sched_main.c > > > @@ -676,15 +676,6 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched) > > > { > > > struct drm_sched_job *job, *next; > > > > > > - /* > > > - * Don't destroy jobs while the timeout worker is running OR thread > > > - * is being parked and hence assumed to not touch pending_list > > > - */ > > > - if ((sched->timeout != MAX_SCHEDULE_TIMEOUT && > > > - !cancel_delayed_work(&sched->work_tdr)) || > > > - kthread_should_park()) > > > - return NULL; > > > - > > > spin_lock(&sched->job_list_lock); > > > > > > job = list_first_entry_or_null(&sched->pending_list, > > > @@ -693,17 +684,21 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched) > > > if (job && dma_fence_is_signaled(&job->s_fence->finished)) { > > > /* remove job from pending_list */ > > > list_del_init(&job->list); > > > + > > > + /* cancel this job's TO timer */ > > > + cancel_delayed_work(&sched->work_tdr); > > > /* make the scheduled timestamp more accurate */ > > > next = list_first_entry_or_null(&sched->pending_list, > > > typeof(*next), list); > > > - if (next) > > > + > > > + if (next) { > > > next->s_fence->scheduled.timestamp = > > > job->s_fence->finished.timestamp; > > > - > > > + /* start TO timer for next job */ > > > + drm_sched_start_timeout(sched); > > > + } > > > } else { > > > job = NULL; > > > - /* queue timeout for next job */ > > > - drm_sched_start_timeout(sched); > > > } > > > > > > spin_unlock(&sched->job_list_lock); > > > @@ -791,11 +786,8 @@ static int drm_sched_main(void *param) > > > (entity = drm_sched_select_entity(sched))) || > > > kthread_should_stop()); > > > > > > - if (cleanup_job) { > > > + if (cleanup_job) > > > sched->ops->free_job(cleanup_job); > > > - /* queue timeout for next job */ > > > - drm_sched_start_timeout(sched); > > > - } > > > > > > if (!entity) > > > continue; > >
[PATCH v2] drm/sun4i: dw-hdmi: Fix HDMI PHY clock setup
Recent rework, which made HDMI PHY driver a platform device, inadvertely reversed clock setup order. HW is very touchy about it. Proper way is to handle controllers resets and clocks first and HDMI PHYs second. Currently, without this fix, first mode set completely fails (nothing on HDMI monitor) on H3 era PHYs. On H6, it still somehow work. Move HDMI PHY reset & clocks handling to sun8i_hdmi_phy_init() which will assure that code is executed after controllers reset & clocks are handled. Additionally, add sun8i_hdmi_phy_deinit() which will deinit them at controllers driver unload. Tested on A64, H3, H6 and R40. Fixes: 9bf3797796f5 ("drm/sun4i: dw-hdmi: Make HDMI PHY into a platform device") Signed-off-by: Jernej Skrabec --- Changes from v1: - if sun8i_hdmi_phy_init() fails, go to error hanling instead of returning immediately - rename err_deassert_rst_phy -> err_assert_rst_phy drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c | 7 +- drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h | 4 +- drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c | 97 ++ 3 files changed, 61 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c index f75fb157f2ff..016b877051da 100644 --- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c +++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c @@ -216,11 +216,13 @@ static int sun8i_dw_hdmi_bind(struct device *dev, struct device *master, goto err_disable_clk_tmds; } + ret = sun8i_hdmi_phy_init(hdmi->phy); + if (ret) + goto err_disable_clk_tmds; + drm_encoder_helper_add(encoder, &sun8i_dw_hdmi_encoder_helper_funcs); drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_TMDS); - sun8i_hdmi_phy_init(hdmi->phy); - plat_data->mode_valid = hdmi->quirks->mode_valid; plat_data->use_drm_infoframe = hdmi->quirks->use_drm_infoframe; sun8i_hdmi_phy_set_ops(hdmi->phy, plat_data); @@ -262,6 +264,7 @@ static void sun8i_dw_hdmi_unbind(struct device *dev, struct device *master, struct sun8i_dw_hdmi *hdmi = dev_get_drvdata(dev); dw_hdmi_unbind(hdmi->hdmi); + sun8i_hdmi_phy_deinit(hdmi->phy); clk_disable_unprepare(hdmi->clk_tmds); reset_control_assert(hdmi->rst_ctrl); gpiod_set_value(hdmi->ddc_en, 0); diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h index 74f6ed0e2570..bffe1b9cd3dc 100644 --- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h +++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h @@ -169,6 +169,7 @@ struct sun8i_hdmi_phy { struct clk *clk_phy; struct clk *clk_pll0; struct clk *clk_pll1; + struct device *dev; unsigned intrcal; struct regmap *regs; struct reset_control*rst_phy; @@ -205,7 +206,8 @@ encoder_to_sun8i_dw_hdmi(struct drm_encoder *encoder) int sun8i_hdmi_phy_get(struct sun8i_dw_hdmi *hdmi, struct device_node *node); -void sun8i_hdmi_phy_init(struct sun8i_hdmi_phy *phy); +int sun8i_hdmi_phy_init(struct sun8i_hdmi_phy *phy); +void sun8i_hdmi_phy_deinit(struct sun8i_hdmi_phy *phy); void sun8i_hdmi_phy_set_ops(struct sun8i_hdmi_phy *phy, struct dw_hdmi_plat_data *plat_data); diff --git a/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c b/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c index c9239708d398..b64d93da651d 100644 --- a/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c +++ b/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c @@ -506,9 +506,60 @@ static void sun8i_hdmi_phy_init_h3(struct sun8i_hdmi_phy *phy) phy->rcal = (val & SUN8I_HDMI_PHY_ANA_STS_RCAL_MASK) >> 2; } -void sun8i_hdmi_phy_init(struct sun8i_hdmi_phy *phy) +int sun8i_hdmi_phy_init(struct sun8i_hdmi_phy *phy) { + int ret; + + ret = reset_control_deassert(phy->rst_phy); + if (ret) { + dev_err(phy->dev, "Cannot deassert phy reset control: %d\n", ret); + return ret; + } + + ret = clk_prepare_enable(phy->clk_bus); + if (ret) { + dev_err(phy->dev, "Cannot enable bus clock: %d\n", ret); + goto err_assert_rst_phy; + } + + ret = clk_prepare_enable(phy->clk_mod); + if (ret) { + dev_err(phy->dev, "Cannot enable mod clock: %d\n", ret); + goto err_disable_clk_bus; + } + + if (phy->variant->has_phy_clk) { + ret = sun8i_phy_clk_create(phy, phy->dev, + phy->variant->has_second_pll); + if (ret) { + dev_err(phy->dev, "Couldn't create the PHY clock\n"); + goto err_disable_clk_mod; + } + + clk_prepare_enable(phy->clk_phy); + } + phy->variant->phy_init(phy); + + return 0; + +err_disable_clk_mod: + clk_disab
Re: [PATCH v2] drm/v3d: fix wait for TMU write combiner flush
On 09/15, Iago Toral Quiroga wrote: > The hardware sets the TMUWCF bit back to 0 when the TMU write > combiner flush completes so we should be checking for that instead > of the L2TFLS bit. > > v2 (Melissa Wen): > - Add Signed-off-by and Fixes tags. > - Change the error message for the timeout to be more clear. > > Fixes spurious Vulkan CTS failures in: > dEQP-VK.binding_model.descriptorset_random.* > > Fixes: d223f98f02099 ("drm/v3d: Add support for compute shader dispatch") > Signed-off-by: Iago Toral Quiroga > Reviewed-by: Melissa Wen Applied to drm-misc-next. Thanks, Melissa > --- > drivers/gpu/drm/v3d/v3d_gem.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c > index a3529809d547..1953706bdaeb 100644 > --- a/drivers/gpu/drm/v3d/v3d_gem.c > +++ b/drivers/gpu/drm/v3d/v3d_gem.c > @@ -197,8 +197,8 @@ v3d_clean_caches(struct v3d_dev *v3d) > > V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF); > if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & > -V3D_L2TCACTL_L2TFLS), 100)) { > - DRM_ERROR("Timeout waiting for L1T write combiner flush\n"); > +V3D_L2TCACTL_TMUWCF), 100)) { > + DRM_ERROR("Timeout waiting for TMU write combiner flush\n"); > } > > mutex_lock(&v3d->cache_clean_lock); > -- > 2.25.1 >
[PATCH RESEND v2 3/3] lib, stackdepot: Add helper to print stack entries into buffer.
To print stack entries into a buffer, users of stackdepot, first get a list of stack entries using stack_depot_fetch and then print this list into a buffer using stack_trace_snprint. Provide a helper in stackdepot for this purpose. Also change above mentioned users to use this helper. Signed-off-by: Imran Khan Suggested-by: Vlastimil Babka Acked-by: Vlastimil Babka --- drivers/gpu/drm/drm_dp_mst_topology.c | 5 + drivers/gpu/drm/drm_mm.c| 5 + drivers/gpu/drm/i915/i915_vma.c | 5 + drivers/gpu/drm/i915/intel_runtime_pm.c | 20 +--- include/linux/stackdepot.h | 3 +++ lib/stackdepot.c| 24 mm/page_owner.c | 5 + 7 files changed, 36 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 86d13d6bc463..2d1adab9e360 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1668,13 +1668,10 @@ __dump_topology_ref_history(struct drm_dp_mst_topology_ref_history *history, for (i = 0; i < history->len; i++) { const struct drm_dp_mst_topology_ref_entry *entry = &history->entries[i]; - ulong *entries; - uint nr_entries; u64 ts_nsec = entry->ts_nsec; u32 rem_nsec = do_div(ts_nsec, 10); - nr_entries = stack_depot_fetch(entry->backtrace, &entries); - stack_trace_snprint(buf, PAGE_SIZE, entries, nr_entries, 4); + stack_depot_snprint(entry->backtrace, buf, PAGE_SIZE, 4); drm_printf(&p, " %d %ss (last at %5llu.%06u):\n%s", entry->count, diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 93d48a6f04ab..ca04d7f6f7b5 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -118,8 +118,6 @@ static noinline void save_stack(struct drm_mm_node *node) static void show_leaks(struct drm_mm *mm) { struct drm_mm_node *node; - unsigned long *entries; - unsigned int nr_entries; char *buf; buf = kmalloc(BUFSZ, GFP_KERNEL); @@ -133,8 +131,7 @@ static void show_leaks(struct drm_mm *mm) continue; } - nr_entries = stack_depot_fetch(node->stack, &entries); - stack_trace_snprint(buf, BUFSZ, entries, nr_entries, 0); + stack_depot_snprint(node->stack, buf, BUFSZ, 0); DRM_ERROR("node [%08llx + %08llx]: inserted at\n%s", node->start, node->size, buf); } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 4b7fc4647e46..f2d9ed375109 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -56,8 +56,6 @@ void i915_vma_free(struct i915_vma *vma) static void vma_print_allocator(struct i915_vma *vma, const char *reason) { - unsigned long *entries; - unsigned int nr_entries; char buf[512]; if (!vma->node.stack) { @@ -66,8 +64,7 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason) return; } - nr_entries = stack_depot_fetch(vma->node.stack, &entries); - stack_trace_snprint(buf, sizeof(buf), entries, nr_entries, 0); + stack_depot_snprint(vma->node.stack, buf, sizeof(buf), 0); DRM_DEBUG_DRIVER("vma.node [%08llx + %08llx] %s: inserted at %s\n", vma->node.start, vma->node.size, reason, buf); } diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index eaf7688f517d..cc312f0a05eb 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -65,16 +65,6 @@ static noinline depot_stack_handle_t __save_depot_stack(void) return stack_depot_save(entries, n, GFP_NOWAIT | __GFP_NOWARN); } -static void __print_depot_stack(depot_stack_handle_t stack, - char *buf, int sz, int indent) -{ - unsigned long *entries; - unsigned int nr_entries; - - nr_entries = stack_depot_fetch(stack, &entries); - stack_trace_snprint(buf, sz, entries, nr_entries, indent); -} - static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm) { spin_lock_init(&rpm->debug.lock); @@ -146,12 +136,12 @@ static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm, if (!buf) return; - __print_depot_stack(stack, buf, PAGE_SIZE, 2); + stack_depot_snprint(stack, buf, PAGE_SIZE, 2); DRM_DEBUG_DRIVER("wakeref %x from\n%s", stack, buf); stack = READ_ONCE(rpm->debug.last_release); if (stack) { - __print_depot_stack(stack, buf
[PATCH RESEND v2 2/3] lib, stackdepot: Add helper to print stack entries.
To print a stack entries, users of stackdepot, first use stack_depot_fetch to get a list of stack entries and then use stack_trace_print to print this list. Provide a helper in stackdepot to print stack entries based on stackdepot handle. Also change above mentioned users to use this helper. Signed-off-by: Imran Khan Suggested-by: Vlastimil Babka Acked-by: Vlastimil Babka Reviewed-by: Alexander Potapenko --- include/linux/stackdepot.h | 2 ++ lib/stackdepot.c | 18 ++ mm/kasan/report.c | 15 +++ mm/page_owner.c| 13 - 4 files changed, 27 insertions(+), 21 deletions(-) diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 6bb4bc1a5f54..d77a30543dd4 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -19,6 +19,8 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries); +void stack_depot_print(depot_stack_handle_t stack); + unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries); #ifdef CONFIG_STACKDEPOT diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 67439c082490..354fe1b62017 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -214,6 +214,24 @@ static inline struct stack_record *find_stack(struct stack_record *bucket, return NULL; } +/** + * stack_depot_print - print stack entries from a depot + * + * @stack: Stack depot handle which was returned from + * stack_depot_save(). + * + */ +void stack_depot_print(depot_stack_handle_t stack) +{ + unsigned long *entries; + unsigned int nr_entries; + + nr_entries = stack_depot_fetch(stack, &entries); + if (nr_entries > 0) + stack_trace_print(entries, nr_entries, 0); +} +EXPORT_SYMBOL_GPL(stack_depot_print); + /** * stack_depot_fetch - Fetch stack entries from a depot * diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 884a950c7026..3239fd8f8747 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -132,20 +132,11 @@ static void end_report(unsigned long *flags, unsigned long addr) kasan_enable_current(); } -static void print_stack(depot_stack_handle_t stack) -{ - unsigned long *entries; - unsigned int nr_entries; - - nr_entries = stack_depot_fetch(stack, &entries); - stack_trace_print(entries, nr_entries, 0); -} - static void print_track(struct kasan_track *track, const char *prefix) { pr_err("%s by task %u:\n", prefix, track->pid); if (track->stack) { - print_stack(track->stack); + stack_depot_print(track->stack); } else { pr_err("(stack is not available)\n"); } @@ -214,12 +205,12 @@ static void describe_object_stacks(struct kmem_cache *cache, void *object, return; if (alloc_meta->aux_stack[0]) { pr_err("Last potentially related work creation:\n"); - print_stack(alloc_meta->aux_stack[0]); + stack_depot_print(alloc_meta->aux_stack[0]); pr_err("\n"); } if (alloc_meta->aux_stack[1]) { pr_err("Second to last potentially related work creation:\n"); - print_stack(alloc_meta->aux_stack[1]); + stack_depot_print(alloc_meta->aux_stack[1]); pr_err("\n"); } #endif diff --git a/mm/page_owner.c b/mm/page_owner.c index d24ed221357c..7918770c2b2b 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -394,8 +394,6 @@ void __dump_page_owner(const struct page *page) struct page_ext *page_ext = lookup_page_ext(page); struct page_owner *page_owner; depot_stack_handle_t handle; - unsigned long *entries; - unsigned int nr_entries; gfp_t gfp_mask; int mt; @@ -423,20 +421,17 @@ void __dump_page_owner(const struct page *page) page_owner->pid, page_owner->ts_nsec, page_owner->free_ts_nsec); handle = READ_ONCE(page_owner->handle); - if (!handle) { + if (!handle) pr_alert("page_owner allocation stack trace missing\n"); - } else { - nr_entries = stack_depot_fetch(handle, &entries); - stack_trace_print(entries, nr_entries, 0); - } + else + stack_depot_print(handle); handle = READ_ONCE(page_owner->free_handle); if (!handle) { pr_alert("page_owner free stack trace missing\n"); } else { - nr_entries = stack_depot_fetch(handle, &entries); pr_alert("page last free stack trace:\n"); - stack_trace_print(entries, nr_entries, 0); + stack_depot_print(handle); } if (page_owner->last_migrate_reason != -1) -- 2.30.2
[PATCH RESEND v2 1/3] lib, stackdepot: check stackdepot handle before accessing slabs.
stack_depot_save allocates slabs that will be used for storing objects in future.If this slab allocation fails we may get to a situation where space allocation for a new stack_record fails, causing stack_depot_save to return 0 as handle. If user of this handle ends up invoking stack_depot_fetch with this handle value, current implementation of stack_depot_fetch will end up using slab from wrong index. To avoid this check handle value at the beginning. Signed-off-by: Imran Khan Suggested-by: Vlastimil Babka Acked-by: Vlastimil Babka --- lib/stackdepot.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 0a2e417f83cb..67439c082490 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -232,6 +232,9 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, struct stack_record *stack; *entries = NULL; + if (!handle) + return 0; + if (parts.slabindex > depot_index) { WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n", parts.slabindex, depot_index, handle); -- 2.30.2
[PATCH RESEND v2 0/3] lib, stackdepot: check stackdepot handle before accessing slabs
Changes in v2: - Fixed compilation error [1] due to typo in patch-3 (stack_depot_print used in place of stack_depot_snprint) This compilation error appears with CONFIG_DRM_I915_DEBUG_RUNTIME_PM=y and this was missed by my test config (x86_64_defconfig) [1] https://patchwork.freedesktop.org/series/94696/ Original cover letter -- This patch series consolidates the changes submitted and reviewed at [1] and [2]. The patches at [1] and [2] were submitted separarely, but they have some inter dependency (later patches were created on top of earlier ones). As both sets are still under review, I have put them in a single change set here, so that it can be reviewed/included together and also to avoid automation build failures where git am fails because of absent parent. I have included Acked-by (from Vlastimil) and Reviewed-by (from Alexander) tags obtained so far for these changes and have also addressed last review comment from Vlastimil [3]. To summarize, the changes in this set are as follows: PATCH-1: Checks validity of a stackdepot handle before proceeding to access stackdepot slab/objects. PATCH-2: Adds a helper in stackdepot, to allow users to print stack entries just by specifying the stackdepot handle. It also changes such users to use this new interface. PATCH-3: Adds a helper in stackdepot, to allow users to print stack entries into buffers just by specifying the stackdepot handle and destination buffer. It also changes such users to use this new interface. [1] https://lore.kernel.org/lkml/20210902000154.1096484-1-imran.f.k...@oracle.com/ [2] https://lore.kernel.org/lkml/20210910141001.1622130-1-imran.f.k...@oracle.com/ [3] https://lore.kernel.org/lkml/ef0aa660-0cb6-dc21-f2ce-368b34f8a...@suse.cz/ Imran Khan (3): lib, stackdepot: check stackdepot handle before accessing slabs. lib, stackdepot: Add helper to print stack entries. lib, stackdepot: Add helper to print stack entries into buffer. drivers/gpu/drm/drm_dp_mst_topology.c | 5 +-- drivers/gpu/drm/drm_mm.c| 5 +-- drivers/gpu/drm/i915/i915_vma.c | 5 +-- drivers/gpu/drm/i915/intel_runtime_pm.c | 20 +++ include/linux/stackdepot.h | 5 +++ lib/stackdepot.c| 45 + mm/kasan/report.c | 15 ++--- mm/page_owner.c | 18 +++--- 8 files changed, 66 insertions(+), 52 deletions(-) -- 2.30.2
Re: [PATCH v3 0/8] Implement generic cc_platform_has() helper function
On 9/15/21 9:46 AM, Borislav Petkov wrote: Sathya, if you want to prepare the Intel variant intel_cc_platform_has() ontop of those and send it to me, that would be good because then I can integrate it all in one branch which can be used to base future work ontop. I have a Intel variant patch (please check following patch). But it includes TDX changes as well. Shall I move TDX changes to different patch and just create a separate patch for adding intel_cc_platform_has()? commit fc5f98a0ed94629d903827c5b44ee9295f835831 Author: Kuppuswamy Sathyanarayanan Date: Wed May 12 11:35:13 2021 -0700 x86/tdx: Add confidential guest support for TDX guest TDX architecture provides a way for VM guests to be highly secure and isolated (from untrusted VMM). To achieve this requirement, any data coming from VMM cannot be completely trusted. TDX guest fixes this issue by hardening the IO drivers against the attack from the VMM. So, when adding hardening fixes to the generic drivers, to protect custom fixes use cc_platform_has() API. Also add TDX guest support to cc_platform_has() API to protect the TDX specific fixes. Signed-off-by: Kuppuswamy Sathyanarayanan diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a5b14de03458..2e78358923a1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -871,6 +871,7 @@ config INTEL_TDX_GUEST depends on SECURITY select X86_X2APIC select SECURITY_LOCKDOWN_LSM + select ARCH_HAS_CC_PLATFORM help Provide support for running in a trusted domain on Intel processors equipped with Trusted Domain eXtensions. TDX is a new Intel diff --git a/arch/x86/include/asm/intel_cc_platform.h b/arch/x86/include/asm/intel_cc_platform.h new file mode 100644 index ..472c3174beac --- /dev/null +++ b/arch/x86/include/asm/intel_cc_platform.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2021 Intel Corporation */ +#ifndef _ASM_X86_INTEL_CC_PLATFORM_H +#define _ASM_X86_INTEL_CC_PLATFORM_H + +#if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_ARCH_HAS_CC_PLATFORM) +bool intel_cc_platform_has(unsigned int flag); +#else +static inline bool intel_cc_platform_has(unsigned int flag) { return false; } +#endif + +#endif /* _ASM_X86_INTEL_CC_PLATFORM_H */ + diff --git a/arch/x86/kernel/cc_platform.c b/arch/x86/kernel/cc_platform.c index 3c9bacd3c3f3..e83bc2f48efe 100644 --- a/arch/x86/kernel/cc_platform.c +++ b/arch/x86/kernel/cc_platform.c @@ -10,11 +10,16 @@ #include #include #include +#include + +#include bool cc_platform_has(enum cc_attr attr) { if (sme_me_mask) return amd_cc_platform_has(attr); + else if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + return intel_cc_platform_has(attr); return false; } diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 8321c43554a1..ab486a3b1eb0 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -60,6 +61,21 @@ static u64 msr_test_ctrl_cache __ro_after_init; */ static bool cpu_model_supports_sld __ro_after_init; +#ifdef CONFIG_ARCH_HAS_CC_PLATFORM +bool intel_cc_platform_has(enum cc_attr attr) +{ + switch (attr) { + case CC_ATTR_GUEST_TDX: + return cpu_feature_enabled(X86_FEATURE_TDX_GUEST); + default: + return false; + } + + return false; +} +EXPORT_SYMBOL_GPL(intel_cc_platform_has); +#endif + /* * Processors which have self-snooping capability can handle conflicting * memory type across CPUs by snooping its own cache. However, there exists diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h index 253f3ea66cd8..e38430e6e396 100644 --- a/include/linux/cc_platform.h +++ b/include/linux/cc_platform.h @@ -61,6 +61,15 @@ enum cc_attr { * Examples include SEV-ES. */ CC_ATTR_GUEST_STATE_ENCRYPT, + + /** +* @CC_ATTR_GUEST_TDX: Trusted Domain Extension Support +* +* The platform/OS is running as a TDX guest/virtual machine. +* +* Examples include SEV-ES. +*/ + CC_ATTR_GUEST_TDX, }; #ifdef CONFIG_ARCH_HAS_CC_PLATFORM -- Sathyanarayanan Kuppuswamy Linux Kernel Developer
Re: [PATCH v3 4/8] powerpc/pseries/svm: Add a powerpc version of cc_platform_has()
Le 15/09/2021 à 12:08, Borislav Petkov a écrit : On Wed, Sep 15, 2021 at 10:28:59AM +1000, Michael Ellerman wrote: I don't love it, a new C file and an out-of-line call to then call back to a static inline that for most configuration will return false ... but whatever :) Yeah, hch thinks it'll cause a big mess otherwise: https://lore.kernel.org/lkml/ysscwvpxevxw%2f...@infradead.org/ Could you please provide more explicit explanation why inlining such an helper is considered as bad practice and messy ? Because as demonstrated in my previous response some days ago, taking that outline ends up with an unneccessary ugly generated code and we don't benefit front GCC's capability to fold in and opt out unreachable code. As pointed by Michael in most cases the function will just return false so behind the performance concern, there is also the code size and code coverage topic that is to be taken into account. And even when the function doesn't return false, the only thing it does folds into a single powerpc instruction so there is really no point in making a dedicated out-of-line fonction for that and suffer the cost and the size of a function call and to justify the addition of a dedicated C file. I guess less ifdeffery is nice too. I can't see your point here. Inlining the function wouldn't add any ifdeffery as far as I can see. So, would you mind reconsidering your approach and allow architectures to provide inline implementation by just not enforcing a generic prototype ? Or otherwise provide more details and exemple of why the cons are more important versus the pros ? Thanks Christophe
Re: [Intel-gfx] [PATCH 08/27] drm/i915: Add logical engine mapping
On Wed, Sep 15, 2021 at 09:24:15AM +0100, Tvrtko Ursulin wrote: > > On 14/09/2021 19:04, Matthew Brost wrote: > > On Tue, Sep 14, 2021 at 09:34:08AM +0100, Tvrtko Ursulin wrote: > > > > > 8< > > > > Today we have: > > > > > > for_each intel_engines: // intel_engines is a flat list of all engines > > > intel_engine_setup() > > > > > > You propose to change it to: > > > > > > for_each engine_class: > > > for 0..max_global_engine_instance: > > >for_each intel_engines: > > > skip engine not present > > > skip class not matching > > > > > > count logical instance > > > > > > for_each intel_engines: > > >skip engine not present > > >skip wrong class > > > > > >intel_engine_setup() > > > > > > > > > I propose: > > > > > > // Leave as is: > > > > > > for_each intel_engines: > > > intel_engine_setup() > > > > > > // Add: > > > > > > for_each engine_class: > > > logical = 0 > > > for_each gt->engine_class[class]: > > >skip engine not present > > > > > >engine->logical_instance = logical++ > > > > > > > > > When code which actually needs a preturbed "map" arrives you add that in > > > to > > > this second loop. > > > > > > > See above, why introduce an algorithm that doesn't work for future parts > > + future patches are land imminently? It makes zero sense whatsoever. > > With your proposal we would literally land code to just throw it away a > > couple of months from now + break patches we intend to land soon. This > > It sure works, it just walks the per class list instead of walking the flat > list skipping one class at the time. > > Just add the map based transformation to the second pass later, when it > becomes required. > I can flatten the algorithm if that helps alleviate your concerns but with that being said, I've played around this locally and IMO makes the code way more ugly. Sure it eliminates some iterations of the loop but who really cares about that in a one time setup function? > > algorithm works and has no reason whatsoever to be optimal as it a one > > time setup call. I really don't understand why we are still talking > > about this paint color. > > I don't think bike shedding is not an appropriate term when complaint is how > proposed algorithm is needlessly complicated. > Are you just ignoring the fact that the algorithm (map) is needed in pending patches? IMO it is more complicated to write throw away code when the proper algorithm is already written. If the logical mapping was straight forward on all platforms as the ones currently upstream I would 100% agree with your suggestion, but it isn't on unembargoed platforms eminently going upstream. The algorithm I have works for the current platforms + the pending platforms. IMO is 100% acceptable to merge something looking towards a known future. Matt > Regards, > > Tvrtko
Re: [PATCH v3 0/8] Implement generic cc_platform_has() helper function
On Wed, Sep 08, 2021 at 05:58:31PM -0500, Tom Lendacky wrote: > This patch series provides a generic helper function, cc_platform_has(), > to replace the sme_active(), sev_active(), sev_es_active() and > mem_encrypt_active() functions. > > It is expected that as new confidential computing technologies are > added to the kernel, they can all be covered by a single function call > instead of a collection of specific function calls all called from the > same locations. > > The powerpc and s390 patches have been compile tested only. Can the > folks copied on this series verify that nothing breaks for them. Also, > a new file, arch/powerpc/platforms/pseries/cc_platform.c, has been > created for powerpc to hold the out of line function. ... > > Tom Lendacky (8): > x86/ioremap: Selectively build arch override encryption functions > mm: Introduce a function to check for confidential computing features > x86/sev: Add an x86 version of cc_platform_has() > powerpc/pseries/svm: Add a powerpc version of cc_platform_has() > x86/sme: Replace occurrences of sme_active() with cc_platform_has() > x86/sev: Replace occurrences of sev_active() with cc_platform_has() > x86/sev: Replace occurrences of sev_es_active() with cc_platform_has() > treewide: Replace the use of mem_encrypt_active() with > cc_platform_has() Ok, modulo the minor things the plan is to take this through tip after -rc2 releases in order to pick up the powerpc build fix and have a clean base (-rc2) to base stuff on, at the same time. Pls holler if something's still amiss. Sathya, if you want to prepare the Intel variant intel_cc_platform_has() ontop of those and send it to me, that would be good because then I can integrate it all in one branch which can be used to base future work ontop. Thx. -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
Re: [PATCH v3 2/3] drm/bridge: parade-ps8640: Use regmap APIs
Hi, On Tue, Sep 14, 2021 at 7:50 PM Stephen Boyd wrote: > > Quoting Doug Anderson (2021-09-14 19:17:03) > > Hi, > > > > On Tue, Sep 14, 2021 at 5:29 PM Stephen Boyd wrote: > > > > > > Quoting Philip Chen (2021-09-14 16:28:44) > > > > diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c > > > > b/drivers/gpu/drm/bridge/parade-ps8640.c > > > > index e340af381e05..8d3e7a147170 100644 > > > > --- a/drivers/gpu/drm/bridge/parade-ps8640.c > > > > +++ b/drivers/gpu/drm/bridge/parade-ps8640.c > > > > @@ -368,6 +396,12 @@ static int ps8640_probe(struct i2c_client *client) > > > > > > > > ps_bridge->page[PAGE0_DP_CNTL] = client; > > > > > > > > + ps_bridge->regmap[PAGE0_DP_CNTL] = devm_regmap_init_i2c(client, > > > > ps8640_regmap_config); > > > > + if (IS_ERR(ps_bridge->regmap[PAGE0_DP_CNTL])) { > > > > + return dev_err_probe(dev, > > > > PTR_ERR(ps_bridge->regmap[PAGE0_DP_CNTL]), > > > > +"Error initting page 0 regmap\n"); > > > > > > This one also doesn't return -EPROBE_DEFER? The dev_err_probe() should > > > really only be used on "get" style APIs that can defer. > > > > Any reason why you say that dev_err_probe() should only be used on > > "get" style APIs that can defer? Even if an API can't return > > -EPROBE_DEFER, using dev_err_probe() still (IMO) makes the code > > cleaner and should be used for any error cases like this during probe. > > Why? > > > > * It shows the error code in a standard way for you. > > * It returns the error code you passed it so you can make your error > > return "one line" instead of 2. > > I'd rather see any sort of error message in getter APIs be pushed into > the callee so that we reduce the text size of the kernel by having one > message instead of hundreds/thousands about "failure to get something". > As far as I can tell this API is designed to skip printing anything when > EPROBE_DEFER is returned, and only print something when it isn't that > particular error code. The other benefit of this API is it sets the > deferred reason in debugfs which is nice to know why some device failed > to probe. Of course now with fw_devlink that almost never triggers so > the feature is becoming useless. I guess we need to split this apart into two issues. One (1) is whether we should be printing errors like this in probe() and the other (2) is the use of dev_err_probe() for cases where err could never be -EPROBE_DEFER. So the argument about reducing the text size for thousands of slightly different errors is all about (1), right? In other words, you'd be equally opposed to a change that added a normal error print with dev_err(), right? IMO, this is a fair debate to have and it comes down to a choice that has pros and cons. Yes the error messages are not needed in the normal case and yes they bloat the kernel size, but when something inevitably goes wrong then you have a way to track it down instead of trying to guess or having to recompile the code to add prints everywhere. Often this can give you a quick clue about a missing Kconfig or a wrongly coded device tree file without tons of time adding prints and recompiling code. That seems like it's worth something... One could also make the argument that if you don't care about all these similar errors bloating the text segment that it would be pretty easy to create a new Kconfig: "CONFIG_I_THINK_PROBE_ERRORS_ARE_BLOAT". If that config is set then it could throw away the strings for every dev_err_probe() that you compile in. I'm not so convinced about the argument (2) that dev_err_probe() should only be used if the error code could be -EPROBE_DEFER. Compare these two: Old: ret = do_something_that_cant_defer(); if (ret < 0) { dev_err(dev, "The foo failed to bar (%pe)\n", ERR_PTR(ret)); return ret; } New: ret = do_something_that_cant_defer(); if (ret < 0) return dev_err_probe(dev, ret, "The foo failed to bar\n"); It seems clear to me that the "New" case is better. The error code is printed in a consistent fashion compared to all other error prints and the fact that it returns the error code makes it cleaner. It's fine that the error could never be -EPROBE_DEFER. Certainly we could add a new function called dev_err_with_code() that worked exactly like dev_err_probe() except that it didn't have special logic for -EPROBE_DEFER but why? Also note that the current function is dev_err_probe(), not dev_err_might_defer(). By the name, it should be useful / OK to use for any errors that come up in the probe path. > > Is there some bad thing about dev_err_probe() that makes it > > problematic to use? If not then the above advantages should be a net > > win, right? > > > > I view it as an anti-pattern. We should strive for driver probe to be > fairly simple so that it's basically getting resources and registering > with frameworks. The error messages in probe may help when you're trying > to get the driver to work and the resource APIs don't make any
[PATCH v8 16/16] nouveau: fold multiple DRM_DEBUG_DRIVERs together
With DRM_USE_DYNAMIC_DEBUG, each callsite record requires 56 bytes. We can combine 12 into one here and save ~620 bytes. Signed-off-by: Jim Cromie --- drivers/gpu/drm/nouveau/nouveau_drm.c | 36 +-- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index ba4cd5f83725..0f45399535bf 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -1245,19 +1245,29 @@ nouveau_drm_pci_table[] = { static void nouveau_display_options(void) { - DRM_DEBUG_DRIVER("Loading Nouveau with parameters:\n"); - - DRM_DEBUG_DRIVER("... tv_disable : %d\n", nouveau_tv_disable); - DRM_DEBUG_DRIVER("... ignorelid: %d\n", nouveau_ignorelid); - DRM_DEBUG_DRIVER("... duallink : %d\n", nouveau_duallink); - DRM_DEBUG_DRIVER("... nofbaccel: %d\n", nouveau_nofbaccel); - DRM_DEBUG_DRIVER("... config : %s\n", nouveau_config); - DRM_DEBUG_DRIVER("... debug: %s\n", nouveau_debug); - DRM_DEBUG_DRIVER("... noaccel : %d\n", nouveau_noaccel); - DRM_DEBUG_DRIVER("... modeset : %d\n", nouveau_modeset); - DRM_DEBUG_DRIVER("... runpm: %d\n", nouveau_runtime_pm); - DRM_DEBUG_DRIVER("... vram_pushbuf : %d\n", nouveau_vram_pushbuf); - DRM_DEBUG_DRIVER("... hdmimhz : %d\n", nouveau_hdmimhz); + DRM_DEBUG_DRIVER("Loading Nouveau with parameters:\n" +"... tv_disable : %d\n" +"... ignorelid: %d\n" +"... duallink : %d\n" +"... nofbaccel: %d\n" +"... config : %s\n" +"... debug: %s\n" +"... noaccel : %d\n" +"... modeset : %d\n" +"... runpm: %d\n" +"... vram_pushbuf : %d\n" +"... hdmimhz : %d\n" +, nouveau_tv_disable +, nouveau_ignorelid +, nouveau_duallink +, nouveau_nofbaccel +, nouveau_config +, nouveau_debug +, nouveau_noaccel +, nouveau_modeset +, nouveau_runtime_pm +, nouveau_vram_pushbuf +, nouveau_hdmimhz); } static const struct dev_pm_ops nouveau_pm_ops = { -- 2.31.1
[PATCH v8 15/16] amdgpu_ucode: reduce number of pr_debug calls
There are blocks of DRM_DEBUG calls, consolidate their args into single calls. With dynamic-debug in use, each callsite consumes 56 bytes of callsite data, and this patch removes about 65 calls, so it saves ~3.5kb. no functional changes. RFC: this creates multi-line log messages, does that break any syslog conventions ? Signed-off-by: Jim Cromie --- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 293 -- 1 file changed, 158 insertions(+), 135 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 2834981f8c08..14a9fef1f4c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -30,17 +30,26 @@ static void amdgpu_ucode_print_common_hdr(const struct common_firmware_header *hdr) { - DRM_DEBUG("size_bytes: %u\n", le32_to_cpu(hdr->size_bytes)); - DRM_DEBUG("header_size_bytes: %u\n", le32_to_cpu(hdr->header_size_bytes)); - DRM_DEBUG("header_version_major: %u\n", le16_to_cpu(hdr->header_version_major)); - DRM_DEBUG("header_version_minor: %u\n", le16_to_cpu(hdr->header_version_minor)); - DRM_DEBUG("ip_version_major: %u\n", le16_to_cpu(hdr->ip_version_major)); - DRM_DEBUG("ip_version_minor: %u\n", le16_to_cpu(hdr->ip_version_minor)); - DRM_DEBUG("ucode_version: 0x%08x\n", le32_to_cpu(hdr->ucode_version)); - DRM_DEBUG("ucode_size_bytes: %u\n", le32_to_cpu(hdr->ucode_size_bytes)); - DRM_DEBUG("ucode_array_offset_bytes: %u\n", - le32_to_cpu(hdr->ucode_array_offset_bytes)); - DRM_DEBUG("crc32: 0x%08x\n", le32_to_cpu(hdr->crc32)); + DRM_DEBUG("size_bytes: %u\n" + "header_size_bytes: %u\n" + "header_version_major: %u\n" + "header_version_minor: %u\n" + "ip_version_major: %u\n" + "ip_version_minor: %u\n" + "ucode_version: 0x%08x\n" + "ucode_size_bytes: %u\n" + "ucode_array_offset_bytes: %u\n" + "crc32: 0x%08x\n", + le32_to_cpu(hdr->size_bytes), + le32_to_cpu(hdr->header_size_bytes), + le16_to_cpu(hdr->header_version_major), + le16_to_cpu(hdr->header_version_minor), + le16_to_cpu(hdr->ip_version_major), + le16_to_cpu(hdr->ip_version_minor), + le32_to_cpu(hdr->ucode_version), + le32_to_cpu(hdr->ucode_size_bytes), + le32_to_cpu(hdr->ucode_array_offset_bytes), + le32_to_cpu(hdr->crc32)); } void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr) @@ -55,9 +64,9 @@ void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr) const struct mc_firmware_header_v1_0 *mc_hdr = container_of(hdr, struct mc_firmware_header_v1_0, header); - DRM_DEBUG("io_debug_size_bytes: %u\n", - le32_to_cpu(mc_hdr->io_debug_size_bytes)); - DRM_DEBUG("io_debug_array_offset_bytes: %u\n", + DRM_DEBUG("io_debug_size_bytes: %u\n" + "io_debug_array_offset_bytes: %u\n", + le32_to_cpu(mc_hdr->io_debug_size_bytes), le32_to_cpu(mc_hdr->io_debug_array_offset_bytes)); } else { DRM_ERROR("Unknown MC ucode version: %u.%u\n", version_major, version_minor); @@ -82,13 +91,17 @@ void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr) switch (version_minor) { case 0: v2_0_hdr = container_of(hdr, struct smc_firmware_header_v2_0, v1_0.header); - DRM_DEBUG("ppt_offset_bytes: %u\n", le32_to_cpu(v2_0_hdr->ppt_offset_bytes)); - DRM_DEBUG("ppt_size_bytes: %u\n", le32_to_cpu(v2_0_hdr->ppt_size_bytes)); + DRM_DEBUG("ppt_offset_bytes: %u\n" + "ppt_size_bytes: %u\n", + le32_to_cpu(v2_0_hdr->ppt_offset_bytes), + le32_to_cpu(v2_0_hdr->ppt_size_bytes)); break; case 1: v2_1_hdr = container_of(hdr, struct smc_firmware_header_v2_1, v1_0.header); - DRM_DEBUG("pptable_count: %u\n", le32_to_cpu(v2_1_hdr->pptable_count)); - DRM_DEBUG("pptable_entry_offset: %u\n", le32_to_cpu(v2_1_hdr->pptable_entry_offset)); + DRM_DEBUG("pptable_count: %u\n" + "pptable_entry_offset: %u\n", + le32_to_cpu(v2_1_hdr->pptable_count), + le32_to_cpu(v2_1_hdr->pptable_entry_offset)); break; default: break; @@ -111,10 +124,12 @@ void am
[PATCH v8 14/16] drm_print: instrument drm_debug_enabled
Duplicate drm_debug_enabled() code into both "basic" and "dyndbg" ifdef branches. Then add a pr_debug("todo: ...") into the "dyndbg" branch. Then convert the "dyndbg" branch's code to a macro, so that the pr_debug() get its callsite info from the invoking function, instead of from drm_debug_enabled() itself. This gives us unique callsite info for the 8 remaining users of drm_debug_enabled(), and lets us enable them individually to see how much logging traffic they generate. The oft-visited callsites can then be reviewed for runtime cost and possible optimizations. Heres what we get: bash-5.1# modprobe drm dyndbg: 384 debug prints in module drm bash-5.1# grep todo: /proc/dynamic_debug/control drivers/gpu/drm/drm_edid.c:1843 [drm]connector_bad_edid =_ "todo: maybe avoid via dyndbg\012" drivers/gpu/drm/drm_print.c:309 [drm]___drm_dbg =p "todo: maybe avoid via dyndbg\012" drivers/gpu/drm/drm_print.c:286 [drm]__drm_dev_dbg =p "todo: maybe avoid via dyndbg\012" drivers/gpu/drm/drm_vblank.c:1491 [drm]drm_vblank_restore =_ "todo: maybe avoid via dyndbg\012" drivers/gpu/drm/drm_vblank.c:787 [drm]drm_crtc_vblank_helper_get_vblank_timestamp_internal =_ "todo: maybe avoid via dyndbg\012" drivers/gpu/drm/drm_vblank.c:410 [drm]drm_crtc_accurate_vblank_count =_ "todo: maybe avoid via dyndbg\012" drivers/gpu/drm/drm_atomic_uapi.c:1457 [drm]drm_mode_atomic_ioctl =_ "todo: maybe avoid via dyndbg\012" drivers/gpu/drm/drm_edid_load.c:178 [drm]edid_load =_ "todo: maybe avoid via dyndbg\012" At quick glance, edid won't qualify, drm_print might, drm_vblank is strongest chance, maybe atomic-ioctl too. Signed-off-by: Jim Cromie --- --- include/drm/drm_print.h | 17 +++-- 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index 4a38591a424b..c9cabc8db672 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -381,6 +381,11 @@ enum drm_debug_category { #define DRM_DBG_CAT_DP DRM_UT_DP #define DRM_DBG_CAT_DRMRES DRM_UT_DRMRES +static inline bool drm_debug_enabled(enum drm_debug_category category) +{ + return unlikely(__drm_debug & category); +} + #else /* CONFIG_DRM_USE_DYNAMIC_DEBUG */ /* join prefix + format in cpp so dyndbg can see it */ @@ -414,12 +419,13 @@ enum drm_debug_category { #define DRM_DBG_CAT_DP "drm:dp:" #define DRM_DBG_CAT_DRMRES "drm:res:" -#endif /* CONFIG_DRM_USE_DYNAMIC_DEBUG */ +#define drm_debug_enabled(category)\ + ({ \ + pr_debug("todo: maybe avoid via dyndbg\n"); \ + unlikely(__drm_debug & (category)); \ + }) -static inline bool drm_debug_enabled(enum drm_debug_category category) -{ - return unlikely(__drm_debug & category); -} +#endif /* CONFIG_DRM_USE_DYNAMIC_DEBUG */ /* * struct device based logging @@ -569,7 +575,6 @@ void __drm_dev_dbg(const struct device *dev, enum drm_debug_category category, #define drm_dbg_drmres(drm, fmt, ...) \ drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_DBG_CAT_DRMRES, fmt, ##__VA_ARGS__) - /* * printk based logging * -- 2.31.1
[PATCH v8 13/16] drm_print: add choice to use dynamic debug in drm-debug
drm's debug system writes 10 distinct categories of messages to syslog using a small API[1]: drm_dbg*(10 names), DRM_DEV_DEBUG*(3 names), DRM_DEBUG*(8 names). There are thousands of these callsites, each categorized in this systematized way. These callsites can be enabled at runtime by their category, each controlled by a bit in drm.debug (/sys/modules/drm/parameter/debug). In the current "basic" implementation, drm_debug_enabled() tests these bits in __drm_debug each time an API[1] call is executed; while cheap individually, the costs accumulate with uptime. This patch uses dynamic-debug with (required) jump-label to patch enabled calls onto their respective NOOP slots, avoiding all runtime bit-checks of __drm_debug by drm_debug_enabled(). Dynamic debug has no concept of category, but we can emulate one by replacing enum categories with a set of prefix-strings; "drm:core:", "drm:kms:" "drm:driver:" etc, and prepend them (at compile time) to the given formats. Then we can use: `echo module drm format "^drm:core: " +p > control` to enable the whole category with one query. This conversion yields many new prdbg callsites: dyndbg: 207 debug prints in module drm_kms_helper dyndbg: 376 debug prints in module drm dyndbg: 1811 debug prints in module i915 dyndbg: 3917 debug prints in module amdgpu Each site costs 56 bytes of .data, which is a big increase for drm modules, so CONFIG_DRM_USE_DYNAMIC_DEBUG makes it optional. CONFIG_JUMP_LABEL is also required, to get the promised optimizations. The "basic" -> "dyndbg" switchover is layered into the macro scheme A. A "prefix" version of DRM_UT_ map, named DRM_DBG_CAT_ "basic": DRM_DBG_CAT_ <=== DRM_UT_. Identity map. "dyndbg": #define DRM_DBG_CAT_KMS"drm:kms: " #define DRM_DBG_CAT_PRIME "drm:prime: " #define DRM_DBG_CAT_ATOMIC "drm:atomic: " In v3, had older name, DRM_DBG_CLASS_ was countered, I had agreed, but this seems better still; CATEGORY is already DRM's term-of-art, and adding a near-synonym 'CLASS' only adds ambiguity. DRM_UT_* are preserved, since theyre used elsewhere. Since the callback maintains its state in __drm_debug, drm_debug_enabled() will stay synchronized, and continue to work. We can address them separately if they are called enough to be worth fixing. B. drm_dev_dbg() & drm_debug() are interposed with macros basic:forward to renamed fn, with args preserved enabled: redirect to pr_debug, dev_dbg, with CATEGORY format catenated This is where drm_debug_enabled() is avoided. The prefix is prepended at compile-time, no category at runtime. C. API[1] uses DRM_DBG_CAT_s These already use (B), now they use (A) too instead of DRM_UT_, to get the correct token type for "basic" and "dyndbg" configs. D. use DEFINE_DYNAMIC_DEBUG_CATEGORIES() This defines the map using DRM_CAT_s, and creates the /sysfs bitmap to control those categories. CONFIG_DRM_USE_DYNAMIC_DEBUG is also used to adjust amdgpu, i915 makefiles to add -DDYNAMIC_DEBUG_MODULE; it includes the current CONFIG_DYNAMIC_DEBUG_CORE and is enabled by the user. NOTES: Because the dyndbg callback is keeping state in __drm_debug, it synchronizes with drm_debug_enabled() and its remaining users; the switchover should be transparent. Code Review is expected to catch the lack of correspondence between bit=>prefix definitions (the selector) and the prefixes used in the API[1] layer above pr_debug() I've coded the categories using the _DD_cat_() macro, which adds the ^anchor and trailing space. This excludes any sub-categories added later. This convention protects any "drm:atomic:fail:" callsites from getting stomped on by `echo 0 > debug`. Other categories could differ, but we need some default. Dyndbg requires that the prefix be in the compiled-in format string; run-time prefixing evades callsite selection by category. pr_debug("%s: ...", __func__, ...) // not ideal Unfortunately __func__ is not a macro, and cannot be catenated at preprocess/compile time. If you want that, you might consider +mfl flags instead; Signed-off-by: Jim Cromie --- v5: . use DEFINE_DYNAMIC_DEBUG_CATEGORIES in drm_print.c . s/DRM_DBG_CLASS_/DRM_DBG_CAT_/ - dont need another term . default=y in Kconfig entry - per @DanVet . move some commit-log prose to dyndbg commit . add-prototyes to (param_get/set)_dyndbg . more wrinkles found by . relocate ratelimit chunk from elsewhere v6: . add kernel doc . fix cpp paste, drop '#' v7: . change __drm_debug to long, to fit with DEFINE_DYNAMIC_DEBUG_CATEGORIES . add -DDYNAMIC_DEBUG_MODULE to ccflags if DRM_USE_DYNAMIC_DEBUG v8: . adapt to altered ^ insertion . add mem cost numbers to kconfig . kdoc improvements (I hope) --- drivers/gpu/drm/Kconfig | 26 drivers/gpu/drm/Makefile| 3 + drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/drm_print.c | 53 ++--- drivers/gpu/drm/i915/Makefile | 2 +- include/drm/drm_print.h | 177 ++
[PATCH v8 12/16] amdgpu: use DEFINE_DYNAMIC_DEBUG_CATEGORIES on existing prdbgs
logger_types.h defines many DC_LOG_*() categorized debug wrappers. Most of these already use DRM debug API, so are controllable using drm.debug, but others use a bare pr_debug("$prefix: .."), with 1 of 13 different class-prefixes matching ~/^\[[_A-Z]+\]:/ Use DEFINE_DYNAMIC_DEBUG_CATEGORIES to create a sysfs location which maps from bits to these 13 sets of categorized pr_debugs to en/disable. Makefile adds -DDYNAMIC_DEBUG_MODULE for CONFIG_DYNAMIC_DEBUG_CORE, otherwise BUILD_BUG_ON triggers (obvious misuses are better than mysterious ones). Anyway heres a baseline, of existing prdbg use. Each callsite costs 56 bytes of kernel .data amdgpu has "extra" prdbgs due to macro expansion. (see repeating linenos in control) (it also has substantial use of drm.debug) - tedious fix but clear size payoff, as a separate patch, later. bash-5.1# for m in i915 amdgpu nouveau; do modprobe $m; done dyndbg: 1 debug prints in module drm dyndbg: 2 debug prints in module ttm dyndbg: 8 debug prints in module video dyndbg: 167 debug prints in module i915 dyndbg: 2339 debug prints in module amdgpu dyndbg: 3 debug prints in module wmi dyndbg: 3 debug prints in module nouveau Signed-off-by: Jim Cromie --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 + .../gpu/drm/amd/display/dc/core/dc_debug.c| 43 ++- 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index c56320e78c0e..1f084919294c 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -38,6 +38,8 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \ -I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \ -I$(FULL_AMD_PATH)/amdkfd +ccflags-$(CONFIG_DYNAMIC_DEBUG_CORE) += -DYNAMIC_DEBUG_MODULE + amdgpu-y := amdgpu_drv.o # add KMS driver diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c index 21be2a684393..ae462e5d42c6 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c @@ -36,8 +36,49 @@ #include "resource.h" -#define DC_LOGGER_INIT(logger) +#ifdef CONFIG_DRM_USE_DYNAMIC_DEBUG +/* define a drm.debug style dyndbg pr-debug control point */ +#include + +unsigned long __debug_dc; +EXPORT_SYMBOL(__debug_dc); + +#define help_(_N, _cat)"\t Bit-" #_N "\t" _cat "\n" + +#define DC_DYNDBG_BITMAP_DESC(name)\ + "Control pr_debugs via /sys/module/amdgpu/parameters/" #name\ + ", where each bit controls a debug category.\n" \ + help_(0, "[SURFACE]:") \ + help_(1, "[CURSOR]:") \ + help_(2, "[PFLIP]:")\ + help_(3, "[VBLANK]:") \ + help_(4, "[HW_LINK_TRAINING]:") \ + help_(5, "[HW_AUDIO]:") \ + help_(6, "[SCALER]:") \ + help_(7, "[BIOS]:") \ + help_(8, "[BANDWIDTH_CALCS]:") \ + help_(9, "[DML]:") \ + help_(10, "[IF_TRACE]:")\ + help_(11, "[GAMMA]:") \ + help_(12, "[SMU_MSG]:") + +DEFINE_DYNAMIC_DEBUG_CATEGORIES(debug_dc, __debug_dc, + DC_DYNDBG_BITMAP_DESC(debug_dc), + _DD_cat_(0, "[CURSOR]:"), + _DD_cat_(1, "[PFLIP]:"), + _DD_cat_(2, "[VBLANK]:"), + _DD_cat_(3, "[HW_LINK_TRAINING]:"), + _DD_cat_(4, "[HW_AUDIO]:"), + _DD_cat_(5, "[SCALER]:"), + _DD_cat_(6, "[BIOS]:"), + _DD_cat_(7, "[BANDWIDTH_CALCS]:"), + _DD_cat_(8, "[DML]:"), + _DD_cat_(9, "[IF_TRACE]:"), + _DD_cat_(10, "[GAMMA]:"), + _DD_cat_(11, "[SMU_MSG]:")); +#endif +#define DC_LOGGER_INIT(logger) #define SURFACE_TRACE(...) do {\ if (dc->debug.surface_trace) \ -- 2.31.1
[PATCH v8 11/16] i915/gvt: use DEFINE_DYNAMIC_DEBUG_CATEGORIES for existing prdbgs
The gvt component of this driver has ~120 pr_debugs, in 9 categories quite similar to those in DRM. Following the interface model of drm.debug, add a parameter to map bits to these categorizations. DEFINE_DYNAMIC_DEBUG_CATEGORIES(debug_gvt, __gvt_debug, "dyndbg bitmap desc", _DD_cat_(0, "gvt:cmd:"), _DD_cat_(1, "gvt:core:"), _DD_cat_(2, "gvt:dpy:"), _DD_cat_(3, "gvt:el:"), _DD_cat_(4, "gvt:irq:"), _DD_cat_(5, "gvt:mm:"), _DD_cat_(6, "gvt:mmio:"), _DD_cat_(7, "gvt:render:"), _DD_cat_(8, "gvt:sched:")); If CONFIG_DYNAMIC_DEBUG_CORE=y, then gvt/Makefile adds -DDYNAMIC_DEBUG_MODULE to cflags, which CONFIG_DYNAMIC_DEBUG=n (CORE-only) builds need. This is redone more comprehensively soon. Signed-off-by: Jim Cromie --- v5: . static decl of vector of bit->class descriptors - Emil.V . relocate gvt-makefile chunk from elsewhere v7: . move ccflags addition up to i915/Makefile from i915/gvt v8: . relocate DDD_CAT decl code into intel_gvt.c - Tvrtko cflags back to i915/Makefile . add -DDYNAMIC_DEBUG_MODULE to support DYNAMIC_DEBUG_CORE-only builds heres unchanged? footprint: bash-5.1# for m in i915 amdgpu nouveau; do modprobe $m; done dyndbg: 1 debug prints in module drm dyndbg: 2 debug prints in module ttm dyndbg: 8 debug prints in module video dyndbg: 167 debug prints in module i915 dyndbg: 2339 debug prints in module amdgpu [drm] amdgpu kernel modesetting enabled. amdgpu: CRAT table disabled by module option amdgpu: Virtual CRAT table created for CPU amdgpu: Topology: Add CPU node dyndbg: 3 debug prints in module wmi dyndbg: 3 debug prints in module nouveau --- drivers/gpu/drm/i915/Makefile| 2 ++ drivers/gpu/drm/i915/intel_gvt.c | 34 2 files changed, 36 insertions(+) diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 4f22cac1c49b..038fd29c89d4 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -295,6 +295,8 @@ i915-y += intel_gvt.o include $(src)/gvt/Makefile endif +ccflags-$(CONFIG_DYNAMIC_DEBUG_CORE) += -DDYNAMIC_DEBUG_MODULE + obj-$(CONFIG_DRM_I915) += i915.o obj-$(CONFIG_DRM_I915_GVT_KVMGT) += gvt/kvmgt.o diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c index 4e70c1a9ef2e..eb0da9173b23 100644 --- a/drivers/gpu/drm/i915/intel_gvt.c +++ b/drivers/gpu/drm/i915/intel_gvt.c @@ -162,3 +162,37 @@ void intel_gvt_resume(struct drm_i915_private *dev_priv) if (intel_gvt_active(dev_priv)) intel_gvt_pm_resume(dev_priv->gvt); } + +#if defined(CONFIG_DRM_USE_DYNAMIC_DEBUG) + +unsigned long __gvt_debug; +EXPORT_SYMBOL(__gvt_debug); + +#define help_(_N, _cat)"\t Bit-" #_N ":\t" _cat "\n" + +#define I915_GVT_CATEGORIES(name) \ + " Enable debug output via /sys/module/i915/parameters/" #name \ + ", where each bit enables a debug category.\n" \ + help_(0, "gvt:cmd:")\ + help_(1, "gvt:core:") \ + help_(2, "gvt:dpy:")\ + help_(3, "gvt:el:") \ + help_(4, "gvt:irq:")\ + help_(5, "gvt:mm:") \ + help_(6, "gvt:mmio:") \ + help_(7, "gvt:render:") \ + help_(8, "gvt:sched:") + +DEFINE_DYNAMIC_DEBUG_CATEGORIES(debug_gvt, __gvt_debug, + I915_GVT_CATEGORIES(debug_gvt), + _DD_cat_(0, "gvt:cmd:"), + _DD_cat_(1, "gvt:core:"), + _DD_cat_(2, "gvt:dpy:"), + _DD_cat_(3, "gvt:el:"), + _DD_cat_(4, "gvt:irq:"), + _DD_cat_(5, "gvt:mm:"), + _DD_cat_(6, "gvt:mmio:"), + _DD_cat_(7, "gvt:render:"), + _DD_cat_(8, "gvt:sched:")); + +#endif -- 2.31.1
[PATCH v8 09/16] drm: fix doc grammar error
no code changes, good for rc Signed-off-by: Jim Cromie --- include/drm/drm_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h index b439ae1921b8..ebb22166ace1 100644 --- a/include/drm/drm_drv.h +++ b/include/drm/drm_drv.h @@ -522,7 +522,7 @@ void *__devm_drm_dev_alloc(struct device *parent, * @type: the type of the struct which contains struct &drm_device * @member: the name of the &drm_device within @type. * - * This allocates and initialize a new DRM device. No device registration is done. + * This allocates and initializes a new DRM device. No device registration is done. * Call drm_dev_register() to advertice the device to user space and register it * with other core subsystems. This should be done last in the device * initialization sequence to make sure userspace can't access an inconsistent -- 2.31.1
[PATCH v8 10/16] i915/gvt: remove spaces in pr_debug "gvt: core:" etc prefixes
Taking embedded spaces out of existing prefixes makes them better class-prefixes; simplifying the extra quoting needed otherwise: $> echo format "^gvt: core:" +p >control Dropping the internal spaces means any trailing space in a query will more clearly terminate the prefix being searched for. Consider a generic drm-debug example: # turn off ATOMIC reports echo format "^drm:atomic: " -p > control # turn off all ATOMIC:* reports, including any sub-categories echo format "^drm:atomic:" -p > control # turn on ATOMIC:FAIL: reports echo format "^drm:atomic:fail: " +p > control Removing embedded spaces in the class-prefixes simplifies the corresponding match-prefix. This means that "quoted" match-prefixes are only needed when the trailing space is desired, in order to exclude explicitly sub-categorized pr-debugs; in this example, "drm:atomic:fail:". RFC: maybe the prefix catenation should paste in the " " class-prefix terminator explicitly. A pr_debug_() flavor could exclude the " ", allowing ad-hoc sub-categorization by appending for example, "fail:" to "drm:atomic:" without the default " " insertion. Signed-off-by: Jim Cromie --- v8: . fix patchwork CI warning --- drivers/gpu/drm/i915/gvt/debug.h | 18 +- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/debug.h b/drivers/gpu/drm/i915/gvt/debug.h index c6027125c1ec..bbecc279e077 100644 --- a/drivers/gpu/drm/i915/gvt/debug.h +++ b/drivers/gpu/drm/i915/gvt/debug.h @@ -36,30 +36,30 @@ do { \ } while (0) #define gvt_dbg_core(fmt, args...) \ - pr_debug("gvt: core: "fmt, ##args) + pr_debug("gvt:core: " fmt, ##args) #define gvt_dbg_irq(fmt, args...) \ - pr_debug("gvt: irq: "fmt, ##args) + pr_debug("gvt:irq: " fmt, ##args) #define gvt_dbg_mm(fmt, args...) \ - pr_debug("gvt: mm: "fmt, ##args) + pr_debug("gvt:mm: " fmt, ##args) #define gvt_dbg_mmio(fmt, args...) \ - pr_debug("gvt: mmio: "fmt, ##args) + pr_debug("gvt:mmio: " fmt, ##args) #define gvt_dbg_dpy(fmt, args...) \ - pr_debug("gvt: dpy: "fmt, ##args) + pr_debug("gvt:dpy: " fmt, ##args) #define gvt_dbg_el(fmt, args...) \ - pr_debug("gvt: el: "fmt, ##args) + pr_debug("gvt:el: " fmt, ##args) #define gvt_dbg_sched(fmt, args...) \ - pr_debug("gvt: sched: "fmt, ##args) + pr_debug("gvt:sched: " fmt, ##args) #define gvt_dbg_render(fmt, args...) \ - pr_debug("gvt: render: "fmt, ##args) + pr_debug("gvt:render: " fmt, ##args) #define gvt_dbg_cmd(fmt, args...) \ - pr_debug("gvt: cmd: "fmt, ##args) + pr_debug("gvt:cmd: " fmt, ##args) #endif -- 2.31.1