date:20210915

Re: [PATCH v3 06/12] drm/ttm: add TTM_PAGE_FLAG_EXTERNAL_MAPPABLE

2021-09-15 Thread Christian König





Am 15.09.21 um 20:59 schrieb Matthew Auld:

In commit:

commit 667a50db0477d47fdff01c666f5ee1ce26b5264c
Author: Thomas Hellstrom 
Date:   Fri Jan 3 11:17:18 2014 +0100

 drm/ttm: Refuse to fault (prime-) imported pages

we introduced the restriction that imported pages should not be directly
mappable through TTM(this also extends to userptr). In the next patch we
want to introduce a shmem_tt backend, which should follow all the
existing rules with TTM_PAGE_FLAG_EXTERNAL, since it will need to handle
swapping itself, but with the above mapping restriction lifted.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
Cc: Christian König 
---
  drivers/gpu/drm/ttm/ttm_bo_vm.c | 6 --
  include/drm/ttm/ttm_tt.h| 7 +++
  2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 708390588c7c..fd6e18f12f50 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -163,8 +163,10 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
 * (if at all) by redirecting mmap to the exporter.
 */
if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL)) {
-   dma_resv_unlock(bo->base.resv);
-   return VM_FAULT_SIGBUS;
+   if (!(bo->ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL_MAPPABLE)) {
+   dma_resv_unlock(bo->base.resv);
+   return VM_FAULT_SIGBUS;
+   }
}
  
  	return 0;

diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
index 7f54a83c95ef..800c9edb3e10 100644
--- a/include/drm/ttm/ttm_tt.h
+++ b/include/drm/ttm/ttm_tt.h
@@ -66,11 +66,18 @@ struct ttm_tt {
 * Note that enum ttm_bo_type.ttm_bo_type_sg objects will always enable
 * this flag.
 *
+* TTM_PAGE_FLAG_EXTERNAL_MAPPABLE: Same behaviour as
+* TTM_PAGE_FLAG_EXTERNAL, but with the reduced restriction that it is
+* still valid to use TTM to map the pages directly. This is useful when
+* implementing a ttm_tt backend which still allocates driver owned
+* pages underneath(say with shmem).
+*
 * TTM_PAGE_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE.
 */
  #define TTM_PAGE_FLAG_SWAPPED (1 << 0)
  #define TTM_PAGE_FLAG_ZERO_ALLOC  (1 << 1)
  #define TTM_PAGE_FLAG_EXTERNAL(1 << 2)
+#define TTM_PAGE_FLAG_EXTERNAL_MAPPABLE(1 << 3 | 
TTM_PAGE_FLAG_EXTERNAL)


That's really bad practice because an "if (!(flags & 
TTM_PAGE_FLAG_EXTERNAL_MAPPABLE))" has a different semantics as an "if 
(flags & TTM_PAGE_FLAG_EXTERNAL_MAPPABLE)".


Rather add a TTM_PAGE_FLAG_UNMAPPABLE and make sure that it is set as 
appropriated.


Regards,
Christian.

  
  #define TTM_PAGE_FLAG_PRIV_POPULATED	(1 << 31)

uint32_t page_flags;

Re: [PATCH v3 05/12] drm/ttm: add some kernel-doc for TTM_PAGE_FLAG_*

2021-09-15 Thread Christian König


Am 15.09.21 um 20:59 schrieb Matthew Auld:

Move it to inline kernel-doc, otherwise we can't add empty lines it
seems. Also drop the kernel-doc for pages_list, which doesn't seem to
exist, and get rid of all the strange holes.


As suggested on the other patch I would do the rename and renumbering in 
there and only the documentation change here.


Christian.



Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
Cc: Christian König 
---
  include/drm/ttm/ttm_tt.h | 57 ++--
  1 file changed, 38 insertions(+), 19 deletions(-)

diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
index a6c284c21e72..7f54a83c95ef 100644
--- a/include/drm/ttm/ttm_tt.h
+++ b/include/drm/ttm/ttm_tt.h
@@ -38,35 +38,54 @@ struct ttm_resource;
  struct ttm_buffer_object;
  struct ttm_operation_ctx;
  
-#define TTM_PAGE_FLAG_SWAPPED		(1 << 4)

-#define TTM_PAGE_FLAG_ZERO_ALLOC   (1 << 6)
-#define TTM_PAGE_FLAG_EXTERNAL (1 << 8)
-
-#define TTM_PAGE_FLAG_PRIV_POPULATED  (1 << 31)
-
  /**
- * struct ttm_tt
- *
- * @pages: Array of pages backing the data.
- * @page_flags: see TTM_PAGE_FLAG_*
- * @num_pages: Number of pages in the page array.
- * @sg: for SG objects via dma-buf
- * @dma_address: The DMA (bus) addresses of the pages
- * @swap_storage: Pointer to shmem struct file for swap storage.
- * @pages_list: used by some page allocation backend
- * @caching: The current caching state of the pages, see enum ttm_caching.
- *
- * This is a structure holding the pages, caching- and aperture binding
- * status for a buffer object that isn't backed by fixed (VRAM / AGP)
+ * struct ttm_tt - This is a structure holding the pages, caching- and aperture
+ * binding status for a buffer object that isn't backed by fixed (VRAM / AGP)
   * memory.
   */
  struct ttm_tt {
+   /** @pages: Array of pages backing the data. */
struct page **pages;
+   /**
+* @page_flags: The page flags.
+*
+* Supported values:
+*
+* TTM_PAGE_FLAG_SWAPPED: Set if the pages have been swapped out.
+* Calling ttm_tt_populate() will swap the pages back in, and unset the
+* flag.
+*
+* TTM_PAGE_FLAG_ZERO_ALLOC: Set if the pages will be zeroed on
+* allocation.
+*
+* TTM_PAGE_FLAG_EXTERNAL: Set if the underlying pages were allocated
+* externally, like with dma-buf or userptr. This effectively disables
+* TTM swapping out such pages.  Also important is to prevent TTM from
+* ever directly mapping these pages.
+*
+* Note that enum ttm_bo_type.ttm_bo_type_sg objects will always enable
+* this flag.
+*
+* TTM_PAGE_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE.
+*/
+#define TTM_PAGE_FLAG_SWAPPED  (1 << 0)
+#define TTM_PAGE_FLAG_ZERO_ALLOC   (1 << 1)
+#define TTM_PAGE_FLAG_EXTERNAL (1 << 2)
+
+#define TTM_PAGE_FLAG_PRIV_POPULATED   (1 << 31)
uint32_t page_flags;
+   /** @num_pages: Number of pages in the page array. */
uint32_t num_pages;
+   /** @sg: for SG objects via dma-buf. */
struct sg_table *sg;
+   /** @dma_address: The DMA (bus) addresses of the pages. */
dma_addr_t *dma_address;
+   /** @swap_storage: Pointer to shmem struct file for swap storage. */
struct file *swap_storage;
+   /**
+* @caching: The current caching state of the pages, see enum
+* ttm_caching.
+*/
enum ttm_caching caching;
  };

Re: [PATCH v3 04/12] drm/ttm: s/FLAG_SG/FLAG_EXTERNAL/

2021-09-15 Thread Christian König





Am 15.09.21 um 20:59 schrieb Matthew Auld:

It covers more than just ttm_bo_type_sg usage, like with say dma-buf,
since one other user is userptr in amdgpu, and in the future we might
have some more. Hence EXTERNAL is likely a more suitable name.

Suggested-by: Christian König 
Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
Cc: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 10 +-
  drivers/gpu/drm/nouveau/nouveau_bo.c|  4 ++--
  drivers/gpu/drm/radeon/radeon_ttm.c |  8 
  drivers/gpu/drm/ttm/ttm_bo.c|  2 +-
  drivers/gpu/drm/ttm/ttm_bo_vm.c |  2 +-
  drivers/gpu/drm/ttm/ttm_tt.c| 10 +-
  include/drm/ttm/ttm_tt.h|  6 +++---
  7 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c5fa6e62f6ca..a6d606f91dfd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -894,7 +894,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
DRM_ERROR("failed to pin userptr\n");
return r;
}
-   } else if (ttm->page_flags & TTM_PAGE_FLAG_SG) {
+   } else if (ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL) {
if (!ttm->sg) {
struct dma_buf_attachment *attach;
struct sg_table *sgt;
@@ -1147,7 +1147,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
return 0;
}
  
-	if (ttm->page_flags & TTM_PAGE_FLAG_SG)

+   if (ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL)
return 0;
  
  	ret = ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx);

@@ -1179,7 +1179,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device 
*bdev,
return;
}
  
-	if (ttm->page_flags & TTM_PAGE_FLAG_SG)

+   if (ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL)
return;
  
  	adev = amdgpu_ttm_adev(bdev);

@@ -1210,8 +1210,8 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object 
*bo,
return -ENOMEM;
}
  
-	/* Set TTM_PAGE_FLAG_SG before populate but after create. */

-   bo->ttm->page_flags |= TTM_PAGE_FLAG_SG;
+   /* Set TTM_PAGE_FLAG_EXTERNAL before populate but after create. */
+   bo->ttm->page_flags |= TTM_PAGE_FLAG_EXTERNAL;
  
  	gtt = (void *)bo->ttm;

gtt->userptr = addr;
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 33dca2565cca..ba0fec252df7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1249,7 +1249,7 @@ nouveau_ttm_tt_populate(struct ttm_device *bdev,
struct ttm_tt *ttm_dma = (void *)ttm;
struct nouveau_drm *drm;
struct device *dev;
-   bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
+   bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL);
  
  	if (ttm_tt_is_populated(ttm))

return 0;
@@ -1272,7 +1272,7 @@ nouveau_ttm_tt_unpopulate(struct ttm_device *bdev,
  {
struct nouveau_drm *drm;
struct device *dev;
-   bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
+   bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL);
  
  	if (slave)

return;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index 7793249bc549..d891491b6da8 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -545,14 +545,14 @@ static int radeon_ttm_tt_populate(struct ttm_device *bdev,
  {
struct radeon_device *rdev = radeon_get_rdev(bdev);
struct radeon_ttm_tt *gtt = radeon_ttm_tt_to_gtt(rdev, ttm);
-   bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
+   bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL);
  
  	if (gtt && gtt->userptr) {

ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
if (!ttm->sg)
return -ENOMEM;
  
-		ttm->page_flags |= TTM_PAGE_FLAG_SG;

+   ttm->page_flags |= TTM_PAGE_FLAG_EXTERNAL;
return 0;
}
  
@@ -569,13 +569,13 @@ static void radeon_ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm

  {
struct radeon_device *rdev = radeon_get_rdev(bdev);
struct radeon_ttm_tt *gtt = radeon_ttm_tt_to_gtt(rdev, ttm);
-   bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
+   bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL);
  
  	radeon_ttm_tt_unbind(bdev, ttm);
  
  	if (gtt && gtt->userptr) {

kfree(ttm->sg);
-   ttm->page_flags &= ~TTM_PAGE_FLAG_SG;
+   ttm->page_flags &= ~TTM_PAGE_FLAG_EXTERNAL;
return;
}
  
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c

index 3b22c0013dbf..642dc7ce3081 100644
--- a/

Re: [RFC PATCH v3 1/6] drm/doc: Color Management and HDR10 RFC

2021-09-15 Thread Jeremy Cline

On Wed, 2021-09-15 at 17:01 +0300, Pekka Paalanen wrote:
> On Fri, 30 Jul 2021 16:41:29 -0400
> Harry Wentland  wrote:
> 
> > Use the new DRM RFC doc section to capture the RFC previously only
> > described in the cover letter at
> > https://patchwork.freedesktop.org/series/89506/
> > 
> > v3:
> >  * Add sections on single-plane and multi-plane HDR
> >  * Describe approach to define HW details vs approach to define SW
> > intentions
> >  * Link Jeremy Cline's excellent HDR summaries
> >  * Outline intention behind overly verbose doc
> >  * Describe FP16 use-case
> >  * Clean up links
> > 
> > v2: create this doc
> > 
> > v1: n/a
> > 
> > Signed-off-by: Harry Wentland 
> 
> Hi Harry,
> 
> I finally managed to go through this, comments below. Excellent to
> have
> pictures included. I wrote this reply over several days, sorry if
> it's
> not quite coherent.
> 
> 
> 
> 



> > +
> > +
> > +Overview and background
> > +===
> > +
> > +I highly recommend you read `Jeremy Cline's HDR primer`_
> > +
> > +Jeremy Cline did a much better job describing this. I highly
> > recommend
> > +you read it at [1]:
> > +
> > +.. _Jeremy Cline's HDR primer:
> > https://www.jcline.org/blog/fedora/graphics/hdr/2021/05/07/hdr-in-linux-p1.html
> 
> That's a nice write-up I didn't know about, thanks.
> 
> I just wish such write-ups would be somehow peer-reviewed for
> correctness and curated for proper referencing. Perhaps like we
> develop
> code: at least some initial peer review and then fixes when anyone
> notices something to improve. Like... what you are doing here! :-)
> 
> The post is perhaps a bit too narrow with OETF/EOTF terms,
> accidentally
> implying that OETF = EOTF^-1 which is not generally true, but that
> all
> depends on which O-to-E or E-to-O functions one is talking about.
> Particularly there is a difference between functions used for signal
> compression which needs an exact matching inverse function, and
> functions containing tone-mapping and artistic effects that when
> concatenated result in the (non-identity) OOTF.
> 
> Nothing in the post seems to disagree with my current understanding
> FWI'mW.

I'm more than happy to update things that are incorrect or mis-leading
since the last thing I want to do is muddy the waters. Personally, I
would much prefer that any useful content from it be peer-reviewed and
included directly in the documentation since, well, it's being hosted
out of my laundry room and the cats have a habit of turning off the
UPS...

Do let me know if I can be of any assistance there; I'm no longer
employed to do anything HDR-related, but I do like clear documentation
so I could dedicate a bit of free time to it.

- Jeremy

Re: [PATCH v3 01/12] drm/ttm: stop setting page->index for the ttm_tt

2021-09-15 Thread Christian König


Am 15.09.21 um 20:59 schrieb Matthew Auld:

In commit:

commit 58aa6622d32af7d2c08d45085f44c54554a16ed7
Author: Thomas Hellstrom 
Date:   Fri Jan 3 11:47:23 2014 +0100

 drm/ttm: Correctly set page mapping and -index members

we started setting the page->mapping and page->index to point to the
virtual address space, if the pages were faulted with TTM. Apparently
this was needed for core-mm to able to reverse lookup the virtual
address given the struct page, and potentially unmap it from the page
tables. However as pointed out by Thomas, since we are now using
PFN_MAP, instead of say PFN_MIXED, this should no longer be the case.

There was also apparently some usecase in vmwgfx which needed this for
dirty tracking, but that also doesn't appear to be the case anymore, as
pointed out by Thomas.

We still need keep the page->mapping for now, since that is still needed
for different reasons, but we try to address that in the next patch.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
Cc: Christian König 


Reviewed-by: Christian König 

Fingers crossed that this really works as documented.


---
  drivers/gpu/drm/ttm/ttm_bo_vm.c | 2 --
  drivers/gpu/drm/ttm/ttm_tt.c| 4 +---
  2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index f56be5bc0861..906ec8a1bf5a 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -346,8 +346,6 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
} else if (unlikely(!page)) {
break;
}
-   page->index = drm_vma_node_start(&bo->base.vma_node) +
-   page_offset;
pfn = page_to_pfn(page);
}
  
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c

index dae52433beeb..1cc04c224988 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -367,10 +367,8 @@ static void ttm_tt_clear_mapping(struct ttm_tt *ttm)
if (ttm->page_flags & TTM_PAGE_FLAG_SG)
return;
  
-	for (i = 0; i < ttm->num_pages; ++i) {

+   for (i = 0; i < ttm->num_pages; ++i)
(*page)->mapping = NULL;
-   (*page++)->index = 0;
-   }
  }
  
  void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)

Re: [PATCH v3 03/12] drm/ttm: remove TTM_PAGE_FLAG_NO_RETRY

2021-09-15 Thread Christian König


Am 15.09.21 um 20:59 schrieb Matthew Auld:

No longer used it seems.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
Cc: Christian König 


Reviewed-by: Christian König 


---
  include/drm/ttm/ttm_tt.h | 1 -
  1 file changed, 1 deletion(-)

diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
index 89b15d673b22..842ce756213c 100644
--- a/include/drm/ttm/ttm_tt.h
+++ b/include/drm/ttm/ttm_tt.h
@@ -41,7 +41,6 @@ struct ttm_operation_ctx;
  #define TTM_PAGE_FLAG_SWAPPED (1 << 4)
  #define TTM_PAGE_FLAG_ZERO_ALLOC  (1 << 6)
  #define TTM_PAGE_FLAG_SG  (1 << 8)
-#define TTM_PAGE_FLAG_NO_RETRY   (1 << 9)
  
  #define TTM_PAGE_FLAG_PRIV_POPULATED  (1 << 31)

Re: [PATCH v3 02/12] drm/ttm: move ttm_tt_{add,clear}_mapping into amdgpu

2021-09-15 Thread Christian König





Am 15.09.21 um 20:59 schrieb Matthew Auld:

Now that setting page->index shouldn't be needed anymore, we are just
left with setting page->mapping, and here it looks like amdgpu is the
only user, where pointing the page->mapping at the dev_mapping is used
to verify that the pages do indeed belong to the device, if userspace
later tries to touch them.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
Cc: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 27 -
  drivers/gpu/drm/ttm/ttm_tt.c| 25 ---
  2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 1129e17e9f09..c5fa6e62f6ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1107,6 +1107,24 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct 
ttm_buffer_object *bo,
return >t->ttm;
  }
  
+static void amdgpu_ttm_tt_add_mapping(struct ttm_device *bdev,

+ struct ttm_tt *ttm)
+{
+   pgoff_t i;
+
+   for (i = 0; i < ttm->num_pages; ++i)
+   ttm->pages[i]->mapping = bdev->dev_mapping;
+}
+
+static void amdgpu_ttm_tt_clear_mapping(struct ttm_tt *ttm)
+{
+   struct page **page = ttm->pages;
+   pgoff_t i;
+
+   for (i = 0; i < ttm->num_pages; ++i)
+   (*page)->mapping = NULL;
+}
+
  /*
   * amdgpu_ttm_tt_populate - Map GTT pages visible to the device
   *
@@ -1119,6 +1137,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
  {
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
+   int ret;
  
  	/* user pages are bound by amdgpu_ttm_tt_pin_userptr() */

if (gtt->userptr) {
@@ -1131,7 +1150,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_device 
*bdev,
if (ttm->page_flags & TTM_PAGE_FLAG_SG)
return 0;
  
-	return ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx);

+   ret = ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx);
+   if (ret)
+   return ret;
+
+   amdgpu_ttm_tt_add_mapping(bdev, ttm);


I don't really see why this needs to be a separate function. Just inline 
the loop here.



+   return 0;
  }
  
  /*

@@ -1159,6 +1183,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device 
*bdev,
return;
  
  	adev = amdgpu_ttm_adev(bdev);

+   amdgpu_ttm_tt_clear_mapping(ttm);


Same here of course, apart from that looks good to me.

Christian.


return ttm_pool_free(&adev->mman.bdev.pool, ttm);
  }
  
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c

index 1cc04c224988..980ecb079b2c 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -289,17 +289,6 @@ int ttm_tt_swapout(struct ttm_device *bdev, struct ttm_tt 
*ttm,
return ret;
  }
  
-static void ttm_tt_add_mapping(struct ttm_device *bdev, struct ttm_tt *ttm)

-{
-   pgoff_t i;
-
-   if (ttm->page_flags & TTM_PAGE_FLAG_SG)
-   return;
-
-   for (i = 0; i < ttm->num_pages; ++i)
-   ttm->pages[i]->mapping = bdev->dev_mapping;
-}
-
  int ttm_tt_populate(struct ttm_device *bdev,
struct ttm_tt *ttm, struct ttm_operation_ctx *ctx)
  {
@@ -336,7 +325,6 @@ int ttm_tt_populate(struct ttm_device *bdev,
if (ret)
goto error;
  
-	ttm_tt_add_mapping(bdev, ttm);

ttm->page_flags |= TTM_PAGE_FLAG_PRIV_POPULATED;
if (unlikely(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) {
ret = ttm_tt_swapin(ttm);
@@ -359,24 +347,11 @@ int ttm_tt_populate(struct ttm_device *bdev,
  }
  EXPORT_SYMBOL(ttm_tt_populate);
  
-static void ttm_tt_clear_mapping(struct ttm_tt *ttm)

-{
-   pgoff_t i;
-   struct page **page = ttm->pages;
-
-   if (ttm->page_flags & TTM_PAGE_FLAG_SG)
-   return;
-
-   for (i = 0; i < ttm->num_pages; ++i)
-   (*page)->mapping = NULL;
-}
-
  void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
  {
if (!ttm_tt_is_populated(ttm))
return;
  
-	ttm_tt_clear_mapping(ttm);

if (bdev->funcs->ttm_tt_unpopulate)
bdev->funcs->ttm_tt_unpopulate(bdev, ttm);
else

Re: [virtio-dev] Re: [PATCH v1 08/12] drm/virtio: implement context init: stop using drv->context when creating fence

2021-09-15 Thread Gerd Hoffmann

  Hi,

> > I guess you need to also update virtio_gpu_fence_event_process()
> > then?  It currently has the strict ordering logic baked in ...
> 
> The update to virtio_gpu_fence_event_process was done as a preparation a
> few months back:
> 
> https://cgit.freedesktop.org/drm/drm-misc/commit/drivers/gpu/drm/virtio/virtgpu_fence.c?id=36549848ed27c22bb2ffd5d1468efc6505b05f97

Ah, ok, missed the detail that the context check is already there.

thanks,
  Gerd

Re: [PATCH] drm/exynos: Make use of the helper function devm_platform_ioremap_resource()

2021-09-15 Thread Inki Dae




21. 8. 31. 오후 4:49에 Cai Huoqing 이(가) 쓴 글:
> Use the devm_platform_ioremap_resource() helper instead of
> calling platform_get_resource() and devm_ioremap_resource()
> separately
> 

Picked it up.

Thanks,
Inki Dae

> Signed-off-by: Cai Huoqing 
> ---
>  drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 4 +---
>  drivers/gpu/drm/exynos/exynos_drm_dsi.c   | 4 +---
>  drivers/gpu/drm/exynos/exynos_drm_fimc.c  | 5 +
>  drivers/gpu/drm/exynos/exynos_drm_fimd.c  | 4 +---
>  drivers/gpu/drm/exynos/exynos_drm_g2d.c   | 5 +
>  drivers/gpu/drm/exynos/exynos_drm_gsc.c   | 6 +-
>  drivers/gpu/drm/exynos/exynos_drm_rotator.c   | 4 +---
>  drivers/gpu/drm/exynos/exynos_drm_scaler.c| 4 +---
>  drivers/gpu/drm/exynos/exynos_hdmi.c  | 4 +---
>  9 files changed, 9 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c 
> b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
> index 9870c4e6af36..b5001db7a95c 100644
> --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
> +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
> @@ -793,7 +793,6 @@ static int exynos5433_decon_probe(struct platform_device 
> *pdev)
>  {
>   struct device *dev = &pdev->dev;
>   struct decon_context *ctx;
> - struct resource *res;
>   int ret;
>   int i;
>  
> @@ -818,8 +817,7 @@ static int exynos5433_decon_probe(struct platform_device 
> *pdev)
>   ctx->clks[i] = clk;
>   }
>  
> - res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> - ctx->addr = devm_ioremap_resource(dev, res);
> + ctx->addr = devm_platform_ioremap_resource(pdev, 0);
>   if (IS_ERR(ctx->addr))
>   return PTR_ERR(ctx->addr);
>  
> diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c 
> b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
> index e39fac889edc..8d137857818c 100644
> --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
> +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
> @@ -1738,7 +1738,6 @@ static const struct component_ops 
> exynos_dsi_component_ops = {
>  static int exynos_dsi_probe(struct platform_device *pdev)
>  {
>   struct device *dev = &pdev->dev;
> - struct resource *res;
>   struct exynos_dsi *dsi;
>   int ret, i;
>  
> @@ -1789,8 +1788,7 @@ static int exynos_dsi_probe(struct platform_device 
> *pdev)
>   }
>   }
>  
> - res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> - dsi->reg_base = devm_ioremap_resource(dev, res);
> + dsi->reg_base = devm_platform_ioremap_resource(pdev, 0);
>   if (IS_ERR(dsi->reg_base))
>   return PTR_ERR(dsi->reg_base);
>  
> diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c 
> b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
> index a3c718148c45..ecfd82d0afb7 100644
> --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c
> +++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
> @@ -85,7 +85,6 @@ struct fimc_scaler {
>  /*
>   * A structure of fimc context.
>   *
> - * @regs_res: register resources.
>   * @regs: memory mapped io registers.
>   * @lock: locking of operations.
>   * @clocks: fimc clocks.
> @@ -103,7 +102,6 @@ struct fimc_context {
>   struct exynos_drm_ipp_formats   *formats;
>   unsigned intnum_formats;
>  
> - struct resource *regs_res;
>   void __iomem*regs;
>   spinlock_t  lock;
>   struct clk  *clocks[FIMC_CLKS_MAX];
> @@ -1327,8 +1325,7 @@ static int fimc_probe(struct platform_device *pdev)
>   ctx->num_formats = num_formats;
>  
>   /* resource memory */
> - ctx->regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> - ctx->regs = devm_ioremap_resource(dev, ctx->regs_res);
> + ctx->regs = devm_platform_ioremap_resource(pdev, 0);
>   if (IS_ERR(ctx->regs))
>   return PTR_ERR(ctx->regs);
>  
> diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c 
> b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
> index 700ca4fa6665..c735e53939d8 100644
> --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
> +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
> @@ -1202,9 +1202,7 @@ static int fimd_probe(struct platform_device *pdev)
>   return PTR_ERR(ctx->lcd_clk);
>   }
>  
> - res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> -
> - ctx->regs = devm_ioremap_resource(dev, res);
> + ctx->regs = devm_platform_ioremap_resource(pdev, 0);
>   if (IS_ERR(ctx->regs))
>   return PTR_ERR(ctx->regs);
>  
> diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c 
> b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
> index b00230626c6a..471fd6c8135f 100644
> --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
> +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
> @@ -1449,7 +1449,6 @@ static const struct component_ops g2d_component_ops = {
>  static int g2d_probe(struct platform_device *pdev)
>  {
>   struct device *dev = &pdev->dev;
> - struct resource *res;
>   struct g2d_data *g2d;
>   int ret;
>  
> @@ -1491,9 +1490,7 @@ static int g2d

Re: [PATCH 8/8] usb: typec: altmodes/displayport: Notify drm subsys of hotplug events

2021-09-15 Thread Stephen Boyd

Quoting Hans de Goede (2021-08-17 14:52:01)
> diff --git a/drivers/usb/typec/altmodes/displayport.c 
> b/drivers/usb/typec/altmodes/displayport.c
> index aa669b9cf70e..c1d8c23baa39 100644
> --- a/drivers/usb/typec/altmodes/displayport.c
> +++ b/drivers/usb/typec/altmodes/displayport.c
> @@ -125,6 +129,7 @@ static int dp_altmode_configure(struct dp_altmode *dp, u8 
> con)
>  static int dp_altmode_status_update(struct dp_altmode *dp)
>  {
> bool configured = !!DP_CONF_GET_PIN_ASSIGN(dp->data.conf);
> +   bool hpd = !!(dp->data.status & DP_STATUS_HPD_STATE);
> u8 con = DP_STATUS_CONNECTION(dp->data.status);
> int ret = 0;
>
> @@ -137,6 +142,11 @@ static int dp_altmode_status_update(struct dp_altmode 
> *dp)
> ret = dp_altmode_configure(dp, con);
> if (!ret)
> dp->state = DP_STATE_CONFIGURE;
> +   } else {
> +   if (dp->hpd != hpd) {
> +   drm_connector_oob_hotplug_event(dp->connector_fwnode);
> +   dp->hpd = hpd;
> +   }
> }
>
> return ret;
> @@ -512,6 +522,7 @@ static const struct attribute_group dp_altmode_group = {
>  int dp_altmode_probe(struct typec_altmode *alt)
>  {
> const struct typec_altmode *port = typec_altmode_get_partner(alt);
> +   struct fwnode_handle *fwnode;
> struct dp_altmode *dp;
> int ret;
>
> @@ -540,6 +551,11 @@ int dp_altmode_probe(struct typec_altmode *alt)
> alt->desc = "DisplayPort";
> alt->ops = &dp_altmode_ops;
>
> +   fwnode = dev_fwnode(alt->dev.parent->parent); /* typec_port fwnode */
> +   dp->connector_fwnode = fwnode_find_reference(fwnode, "displayport", 
> 0);

I'm trying to figure out how to translate this over to DT bindings. Is
there a binding document for this fwnode reference? If not, can you
please update
Documentation/devicetree/bindings/connector/usb-connector.yaml with this
property?

I think this means that the type-c node would have a 'displayport =
<&some_phandle>' property in it that points to the display port hardware
device that's pumping out the DisplayPort data?

> +   if (IS_ERR(dp->connector_fwnode))
> +   dp->connector_fwnode = NULL;
> +
> typec_altmode_set_drvdata(alt, dp);
>
> dp->state = DP_STATE_ENTER;
> @@ -555,6 +571,13 @@ void dp_altmode_remove(struct typec_altmode *alt)
>
> sysfs_remove_group(&alt->dev.kobj, &dp_altmode_group);
> cancel_work_sync(&dp->work);
> +
> +   if (dp->connector_fwnode) {
> +   if (dp->hpd)
> +   drm_connector_oob_hotplug_event(dp->connector_fwnode);

I was hoping that we could make a type-c connector into a drm_bridge.
I'm thinking that it would be a DP-to-panel bridge. Then a panel could
be created as well on the end of the type-c connector and the bridge
would report hpd whenever the type-c logic figures out the cable has
been connected and hpd is asserted. The actual DisplayPort hardware
that's encoding data would then find the bridge through the graph
binding connected to the output node.

I'm not sure how MST is handled though. In that scenario maybe there's
more than one panel?

If you're interested the dts file that I'm trying to make this work for
is sc7180-trogdor.dtsi and I need to hook up mdss_dp's output port to
the two type-c connectors, usb_c0 and usb_c1, somehow. The two ports are
actually muxed by the EC (parent node) so only one type-c port can be
connected to the DP hardware at a time.

> +
> +   fwnode_handle_put(dp->connector_fwnode);
> +   }

Re: [PATCH v5 11/16] drm/mediatek: add display MDP RDMA support for MT8195

2021-09-15 Thread Nancy . Lin

Hi Chun-Kuang,

Thanks for the review.

On Thu, 2021-09-09 at 07:54 +0800, Chun-Kuang Hu wrote:
> Hi, Nancy:
> 
> Nancy.Lin  於 2021年9月6日 週一 下午3:15寫道：
> > 
> > Add MDP_RDMA driver for MT8195. MDP_RDMA is the DMA engine of
> > the ovl_adaptor component.
> > 
> > Signed-off-by: Nancy.Lin 
> > ---
> >  drivers/gpu/drm/mediatek/Makefile   |   3 +-
> >  drivers/gpu/drm/mediatek/mtk_disp_drv.h |   7 +
> >  drivers/gpu/drm/mediatek/mtk_mdp_rdma.c | 301
> > 
> >  drivers/gpu/drm/mediatek/mtk_mdp_rdma.h |  37 +++
> >  4 files changed, 347 insertions(+), 1 deletion(-)
> >  create mode 100644 drivers/gpu/drm/mediatek/mtk_mdp_rdma.c
> >  create mode 100644 drivers/gpu/drm/mediatek/mtk_mdp_rdma.h
> > 
> > diff --git a/drivers/gpu/drm/mediatek/Makefile
> > b/drivers/gpu/drm/mediatek/Makefile
> > index a38e88e82d12..6e604a933ed0 100644
> > --- a/drivers/gpu/drm/mediatek/Makefile
> > +++ b/drivers/gpu/drm/mediatek/Makefile
> > @@ -13,7 +13,8 @@ mediatek-drm-y := mtk_disp_aal.o \
> >   mtk_drm_gem.o \
> >   mtk_drm_plane.o \
> >   mtk_dsi.o \
> > - mtk_dpi.o
> > + mtk_dpi.o \
> > + mtk_mdp_rdma.o
> > 
> >  obj-$(CONFIG_DRM_MEDIATEK) += mediatek-drm.o
> > 
> > diff --git a/drivers/gpu/drm/mediatek/mtk_disp_drv.h
> > b/drivers/gpu/drm/mediatek/mtk_disp_drv.h
> > index a33b13fe2b6e..b3a372cab0bd 100644
> > --- a/drivers/gpu/drm/mediatek/mtk_disp_drv.h
> > +++ b/drivers/gpu/drm/mediatek/mtk_disp_drv.h
> > @@ -8,6 +8,7 @@
> > 
> >  #include 
> >  #include "mtk_drm_plane.h"
> > +#include "mtk_mdp_rdma.h"
> > 
> >  int mtk_aal_clk_enable(struct device *dev);
> >  void mtk_aal_clk_disable(struct device *dev);
> > @@ -106,4 +107,10 @@ void mtk_rdma_enable_vblank(struct device
> > *dev,
> > void *vblank_cb_data);
> >  void mtk_rdma_disable_vblank(struct device *dev);
> > 
> > +int mtk_mdp_rdma_clk_enable(struct device *dev);
> > +void mtk_mdp_rdma_clk_disable(struct device *dev);
> > +void mtk_mdp_rdma_start(struct device *dev, struct cmdq_pkt
> > *cmdq_pkt);
> > +void mtk_mdp_rdma_stop(struct device *dev, struct cmdq_pkt
> > *cmdq_pkt);
> > +void mtk_mdp_rdma_config(struct device *dev, struct
> > mtk_mdp_rdma_cfg *cfg,
> > +struct cmdq_pkt *cmdq_pkt);
> >  #endif
> > diff --git a/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c
> > b/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c
> > new file mode 100644
> > index ..052434d960b9
> > --- /dev/null
> > +++ b/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c
> > @@ -0,0 +1,301 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Copyright (c) 2021 MediaTek Inc.
> > + */
> > +
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +
> > +#include "mtk_drm_drv.h"
> > +#include "mtk_disp_drv.h"
> > +#include "mtk_mdp_rdma.h"
> > +
> > +#define
> > MDP_RDMA_EN0x000
> > +   #define FLD_ROT_ENABLEBIT(0)
> 
> Maybe my description is not good, I like the style of rdma driver
> [1].
> 
> [1] 
> https://urldefense.com/v3/__https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/gpu/drm/mediatek/mtk_disp_rdma.c?h=v5.14__;!!CTRNKA9wMg0ARbw!0WCmLLqZ2IclvjA-NLthJ-PGuByyzj_ImXoeNh2mvZ7Is9NFLTb37Pzr3jr4fK4j$
>  
> 
OK, I will change the alignment.

> > +
> > +#define
> > MDP_RDMA_RESET 0x008
> > +
> > +#define
> > MDP_RDMA_CON   0x020
> > +   #define FLD_OUTPUT_10BBIT(5)
> > +   #define FLD_SIMPLE_MODE   BIT(4)
> > +
> > +#define
> > MDP_RDMA_GMCIF_CON 0x028
> > +   #define FLD_COMMAND_DIV   BIT(0)
> > +   #define FLD_EXT_PREULTRA_EN   BIT(3)
> > +   #define
> > FLD_RD_REQ_TYPE   GENMASK(7, 4)
> > +   #define VAL_RD_REQ_TYPE_BURST_8_ACCESS7
> > +   #define
> > FLD_ULTRA_EN  GENMASK(13, 12)
> > +   #define VAL_ULTRA_EN_ENABLE   1
> > +   #define
> > FLD_PRE_ULTRA_EN  GENMASK(17, 16)
> > +   #define VAL_PRE_ULTRA_EN_ENABLE   1
> > +   #define FLD_EXT_ULTRA_EN  BIT(18)
> > +
> > +#define
> > MDP_RDMA_SRC_CON   0x030
> > +   #define FLD_OUTPUT_ARGB   BIT(25)
> > +   #define
> > FLD_BIT_NUMBERGENMASK(19, 18)
> > +   #define FLD_UNIFORM_CONFIGBIT(17)
> > +   #define FLD_SWAP  BIT(14)
> > +   #define
> > FLD_SRC_FORMATGENMASK(3, 0)
> > +
> > +#define
> > MDP_RDMA_COMP_CON

Re: [PATCH v5 08/16] soc: mediatek: add cmdq support of mtk-mmsys config API for mt8195 vdosys1

2021-09-15 Thread Nancy . Lin

Hi Chun-Kuang,

Thanks for the review.

On Wed, 2021-09-08 at 00:29 +0800, Chun-Kuang Hu wrote:
> Hi, Nancy:
> 
> Nancy.Lin  於 2021年9月6日 週一 下午3:15寫道：
> > 
> > Add cmdq support for mtk-mmsys config API.
> > The mmsys config register settings need to take effect with the
> > other
> > HW settings(like OVL_ADAPTOR...) at the same vblanking time.
> > 
> > If we use CPU to write the mmsys reg, we can't guarantee all the
> > settings can be written in the same vblanking time.
> > Cmdq is used for this purpose. We prepare all the related HW
> > settings
> > in one cmdq packet. The first command in the packet is "wait stream
> > done",
> > and then following with all the HW settings. After the cmdq packet
> > is
> > flush to GCE HW. The GCE waits for the "stream done event" to
> > coming
> > and then starts flushing all the HW settings. This can guarantee
> > all
> > the settings flush in the same vblanking.
> > 
> > Signed-off-by: Nancy.Lin 
> > ---
> >  drivers/soc/mediatek/mtk-mmsys.c   | 28 +-
> > 
> >  include/linux/soc/mediatek/mtk-mmsys.h |  6 +-
> >  2 files changed, 28 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/soc/mediatek/mtk-mmsys.c
> > b/drivers/soc/mediatek/mtk-mmsys.c
> > index 3a38b8269c71..060065501b8a 100644
> > --- a/drivers/soc/mediatek/mtk-mmsys.c
> > +++ b/drivers/soc/mediatek/mtk-mmsys.c
> > @@ -81,6 +81,7 @@ struct mtk_mmsys {
> > const struct mtk_mmsys_driver_data *data;
> > spinlock_t lock; /* protects mmsys_sw_rst_b reg */
> > struct reset_controller_dev rcdev;
> > +   struct cmdq_client_reg cmdq_base;
> >  };
> > 
> >  void mtk_mmsys_ddp_connect(struct device *dev,
> > @@ -174,7 +175,7 @@ static const struct reset_control_ops
> > mtk_mmsys_reset_ops = {
> >  };
> > 
> >  void mtk_mmsys_ddp_config(struct device *dev, enum
> > mtk_mmsys_config_type config,
> > - u32 id, u32 val)
> > + u32 id, u32 val, struct cmdq_pkt
> > *cmdq_pkt)
> >  {
> > struct mtk_mmsys *mmsys = dev_get_drvdata(dev);
> > const struct mtk_mmsys_config *mmsys_config = mmsys->data-
> > >config;
> > @@ -197,10 +198,20 @@ void mtk_mmsys_ddp_config(struct device *dev,
> > enum mtk_mmsys_config_type config,
> > mask = mmsys_config[i].mask;
> > reg_val = val << mmsys_config[i].shift;
> > 
> > -   u32 tmp = readl(mmsys->regs + offset);
> > -
> > -   tmp = (tmp & ~mask) | reg_val;
> > -   writel(tmp, mmsys->regs + offset);
> > +#if IS_REACHABLE(CONFIG_MTK_CMDQ)
> > +   if (cmdq_pkt && mmsys->cmdq_base.size) {
> > +   cmdq_pkt_write_mask(cmdq_pkt, mmsys-
> > >cmdq_base.subsys,
> > +   mmsys->cmdq_base.offset +
> > offset, reg_val,
> > +   mask);
> > +   } else {
> > +#endif
> > +   u32 tmp = readl(mmsys->regs + offset);
> > +
> > +   tmp = (tmp & ~mask) | reg_val;
> > +   writel(tmp, mmsys->regs + offset);
> > +#if IS_REACHABLE(CONFIG_MTK_CMDQ)
> > +   }
> > +#endif
> >  }
> >  EXPORT_SYMBOL_GPL(mtk_mmsys_ddp_config);
> > 
> > @@ -236,6 +247,13 @@ static int mtk_mmsys_probe(struct
> > platform_device *pdev)
> > }
> > 
> > mmsys->data = of_device_get_match_data(&pdev->dev);
> > +
> > +#if IS_REACHABLE(CONFIG_MTK_CMDQ)
> > +   ret = cmdq_dev_get_client_reg(dev, &mmsys->cmdq_base, 0);
> 
> Define mediatek,gce-client-reg in binding document first.
> 
> Regards,
> Chun-Kuang.
> 
OK, I will add binding document in the next revision.

Regards,
Nancy Lin

> > +   if (ret)
> > +   dev_dbg(dev, "No mediatek,gce-client-reg!\n");
> > +#endif
> > +
> > platform_set_drvdata(pdev, mmsys);
> > 
> > clks = platform_device_register_data(&pdev->dev, mmsys-
> > >data->clk_driver,
> > diff --git a/include/linux/soc/mediatek/mtk-mmsys.h
> > b/include/linux/soc/mediatek/mtk-mmsys.h
> > index ef2a6d9a834b..9705d242849a 100644
> > --- a/include/linux/soc/mediatek/mtk-mmsys.h
> > +++ b/include/linux/soc/mediatek/mtk-mmsys.h
> > @@ -6,6 +6,10 @@
> >  #ifndef __MTK_MMSYS_H
> >  #define __MTK_MMSYS_H
> > 
> > +#include 
> > +#include 
> > +#include 
> > +
> >  enum mtk_ddp_comp_id;
> >  struct device;
> > 
> > @@ -75,6 +79,6 @@ void mtk_mmsys_ddp_disconnect(struct device *dev,
> >   enum mtk_ddp_comp_id next);
> > 
> >  void mtk_mmsys_ddp_config(struct device *dev, enum
> > mtk_mmsys_config_type config,
> > - u32 id, u32 val);
> > + u32 id, u32 val, struct cmdq_pkt
> > *cmdq_pkt);
> > 
> >  #endif /* __MTK_MMSYS_H */
> > --
> > 2.18.0
> >

Re: [PATCH v5 04/16] dt-bindings: reset: mt8195: add vdosys1 reset control bit

2021-09-15 Thread Nancy . Lin

Hi Chun-Kuang,

Thanks for the review.

On Wed, 2021-09-08 at 00:06 +0800, Chun-Kuang Hu wrote:
> Hi, Nancy:
> 
> Nancy.Lin  於 2021年9月6日 週一 下午3:15寫道：
> > 
> > Add vdosys1 reset control bit for MT8195 platform.
> > 
> > Signed-off-by: Nancy.Lin 
> > ---
> >  include/dt-bindings/reset/mt8195-resets.h | 12 
> >  1 file changed, 12 insertions(+)
> > 
> > diff --git a/include/dt-bindings/reset/mt8195-resets.h
> > b/include/dt-bindings/reset/mt8195-resets.h
> > index a26bccc8b957..eaaa882c09bd 100644
> > --- a/include/dt-bindings/reset/mt8195-resets.h
> > +++ b/include/dt-bindings/reset/mt8195-resets.h
> > @@ -26,4 +26,16 @@
> > 
> >  #define MT8195_TOPRGU_SW_RST_NUM   16
> > 
> > +/* VDOSYS1 */
> > +#define MT8195_VDOSYS1_SW0_RST_B_MERGE0_DL_ASYNC 25
> > +#define MT8195_VDOSYS1_SW0_RST_B_MERGE1_DL_ASYNC 26
> > +#define MT8195_VDOSYS1_SW0_RST_B_MERGE2_DL_ASYNC 27
> > +#define MT8195_VDOSYS1_SW0_RST_B_MERGE3_DL_ASYNC 28
> > +#define MT8195_VDOSYS1_SW0_RST_B_MERGE4_DL_ASYNC 29
> > +#define MT8195_VDOSYS1_SW1_RST_B_HDR_VDO_FE0_DL_ASYNC 51
> > +#define MT8195_VDOSYS1_SW1_RST_B_HDR_VDO_FE1_DL_ASYNC 52
> > +#define MT8195_VDOSYS1_SW1_RST_B_HDR_GFX_FE0_DL_ASYNC 53
> > +#define MT8195_VDOSYS1_SW1_RST_B_HDR_GFX_FE1_DL_ASYNC 54
> > +#define MT8195_VDOSYS1_SW1_RST_B_HDR_VDO_BE_DL_ASYNC 55
> 
> Maybe you should align the indent style with TOPRGU.
> 
> Regards,
> Chun-Kuang.
> 
OK, I will modify it in the next revision.

Regards,
Nancy Lin

> > +
> >  #endif  /* _DT_BINDINGS_RESET_CONTROLLER_MT8195 */
> > --
> > 2.18.0
> >

Re: [PATCH v5 01/16] dt-bindings: mediatek: add vdosys1 RDMA definition for mt8195

2021-09-15 Thread Nancy . Lin

Hi Chun-Kuang,

Thanks for the review.

On Tue, 2021-09-07 at 07:42 +0800, Chun-Kuang Hu wrote:
> Hi, Nancy:
> 
> Nancy.Lin  於 2021年9月6日 週一 下午3:15寫道：
> > 
> > Add vdosys1 RDMA definition.
> > 
> > Signed-off-by: Nancy.Lin 
> > ---
> >  .../display/mediatek/mediatek,mdp-rdma.yaml   | 77
> > +++
> >  1 file changed, 77 insertions(+)
> >  create mode 100644
> > Documentation/devicetree/bindings/display/mediatek/mediatek,mdp-
> > rdma.yaml
> > 
> > diff --git
> > a/Documentation/devicetree/bindings/display/mediatek/mediatek,mdp-
> > rdma.yaml
> > b/Documentation/devicetree/bindings/display/mediatek/mediatek,mdp-
> > rdma.yaml
> > new file mode 100644
> > index ..3610093848e1
> > --- /dev/null
> > +++
> > b/Documentation/devicetree/bindings/display/mediatek/mediatek,mdp-
> > rdma.yaml
> 
> I've compared the rdma driver in mdp [1] with the rdma driver in
> display [2], both are similar. The difference are like merge0 versus
> merge5. So I would like both binding document are placed together. In
> display folder? In media folder? In SoC folder? I've no idea which
> one
> is better, but at lease put together.
> 
> [1] 
> https://urldefense.com/v3/__https://patchwork.kernel.org/project/linux-mediatek/patch/20210824100027.25989-6-moudy...@mediatek.com/__;!!CTRNKA9wMg0ARbw!1MjfK1sAMDvP9fU1GX6QvfLEfapYEcLmsYP2AhkAOZ6LVaLTLi6vAnJMMqH3vrJ3$
>  
> [2] 
> https://urldefense.com/v3/__https://patchwork.kernel.org/project/linux-mediatek/patch/20210906071539.12953-12-nancy@mediatek.com/__;!!CTRNKA9wMg0ARbw!1MjfK1sAMDvP9fU1GX6QvfLEfapYEcLmsYP2AhkAOZ6LVaLTLi6vAnJMMuM29V9T$
>  
> 
> Regards,
> Chun-Kuang.
> 
OK, I will discuss this with Moudy.

Regards,
Nancy Lin
> > @@ -0,0 +1,77 @@
> > +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
> > +%YAML 1.2
> > +---
> > +$id: 
> > https://urldefense.com/v3/__http://devicetree.org/schemas/display/mediatek/mediatek,mdp-rdma.yaml*__;Iw!!CTRNKA9wMg0ARbw!1MjfK1sAMDvP9fU1GX6QvfLEfapYEcLmsYP2AhkAOZ6LVaLTLi6vAnJMMheRB2bL$
> >  
> > +$schema: 
> > https://urldefense.com/v3/__http://devicetree.org/meta-schemas/core.yaml*__;Iw!!CTRNKA9wMg0ARbw!1MjfK1sAMDvP9fU1GX6QvfLEfapYEcLmsYP2AhkAOZ6LVaLTLi6vAnJMMkoF4_Zs$
> >  
> > +
> > +title: mediatek display MDP RDMA
> > +
> > +maintainers:
> > +  - CK Hu 
> > +
> > +description: |
> > +  The mediatek display MDP RDMA stands for Read Direct Memory
> > Access.
> > +  It provides real time data to the back-end panel driver, such as
> > DSI,
> > +  DPI and DP_INTF.
> > +  It contains one line buffer to store the sufficient pixel data.
> > +  RDMA device node must be siblings to the central MMSYS_CONFIG
> > node.
> > +  For a description of the MMSYS_CONFIG binding, see
> > +  Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.ya
> > ml for details.
> > +
> > +properties:
> > +  compatible:
> > +oneOf:
> > +  - items:
> > +  - const: mediatek,mt8195-vdo1-rdma
> > +
> > +  reg:
> > +maxItems: 1
> > +
> > +  interrupts:
> > +maxItems: 1
> > +
> > +  power-domains:
> > +description: A phandle and PM domain specifier as defined by
> > bindings of
> > +  the power controller specified by phandle. See
> > +  Documentation/devicetree/bindings/power/power-domain.yaml
> > for details.
> > +
> > +  clocks:
> > +items:
> > +  - description: RDMA Clock
> > +
> > +  iommus:
> > +description:
> > +  This property should point to the respective IOMMU block
> > with master port as argument,
> > +  see
> > Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml for
> > details.
> > +
> > +  mediatek,gce-client-reg:
> > +description:
> > +  The register of display function block to be set by gce.
> > There are 4 arguments,
> > +  such as gce node, subsys id, offset and register size. The
> > subsys id that is
> > +  mapping to the register of display function blocks is
> > defined in the gce header
> > +  include/include/dt-bindings/gce/-gce.h of each chips.
> > +$ref: /schemas/types.yaml#/definitions/phandle-array
> > +maxItems: 1
> > +
> > +required:
> > +  - compatible
> > +  - reg
> > +  - power-domains
> > +  - clocks
> > +  - iommus
> > +
> > +additionalProperties: false
> > +
> > +examples:
> > +  - |
> > +
> > +vdo1_rdma0: vdo1_rdma@1c104000 {
> > +compatible = "mediatek,mt8195-vdo1-rdma";
> > +reg = <0 0x1c104000 0 0x1000>;
> > +interrupts = ;
> > +clocks = <&vdosys1 CLK_VDO1_MDP_RDMA0>;
> > +power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
> > +iommus = <&iommu_vdo M4U_PORT_L2_MDP_RDMA0>;
> > +mediatek,gce-client-reg = <&gce1 SUBSYS_1c10 0x4000
> > 0x1000>;
> > +};
> > +
> > --
> > 2.18.0
> >

Re: [PATCH v5 09/16] soc: mediatek: mmsys: modify reset controller for MT8195 vdosys1

2021-09-15 Thread Nancy . Lin

Dear Philipp,

Thanks for the review.

On Mon, 2021-09-06 at 09:29 +0200, Philipp Zabel wrote:
> Hi Nancy,
> 
> On Mon, 2021-09-06 at 15:15 +0800, Nancy.Lin wrote:
> > MT8195 vdosys1 has more than 32 reset bits and a different reset
> > base
> > than other chips. Modify mmsys for support 64 bit and different
> > reset
> > base.
> > 
> > Signed-off-by: Nancy.Lin 
> > ---
> >  drivers/soc/mediatek/mt8195-mmsys.h |  1 +
> >  drivers/soc/mediatek/mtk-mmsys.c| 15 ---
> >  drivers/soc/mediatek/mtk-mmsys.h|  1 +
> >  3 files changed, 14 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/soc/mediatek/mt8195-mmsys.h
> > b/drivers/soc/mediatek/mt8195-mmsys.h
> > index 648baaec112b..f67801c42fd9 100644
> > --- a/drivers/soc/mediatek/mt8195-mmsys.h
> > +++ b/drivers/soc/mediatek/mt8195-mmsys.h
> > @@ -123,6 +123,7 @@
> >  #define MT8195_VDO1_MIXER_SOUT_SEL_IN  
> > 0xf68
> >  #define MT8195_MIXER_SOUT_SEL_IN_FROM_DISP_MIXER   (0 <<
> > 0)
> >  
> > +#define MT8195_VDO1_SW0_RST_B   0x1d0
> >  #define MT8195_VDO1_MERGE0_ASYNC_CFG_WD0xe30
> >  #define MT8195_VDO1_MERGE1_ASYNC_CFG_WD0xe40
> >  #define MT8195_VDO1_MERGE2_ASYNC_CFG_WD0xe50
> > diff --git a/drivers/soc/mediatek/mtk-mmsys.c
> > b/drivers/soc/mediatek/mtk-mmsys.c
> > index 060065501b8a..97cb26339ef6 100644
> > --- a/drivers/soc/mediatek/mtk-mmsys.c
> > +++ b/drivers/soc/mediatek/mtk-mmsys.c
> > @@ -18,6 +18,8 @@
> >  #include "mt8365-mmsys.h"
> >  #include "mt8195-mmsys.h"
> >  
> > +#define MMSYS_SW_RESET_PER_REG 32
> > +
> >  static const struct mtk_mmsys_driver_data mt2701_mmsys_driver_data
> > = {
> > .clk_driver = "clk-mt2701-mm",
> > .routes = mmsys_default_routing_table,
> > @@ -48,12 +50,14 @@ static const struct mtk_mmsys_driver_data
> > mt8173_mmsys_driver_data = {
> > .clk_driver = "clk-mt8173-mm",
> > .routes = mmsys_default_routing_table,
> > .num_routes = ARRAY_SIZE(mmsys_default_routing_table),
> > +   .sw_reset_start = MMSYS_SW0_RST_B,
> >  };
> >  
> >  static const struct mtk_mmsys_driver_data mt8183_mmsys_driver_data
> > = {
> > .clk_driver = "clk-mt8183-mm",
> > .routes = mmsys_mt8183_routing_table,
> > .num_routes = ARRAY_SIZE(mmsys_mt8183_routing_table),
> > +   .sw_reset_start = MMSYS_SW0_RST_B,
> >  };
> >  
> >  static const struct mtk_mmsys_driver_data mt8365_mmsys_driver_data
> > = {
> > @@ -74,6 +78,7 @@ static const struct mtk_mmsys_driver_data
> > mt8195_vdosys1_driver_data = {
> > .num_routes = ARRAY_SIZE(mmsys_mt8195_routing_table),
> > .config = mmsys_mt8195_config_table,
> > .num_configs = ARRAY_SIZE(mmsys_mt8195_config_table),
> > +   .sw_reset_start = MT8195_VDO1_SW0_RST_B,
> >  };
> >  
> >  struct mtk_mmsys {
> > @@ -126,19 +131,23 @@ static int mtk_mmsys_reset_update(struct
> > reset_controller_dev *rcdev, unsigned l
> >  {
> > struct mtk_mmsys *mmsys = container_of(rcdev, struct mtk_mmsys,
> > rcdev);
> > unsigned long flags;
> > +   u32 offset;
> > u32 reg;
> > int i;
> >  
> > +   offset = (id / MMSYS_SW_RESET_PER_REG) * sizeof(u32);
> > +   id = id % MMSYS_SW_RESET_PER_REG;
> > +
> > spin_lock_irqsave(&mmsys->lock, flags);
> >  
> > -   reg = readl_relaxed(mmsys->regs + MMSYS_SW0_RST_B);
> > +   reg = readl_relaxed(mmsys->regs + mmsys->data->sw_reset_start +
> > offset);
> >  
> > if (assert)
> > reg &= ~BIT(id);
> > else
> > reg |= BIT(id);
> >  
> > -   writel_relaxed(reg, mmsys->regs + MMSYS_SW0_RST_B);
> > +   writel_relaxed(reg, mmsys->regs + mmsys->data->sw_reset_start +
> > offset);
> >  
> > spin_unlock_irqrestore(&mmsys->lock, flags);
> >  
> > @@ -237,7 +246,7 @@ static int mtk_mmsys_probe(struct
> > platform_device *pdev)
> > spin_lock_init(&mmsys->lock);
> >  
> > mmsys->rcdev.owner = THIS_MODULE;
> > -   mmsys->rcdev.nr_resets = 32;
> > +   mmsys->rcdev.nr_resets = 64;
> 
> If only MT8195 vdosys1 has more than 32 reset bits, this should be
> kept
> at 32 for the others.
> 
> regards

OK, I will modify it in the next revision.
> Philipp

RE: [RFC v1 4/6] drm/virtio: Probe and implement VIRTIO_GPU_F_RELEASE_FENCE feature

2021-09-15 Thread Kasireddy, Vivek

Hi Gerd,

>   Hi,
> 
> > --- a/include/uapi/linux/virtio_gpu.h
> > +++ b/include/uapi/linux/virtio_gpu.h
> > @@ -60,6 +60,8 @@
> >   */
> >  #define VIRTIO_GPU_F_RESOURCE_BLOB   3
> >
> > +#define VIRTIO_GPU_F_RELEASE_FENCE  4
> > +
> >  enum virtio_gpu_ctrl_type {
> > VIRTIO_GPU_UNDEFINED = 0,
> 
> Where is the virtio-spec update for that?
[Kasireddy, Vivek] I was going to do that if there'd a consensus over 
DRM_CAP_RELEASE_FENCE.
Otherwise, I don't think VIRTIO_GPU_F_RELEASE_FENCE is needed.

Thanks,
Vivek

> 
> thanks,
>   Gerd

Re: [Freedreno] [PATCH] drm/msm: Do not run snapshot on non-DPU devices

2021-09-15 Thread abhinavk


Hi Fabio

On 2021-09-14 10:48, Fabio Estevam wrote:

Since commit 98659487b845 ("drm/msm: add support to take dpu snapshot")
the following NULL pointer dereference is seen on i.MX53:

[ 3.275493] msm msm: bound 3000.gpu (ops a3xx_ops)
[ 3.287174] [drm] Initialized msm 1.8.0 20130625 for msm on minor 0
[ 3.293915] 8<--- cut here ---
[ 3.297012] Unable to handle kernel NULL pointer dereference at
virtual address 0028
[ 3.305244] pgd = (ptrval)
[ 3.307989] [0028] *pgd=
[ 3.311624] Internal error: Oops: 805 [#1] SMP ARM
[ 3.316430] Modules linked in:
[ 3.319503] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 
5.14.0+g682d702b426b #1

[ 3.326652] Hardware name: Freescale i.MX53 (Device Tree Support)
[ 3.332754] PC is at __mutex_init+0x14/0x54
[ 3.336969] LR is at msm_disp_snapshot_init+0x24/0xa0

i.MX53 does not use the DPU controller.

Fix the problem by only calling msm_disp_snapshot_init() on platforms 
that

use the DPU controller.

Cc: sta...@vger.kernel.org
Fixes: 98659487b845 ("drm/msm: add support to take dpu snapshot")
Signed-off-by: Fabio Estevam 
---
 drivers/gpu/drm/msm/msm_drv.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.c 
b/drivers/gpu/drm/msm/msm_drv.c

index 2e6fc185e54d..2aa2266454b7 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -630,10 +630,11 @@ static int msm_drm_init(struct device *dev,
const struct drm_driver *drv)
if (ret)
goto err_msm_uninit;

-   ret = msm_disp_snapshot_init(ddev);
-   if (ret)
-   DRM_DEV_ERROR(dev, "msm_disp_snapshot_init failed ret = %d\n", 
ret);
-
+   if (kms) {
+   ret = msm_disp_snapshot_init(ddev);
+   if (ret)
+			DRM_DEV_ERROR(dev, "msm_disp_snapshot_init failed ret = %d\n", 
ret);

+   }
Are you not using DPU or are you not using mdp4/mdp5 as well? Even if 
you are using any of mdps, kms should

not be NULL. Hence wanted to check the test case.


drm_mode_config_reset(ddev);

 #ifdef CONFIG_DRM_FBDEV_EMULATION

Re: [virtio-dev] [PATCH v1 09/12] drm/virtio: implement context init: allocate an array of fence contexts

2021-09-15 Thread Chia-I Wu

 i

On Tue, Sep 14, 2021 at 6:26 PM Gurchetan Singh
 wrote:
>
>
>
> On Tue, Sep 14, 2021 at 10:53 AM Chia-I Wu  wrote:
>>
>> ,On Mon, Sep 13, 2021 at 6:57 PM Gurchetan Singh
>>  wrote:
>> >
>> >
>> >
>> >
>> > On Mon, Sep 13, 2021 at 11:52 AM Chia-I Wu  wrote:
>> >>
>> >> .
>> >>
>> >> On Mon, Sep 13, 2021 at 10:48 AM Gurchetan Singh
>> >>  wrote:
>> >> >
>> >> >
>> >> >
>> >> > On Fri, Sep 10, 2021 at 12:33 PM Chia-I Wu  wrote:
>> >> >>
>> >> >> On Wed, Sep 8, 2021 at 6:37 PM Gurchetan Singh
>> >> >>  wrote:
>> >> >> >
>> >> >> > We don't want fences from different 3D contexts (virgl, gfxstream,
>> >> >> > venus) to be on the same timeline.  With explicit context creation,
>> >> >> > we can specify the number of ring each context wants.
>> >> >> >
>> >> >> > Execbuffer can specify which ring to use.
>> >> >> >
>> >> >> > Signed-off-by: Gurchetan Singh 
>> >> >> > Acked-by: Lingfeng Yang 
>> >> >> > ---
>> >> >> >  drivers/gpu/drm/virtio/virtgpu_drv.h   |  3 +++
>> >> >> >  drivers/gpu/drm/virtio/virtgpu_ioctl.c | 34 
>> >> >> > --
>> >> >> >  2 files changed, 35 insertions(+), 2 deletions(-)
>> >> >> >
>> >> >> > diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h 
>> >> >> > b/drivers/gpu/drm/virtio/virtgpu_drv.h
>> >> >> > index a5142d60c2fa..cca9ab505deb 100644
>> >> >> > --- a/drivers/gpu/drm/virtio/virtgpu_drv.h
>> >> >> > +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
>> >> >> > @@ -56,6 +56,7 @@
>> >> >> >  #define STATE_ERR 2
>> >> >> >
>> >> >> >  #define MAX_CAPSET_ID 63
>> >> >> > +#define MAX_RINGS 64
>> >> >> >
>> >> >> >  struct virtio_gpu_object_params {
>> >> >> > unsigned long size;
>> >> >> > @@ -263,6 +264,8 @@ struct virtio_gpu_fpriv {
>> >> >> > uint32_t ctx_id;
>> >> >> > uint32_t context_init;
>> >> >> > bool context_created;
>> >> >> > +   uint32_t num_rings;
>> >> >> > +   uint64_t base_fence_ctx;
>> >> >> > struct mutex context_lock;
>> >> >> >  };
>> >> >> >
>> >> >> > diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c 
>> >> >> > b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
>> >> >> > index f51f3393a194..262f79210283 100644
>> >> >> > --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
>> >> >> > +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
>> >> >> > @@ -99,6 +99,11 @@ static int virtio_gpu_execbuffer_ioctl(struct 
>> >> >> > drm_device *dev, void *data,
>> >> >> > int in_fence_fd = exbuf->fence_fd;
>> >> >> > int out_fence_fd = -1;
>> >> >> > void *buf;
>> >> >> > +   uint64_t fence_ctx;
>> >> >> > +   uint32_t ring_idx;
>> >> >> > +
>> >> >> > +   fence_ctx = vgdev->fence_drv.context;
>> >> >> > +   ring_idx = 0;
>> >> >> >
>> >> >> > if (vgdev->has_virgl_3d == false)
>> >> >> > return -ENOSYS;
>> >> >> > @@ -106,6 +111,17 @@ static int virtio_gpu_execbuffer_ioctl(struct 
>> >> >> > drm_device *dev, void *data,
>> >> >> > if ((exbuf->flags & ~VIRTGPU_EXECBUF_FLAGS))
>> >> >> > return -EINVAL;
>> >> >> >
>> >> >> > +   if ((exbuf->flags & VIRTGPU_EXECBUF_RING_IDX)) {
>> >> >> > +   if (exbuf->ring_idx >= vfpriv->num_rings)
>> >> >> > +   return -EINVAL;
>> >> >> > +
>> >> >> > +   if (!vfpriv->base_fence_ctx)
>> >> >> > +   return -EINVAL;
>> >> >> > +
>> >> >> > +   fence_ctx = vfpriv->base_fence_ctx;
>> >> >> > +   ring_idx = exbuf->ring_idx;
>> >> >> > +   }
>> >> >> > +
>> >> >> > exbuf->fence_fd = -1;
>> >> >> >
>> >> >> > virtio_gpu_create_context(dev, file);
>> >> >> > @@ -173,7 +189,7 @@ static int virtio_gpu_execbuffer_ioctl(struct 
>> >> >> > drm_device *dev, void *data,
>> >> >> > goto out_memdup;
>> >> >> > }
>> >> >> >
>> >> >> > -   out_fence = virtio_gpu_fence_alloc(vgdev, 
>> >> >> > vgdev->fence_drv.context, 0);
>> >> >> > +   out_fence = virtio_gpu_fence_alloc(vgdev, fence_ctx, 
>> >> >> > ring_idx);
>> >> >> > if(!out_fence) {
>> >> >> > ret = -ENOMEM;
>> >> >> > goto out_unresv;
>> >> >> > @@ -691,7 +707,7 @@ static int virtio_gpu_context_init_ioctl(struct 
>> >> >> > drm_device *dev,
>> >> >> > return -EINVAL;
>> >> >> >
>> >> >> > /* Number of unique parameters supported at this time. */
>> >> >> > -   if (num_params > 1)
>> >> >> > +   if (num_params > 2)
>> >> >> > return -EINVAL;
>> >> >> >
>> >> >> > ctx_set_params = 
>> >> >> > memdup_user(u64_to_user_ptr(args->ctx_set_params),
>> >> >> > @@ -731,6 +747,20 @@ static int virtio_gpu_context_init_ioctl(struct 
>> >> >> > drm_device *dev,
>> >> >> >
>> >> >> > vfpriv->context_init |= value;
>> >> >> > break;
>> >> >> > +   case VIRTGPU_CONTEXT_PARAM_NUM_RINGS:
>> >> >> > +   if (vfpriv->base_fence_ctx) {
>> >> >> > +

[PATCH 2/2] drm/i915/uapi: Add query for hwconfig table

2021-09-15 Thread John . C . Harrison

From: Rodrigo Vivi 

GuC contains a consolidated table with a bunch of information about the
current device.

Previously, this information was spread and hardcoded to all the components
including GuC, i915 and various UMDs. The goal here is to consolidate
the data into GuC in a way that all interested components can grab the
very latest and synchronized information using a simple query.

As per most of the other queries, this one can be called twice.
Once with item.length=0 to determine the exact buffer size, then
allocate the user memory and call it again for to retrieve the
table data. For example:
  struct drm_i915_query_item item = {
.query_id = DRM_I915_QUERY_HWCONCFIG_TABLE;
  };
  query.items_ptr = (int64_t) &item;
  query.num_items = 1;

  ioctl(fd, DRM_IOCTL_I915_QUERY, query, sizeof(query));

  if (item.length <= 0)
return -ENOENT;

  data = malloc(item.length);
  item.data_ptr = (int64_t) &data;
  ioctl(fd, DRM_IOCTL_I915_QUERY, query, sizeof(query));

  // Parse the data as appropriate...

The returned array is a simple and flexible KLV (Key/Length/Value)
formatted table. For example, it could be just:
  enum device_attr {
 ATTR_SOME_VALUE = 0,
 ATTR_SOME_MASK  = 1,
  };

  static const u32 hwconfig[] = {
  ATTR_SOME_VALUE,
  1, // Value Length in DWords
  8, // Value

  ATTR_SOME_MASK,
  3,
  0x00, 0x, 0xFF00,
  };

The attribute ids are defined in a hardware spec.

Cc: Tvrtko Ursulin 
Cc: Kenneth Graunke 
Cc: Michal Wajdeczko 
Cc: Slawomir Milczarek 
Signed-off-by: Rodrigo Vivi 
Signed-off-by: John Harrison 
Reviewed-by: Matthew Brost 
---
 drivers/gpu/drm/i915/i915_query.c | 23 +++
 include/uapi/drm/i915_drm.h   |  1 +
 2 files changed, 24 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_query.c 
b/drivers/gpu/drm/i915/i915_query.c
index 5e2b909827f4..96989a37453c 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -477,12 +477,35 @@ static int query_memregion_info(struct drm_i915_private 
*i915,
return total_length;
 }
 
+static int query_hwconfig_table(struct drm_i915_private *i915,
+   struct drm_i915_query_item *query_item)
+{
+   struct intel_gt *gt = &i915->gt;
+   struct intel_guc_hwconfig *hwconfig = >->uc.guc.hwconfig;
+
+   if (!hwconfig->size || !hwconfig->ptr)
+   return -ENODEV;
+
+   if (query_item->length == 0)
+   return hwconfig->size;
+
+   if (query_item->length < hwconfig->size)
+   return -EINVAL;
+
+   if (copy_to_user(u64_to_user_ptr(query_item->data_ptr),
+hwconfig->ptr, hwconfig->size))
+   return -EFAULT;
+
+   return hwconfig->size;
+}
+
 static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
struct drm_i915_query_item *query_item) 
= {
query_topology_info,
query_engine_info,
query_perf_config,
query_memregion_info,
+   query_hwconfig_table,
 };
 
 int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index bde5860b3686..a1281f35b190 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -2499,6 +2499,7 @@ struct drm_i915_query_item {
 #define DRM_I915_QUERY_ENGINE_INFO 2
 #define DRM_I915_QUERY_PERF_CONFIG  3
 #define DRM_I915_QUERY_MEMORY_REGIONS   4
+#define DRM_I915_QUERY_HWCONFIG_TABLE   5
 /* Must be kept compact -- no holes and well documented */
 
/**
-- 
2.25.1

[PATCH 0/2] Add support for querying hw info that UMDs need

2021-09-15 Thread John . C . Harrison

From: John Harrison 

Various UMDs require hardware configuration information about the
current platform. A bunch of static information is available in a
fixed table that can be retrieved from the GuC.

Test-with: 20210915215558.2473428-2-john.c.harri...@intel.com
UMD: https://github.com/intel/compute-runtime/pull/432/files
UMD: https://github.com/intel/media-driver/pull/1239/files

CC: Katarzyna Cencelewska 
CC: Tony Ye 
CC: Jason Ekstrand 
Signed-off-by: John Harrison 
Reviewed-by: Matthew Brost 


John Harrison (1):
  drm/i915/guc: Add fetch of hwconfig table

Rodrigo Vivi (1):
  drm/i915/uapi: Add query for hwconfig table

 drivers/gpu/drm/i915/Makefile |   1 +
 .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |   1 +
 .../gpu/drm/i915/gt/uc/abi/guc_errors_abi.h   |   4 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.c|   3 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|   2 +
 .../gpu/drm/i915/gt/uc/intel_guc_hwconfig.c   | 156 ++
 .../gpu/drm/i915/gt/uc/intel_guc_hwconfig.h   |  19 +++
 drivers/gpu/drm/i915/gt/uc/intel_uc.c |   6 +
 drivers/gpu/drm/i915/i915_query.c |  23 +++
 include/uapi/drm/i915_drm.h   |   1 +
 10 files changed, 215 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
 create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.h

-- 
2.25.1

[PATCH 1/2] drm/i915/guc: Add fetch of hwconfig table

2021-09-15 Thread John . C . Harrison

From: John Harrison 

Implement support for fetching the hardware description table from the
GuC. The call is made twice - once without a destination buffer to
query the size and then a second time to fill in the buffer.

Note that the table is only available on ADL-P and later platforms.

Cc: Michal Wajdeczko 
Signed-off-by: Rodrigo Vivi 
Signed-off-by: John Harrison 
Reviewed-by: Matthew Brost 
---
 drivers/gpu/drm/i915/Makefile |   1 +
 .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |   1 +
 .../gpu/drm/i915/gt/uc/abi/guc_errors_abi.h   |   4 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.c|   3 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|   2 +
 .../gpu/drm/i915/gt/uc/intel_guc_hwconfig.c   | 156 ++
 .../gpu/drm/i915/gt/uc/intel_guc_hwconfig.h   |  19 +++
 drivers/gpu/drm/i915/gt/uc/intel_uc.c |   6 +
 8 files changed, 191 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
 create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index c1e9f7369fb5..3789f03a1021 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -190,6 +190,7 @@ i915-y += gt/uc/intel_uc.o \
  gt/uc/intel_guc_rc.o \
  gt/uc/intel_guc_slpc.o \
  gt/uc/intel_guc_submission.o \
+ gt/uc/intel_guc_hwconfig.o \
  gt/uc/intel_huc.o \
  gt/uc/intel_huc_debugfs.o \
  gt/uc/intel_huc_fw.o
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h 
b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index 8ff58aff..72fd492b726a 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -137,6 +137,7 @@ enum intel_guc_action {
INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009,
INTEL_GUC_ACTION_SETUP_PC_GUCRC = 0x3004,
INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
+   INTEL_GUC_ACTION_GET_HWCONFIG = 0x4100,
INTEL_GUC_ACTION_REGISTER_CONTEXT = 0x4502,
INTEL_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503,
INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h 
b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
index 488b6061ee89..f9e2a6aaef4a 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
@@ -8,6 +8,10 @@
 
 enum intel_guc_response_status {
INTEL_GUC_RESPONSE_STATUS_SUCCESS = 0x0,
+   INTEL_GUC_RESPONSE_NOT_SUPPORTED = 0x20,
+   INTEL_GUC_RESPONSE_NO_ATTRIBUTE_TABLE = 0x201,
+   INTEL_GUC_RESPONSE_NO_DECRYPTION_KEY = 0x202,
+   INTEL_GUC_RESPONSE_DECRYPTION_FAILED = 0x204,
INTEL_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000,
 };
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index fbfcae727d7f..82c0ce0090c6 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -422,13 +422,14 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 
*request, u32 len,
/*
 * No GuC command should ever take longer than 10ms.
 * Fast commands should still complete in 10us.
+* Except for the hwconfig table query, which takes ~50ms.
 */
ret = __intel_wait_for_register_fw(uncore,
   guc_send_reg(guc, 0),
   GUC_HXG_MSG_0_ORIGIN,
   FIELD_PREP(GUC_HXG_MSG_0_ORIGIN,
  GUC_HXG_ORIGIN_GUC),
-  10, 10, &header);
+  10, 100, &header);
if (unlikely(ret)) {
 timeout:
drm_err(&i915->drm, "mmio request %#x: no reply %x\n",
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 5dd174babf7a..ec38a69ca3fe 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -13,6 +13,7 @@
 #include "intel_guc_fw.h"
 #include "intel_guc_fwif.h"
 #include "intel_guc_ct.h"
+#include "intel_guc_hwconfig.h"
 #include "intel_guc_log.h"
 #include "intel_guc_reg.h"
 #include "intel_guc_slpc_types.h"
@@ -37,6 +38,7 @@ struct intel_guc {
struct intel_guc_ct ct;
/** @slpc: sub-structure containing SLPC related data and objects */
struct intel_guc_slpc slpc;
+   struct intel_guc_hwconfig hwconfig;
 
/** @sched_engine: Global engine used to submit requests to GuC */
struct i915_sched_engine *sched_engine;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
new file mode 100644
index ..af4fc9fdbaaf
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
@@

Re: [virtio-dev] Re: [PATCH v1 08/12] drm/virtio: implement context init: stop using drv->context when creating fence

2021-09-15 Thread Gurchetan Singh

On Tue, Sep 14, 2021 at 10:53 PM Gerd Hoffmann  wrote:

> On Wed, Sep 08, 2021 at 06:37:13PM -0700, Gurchetan Singh wrote:
> > The plumbing is all here to do this.  Since we always use the
> > default fence context when allocating a fence, this makes no
> > functional difference.
> >
> > We can't process just the largest fence id anymore, since it's
> > it's associated with different timelines.  It's fine for fence_id
> > 260 to signal before 259.  As such, process each fence_id
> > individually.
>
> I guess you need to also update virtio_gpu_fence_event_process()
> then?  It currently has the strict ordering logic baked in ...
>

The update to virtio_gpu_fence_event_process was done as a preparation a
few months back:

https://cgit.freedesktop.org/drm/drm-misc/commit/drivers/gpu/drm/virtio/virtgpu_fence.c?id=36549848ed27c22bb2ffd5d1468efc6505b05f97



>
> take care,
>   Gerd
>
>
> -
> To unsubscribe, e-mail: virtio-dev-unsubscr...@lists.oasis-open.org
> For additional commands, e-mail: virtio-dev-h...@lists.oasis-open.org
>
>

Re: [PATCH v2] drm/panfrost: Calculate lock region size correctly

2021-09-15 Thread Alyssa Rosenzweig

Took me a careful read, but this is

Reviewed-by: Alyssa Rosenzweig 

Thanks for hunting this down!

Re: [PATCH V6 2/2] drm/vkms: Add support for virtual hardware mode

2021-09-15 Thread Melissa Wen

On 09/01, Sumera Priyadarsini wrote:
> Add a virtual hardware or vblank-less mode as a module
> to enable VKMS to emulate virtual hardware drivers. This means
> no vertical blanking events occur and pageflips are completed
> arbitrarily and when required for updating the frame.
> 
> Add a new drm_crtc_funcs struct, vkms_vblankless_crtc_funcs and a
> drm_crtc_helper_funcs struct, vkms_vblankless_crtc_helper_funcs()
> which hold the atomic helpers for virtual hardware mode.
> The existing vkms_crtc_funcs struct and vkms_crtc_helper_funcs
> struct hold atomic helpers for the default vblank mode.
> This makes the code flow clearer and testing
> virtual hardware mode easier.
> 
> Add a function vkms_crtc_composer() which calls the helper function,
> vkms_composer_common() for plane composition in vblank-less mode.
> vkms_crtc_composer() is directly called in the atomic hook in
> vkms_crtc_atomic_begin().
> 
> However, some crc captures still use vblanks which causes the crc-based
> igt tests to crash. So, no CRC functions are called in vblankless mode
> for now and will be implemented in a later patch.

Hi Sumera,

this approach lgtm.

something on the writeback engine needs to be adjusted, as I checked a leak
when unloading the driver (after a kms_writeback testcase).

One fix is pointed below. As far as I checked to the other issue, a wb
job is prepared and enqueued, but never cleaned up. Not sure if wb ops
have any dependency on vblanks (and we should also skip) or something
wrong when getting crtc state during the composer work (wb_pending
condition is not working as expected when in the vhw mode).

The error log is here: https://paste.debian.net/hidden/54bf7945/

Daniel, do you have any idea from the top of your head?

> 
> This patchset has been tested with the igt tests- kms_writeback, kms_atomic
> , kms_lease, kms_flip, kms_pipe_get_crc and preserves results except for
> subtests related to crc reads and vertical blanking, in which case,
> tests are skipped.
> 
> The patch is based on Rodrigo Siqueira's
> patch(https://patchwork.freedesktop.org/patch/316851/?series=48469&rev=3)
> and the ensuing review.
> 
> Signed-off-by: Sumera Priyadarsini 
> ---
> Changes in V6:
> - Skip CRC functions in vblankless mode
> - Refactor helper function names(Melissa)
> Changes in V5:
> - Move vkms_crtc_composer() to this patch(Melissa)
> - Add more clarification for "vblank-less" mode(Pekka)
> - Replace kzalloc() with kvmalloc() in compose_active_planes()
> to fix memory allocation error for output frame
> - Fix checkpatch warnings (Melissa)
> Changes in V3:
> - Refactor patchset(Melissa)
> Changes in V2:
> - Add atomic helper functions in a separate struct for virtual hardware
> mode (Daniel)
> - Remove spinlock across 'vkms_output->lock' in vkms_crtc.c(Daniel)
> - Add vkms_composer_common() (Daniel)
> ---
>  drivers/gpu/drm/vkms/vkms_composer.c  | 21 +++--
>  drivers/gpu/drm/vkms/vkms_crtc.c  | 43 +--
>  drivers/gpu/drm/vkms/vkms_drv.c   | 16 +++---
>  drivers/gpu/drm/vkms/vkms_drv.h   |  2 ++
>  drivers/gpu/drm/vkms/vkms_writeback.c |  3 +-
>  5 files changed, 74 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/vkms/vkms_composer.c 
> b/drivers/gpu/drm/vkms/vkms_composer.c
> index bca746fb5b53..a009589b2c3a 100644
> --- a/drivers/gpu/drm/vkms/vkms_composer.c
> +++ b/drivers/gpu/drm/vkms/vkms_composer.c
> @@ -176,11 +176,12 @@ static int compose_active_planes(void **vaddr_out,
>  {
>   struct drm_framebuffer *fb = &primary_composer->fb;
>   struct drm_gem_object *gem_obj = drm_gem_fb_get_obj(fb, 0);
> +
>   const void *vaddr;
>   int i;
>  
>   if (!*vaddr_out) {
> - *vaddr_out = kzalloc(gem_obj->size, GFP_KERNEL);
> + *vaddr_out = kvmalloc(gem_obj->size, GFP_KERNEL);
>   if (!*vaddr_out) {
>   DRM_ERROR("Cannot allocate memory for output frame.");
>   return -ENOMEM;
> @@ -229,7 +230,7 @@ int vkms_composer_common(struct vkms_crtc_state 
> *crtc_state,
>  
>   if (ret) {
>   if ((ret == -EINVAL || ret == -ENOMEM) && !wb_pending)
> - kfree(vaddr_out);
> + kvfree(vaddr_out);
>   return ret;
>   }
>  
> @@ -241,7 +242,7 @@ int vkms_composer_common(struct vkms_crtc_state 
> *crtc_state,
>   crtc_state->wb_pending = false;
>   spin_unlock_irq(&out->composer_lock);
>   } else {
> - kfree(vaddr_out);
> + kvfree(vaddr_out);
>   }
>  
>   return 0;
> @@ -296,6 +297,20 @@ void vkms_composer_worker(struct work_struct *work)
>   drm_crtc_add_crc_entry(crtc, true, frame_start++, &crc32);
>  }
>  
> +void vkms_crtc_composer(struct vkms_crtc_state *crtc_state)
> +{
> + struct drm_crtc *crtc = crtc_state->base.crtc;
> + struct vkms_output *out = drm_crtc_to_vkms_output(crtc);
> + u32 crc32 = 0;
> + int ret;
> +
> +

[PATCH v7 2/3] drm/mediatek: implment the dsi hs packets aligned

2021-09-15 Thread Jitao Shi

Some dsi devices require the packets on lanes aligned at the end,
or the screen will shift or scroll.

Signed-off-by: Jitao Shi 
---
 drivers/gpu/drm/mediatek/mtk_dsi.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c 
b/drivers/gpu/drm/mediatek/mtk_dsi.c
index 93b40c245f00..9d72e6dce0bf 100644
--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
@@ -194,6 +194,8 @@ struct mtk_dsi {
struct clk *hs_clk;
 
u32 data_rate;
+   /* force dsi line end without dsi_null data */
+   bool hs_packet_end_aligned;
 
unsigned long mode_flags;
enum mipi_dsi_pixel_format format;
@@ -499,6 +501,13 @@ static void mtk_dsi_config_vdo_timing(struct mtk_dsi *dsi)
DRM_WARN("HFP + HBP less than d-phy, FPS will under 60Hz\n");
}
 
+   if (dsi->hs_packet_end_aligned) {
+   horizontal_sync_active_byte = 
roundup(horizontal_sync_active_byte, dsi->lanes) - 2;
+   horizontal_frontporch_byte = 
roundup(horizontal_frontporch_byte, dsi->lanes) - 2;
+   horizontal_backporch_byte = roundup(horizontal_backporch_byte, 
dsi->lanes) - 2;
+   horizontal_backporch_byte -= (vm->hactive * dsi_tmp_buf_bpp + 
2) % dsi->lanes;
+   }
+
writel(horizontal_sync_active_byte, dsi->regs + DSI_HSA_WC);
writel(horizontal_backporch_byte, dsi->regs + DSI_HBP_WC);
writel(horizontal_frontporch_byte, dsi->regs + DSI_HFP_WC);
@@ -793,6 +802,7 @@ static int mtk_dsi_host_attach(struct mipi_dsi_host *host,
dsi->lanes = device->lanes;
dsi->format = device->format;
dsi->mode_flags = device->mode_flags;
+   dsi->hs_packet_end_aligned = device->hs_packet_end_aligned;
 
return 0;
 }
-- 
2.25.1

[PATCH v7 3/3] drm/bridge: anx7625: config hs packets end aligned to avoid screen shift

2021-09-15 Thread Jitao Shi

This device requires the packets on lanes aligned at the end to fix
screen shift or scroll.

Signed-off-by: Jitao Shi 
---
 drivers/gpu/drm/bridge/analogix/anx7625.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c 
b/drivers/gpu/drm/bridge/analogix/anx7625.c
index 14d73fb1dd15..d76fb63fa9f7 100644
--- a/drivers/gpu/drm/bridge/analogix/anx7625.c
+++ b/drivers/gpu/drm/bridge/analogix/anx7625.c
@@ -1327,6 +1327,7 @@ static int anx7625_attach_dsi(struct anx7625_data *ctx)
MIPI_DSI_MODE_VIDEO_SYNC_PULSE  |
MIPI_DSI_MODE_NO_EOT_PACKET |
MIPI_DSI_MODE_VIDEO_HSE;
+   dsi->hs_packet_end_aligned = true;
 
if (mipi_dsi_attach(dsi) < 0) {
DRM_DEV_ERROR(dev, "fail to attach dsi to host.\n");
-- 
2.25.1

[PATCH v7 1/3] drm/dsi: transer dsi hs packet aligned

2021-09-15 Thread Jitao Shi

Some DSI devices reqire the hs packet starting and ending
at same time on all dsi lanes. So use a flag to those devices.

Signed-off-by: Jitao Shi 
---
 include/drm/drm_mipi_dsi.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h
index af7ba8071eb0..8e8563792682 100644
--- a/include/drm/drm_mipi_dsi.h
+++ b/include/drm/drm_mipi_dsi.h
@@ -177,6 +177,7 @@ struct mipi_dsi_device_info {
  * @lp_rate: maximum lane frequency for low power mode in hertz, this should
  * be set to the real limits of the hardware, zero is only accepted for
  * legacy drivers
+ * @hs_packet_end_aligned: transfer dsi hs packet ending aligned
  */
 struct mipi_dsi_device {
struct mipi_dsi_host *host;
@@ -189,6 +190,7 @@ struct mipi_dsi_device {
unsigned long mode_flags;
unsigned long hs_rate;
unsigned long lp_rate;
+   bool hs_packet_end_aligned;
 };
 
 #define MIPI_DSI_MODULE_PREFIX "mipi-dsi:"
-- 
2.25.1

[PATCH v7 0/3] force hsa hbp hfp packets multiple of lanenum to avoid screen shift

2021-09-15 Thread Jitao Shi

Changes since v6:
 - Add "bool hs_packet_end_aligned" in "struct mipi_dsi_device" to control the 
dsi aligned.
 - Config the "hs_packet_end_aligned" in ANX7725 .attach().

Changes since v5:
 - Search the anx7625 compatible as flag to control dsi output aligned.

Changes since v4:
 - Move "dt-bindings: drm/bridge: anx7625: add force_dsi_end_without_null" 
before
   "drm/mediatek: force hsa hbp hfp packets multiple of lanenum to avoid".

 - Retitle "dt-bindings: drm/bridge: anx7625: add force_dsi_end_without_null".

Jitao Shi (3):
  drm/dsi: transer dsi hs packet aligned
  drm/mediatek: implment the dsi hs packets aligned
  drm/bridge: anx7625: config hs packets end aligned to avoid screen
shift

 drivers/gpu/drm/bridge/analogix/anx7625.c |  1 +
 drivers/gpu/drm/mediatek/mtk_dsi.c| 10 ++
 include/drm/drm_mipi_dsi.h|  2 ++
 3 files changed, 13 insertions(+)

-- 
2.25.1

Re: [RFC PATCH v2 2/2] drm/bridge: parade-ps8640: Add support for AUX channel

2021-09-15 Thread Doug Anderson

Hi,

On Tue, Sep 14, 2021 at 5:28 PM Philip Chen  wrote:
>
> > > Changes in v2:
> > > - Handle the case where an AUX transaction has no payload
> > > - Add a reg polling for p0.0x83 to confirm AUX cmd is issued and
> > >   read data is returned
> > > - Replace regmap_noinc_read/write with looped regmap_read/write,
> > >   as regmap_noinc_read/write doesn't read one byte at a time unless
> > >   max_raw_read/write is set to 1.
> >
> > What about if you set val_bytes? I think you just need to set that to
> > "1" and it'll work?
> I think val_bytes is already set to 1 as we set val_bits to 8. See:
> map->format.val_bytes = DIV_ROUND_UP(config->val_bits, 8);

To me that feels like a bug in the regmap API, then. I can't see how
it would make any sense for this function not to take val_bytes into
account...

I wonder if other users are somehow getting lucky today. Maybe users
that are using this for MMIO get lucky because max_raw_read is set
properly. ...and maybe other i2c users get lucky because some
peripherals are OK w/ this bug? AKA, maybe this actually works in most
cases for FIFOs:

write address of bridge chip on i2c bus
write R/W bit on i2c bus
write FIFO register address on i2c bus
read byte
read byte
read byte
...
read byte
read byte
end transaction

Normally for i2c you assume that the other side will read from
subsequent register addresses for each "read byte", but I suppose it's
possible that some i2c devices are setup to realize that if the
register address was the address of a FIFO that it shouldn't read from
the next register address but should just read the next byte in the
FIFO?

In any case, it's fine to do it with a loop like you're doing but it
still seems weird that you'd need to.

-Doug

Re: [PATCH v3 3/3] drm/bridge: parade-ps8640: Add support for AUX channel

2021-09-15 Thread Doug Anderson

Hi,

On Tue, Sep 14, 2021 at 5:57 PM Stephen Boyd  wrote:
>
> Quoting Philip Chen (2021-09-14 16:28:45)
> > diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c 
> > b/drivers/gpu/drm/bridge/parade-ps8640.c
> > index 8d3e7a147170..dc349d729f5a 100644
> > --- a/drivers/gpu/drm/bridge/parade-ps8640.c
> > +++ b/drivers/gpu/drm/bridge/parade-ps8640.c
> > @@ -117,6 +144,129 @@ static inline struct ps8640 *bridge_to_ps8640(struct 
> > drm_bridge *e)
> [...]
> > +   case DP_AUX_I2C_WRITE:
> > +   case DP_AUX_I2C_READ:
> > +   break;
> > +   default:
> > +   return -EINVAL;
> > +   }
> > +
> > +   ret = regmap_write(map, PAGE0_AUXCH_CFG3, AUXCH_CFG3_RESET);
> > +   if (ret) {
> > +   dev_err(dev, "failed to write PAGE0_AUXCH_CFG3: %d\n", ret);
>
> Can we use DRM_DEV_ERROR()?

I've never gotten clear guidance here. For instance, in some other
review I suggested using the DRM wrapper and got told "no" [1]. ;-)
The driver landed without the DRM_ERROR versions. I don't really care
lots so it's fine with me to use use DRM_DEV_ERROR, I just wish I
understood the rules...

[1] https://lore.kernel.org/all/49db7ef3-fa53-a274-7c69-c2d840b13...@denx.de/


> > +   return ret;
> > +   }
> > +
> > +   /* Assume it's good */
> > +   msg->reply = 0;
> > +
> > +   addr_len[0] = msg->address & 0xff;
> > +   addr_len[1] = (msg->address >> 8) & 0xff;
> > +   addr_len[2] = ((msg->request << 4) & SWAUX_CMD_MASK) |
> > +   ((msg->address >> 16) & SWAUX_ADDR_19_16_MASK);
>
> It really feels like this out to be possible with some sort of
> cpu_to_le32() API. We're shoving msg->address into 3 bytes and then
> adding in the request and some length. So we could do something like:
>
> u32 addr_len;
>
> addr_len = FIELD_PREP(SWAUX_ADDR_MASK, msg->address);
> addr_len |= FIELD_PREP(SWAUX_CMD_MASK, msg->request);
> if (len)
> addr_len |= FIELD_PREP(LEN_MASK, len - 1);
> else
> addr_len |= FIELD_PREP(LEN_MASK, SWAUX_NO_PAYLOAD );
>
> cpu_to_le32s(&addr_len);
>
> regmap_bulk_write(map, PAGE0_SWAUX_ADDR_7_0, &addr_len, 
> sizeof(addr_len));

You're arguing that your version of the code is more efficient? Easier
to understand? Something else? To me, Philip's initial version is
crystal clear and easy to map to the bridge datasheet but I need to
think more to confirm that your version is right. Thinking is hard and
I like to avoid it when possible.

In any case, it's definitely bikeshedding and I'll yield if everyone
likes the other version better. ;-)


> > +   return ret;
> > +   }
> > +
> > +   switch (data & SWAUX_STATUS_MASK) {
> > +   /* Ignore the DEFER cases as they are already handled in hardware */
> > +   case SWAUX_STATUS_NACK:
> > +   case SWAUX_STATUS_I2C_NACK:
> > +   /*
> > +* The programming guide is not clear about whether a I2C 
> > NACK
> > +* would trigger SWAUX_STATUS_NACK or 
> > SWAUX_STATUS_I2C_NACK. So
> > +* we handle both cases together.
> > +*/
> > +   if (is_native_aux)
> > +   msg->reply |= DP_AUX_NATIVE_REPLY_NACK;
> > +   else
> > +   msg->reply |= DP_AUX_I2C_REPLY_NACK;
> > +
> > +   len = data & SWAUX_M_MASK;
> > +   return len;
>
> Why no 'return data & SWAUX_M_MASK;' and skip the assignment?

Actually, I think it's the "return" that's a bug, isn't it? If we're
doing a "read" and we're returning a positive number of bytes then we
need to actually _read_ them. Reading happens below, doesn't it?


-Doug

Re: [PATCH v3 3/3] drm/bridge: parade-ps8640: Add support for AUX channel

2021-09-15 Thread Philip Chen

Hi Fabio

On Wed, Sep 15, 2021 at 2:00 PM Fabio Estevam  wrote:
>
> On Wed, Sep 15, 2021 at 5:41 PM Philip Chen  wrote:
>
> > As regmap_read() should always read 1 byte at a time, should I just do:
> > regmap_read(map, PAGE0_SWAUX_RDATA, (unsigned int*)(buf + i))
>
> There is also regmap_bulk_read() if you need to read more data.

Thanks for the review.
PAGE0_SWAUX_RDATA is a single-byte FIFO buffer.
So I'll need to read one byte at a time cyclically.

Re: [PATCH 0/9] drm: Add privacy-screen class and connector properties

2021-09-15 Thread Lyude Paul

OK! Looked over all of these patches. Patches 2 and 4 have some comments that
should be addressed, but otherwise this series is:

Reviewed-by: Lyude Paul 

Let me know when/if you need help pushing this upstream

On Mon, 2021-09-06 at 09:35 +0200, Hans de Goede wrote:
> Hi all,
> 
> Here is the privacy-screen related code which I last posted in April 2021
> To the best of my knowledge there is consensus about / everyone is in
> agreement with the new userspace API (2 connector properties) this
> patch-set add (patch 1 of the series).
> 
> This is unchanged (except for a rebase on drm-tip), what has changed is
> that the first userspace consumer of the new properties is now fully ready
> for merging (it is just waiting for the kernel bits to land first):
> 
>  -
> https://gitlab.gnome.org/GNOME/gsettings-desktop-schemas/-/merge_requests/49
>  - https://gitlab.gnome.org/GNOME/mutter/-/merge_requests/1952
>  - https://gitlab.gnome.org/GNOME/gnome-control-center/-/merge_requests/1032
> 
> Having a userspace-consumer of the API fully ready for merging, clears the
> last blocker for this series. It has already has been reviewed before
> by Emil Velikov, but it could really do with another review.
> 
> The new API works as designed and add the following features to GNOME:
> 
> 1. Showing an OSD notification when the privacy-screen is toggled on/off
>    through hotkeys handled by the embedded-controller
> 2. Allowing control of the privacy-screen from the GNOME control-panel,
>    including the on/off slider shown there updating to match the hw-setting
>    when the setting is changed with the control-panel open.
> 3. Restoring the last user-setting at login
> 
> This series consists of a number of different parts:
> 
> 1. A new version of Rajat's privacy-screen connector properties patch,
> this adds new userspace API in the form of new properties
> 
> 2. Since on most devices the privacy screen is actually controlled by
> some vendor specific ACPI/WMI interface which has a driver under
> drivers/platform/x86, we need some "glue" code to make this functionality
> available to KMS drivers. Patches 2-4 add a new privacy-screen class for
> this, which allows non KMS drivers (and possibly KMS drivers too) to
> register a privacy-screen device and also adds an interface for KMS drivers
> to get access to the privacy-screen associated with a specific connector.
> This is modelled similar to how we deal with e.g. PWMs and GPIOs in the
> kernel, including separate includes for consumers and providers(drivers).
> 
> 3. Some drm_connector helper functions to keep the actual changes needed
> for this in individual KMS drivers as small as possible (patch 5).
> 
> 4. Make the thinkpad_acpi code register a privacy-screen device on
> ThinkPads with a privacy-screen (patches 6-8)
> 
> 5. Make the i915 driver export the privacy-screen functionality through
> the connector properties on the eDP connector.
> 
> I believe that it would be best to merge the entire series, including
> the thinkpad_acpi changes through drm-misc in one go. As the pdx86
> subsys maintainer I hereby give my ack for merging the thinkpad_acpi
> changes through drm-misc.
> 
> There is one small caveat with this series, which it is good to be
> aware of. The i915 driver will now return -EPROBE_DEFER on Thinkpads
> with an eprivacy screen, until the thinkpad_acpi driver is loaded.
> This means that initrd generation tools will need to be updated to
> include thinkpad_acpi when the i915 driver is added to the initrd.
> Without this the loading of the i915 driver will be delayed to after
> the switch to real rootfs.
> 
> Regards,
> 
> Hans
> 
> 
> Hans de Goede (8):
>   drm: Add privacy-screen class (v3)
>   drm/privacy-screen: Add X86 specific arch init code
>   drm/privacy-screen: Add notifier support
>   drm/connector: Add a drm_connector privacy-screen helper functions
>   platform/x86: thinkpad_acpi: Add hotkey_notify_extended_hotkey()
>     helper
>   platform/x86: thinkpad_acpi: Get privacy-screen / lcdshadow ACPI
>     handles only once
>   platform/x86: thinkpad_acpi: Register a privacy-screen device
>   drm/i915: Add privacy-screen support
> 
> Rajat Jain (1):
>   drm/connector: Add support for privacy-screen properties (v4)
> 
>  Documentation/gpu/drm-kms-helpers.rst    |  15 +
>  Documentation/gpu/drm-kms.rst    |   2 +
>  MAINTAINERS  |   8 +
>  drivers/gpu/drm/Kconfig  |   4 +
>  drivers/gpu/drm/Makefile |   1 +
>  drivers/gpu/drm/drm_atomic_uapi.c    |   4 +
>  drivers/gpu/drm/drm_connector.c  | 214 +
>  drivers/gpu/drm/drm_drv.c    |   4 +
>  drivers/gpu/drm/drm_privacy_screen.c | 468 +++
>  drivers/gpu/drm/drm_privacy_screen_x86.c |  86 
>  drivers/gpu/drm/i915/display/intel_display.c |   5 +
>  drivers/gpu/drm/i915/display/intel_dp.c  |  10 +
>  drivers/gpu/drm/i915/i915_pci.c

Re: [PATCH 9/9] drm/i915: Add privacy-screen support

2021-09-15 Thread Lyude Paul

On Mon, 2021-09-06 at 09:35 +0200, Hans de Goede wrote:
> Add support for eDP panels with a built-in privacy screen using the
> new drm_privacy_screen class.
> 
> One thing which stands out here is the addition of these 2 lines to
> intel_atomic_commit_tail:
> 
> for_each_new_connector_in_state(&state->base, connector, ...
> drm_connector_update_privacy_screen(connector, state);
> 
> It may seem more logical to instead take care of updating the
> privacy-screen state by marking the crtc as needing a modeset and then
> do this in both the encoder update_pipe (for fast-sets) and enable
> (for full modesets) callbacks. But ATM these callbacks only get passed
> the new connector_state and these callbacks are all called after
> drm_atomic_helper_swap_state() at which point there is no way to get
> the old state from the new state.

I was going to suggest that you workaround this simply by adding a variable
that corresponds to the most recently committed privacy screen state somewhere
in a driver private structure. But, then I realized that's basically the same
as what you're doing now except that your current solution stores said state
in a shared struct. So, I think you probably do have the right idea here as
long as we don't get any non-ACPI providers in the future. This also seems
like something that wouldn't be difficult to fixup down the line if that ends
up changing.

> 
> Without access to the old state, we do not know if the sw_state of
> the privacy-screen has changes so we would need to call
> drm_privacy_screen_set_sw_state() unconditionally. This is undesirable
> since all current known privacy-screen providers use ACPI calls which
> are somewhat expensive to make.
> 
> Also, as all providers use ACPI calls, rather then poking GPU registers,
> there is no need to order this together with other encoder operations.
> Since no GPU poking is involved having this as a separate step of the
> commit process actually is the logical thing to do.
> 
> Reviewed-by: Emil Velikov 
> Signed-off-by: Hans de Goede 
> ---
>  drivers/gpu/drm/i915/display/intel_display.c |  5 +
>  drivers/gpu/drm/i915/display/intel_dp.c  | 10 ++
>  drivers/gpu/drm/i915/i915_pci.c  | 12 
>  3 files changed, 27 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_display.c
> b/drivers/gpu/drm/i915/display/intel_display.c
> index 5560d2f4c352..7285873d329a 100644
> --- a/drivers/gpu/drm/i915/display/intel_display.c
> +++ b/drivers/gpu/drm/i915/display/intel_display.c
> @@ -10140,6 +10140,8 @@ static void intel_atomic_commit_tail(struct
> intel_atomic_state *state)
> struct drm_device *dev = state->base.dev;
> struct drm_i915_private *dev_priv = to_i915(dev);
> struct intel_crtc_state *new_crtc_state, *old_crtc_state;
> +   struct drm_connector_state *new_connector_state;
> +   struct drm_connector *connector;
> struct intel_crtc *crtc;
> u64 put_domains[I915_MAX_PIPES] = {};
> intel_wakeref_t wakeref = 0;
> @@ -10237,6 +10239,9 @@ static void intel_atomic_commit_tail(struct
> intel_atomic_state *state)
> intel_color_load_luts(new_crtc_state);
> }
>  
> +   for_each_new_connector_in_state(&state->base, connector,
> new_connector_state, i)
> +   drm_connector_update_privacy_screen(connector, &state-
> >base);
> +
> /*
>  * Now that the vblank has passed, we can go ahead and program the
>  * optimal watermarks on platforms that need two-step watermark
> diff --git a/drivers/gpu/drm/i915/display/intel_dp.c
> b/drivers/gpu/drm/i915/display/intel_dp.c
> index 7f8e8865048f..3aa2072cccf6 100644
> --- a/drivers/gpu/drm/i915/display/intel_dp.c
> +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> @@ -37,6 +37,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  #include "g4x_dp.h"
> @@ -5217,6 +5218,7 @@ static bool intel_edp_init_connector(struct intel_dp
> *intel_dp,
> struct drm_connector *connector = &intel_connector->base;
> struct drm_display_mode *fixed_mode = NULL;
> struct drm_display_mode *downclock_mode = NULL;
> +   struct drm_privacy_screen *privacy_screen;
> bool has_dpcd;
> enum pipe pipe = INVALID_PIPE;
> struct edid *edid;
> @@ -5308,6 +5310,14 @@ static bool intel_edp_init_connector(struct intel_dp
> *intel_dp,
> fixed_mode->hdisplay, fixed_mode->vdisplay);
> }
>  
> +   privacy_screen = drm_privacy_screen_get(dev->dev, NULL);
> +   if (!IS_ERR(privacy_screen)) {
> +   drm_connector_attach_privacy_screen_provider(connector,
> +   
> privacy_screen);
> +   } else if (PTR_ERR(privacy_screen) != -ENODEV) {
> +   drm_warn(&dev_priv->drm, "Error getting privacy-screen\n");
> +   }
> +
> return true;
>  
>  out_vdd_off:
>

[Bug 214425] New: [drm][amdgpu][TTM] Page pool memory never gets freed

2021-09-15 Thread bugzilla-daemon

https://bugzilla.kernel.org/show_bug.cgi?id=214425

Bug ID: 214425
   Summary: [drm][amdgpu][TTM] Page pool memory never gets freed
   Product: Drivers
   Version: 2.5
Kernel Version: 5.14.3
  Hardware: x86-64
OS: Linux
  Tree: Mainline
Status: NEW
  Severity: normal
  Priority: P1
 Component: Video(DRI - non Intel)
  Assignee: drivers_video-...@kernel-bugs.osdl.org
  Reporter: dou...@swarmtech.cz
Regression: No

Hello,
while playing certain webGL games, I've noticed what appears to be a memory
leak in the kernel. Further investigation revealed that after about an hour of
gameplay, over 3GB of memory (half of all available RAM on my machine) will be
taken by the TTM page pool.

While the excessive allocation may be caused by a resource leak in the game
itself (I need to investigate that further), the larger problem is that TTM
never releases the memory even after I quit the game. Closing the game only
moves the allocated memory from active buffer objects to idle memory pool where
it'll get stuck until I reboot the system. Shutting down X server doesn't
release the memory either.

System specs:
HP Probook 455 G7
AMD Ryzen 5 4500U CPU
AMD Renoir GPU (Mesa 21.2.1, LLVM 12.0)
Gentoo Linux

TTM statistics before quitting the game:
/sys/kernel/debug/ttm/buffer_objects:
3116

/sys/kernel/debug/ttm/page_pool:
  --- 0--- --- 1--- --- 2--- --- 3--- --- 4--- --- 5--- --- 6--- ---
7--- --- 8--- --- 9--- ---10---
wc  :2211820   
1212
uc  :0000000   
0000
wc 32   :0000000   
0000
uc 32   :0000000   
0000

total   : 3410 of   939433

/sys/kernel/debug/ttm/page_pool_shrink:
2898/512


===

TTM statistics after quitting the game (until reboot):
/sys/kernel/debug/ttm/buffer_objects:
403

/sys/kernel/debug/ttm/page_pool:
  --- 0--- --- 1--- --- 2--- --- 3--- --- 4--- --- 5--- --- 6--- ---
7--- --- 8--- --- 9--- ---10---
wc  :  151  134   205  255  241  790 
193  416 1121   83
uc  :0000000   
0000
wc 32   :0000000   
0000
uc 32   :0000000   
0000

total   :   853035 of   939433

/sys/kernel/debug/ttm/page_pool_shrink:
853034/1

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

Re: [Intel-gfx] [PATCH v9 04/17] drm/i915/pxp: allocate a vcs context for pxp usage

2021-09-15 Thread Rodrigo Vivi

On Wed, Sep 15, 2021 at 04:53:35PM +0300, Jani Nikula wrote:
> On Fri, 10 Sep 2021, Daniele Ceraolo Spurio  
> wrote:
> > diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.h 
> > b/drivers/gpu/drm/i915/pxp/intel_pxp.h
> > new file mode 100644
> > index ..e87550fb9821
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.h
> > @@ -0,0 +1,35 @@
> > +/* SPDX-License-Identifier: MIT */
> > +/*
> > + * Copyright(c) 2020, Intel Corporation. All rights reserved.
> > + */
> > +
> > +#ifndef __INTEL_PXP_H__
> > +#define __INTEL_PXP_H__
> > +
> > +#include "gt/intel_gt_types.h"
> 
> I've been trying to promote the idea that we don't include headers from
> headers, unless really necessary. It helps with build times by reducing
> rebuilds due to changes, but more importantly, it helps with coming up
> with abstractions that don't need to look at the guts of other
> components.
> 
> The above include line pulls in 67 other includes. And it has to look at
> the same files a *lot* more times to know not to include them again.
> 
> Maybe we need to start being more aggressive about hiding the
> abstractions behind the interfaces and headers. Static inlines are
> nothing but micro-optimizations that leak abstractions. Do we need
> these?

Yeap, we have a few cases where this is already happening...

Should we start using the container_of more directly and avoid the a_to_b()
helpers?

Should we create the a_to_b() helpers only inside .c files like we have
in a few other cases?

In this pxp case here it looks like using the container of directly is
everywhere is better... is this your recommendation?

> 
> > +#include "intel_pxp_types.h"
> > +
> > +static inline struct intel_gt *pxp_to_gt(const struct intel_pxp *pxp)
> > +{
> > +   return container_of(pxp, struct intel_gt, pxp);
> > +}
> 
> I think it's questionable to claim the parameter is const, when you can
> do:
> 
>   const struct intel_pxp *const_pxp = something;
>   struct intel_pxp *pxp = &pxp_to_gt(const_pxp)->pxp;
> 
> BR,
> Jani.
> 
> > +
> > +static inline bool intel_pxp_is_enabled(const struct intel_pxp *pxp)
> > +{
> > +   return pxp->ce;
> > +}
> > +
> > +#ifdef CONFIG_DRM_I915_PXP
> > +void intel_pxp_init(struct intel_pxp *pxp);
> > +void intel_pxp_fini(struct intel_pxp *pxp);
> > +#else
> > +static inline void intel_pxp_init(struct intel_pxp *pxp)
> > +{
> > +}
> > +
> > +static inline void intel_pxp_fini(struct intel_pxp *pxp)
> > +{
> > +}
> > +#endif
> > +
> > +#endif /* __INTEL_PXP_H__ */
> > diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h 
> > b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
> > new file mode 100644
> > index ..bd12c520e60a
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
> > @@ -0,0 +1,15 @@
> > +/* SPDX-License-Identifier: MIT */
> > +/*
> > + * Copyright(c) 2020, Intel Corporation. All rights reserved.
> > + */
> > +
> > +#ifndef __INTEL_PXP_TYPES_H__
> > +#define __INTEL_PXP_TYPES_H__
> > +
> > +struct intel_context;
> > +
> > +struct intel_pxp {
> > +   struct intel_context *ce;
> > +};
> > +
> > +#endif /* __INTEL_PXP_TYPES_H__ */
> 
> -- 
> Jani Nikula, Intel Open Source Graphics Center

Re: [PATCH v3 3/3] drm/bridge: parade-ps8640: Add support for AUX channel

2021-09-15 Thread Fabio Estevam

On Wed, Sep 15, 2021 at 5:41 PM Philip Chen  wrote:

> As regmap_read() should always read 1 byte at a time, should I just do:
> regmap_read(map, PAGE0_SWAUX_RDATA, (unsigned int*)(buf + i))

There is also regmap_bulk_read() if you need to read more data.

Re: [Intel-gfx] [PATCH 14/27] drm/i915/guc: Assign contexts in parent-child relationship consecutive guc_ids

2021-09-15 Thread Matthew Brost

On Wed, Sep 15, 2021 at 01:04:45PM -0700, John Harrison wrote:
> On 8/20/2021 15:44, Matthew Brost wrote:
> > Assign contexts in parent-child relationship consecutive guc_ids. This
> > is accomplished by partitioning guc_id space between ones that need to
> > be consecutive (1/16 available guc_ids) and ones that do not (15/16 of
> > available guc_ids). The consecutive search is implemented via the bitmap
> > API.
> > 
> > This is a precursor to the full GuC multi-lrc implementation but aligns
> > to how GuC mutli-lrc interface is defined - guc_ids must be consecutive
> > when using the GuC multi-lrc interface.
> > 
> > v2:
> >   (Daniel Vetter)
> >- Explictly state why we assign consecutive guc_ids
> > 
> > Signed-off-by: Matthew Brost 
> > ---
> >   drivers/gpu/drm/i915/gt/uc/intel_guc.h|   6 +-
> >   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 107 +-
> >   2 files changed, 86 insertions(+), 27 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > index 023953e77553..3f95b1b4f15c 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > @@ -61,9 +61,13 @@ struct intel_guc {
> >  */
> > spinlock_t lock;
> > /**
> > -* @guc_ids: used to allocate new guc_ids
> > +* @guc_ids: used to allocate new guc_ids, single-lrc
> >  */
> > struct ida guc_ids;
> > +   /**
> > +* @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc
> > +*/
> > +   unsigned long *guc_ids_bitmap;
> > /** @num_guc_ids: number of guc_ids that can be used */
> > u32 num_guc_ids;
> > /** @max_guc_ids: max number of guc_ids that can be used */
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > index 00d54bb00bfb..e9dfd43d29a0 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > @@ -125,6 +125,18 @@ guc_create_virtual(struct intel_engine_cs **siblings, 
> > unsigned int count);
> >   #define GUC_REQUEST_SIZE 64 /* bytes */
> > +/*
> > + * We reserve 1/16 of the guc_ids for multi-lrc as these need to be 
> > contiguous
> > + * per the GuC submission interface. A different allocation algorithm is 
> > used
> > + * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
> The 'hence' clause seems to be attached to the wrong reason. The id space is
> partition because of the contiguous vs random requirements of multi vs
> single LRC, not because a different allocator is used in one partion vs the
> other.
> 

Kinda? The reason I partitioned it because to algorithms are different,
we could a unified space with a single algorithm, right? It was just
easier split the space and use 2 already existing data structures rather
cook up an algorithm in a unified space. There isn't a requirement from
the GuC that the space is partitioned, the only requirement is multi-lrc
IDs are contiguous. All this being said, I think comment is correct.

> > + * partition the guc_id space. We believe the number of multi-lrc contexts 
> > in
> > + * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids 
> > for
> > + * multi-lrc.
> > + */
> > +#define NUMBER_MULTI_LRC_GUC_ID(guc) \
> > +   ((guc)->submission_state.num_guc_ids / 16 > 32 ? \
> > +(guc)->submission_state.num_guc_ids / 16 : 32)
> > +
> >   /*
> >* Below is a set of functions which control the GuC scheduling state 
> > which
> >* require a lock.
> > @@ -1176,6 +1188,10 @@ int intel_guc_submission_init(struct intel_guc *guc)
> > INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
> > intel_gt_pm_unpark_work_init(&guc->submission_state.destroyed_worker,
> >  destroyed_worker_func);
> > +   guc->submission_state.guc_ids_bitmap =
> > +   bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
> > +   if (!guc->submission_state.guc_ids_bitmap)
> > +   return -ENOMEM;
> > return 0;
> >   }
> > @@ -1188,6 +1204,7 @@ void intel_guc_submission_fini(struct intel_guc *guc)
> > guc_lrc_desc_pool_destroy(guc);
> > guc_flush_destroyed_contexts(guc);
> > i915_sched_engine_put(guc->sched_engine);
> > +   bitmap_free(guc->submission_state.guc_ids_bitmap);
> >   }
> >   static void queue_request(struct i915_sched_engine *sched_engine,
> > @@ -1239,18 +1256,43 @@ static void guc_submit_request(struct i915_request 
> > *rq)
> > spin_unlock_irqrestore(&sched_engine->lock, flags);
> >   }
> > -static int new_guc_id(struct intel_guc *guc)
> > +static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
> >   {
> > -   return ida_simple_get(&guc->submission_state.guc_ids, 0,
> > - guc->submission_state.num_guc_ids, G

Re: [PATCH 8/9] platform/x86: thinkpad_acpi: Register a privacy-screen device

2021-09-15 Thread Lyude Paul

On Mon, 2021-09-06 at 09:35 +0200, Hans de Goede wrote:
> Register a privacy-screen device on laptops with a privacy-screen,
> this exports the PrivacyGuard features to user-space using a
> standardized vendor-agnostic sysfs interface. Note the sysfs interface
> is read-only.
> 
> Registering a privacy-screen device with the new privacy-screen class
> code will also allow the GPU driver to get a handle to it and export
> the privacy-screen setting as a property on the DRM connector object
> for the LCD panel. This DRM connector property is news standardized

Looks like a typo here --^

> interface which all user-space code should use to query and control
> the privacy-screen.
> 
> Reviewed-by: Emil Velikov 
> Signed-off-by: Hans de Goede 
> ---
> Changes in v2:
> - Make the new lcdshadow_set_sw_state, lcdshadow_get_hw_state and
>   lcdshadow_ops symbols static
> - Update state and call drm_privacy_screen_call_notifier_chain()
>   when the state is changed by pressing the Fn + D hotkey combo
> ---
>  drivers/platform/x86/Kconfig |  2 +
>  drivers/platform/x86/thinkpad_acpi.c | 91 
>  2 files changed, 68 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
> index d12db6c316ea..ae00a27f9f95 100644
> --- a/drivers/platform/x86/Kconfig
> +++ b/drivers/platform/x86/Kconfig
> @@ -509,7 +509,9 @@ config THINKPAD_ACPI
> depends on ACPI_VIDEO || ACPI_VIDEO = n
> depends on BACKLIGHT_CLASS_DEVICE
> depends on I2C
> +   depends on DRM
> select ACPI_PLATFORM_PROFILE
> +   select DRM_PRIVACY_SCREEN
> select HWMON
> select NVRAM
> select NEW_LEDS
> diff --git a/drivers/platform/x86/thinkpad_acpi.c
> b/drivers/platform/x86/thinkpad_acpi.c
> index b8f2556c4797..044b238730ba 100644
> --- a/drivers/platform/x86/thinkpad_acpi.c
> +++ b/drivers/platform/x86/thinkpad_acpi.c
> @@ -73,6 +73,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include "dual_accel_detect.h"
>  
>  /* ThinkPad CMOS commands */
> @@ -157,6 +158,7 @@ enum tpacpi_hkey_event_t {
> TP_HKEY_EV_VOL_UP   = 0x1015, /* Volume up or unmute */
> TP_HKEY_EV_VOL_DOWN = 0x1016, /* Volume down or unmute
> */
> TP_HKEY_EV_VOL_MUTE = 0x1017, /* Mixer output mute */
> +   TP_HKEY_EV_PRIVACYGUARD_TOGGLE  = 0x130f, /* Toggle priv.guard
> on/off */
>  
> /* Reasons for waking up from S3/S4 */
> TP_HKEY_EV_WKUP_S3_UNDOCK   = 0x2304, /* undock requested, S3 */
> @@ -3889,6 +3891,12 @@ static bool hotkey_notify_extended_hotkey(const u32
> hkey)
>  {
> unsigned int scancode;
>  
> +   switch (hkey) {
> +   case TP_HKEY_EV_PRIVACYGUARD_TOGGLE:
> +   tpacpi_driver_event(hkey);
> +   return true;
> +   }
> +
> /* Extended keycodes start at 0x300 and our offset into the map
>  * TP_ACPI_HOTKEYSCAN_EXTENDED_START. The calculated scancode
>  * will be positive, but might not be in the correct range.
> @@ -9819,30 +9827,40 @@ static struct ibm_struct battery_driver_data = {
>   * LCD Shadow subdriver, for the Lenovo PrivacyGuard feature
>   */
>  
> +static struct drm_privacy_screen *lcdshadow_dev;
>  static acpi_handle lcdshadow_get_handle;
>  static acpi_handle lcdshadow_set_handle;
> -static int lcdshadow_state;
>  
> -static int lcdshadow_on_off(bool state)
> +static int lcdshadow_set_sw_state(struct drm_privacy_screen *priv,
> + enum drm_privacy_screen_status state)
>  {
> int output;
>  
> +   if (WARN_ON(!mutex_is_locked(&priv->lock)))
> +   return -EIO;
> +
> if (!acpi_evalf(lcdshadow_set_handle, &output, NULL, "dd",
> (int)state))
> return -EIO;
>  
> -   lcdshadow_state = state;
> +   priv->hw_state = priv->sw_state = state;
> return 0;
>  }
>  
> -static int lcdshadow_set(bool on)
> +static void lcdshadow_get_hw_state(struct drm_privacy_screen *priv)
>  {
> -   if (lcdshadow_state < 0)
> -   return lcdshadow_state;
> -   if (lcdshadow_state == on)
> -   return 0;
> -   return lcdshadow_on_off(on);
> +   int output;
> +
> +   if (!acpi_evalf(lcdshadow_get_handle, &output, NULL, "dd", 0))
> +   return;
> +
> +   priv->hw_state = priv->sw_state = output & 0x1;
>  }
>  
> +static const struct drm_privacy_screen_ops lcdshadow_ops = {
> +   .set_sw_state = lcdshadow_set_sw_state,
> +   .get_hw_state = lcdshadow_get_hw_state,
> +};
> +
>  static int tpacpi_lcdshadow_init(struct ibm_init_struct *iibm)
>  {
> acpi_status status1, status2;
> @@ -9850,36 +9868,44 @@ static int tpacpi_lcdshadow_init(struct
> ibm_init_struct *iibm)
>  
> status1 = acpi_get_handle(hkey_handle, "GSSS",
> &lcdshadow_get_handle);
> status2 = acpi_get_handle(hkey_handle, "",
> &

[PATCH] drm/rockchip: remove unused psr_list{,_lock}

2021-09-15 Thread Brian Norris

Some leftover cleanup from commit 6c836d965bad ("drm/rockchip: Use the
helpers for PSR").

Cc: Heiko Stuebner 
Cc: Sean Paul 
Signed-off-by: Brian Norris 
---

 drivers/gpu/drm/rockchip/rockchip_drm_drv.c | 3 ---
 drivers/gpu/drm/rockchip/rockchip_drm_drv.h | 2 --
 2 files changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c 
b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index bfba9793d238..6fa686e6c7e9 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
@@ -138,9 +138,6 @@ static int rockchip_drm_bind(struct device *dev)
 
drm_dev->dev_private = private;
 
-   INIT_LIST_HEAD(&private->psr_list);
-   mutex_init(&private->psr_list_lock);
-
ret = rockchip_drm_init_iommu(drm_dev);
if (ret)
goto err_free;
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h 
b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
index e33c2dcd0d4b..aa0909e8edf9 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
@@ -48,8 +48,6 @@ struct rockchip_drm_private {
struct iommu_domain *domain;
struct mutex mm_lock;
struct drm_mm mm;
-   struct list_head psr_list;
-   struct mutex psr_list_lock;
 };
 
 int rockchip_drm_dma_attach_device(struct drm_device *drm_dev,
-- 
2.33.0.309.g3052b89438-goog

Re: [PATCH v3 3/3] drm/bridge: parade-ps8640: Add support for AUX channel

2021-09-15 Thread Philip Chen

Hi

On Tue, Sep 14, 2021 at 5:57 PM Stephen Boyd  wrote:
>
> Quoting Philip Chen (2021-09-14 16:28:45)
> > diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c 
> > b/drivers/gpu/drm/bridge/parade-ps8640.c
> > index 8d3e7a147170..dc349d729f5a 100644
> > --- a/drivers/gpu/drm/bridge/parade-ps8640.c
> > +++ b/drivers/gpu/drm/bridge/parade-ps8640.c
> > @@ -117,6 +144,129 @@ static inline struct ps8640 *bridge_to_ps8640(struct 
> > drm_bridge *e)
> [...]
> > +   case DP_AUX_I2C_WRITE:
> > +   case DP_AUX_I2C_READ:
> > +   break;
> > +   default:
> > +   return -EINVAL;
> > +   }
> > +
> > +   ret = regmap_write(map, PAGE0_AUXCH_CFG3, AUXCH_CFG3_RESET);
> > +   if (ret) {
> > +   dev_err(dev, "failed to write PAGE0_AUXCH_CFG3: %d\n", ret);
>
> Can we use DRM_DEV_ERROR()?
Sure.
>
> > +   return ret;
> > +   }
> > +
> > +   /* Assume it's good */
> > +   msg->reply = 0;
> > +
> > +   addr_len[0] = msg->address & 0xff;
> > +   addr_len[1] = (msg->address >> 8) & 0xff;
> > +   addr_len[2] = ((msg->request << 4) & SWAUX_CMD_MASK) |
> > +   ((msg->address >> 16) & SWAUX_ADDR_19_16_MASK);
>
> It really feels like this out to be possible with some sort of
> cpu_to_le32() API. We're shoving msg->address into 3 bytes and then
> adding in the request and some length. So we could do something like:
>
> u32 addr_len;
>
> addr_len = FIELD_PREP(SWAUX_ADDR_MASK, msg->address);
> addr_len |= FIELD_PREP(SWAUX_CMD_MASK, msg->request);
> if (len)
> addr_len |= FIELD_PREP(LEN_MASK, len - 1);
> else
> addr_len |= FIELD_PREP(LEN_MASK, SWAUX_NO_PAYLOAD );
>
> cpu_to_le32s(&addr_len);
>
> regmap_bulk_write(map, PAGE0_SWAUX_ADDR_7_0, &addr_len, 
> sizeof(addr_len));
>
Yes, thanks for the advice.
Will add this change to v4.

> > +   addr_len[3] = (len == 0) ? SWAUX_NO_PAYLOAD :
> > +   ((len - 1) & SWAUX_LENGTH_MASK);
> > +
> > +   regmap_bulk_write(map, PAGE0_SWAUX_ADDR_7_0, addr_len,
> > + ARRAY_SIZE(addr_len));
> > +
> > +   if (len && (request == DP_AUX_NATIVE_WRITE ||
> > +   request == DP_AUX_I2C_WRITE)) {
> > +   /* Write to the internal FIFO buffer */
> > +   for (i = 0; i < len; i++) {
> > +   ret = regmap_write(map, PAGE0_SWAUX_WDATA, buf[i]);
> > +   if (ret) {
> > +   dev_err(dev, "failed to write WDATA: %d\n",
>
> DRM_DEV_ERROR?
Sure.
>
> > +   ret);
> > +   return ret;
> > +   }
> > +   }
> > +   }
> > +
> > +   regmap_write(map, PAGE0_SWAUX_CTRL, SWAUX_SEND);
> > +
> > +   /* Zero delay loop because i2c transactions are slow already */
> > +   regmap_read_poll_timeout(map, PAGE0_SWAUX_CTRL, data,
> > +!(data & SWAUX_SEND), 0, 50 * 1000);
> > +
> > +   regmap_read(map, PAGE0_SWAUX_STATUS, &data);
> > +   if (ret) {
> > +   dev_err(dev, "failed to read PAGE0_SWAUX_STATUS: %d\n", 
> > ret);
>
> DRM_DEV_ERROR?
Sure.
>
> > +   return ret;
> > +   }
> > +
> > +   switch (data & SWAUX_STATUS_MASK) {
> > +   /* Ignore the DEFER cases as they are already handled in hardware */
> > +   case SWAUX_STATUS_NACK:
> > +   case SWAUX_STATUS_I2C_NACK:
> > +   /*
> > +* The programming guide is not clear about whether a I2C 
> > NACK
> > +* would trigger SWAUX_STATUS_NACK or 
> > SWAUX_STATUS_I2C_NACK. So
> > +* we handle both cases together.
> > +*/
> > +   if (is_native_aux)
> > +   msg->reply |= DP_AUX_NATIVE_REPLY_NACK;
> > +   else
> > +   msg->reply |= DP_AUX_I2C_REPLY_NACK;
> > +
> > +   len = data & SWAUX_M_MASK;
> > +   return len;
>
> Why no 'return data & SWAUX_M_MASK;' and skip the assignment?
I want to make it clear that we are returning the number of bytes that
we have read/written instead of some error code.
If you think it's not super helpful, I can just return data & SWAUX_M_MASK.

>
> > +   case SWAUX_STATUS_ACKM:
>
> Move this up and add fallthrough?
Thanks.
Will add this change to v4.
>
> > +   len = data & SWAUX_M_MASK;
> > +   return len;
> > +   case SWAUX_STATUS_INVALID:
> > +   return -EOPNOTSUPP;
> > +   case SWAUX_STATUS_TIMEOUT:
> > +   return -ETIMEDOUT;
> > +   }
> > +
> > +   if (len && (request == DP_AUX_NATIVE_READ ||
> > +   request == DP_AUX_I2C_READ)) {
> > +   /* Read from the internal FIFO buffer */
> > +   for (i = 0; i < len; i++) {
> > +   ret = regmap_read(map, PAGE0_S

[PATCH v2 13/13] drm/msm: Implement HDCP 1.x using the new drm HDCP helpers

2021-09-15 Thread Sean Paul

From: Sean Paul 

This patch adds HDCP 1.x support to msm DP connectors using the new HDCP
helpers.

Cc: Stephen Boyd 
Signed-off-by: Sean Paul 
Link: 
https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-15-s...@poorly.run
 #v1

Changes in v2:
-Squash [1] into this patch with the following changes (Stephen)
  -Update the sc7180 dtsi file
  -Remove resource names and just use index (Stephen)

[1] 
https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-14-s...@poorly.run
---
 arch/arm64/boot/dts/qcom/sc7180.dtsi |   4 +-
 drivers/gpu/drm/msm/Makefile |   1 +
 drivers/gpu/drm/msm/dp/dp_debug.c|  49 ++-
 drivers/gpu/drm/msm/dp/dp_debug.h|   6 +-
 drivers/gpu/drm/msm/dp/dp_display.c  |  45 ++-
 drivers/gpu/drm/msm/dp/dp_display.h  |   5 +
 drivers/gpu/drm/msm/dp/dp_drm.c  |  68 -
 drivers/gpu/drm/msm/dp/dp_drm.h  |   5 +
 drivers/gpu/drm/msm/dp/dp_hdcp.c | 433 +++
 drivers/gpu/drm/msm/dp/dp_hdcp.h |  27 ++
 drivers/gpu/drm/msm/dp/dp_parser.c   |  22 +-
 drivers/gpu/drm/msm/dp/dp_parser.h   |   4 +
 drivers/gpu/drm/msm/dp/dp_reg.h  |  44 ++-
 drivers/gpu/drm/msm/msm_atomic.c |  15 +
 14 files changed, 709 insertions(+), 19 deletions(-)
 create mode 100644 drivers/gpu/drm/msm/dp/dp_hdcp.c
 create mode 100644 drivers/gpu/drm/msm/dp/dp_hdcp.h

diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi 
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index c8921e2d6480..3ae6fc7a2c01 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -3088,7 +3088,9 @@ mdss_dp: displayport-controller@ae9 {
compatible = "qcom,sc7180-dp";
status = "disabled";
 
-   reg = <0 0x0ae9 0 0x1400>;
+   reg = <0 0x0ae9 0 0x1400>,
+ <0 0x0aed1000 0 0x174>,
+ <0 0x0aee1000 0 0x2c>;
 
interrupt-parent = <&mdss>;
interrupts = <12>;
diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index 904535eda0c4..98731fd262d6 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -109,6 +109,7 @@ msm-$(CONFIG_DRM_MSM_DP)+= dp/dp_aux.o \
dp/dp_ctrl.o \
dp/dp_display.o \
dp/dp_drm.o \
+   dp/dp_hdcp.o \
dp/dp_hpd.o \
dp/dp_link.o \
dp/dp_panel.o \
diff --git a/drivers/gpu/drm/msm/dp/dp_debug.c 
b/drivers/gpu/drm/msm/dp/dp_debug.c
index 2f6247e80e9d..de16fca8782a 100644
--- a/drivers/gpu/drm/msm/dp/dp_debug.c
+++ b/drivers/gpu/drm/msm/dp/dp_debug.c
@@ -8,6 +8,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "dp_parser.h"
 #include "dp_catalog.h"
@@ -15,6 +16,7 @@
 #include "dp_ctrl.h"
 #include "dp_debug.h"
 #include "dp_display.h"
+#include "dp_hdcp.h"
 
 #define DEBUG_NAME "msm_dp"
 
@@ -24,6 +26,7 @@ struct dp_debug_private {
struct dp_usbpd *usbpd;
struct dp_link *link;
struct dp_panel *panel;
+   struct dp_hdcp *hdcp;
struct drm_connector **connector;
struct device *dev;
struct drm_device *drm_dev;
@@ -349,6 +352,38 @@ static int dp_test_active_open(struct inode *inode,
inode->i_private);
 }
 
+static ssize_t dp_hdcp_key_write(struct file *file, const char __user *ubuf,
+size_t len, loff_t *offp)
+{
+   char *input_buffer;
+   int ret = 0;
+   struct dp_debug_private *debug = file->private_data;
+   struct drm_device *dev;
+
+   dev = debug->drm_dev;
+
+   if (len != (DRM_HDCP_KSV_LEN + DP_HDCP_NUM_KEYS * DP_HDCP_KEY_LEN))
+   return -EINVAL;
+
+   if (!debug->hdcp)
+   return -ENOENT;
+
+   input_buffer = memdup_user_nul(ubuf, len);
+   if (IS_ERR(input_buffer))
+   return PTR_ERR(input_buffer);
+
+   ret = dp_hdcp_ingest_key(debug->hdcp, input_buffer, len);
+
+   kfree(input_buffer);
+   if (ret < 0) {
+   DRM_ERROR("Could not ingest HDCP key, ret=%d\n", ret);
+   return ret;
+   }
+
+   *offp += len;
+   return len;
+}
+
 static const struct file_operations dp_debug_fops = {
.open = simple_open,
.read = dp_debug_read_info,
@@ -363,6 +398,12 @@ static const struct file_operations test_active_fops = {
.write = dp_test_active_write
 };
 
+static const struct file_operations dp_hdcp_key_fops = {
+   .owner = THIS_MODULE,
+   .open = simple_open,
+   .write = dp_hdcp_key_write,
+};
+
 static int dp_debug_init(struct dp_debug *dp_debug, struct drm_minor *minor)
 {
int rc = 0;
@@ -384,6 +425,10 @@ static int dp_debug_init(struct dp_debug *dp_debug, struct 
drm_minor *minor)
minor->debugfs_root,
debug, &dp_test_type_fops);
 
+   debugfs_create_file("msm

[PATCH v2 12/13] dt-bindings: msm/dp: Add bindings for HDCP registers

2021-09-15 Thread Sean Paul

From: Sean Paul 

This patch adds the bindings for the MSM DisplayPort HDCP registers
which are required to write the HDCP key into the display controller as
well as the registers to enable HDCP authentication/key
exchange/encryption.

Cc: Rob Herring 
Cc: Stephen Boyd 
Signed-off-by: Sean Paul 
Link: 
https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-13-s...@poorly.run
 #v1

Changes in v2:
-Drop register range names (Stephen)
-Fix yaml errors (Rob)
---
 .../devicetree/bindings/display/msm/dp-controller.yaml | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml 
b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml
index 64d8d9e5e47a..80a55e9ff532 100644
--- a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml
+++ b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml
@@ -19,7 +19,7 @@ properties:
   - qcom,sc7180-dp
 
   reg:
-maxItems: 1
+maxItems: 3
 
   interrupts:
 maxItems: 1
@@ -99,8 +99,9 @@ examples:
 #include 
 
 displayport-controller@ae9 {
-compatible = "qcom,sc7180-dp";
-reg = <0xae9 0x1400>;
+reg = <0 0x0ae9 0 0x1400>,
+  <0 0x0aed1000 0 0x174>,
+  <0 0x0aee1000 0 0x2c>;
 interrupt-parent = <&mdss>;
 interrupts = <12>;
 clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
-- 
Sean Paul, Software Engineer, Google / Chromium OS

[PATCH v2 11/13] drm/msm/dp: Re-order dp_audio_put in deinit_sub_modules

2021-09-15 Thread Sean Paul

From: Sean Paul 

Audio is initialized last, it should be de-initialized first to match
the order in dp_init_sub_modules().

Signed-off-by: Sean Paul 
Link: 
https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-12-s...@poorly.run
 #v1

Changes in v2:
-None
---
 drivers/gpu/drm/msm/dp/dp_display.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/dp/dp_display.c 
b/drivers/gpu/drm/msm/dp/dp_display.c
index fbe4c2cd52a3..19946024e235 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -714,9 +714,9 @@ static int dp_irq_hpd_handle(struct dp_display_private *dp, 
u32 data)
 static void dp_display_deinit_sub_modules(struct dp_display_private *dp)
 {
dp_debug_put(dp->debug);
+   dp_audio_put(dp->audio);
dp_panel_put(dp->panel);
dp_aux_put(dp->aux);
-   dp_audio_put(dp->audio);
 }
 
 static int dp_init_sub_modules(struct dp_display_private *dp)
-- 
Sean Paul, Software Engineer, Google / Chromium OS

[PATCH v2 10/13] drm/msm/dpu: Remove encoder->enable() hack

2021-09-15 Thread Sean Paul

From: Sean Paul 

encoder->commit() was being misused because there were some global
resources which needed to be tweaked in encoder->enable() which were not
accessible in dpu_encoder.c. That is no longer true and the redirect
serves no purpose any longer. So remove the indirection.

Signed-off-by: Sean Paul 
Link: 
https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-11-s...@poorly.run
 #v1

Changes in v2:
-None
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c |  5 +
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 22 -
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h |  2 --
 drivers/gpu/drm/msm/disp/dpu1/dpu_trace.h   |  4 
 4 files changed, 1 insertion(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index 984f8a59cb73..ddc542a0d41f 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -2122,11 +2122,8 @@ static void dpu_encoder_frame_done_timeout(struct 
timer_list *t)
 static const struct drm_encoder_helper_funcs dpu_encoder_helper_funcs = {
.mode_set = dpu_encoder_virt_mode_set,
.disable = dpu_encoder_virt_disable,
-   .enable = dpu_kms_encoder_enable,
+   .enable = dpu_encoder_virt_enable,
.atomic_check = dpu_encoder_virt_atomic_check,
-
-   /* This is called by dpu_kms_encoder_enable */
-   .commit = dpu_encoder_virt_enable,
 };
 
 static const struct drm_encoder_funcs dpu_encoder_funcs = {
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index fb0d9f781c66..4a0b55d145ad 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -381,28 +381,6 @@ static void dpu_kms_flush_commit(struct msm_kms *kms, 
unsigned crtc_mask)
}
 }
 
-/*
- * Override the encoder enable since we need to setup the inline rotator and do
- * some crtc magic before enabling any bridge that might be present.
- */
-void dpu_kms_encoder_enable(struct drm_encoder *encoder)
-{
-   const struct drm_encoder_helper_funcs *funcs = encoder->helper_private;
-   struct drm_device *dev = encoder->dev;
-   struct drm_crtc *crtc;
-
-   /* Forward this enable call to the commit hook */
-   if (funcs && funcs->commit)
-   funcs->commit(encoder);
-
-   drm_for_each_crtc(crtc, dev) {
-   if (!(crtc->state->encoder_mask & drm_encoder_mask(encoder)))
-   continue;
-
-   trace_dpu_kms_enc_enable(DRMID(crtc));
-   }
-}
-
 static void dpu_kms_complete_commit(struct msm_kms *kms, unsigned crtc_mask)
 {
struct dpu_kms *dpu_kms = to_dpu_kms(kms);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
index 323a6bce9e64..f1ebb60dacab 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
@@ -248,8 +248,6 @@ void *dpu_debugfs_get_root(struct dpu_kms *dpu_kms);
 int dpu_enable_vblank(struct msm_kms *kms, struct drm_crtc *crtc);
 void dpu_disable_vblank(struct msm_kms *kms, struct drm_crtc *crtc);
 
-void dpu_kms_encoder_enable(struct drm_encoder *encoder);
-
 /**
  * dpu_kms_get_clk_rate() - get the clock rate
  * @dpu_kms:  pointer to dpu_kms structure
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_trace.h 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_trace.h
index 37bba57675a8..54d74341e690 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_trace.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_trace.h
@@ -266,10 +266,6 @@ DEFINE_EVENT(dpu_drm_obj_template, 
dpu_crtc_complete_commit,
TP_PROTO(uint32_t drm_id),
TP_ARGS(drm_id)
 );
-DEFINE_EVENT(dpu_drm_obj_template, dpu_kms_enc_enable,
-   TP_PROTO(uint32_t drm_id),
-   TP_ARGS(drm_id)
-);
 DEFINE_EVENT(dpu_drm_obj_template, dpu_kms_commit,
TP_PROTO(uint32_t drm_id),
TP_ARGS(drm_id)
-- 
Sean Paul, Software Engineer, Google / Chromium OS

[PATCH v2 09/13] drm/msm/dpu: Remove useless checks in dpu_encoder

2021-09-15 Thread Sean Paul

From: Sean Paul 

A couple more useless checks to remove in dpu_encoder.

Signed-off-by: Sean Paul 
Link: 
https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-10-s...@poorly.run
 #v1

Changes in v2:
-None
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 12 
 1 file changed, 12 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index 0e9d3fa1544b..984f8a59cb73 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -1153,10 +1153,6 @@ static void dpu_encoder_virt_enable(struct drm_encoder 
*drm_enc)
struct msm_drm_private *priv;
struct drm_display_mode *cur_mode = NULL;
 
-   if (!drm_enc) {
-   DPU_ERROR("invalid encoder\n");
-   return;
-   }
dpu_enc = to_dpu_encoder_virt(drm_enc);
 
mutex_lock(&dpu_enc->enc_lock);
@@ -1203,14 +1199,6 @@ static void dpu_encoder_virt_disable(struct drm_encoder 
*drm_enc)
struct msm_drm_private *priv;
int i = 0;
 
-   if (!drm_enc) {
-   DPU_ERROR("invalid encoder\n");
-   return;
-   } else if (!drm_enc->dev) {
-   DPU_ERROR("invalid dev\n");
-   return;
-   }
-
dpu_enc = to_dpu_encoder_virt(drm_enc);
DPU_DEBUG_ENC(dpu_enc, "\n");
 
-- 
Sean Paul, Software Engineer, Google / Chromium OS

[PATCH v2 08/13] drm/msm/dpu_kms: Re-order dpu includes

2021-09-15 Thread Sean Paul

From: Sean Paul 

Make includes alphabetical in dpu_kms.c

Signed-off-by: Sean Paul 
Link: 
https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-9-s...@poorly.run
 #v1

Changes in v2:
-None
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index ae48f41821cf..fb0d9f781c66 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -21,14 +21,14 @@
 #include "msm_gem.h"
 #include "disp/msm_disp_snapshot.h"
 
-#include "dpu_kms.h"
 #include "dpu_core_irq.h"
+#include "dpu_crtc.h"
+#include "dpu_encoder.h"
 #include "dpu_formats.h"
 #include "dpu_hw_vbif.h"
-#include "dpu_vbif.h"
-#include "dpu_encoder.h"
+#include "dpu_kms.h"
 #include "dpu_plane.h"
-#include "dpu_crtc.h"
+#include "dpu_vbif.h"
 
 #define CREATE_TRACE_POINTS
 #include "dpu_trace.h"
-- 
Sean Paul, Software Engineer, Google / Chromium OS

[PATCH v2 07/13] drm/i915/hdcp: Use HDCP helpers for i915

2021-09-15 Thread Sean Paul

From: Sean Paul 

Now that all of the HDCP 1.x logic has been migrated to the central HDCP
helpers, use it in the i915 driver.

The majority of the driver code for HDCP 1.x will live in intel_hdcp.c,
however there are a few helper hooks which are connector-specific and
need to be partially or fully implemented in the intel_dp_hdcp.c or
intel_hdmi.c.

We'll leave most of the HDCP 2.x code alone since we don't have another
implementation of HDCP 2.x to use as reference for what should and
should not live in the drm helpers. The helper will call the overly
general enable/disable/is_capable HDCP 2.x callbacks and leave the
interesting stuff for the driver. Once we have another HDCP 2.x
implementation, we should do a similar migration.

Signed-off-by: Sean Paul 
Link: 
https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-8-s...@poorly.run
 #v1

Changes in v2:
-Fix mst helper function pointer reported by 0-day
---
 drivers/gpu/drm/i915/display/intel_ddi.c  |  29 +-
 .../drm/i915/display/intel_display_debugfs.c  |   6 +-
 .../drm/i915/display/intel_display_types.h|  58 +-
 drivers/gpu/drm/i915/display/intel_dp_hdcp.c  | 345 +++
 drivers/gpu/drm/i915/display/intel_dp_mst.c   |  17 +-
 drivers/gpu/drm/i915/display/intel_hdcp.c | 935 +++---
 drivers/gpu/drm/i915/display/intel_hdcp.h |  30 +-
 drivers/gpu/drm/i915/display/intel_hdmi.c | 256 ++---
 8 files changed, 417 insertions(+), 1259 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c 
b/drivers/gpu/drm/i915/display/intel_ddi.c
index 23ef291f7b30..8bdf41593174 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -26,6 +26,7 @@
  */
 
 #include 
+#include 
 
 #include "i915_drv.h"
 #include "intel_audio.h"
@@ -3131,6 +3132,9 @@ static void intel_enable_ddi(struct intel_atomic_state 
*state,
 const struct intel_crtc_state *crtc_state,
 const struct drm_connector_state *conn_state)
 {
+   struct intel_connector *connector = 
to_intel_connector(conn_state->connector);
+   struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
+
drm_WARN_ON(state->base.dev, crtc_state->has_pch_encoder);
 
if (!crtc_state->bigjoiner_slave)
@@ -3147,12 +3151,10 @@ static void intel_enable_ddi(struct intel_atomic_state 
*state,
else
intel_enable_ddi_dp(state, encoder, crtc_state, conn_state);
 
-   /* Enable hdcp if it's desired */
-   if (conn_state->content_protection ==
-   DRM_MODE_CONTENT_PROTECTION_DESIRED)
-   intel_hdcp_enable(to_intel_connector(conn_state->connector),
- crtc_state,
- (u8)conn_state->hdcp_content_type);
+   if (connector->hdcp_helper_data)
+   drm_hdcp_helper_atomic_commit(connector->hdcp_helper_data,
+   &state->base,
+   &dig_port->hdcp_mutex);
 }
 
 static void intel_disable_ddi_dp(struct intel_atomic_state *state,
@@ -3212,7 +3214,13 @@ static void intel_disable_ddi(struct intel_atomic_state 
*state,
  const struct intel_crtc_state *old_crtc_state,
  const struct drm_connector_state *old_conn_state)
 {
-   intel_hdcp_disable(to_intel_connector(old_conn_state->connector));
+   struct intel_connector *connector = 
to_intel_connector(old_conn_state->connector);
+   struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
+
+   if (connector->hdcp_helper_data)
+   drm_hdcp_helper_atomic_commit(connector->hdcp_helper_data,
+   &state->base,
+   &dig_port->hdcp_mutex);
 
if (intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_HDMI))
intel_disable_ddi_hdmi(state, encoder, old_crtc_state,
@@ -3243,13 +3251,18 @@ void intel_ddi_update_pipe(struct intel_atomic_state 
*state,
   const struct intel_crtc_state *crtc_state,
   const struct drm_connector_state *conn_state)
 {
+   struct intel_connector *connector = 
to_intel_connector(conn_state->connector);
+   struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 
if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI) &&
!intel_encoder_is_mst(encoder))
intel_ddi_update_pipe_dp(state, encoder, crtc_state,
 conn_state);
 
-   intel_hdcp_update_pipe(state, encoder, crtc_state, conn_state);
+   if (connector->hdcp_helper_data)
+   drm_hdcp_helper_atomic_commit(connector->hdcp_helper_data,
+ &state->base,
+ &dig_port->hdcp_mutex);
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/display/intel_d

[PATCH v2 06/13] drm/i915/hdcp: Retain hdcp_capable return codes

2021-09-15 Thread Sean Paul

From: Sean Paul 

The shim functions return error codes, but they are discarded in
intel_hdcp.c. This patch plumbs the return codes through so they are
properly handled.

Signed-off-by: Sean Paul 
Link: 
https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-7-s...@poorly.run
 #v1

Changes in v2:
-None
---
 .../drm/i915/display/intel_display_debugfs.c  |  9 +++-
 drivers/gpu/drm/i915/display/intel_hdcp.c | 51 ++-
 drivers/gpu/drm/i915/display/intel_hdcp.h |  4 +-
 3 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c 
b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
index 68f4ba8c46e7..5ffd31e9908f 100644
--- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c
+++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
@@ -644,6 +644,7 @@ static void intel_panel_info(struct seq_file *m, struct 
intel_panel *panel)
 static void intel_hdcp_info(struct seq_file *m,
struct intel_connector *intel_connector)
 {
+   int ret;
bool hdcp_cap, hdcp2_cap;
 
if (!intel_connector->hdcp.shim) {
@@ -651,8 +652,12 @@ static void intel_hdcp_info(struct seq_file *m,
goto out;
}
 
-   hdcp_cap = intel_hdcp_capable(intel_connector);
-   hdcp2_cap = intel_hdcp2_capable(intel_connector);
+   ret = intel_hdcp_capable(intel_connector, &hdcp_cap);
+   if (ret)
+   hdcp_cap = false;
+   ret = intel_hdcp2_capable(intel_connector, &hdcp2_cap);
+   if (ret)
+   hdcp2_cap = false;
 
if (hdcp_cap)
seq_puts(m, "HDCP1.4 ");
diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c 
b/drivers/gpu/drm/i915/display/intel_hdcp.c
index af166baf8c71..59275919e7b9 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp.c
+++ b/drivers/gpu/drm/i915/display/intel_hdcp.c
@@ -153,50 +153,49 @@ int intel_hdcp_read_valid_bksv(struct intel_digital_port 
*dig_port,
 }
 
 /* Is HDCP1.4 capable on Platform and Sink */
-bool intel_hdcp_capable(struct intel_connector *connector)
+int intel_hdcp_capable(struct intel_connector *connector, bool *capable)
 {
struct intel_digital_port *dig_port = 
intel_attached_dig_port(connector);
const struct intel_hdcp_shim *shim = connector->hdcp.shim;
-   bool capable = false;
u8 bksv[5];
 
+   *capable = false;
+
if (!shim)
-   return capable;
+   return 0;
 
-   if (shim->hdcp_capable) {
-   shim->hdcp_capable(dig_port, &capable);
-   } else {
-   if (!intel_hdcp_read_valid_bksv(dig_port, shim, bksv))
-   capable = true;
-   }
+   if (shim->hdcp_capable)
+   return shim->hdcp_capable(dig_port, capable);
+
+   if (!intel_hdcp_read_valid_bksv(dig_port, shim, bksv))
+   *capable = true;
 
-   return capable;
+   return 0;
 }
 
 /* Is HDCP2.2 capable on Platform and Sink */
-bool intel_hdcp2_capable(struct intel_connector *connector)
+int intel_hdcp2_capable(struct intel_connector *connector, bool *capable)
 {
struct intel_digital_port *dig_port = 
intel_attached_dig_port(connector);
struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
struct intel_hdcp *hdcp = &connector->hdcp;
-   bool capable = false;
+
+   *capable = false;
 
/* I915 support for HDCP2.2 */
if (!hdcp->hdcp2_supported)
-   return false;
+   return 0;
 
/* MEI interface is solid */
mutex_lock(&dev_priv->hdcp_comp_mutex);
if (!dev_priv->hdcp_comp_added ||  !dev_priv->hdcp_master) {
mutex_unlock(&dev_priv->hdcp_comp_mutex);
-   return false;
+   return 0;
}
mutex_unlock(&dev_priv->hdcp_comp_mutex);
 
/* Sink's capability for HDCP2.2 */
-   hdcp->shim->hdcp_2_2_capable(dig_port, &capable);
-
-   return capable;
+   return hdcp->shim->hdcp_2_2_capable(dig_port, capable);
 }
 
 static bool intel_hdcp_in_use(struct drm_i915_private *dev_priv,
@@ -2332,6 +2331,7 @@ int intel_hdcp_enable(struct intel_connector *connector,
struct intel_digital_port *dig_port = 
intel_attached_dig_port(connector);
struct intel_hdcp *hdcp = &connector->hdcp;
unsigned long check_link_interval = DRM_HDCP_CHECK_PERIOD_MS;
+   bool capable;
int ret = -EINVAL;
 
if (!hdcp->shim)
@@ -2350,21 +2350,27 @@ int intel_hdcp_enable(struct intel_connector *connector,
 * Considering that HDCP2.2 is more secure than HDCP1.4, If the setup
 * is capable of HDCP2.2, it is preferred to use HDCP2.2.
 */
-   if (intel_hdcp2_capable(connector)) {
+   ret = intel_hdcp2_capable(connector, &capable);
+   if (capable) {
ret = _intel_hdcp2_enable(connector);
-   if (!ret)
+   if (!ret) {
chec

[PATCH v2 05/13] drm/i915/hdcp: Consolidate HDCP setup/state cache

2021-09-15 Thread Sean Paul

From: Sean Paul 

Stick all of the setup for HDCP into a dedicated function. No functional
change, but this will facilitate moving HDCP logic into helpers.

Signed-off-by: Sean Paul 
Link: 
https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-6-s...@poorly.run
 #v1

Changes in v2:
-None
---
 drivers/gpu/drm/i915/display/intel_hdcp.c | 52 +++
 1 file changed, 35 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c 
b/drivers/gpu/drm/i915/display/intel_hdcp.c
index feebafead046..af166baf8c71 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp.c
+++ b/drivers/gpu/drm/i915/display/intel_hdcp.c
@@ -2167,6 +2167,37 @@ static enum mei_fw_tc intel_get_mei_fw_tc(enum 
transcoder cpu_transcoder)
}
 }
 
+static int
+_intel_hdcp_setup(struct intel_connector *connector,
+ const struct intel_crtc_state *pipe_config, u8 content_type)
+{
+   struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+   struct intel_digital_port *dig_port = 
intel_attached_dig_port(connector);
+   struct intel_hdcp *hdcp = &connector->hdcp;
+   int ret = 0;
+
+   if (!connector->encoder) {
+   drm_err(&dev_priv->drm, "[%s:%d] encoder is not initialized\n",
+   connector->base.name, connector->base.base.id);
+   return -ENODEV;
+   }
+
+   hdcp->content_type = content_type;
+
+   if (intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST)) {
+   hdcp->cpu_transcoder = pipe_config->mst_master_transcoder;
+   hdcp->stream_transcoder = pipe_config->cpu_transcoder;
+   } else {
+   hdcp->cpu_transcoder = pipe_config->cpu_transcoder;
+   hdcp->stream_transcoder = INVALID_TRANSCODER;
+   }
+
+   if (DISPLAY_VER(dev_priv) >= 12)
+   dig_port->hdcp_port_data.fw_tc = 
intel_get_mei_fw_tc(hdcp->cpu_transcoder);
+
+   return ret;
+}
+
 static int initialize_hdcp_port_data(struct intel_connector *connector,
 struct intel_digital_port *dig_port,
 const struct intel_hdcp_shim *shim)
@@ -2306,28 +2337,14 @@ int intel_hdcp_enable(struct intel_connector *connector,
if (!hdcp->shim)
return -ENOENT;
 
-   if (!connector->encoder) {
-   drm_err(&dev_priv->drm, "[%s:%d] encoder is not initialized\n",
-   connector->base.name, connector->base.base.id);
-   return -ENODEV;
-   }
-
mutex_lock(&hdcp->mutex);
mutex_lock(&dig_port->hdcp_mutex);
drm_WARN_ON(&dev_priv->drm,
hdcp->value == DRM_MODE_CONTENT_PROTECTION_ENABLED);
-   hdcp->content_type = content_type;
-
-   if (intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST)) {
-   hdcp->cpu_transcoder = pipe_config->mst_master_transcoder;
-   hdcp->stream_transcoder = pipe_config->cpu_transcoder;
-   } else {
-   hdcp->cpu_transcoder = pipe_config->cpu_transcoder;
-   hdcp->stream_transcoder = INVALID_TRANSCODER;
-   }
 
-   if (DISPLAY_VER(dev_priv) >= 12)
-   dig_port->hdcp_port_data.fw_tc = 
intel_get_mei_fw_tc(hdcp->cpu_transcoder);
+   ret = _intel_hdcp_setup(connector, pipe_config, content_type);
+   if (ret)
+   goto out;
 
/*
 * Considering that HDCP2.2 is more secure than HDCP1.4, If the setup
@@ -2355,6 +2372,7 @@ int intel_hdcp_enable(struct intel_connector *connector,
true);
}
 
+out:
mutex_unlock(&dig_port->hdcp_mutex);
mutex_unlock(&hdcp->mutex);
return ret;
-- 
Sean Paul, Software Engineer, Google / Chromium OS

[PATCH v2 04/13] drm/hdcp: Expand HDCP helper library for enable/disable/check

2021-09-15 Thread Sean Paul

From: Sean Paul 

This patch expands upon the HDCP helper library to manage HDCP
enable, disable, and check.

Previous to this patch, the majority of the state management and sink
interaction is tucked inside the Intel driver with the understanding
that once a new platform supported HDCP we could make good decisions
about what should be centralized. With the addition of HDCP support
for Qualcomm, it's time to migrate the protocol-specific bits of HDCP
authentication, key exchange, and link checks to the HDCP helper.

In terms of functionality, this migration is 1:1 with the Intel driver,
however things are laid out a bit differently than with intel_hdcp.c,
which is why this is a separate patch from the i915 transition to the
helper. On i915, the "shim" vtable is used to account for HDMI vs. DP
vs. DP-MST differences whereas the helper library uses a LUT to
account for the register offsets and a remote read function to route
the messages. On i915, storing the sink information in the source is
done inline whereas now we use the new drm_hdcp_helper_funcs vtable
to store and fetch information to/from source hw. Finally, instead of
calling enable/disable directly from the driver, we'll leave that
decision to the helper and by calling drm_hdcp_helper_atomic_commit()
from the driver. All told, this will centralize the protocol and state
handling in the helper, ensuring we collect all of our bugs^Wlogic
in one place.

Signed-off-by: Sean Paul 
Link: 
https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-5-s...@poorly.run
 #v1

Changes in v2:
-Fixed set-but-unused variable identified by 0-day
---
 drivers/gpu/drm/drm_hdcp.c | 1103 
 include/drm/drm_hdcp.h |  191 +++
 2 files changed, 1294 insertions(+)

diff --git a/drivers/gpu/drm/drm_hdcp.c b/drivers/gpu/drm/drm_hdcp.c
index 742313ce8f6f..47c6e6923a76 100644
--- a/drivers/gpu/drm/drm_hdcp.c
+++ b/drivers/gpu/drm/drm_hdcp.c
@@ -6,15 +6,20 @@
  * Ramalingam C 
  */
 
+#include 
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -513,3 +518,1101 @@ bool drm_hdcp_atomic_check(struct drm_connector 
*connector,
return old_hdcp != new_hdcp;
 }
 EXPORT_SYMBOL(drm_hdcp_atomic_check);
+
+struct drm_hdcp_helper_data {
+   struct mutex mutex;
+   struct mutex *driver_mutex;
+
+   struct drm_connector *connector;
+   const struct drm_hdcp_helper_funcs *funcs;
+
+   u64 value;
+   unsigned int enabled_type;
+
+   struct delayed_work check_work;
+   struct work_struct prop_work;
+
+   struct drm_dp_aux *aux;
+   const struct drm_hdcp_hdcp1_receiver_reg_lut *hdcp1_lut;
+};
+
+struct drm_hdcp_hdcp1_receiver_reg_lut {
+   unsigned int bksv;
+   unsigned int ri;
+   unsigned int aksv;
+   unsigned int an;
+   unsigned int ainfo;
+   unsigned int v[5];
+   unsigned int bcaps;
+   unsigned int bcaps_mask_repeater_present;
+   unsigned int bstatus;
+};
+
+static const struct drm_hdcp_hdcp1_receiver_reg_lut drm_hdcp_hdcp1_ddc_lut = {
+   .bksv = DRM_HDCP_DDC_BKSV,
+   .ri = DRM_HDCP_DDC_RI_PRIME,
+   .aksv = DRM_HDCP_DDC_AKSV,
+   .an = DRM_HDCP_DDC_AN,
+   .ainfo = DRM_HDCP_DDC_AINFO,
+   .v = { DRM_HDCP_DDC_V_PRIME(0), DRM_HDCP_DDC_V_PRIME(1),
+  DRM_HDCP_DDC_V_PRIME(2), DRM_HDCP_DDC_V_PRIME(3),
+  DRM_HDCP_DDC_V_PRIME(4) },
+   .bcaps = DRM_HDCP_DDC_BCAPS,
+   .bcaps_mask_repeater_present = DRM_HDCP_DDC_BCAPS_REPEATER_PRESENT,
+   .bstatus = DRM_HDCP_DDC_BSTATUS,
+};
+
+static const struct drm_hdcp_hdcp1_receiver_reg_lut drm_hdcp_hdcp1_dpcd_lut = {
+   .bksv = DP_AUX_HDCP_BKSV,
+   .ri = DP_AUX_HDCP_RI_PRIME,
+   .aksv = DP_AUX_HDCP_AKSV,
+   .an = DP_AUX_HDCP_AN,
+   .ainfo = DP_AUX_HDCP_AINFO,
+   .v = { DP_AUX_HDCP_V_PRIME(0), DP_AUX_HDCP_V_PRIME(1),
+  DP_AUX_HDCP_V_PRIME(2), DP_AUX_HDCP_V_PRIME(3),
+  DP_AUX_HDCP_V_PRIME(4) },
+   .bcaps = DP_AUX_HDCP_BCAPS,
+   .bcaps_mask_repeater_present = DP_BCAPS_REPEATER_PRESENT,
+
+   /*
+* For some reason the HDMI and DP HDCP specs call this register
+* definition by different names. In the HDMI spec, it's called BSTATUS,
+* but in DP it's called BINFO.
+*/
+   .bstatus = DP_AUX_HDCP_BINFO,
+};
+
+static int drm_hdcp_remote_ddc_read(struct i2c_adapter *i2c,
+   unsigned int offset, u8 *value, size_t len)
+{
+   int ret;
+   u8 start = offset & 0xff;
+   struct i2c_msg msgs[] = {
+   {
+   .addr = DRM_HDCP_DDC_ADDR,
+   .flags = 0,
+   .len = 1,
+   .buf = &start,
+   },
+   {
+   .addr = DRM_HDCP_DDC_ADDR,
+   .flags = I2C_M_RD,
+

[PATCH v2 03/13] drm/hdcp: Update property value on content type and user changes

2021-09-15 Thread Sean Paul

From: Sean Paul 

This patch updates the connector's property value in 2 cases which were
previously missed:

1- Content type changes. The value should revert back to DESIRED from
   ENABLED in case the driver must re-authenticate the link due to the
   new content type.

2- Userspace sets value to DESIRED while ENABLED. In this case, the
   value should be reset immediately to ENABLED since the link is
   actively being encrypted.

To accommodate these changes, I've split up the conditionals to make
things a bit more clear (as much as one can with this mess of state).

Signed-off-by: Sean Paul 
Link: 
https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-4-s...@poorly.run
 #v1

Changes in v2:
-None
---
 drivers/gpu/drm/drm_hdcp.c | 26 +-
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/drm_hdcp.c b/drivers/gpu/drm/drm_hdcp.c
index dd8fa91c51d6..742313ce8f6f 100644
--- a/drivers/gpu/drm/drm_hdcp.c
+++ b/drivers/gpu/drm/drm_hdcp.c
@@ -487,21 +487,29 @@ bool drm_hdcp_atomic_check(struct drm_connector 
*connector,
return true;
 
/*
-* Nothing to do if content type is unchanged and one of:
-*  - state didn't change
+* Content type changes require an HDCP disable/enable cycle.
+*/
+   if (new_conn_state->hdcp_content_type != 
old_conn_state->hdcp_content_type) {
+   new_conn_state->content_protection =
+   DRM_MODE_CONTENT_PROTECTION_DESIRED;
+   return true;
+   }
+
+   /*
+* Ignore meaningless state changes:
 *  - HDCP was activated since the last commit
-*  - attempting to set to desired while already enabled
+*  - Attempting to set to desired while already enabled
 */
-   if (old_hdcp == new_hdcp ||
-   (old_hdcp == DRM_MODE_CONTENT_PROTECTION_DESIRED &&
+   if ((old_hdcp == DRM_MODE_CONTENT_PROTECTION_DESIRED &&
 new_hdcp == DRM_MODE_CONTENT_PROTECTION_ENABLED) ||
(old_hdcp == DRM_MODE_CONTENT_PROTECTION_ENABLED &&
 new_hdcp == DRM_MODE_CONTENT_PROTECTION_DESIRED)) {
-   if (old_conn_state->hdcp_content_type ==
-   new_conn_state->hdcp_content_type)
-   return false;
+   new_conn_state->content_protection =
+   DRM_MODE_CONTENT_PROTECTION_ENABLED;
+return false;
}
 
-   return true;
+   /* Finally, if state changes, we need action */
+   return old_hdcp != new_hdcp;
 }
 EXPORT_SYMBOL(drm_hdcp_atomic_check);
-- 
Sean Paul, Software Engineer, Google / Chromium OS

[PATCH v2 02/13] drm/hdcp: Avoid changing crtc state in hdcp atomic check

2021-09-15 Thread Sean Paul

From: Sean Paul 

Instead of forcing a modeset in the hdcp atomic check, simply return
true if the content protection value is changing and let the driver
decide whether a modeset is required or not.

Signed-off-by: Sean Paul 
Link: 
https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-3-s...@poorly.run
 #v1

Changes in v2:
-None
---
 drivers/gpu/drm/drm_hdcp.c  | 33 +++--
 drivers/gpu/drm/i915/display/intel_atomic.c |  5 ++--
 include/drm/drm_hdcp.h  |  2 +-
 3 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/drm_hdcp.c b/drivers/gpu/drm/drm_hdcp.c
index 522326b03e66..dd8fa91c51d6 100644
--- a/drivers/gpu/drm/drm_hdcp.c
+++ b/drivers/gpu/drm/drm_hdcp.c
@@ -430,11 +430,14 @@ EXPORT_SYMBOL(drm_hdcp_update_content_protection);
  * @connector: drm_connector on which content protection state needs an update
  *
  * This function can be used by display drivers to perform an atomic check on 
the
- * hdcp state elements. If hdcp state has changed, this function will set
- * mode_changed on the crtc driving the connector so it can update its hardware
- * to match the hdcp state.
+ * hdcp state elements. If hdcp state has changed in a manner which requires 
the
+ * driver to enable or disable content protection, this function will return
+ * true.
+ *
+ * Returns:
+ * true if the driver must enable/disable hdcp, false otherwise
  */
-void drm_hdcp_atomic_check(struct drm_connector *connector,
+bool drm_hdcp_atomic_check(struct drm_connector *connector,
   struct drm_atomic_state *state)
 {
struct drm_connector_state *new_conn_state, *old_conn_state;
@@ -452,10 +455,12 @@ void drm_hdcp_atomic_check(struct drm_connector 
*connector,
 * If the connector is being disabled with CP enabled, mark it
 * desired so it's re-enabled when the connector is brought back
 */
-   if (old_hdcp == DRM_MODE_CONTENT_PROTECTION_ENABLED)
+   if (old_hdcp == DRM_MODE_CONTENT_PROTECTION_ENABLED) {
new_conn_state->content_protection =
DRM_MODE_CONTENT_PROTECTION_DESIRED;
-   return;
+   return true;
+   }
+   return false;
}
 
new_crtc_state = drm_atomic_get_new_crtc_state(state,
@@ -467,9 +472,19 @@ void drm_hdcp_atomic_check(struct drm_connector *connector,
*/
if (drm_atomic_crtc_needs_modeset(new_crtc_state) &&
(old_hdcp == DRM_MODE_CONTENT_PROTECTION_ENABLED &&
-new_hdcp != DRM_MODE_CONTENT_PROTECTION_UNDESIRED))
+new_hdcp != DRM_MODE_CONTENT_PROTECTION_UNDESIRED)) {
new_conn_state->content_protection =
DRM_MODE_CONTENT_PROTECTION_DESIRED;
+   return true;
+   }
+
+   /*
+* Coming back from disable or changing CRTC with DESIRED state requires
+* that the driver try CP enable.
+*/
+   if (new_hdcp == DRM_MODE_CONTENT_PROTECTION_DESIRED &&
+   new_conn_state->crtc != old_conn_state->crtc)
+   return true;
 
/*
 * Nothing to do if content type is unchanged and one of:
@@ -484,9 +499,9 @@ void drm_hdcp_atomic_check(struct drm_connector *connector,
 new_hdcp == DRM_MODE_CONTENT_PROTECTION_DESIRED)) {
if (old_conn_state->hdcp_content_type ==
new_conn_state->hdcp_content_type)
-   return;
+   return false;
}
 
-   new_crtc_state->mode_changed = true;
+   return true;
 }
 EXPORT_SYMBOL(drm_hdcp_atomic_check);
diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c 
b/drivers/gpu/drm/i915/display/intel_atomic.c
index 1e306e8427ec..c7b5470c40aa 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic.c
@@ -122,8 +122,6 @@ int intel_digital_connector_atomic_check(struct 
drm_connector *conn,
to_intel_digital_connector_state(old_state);
struct drm_crtc_state *crtc_state;
 
-   drm_hdcp_atomic_check(conn, state);
-
if (!new_state->crtc)
return 0;
 
@@ -139,7 +137,8 @@ int intel_digital_connector_atomic_check(struct 
drm_connector *conn,
new_conn_state->base.picture_aspect_ratio != 
old_conn_state->base.picture_aspect_ratio ||
new_conn_state->base.content_type != 
old_conn_state->base.content_type ||
new_conn_state->base.scaling_mode != 
old_conn_state->base.scaling_mode ||
-   !drm_connector_atomic_hdr_metadata_equal(old_state, new_state))
+   !drm_connector_atomic_hdr_metadata_equal(old_state, new_state) ||
+   drm_hdcp_atomic_check(conn, state))
crtc_state->mode_changed = true;
 
return 0;
diff --git a/include/drm/drm_hdcp.h b/include/drm/drm_hdcp.h
index d

[PATCH v2 01/13] drm/hdcp: Add drm_hdcp_atomic_check()

2021-09-15 Thread Sean Paul

From: Sean Paul 

This patch moves the hdcp atomic check from i915 to drm_hdcp so other
drivers can use it. No functional changes, just cleaned up some of the
code when moving it over.

Signed-off-by: Sean Paul 
Link: 
https://patchwork.freedesktop.org/patch/msgid/20210913175747.47456-2-s...@poorly.run
 #v1

Changes in v2:
-None
---
 drivers/gpu/drm/drm_hdcp.c  | 71 -
 drivers/gpu/drm/i915/display/intel_atomic.c |  4 +-
 drivers/gpu/drm/i915/display/intel_hdcp.c   | 47 --
 drivers/gpu/drm/i915/display/intel_hdcp.h   |  3 -
 include/drm/drm_hdcp.h  |  3 +
 5 files changed, 75 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/drm_hdcp.c b/drivers/gpu/drm/drm_hdcp.c
index ca9b8f697202..522326b03e66 100644
--- a/drivers/gpu/drm/drm_hdcp.c
+++ b/drivers/gpu/drm/drm_hdcp.c
@@ -13,13 +13,14 @@
 #include 
 #include 
 
+#include 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
-#include 
 
 #include "drm_internal.h"
 
@@ -421,3 +422,71 @@ void drm_hdcp_update_content_protection(struct 
drm_connector *connector,
 dev->mode_config.content_protection_property);
 }
 EXPORT_SYMBOL(drm_hdcp_update_content_protection);
+
+/**
+ * drm_hdcp_atomic_check - Helper for drivers to call during 
connector->atomic_check
+ *
+ * @state: pointer to the atomic state being checked
+ * @connector: drm_connector on which content protection state needs an update
+ *
+ * This function can be used by display drivers to perform an atomic check on 
the
+ * hdcp state elements. If hdcp state has changed, this function will set
+ * mode_changed on the crtc driving the connector so it can update its hardware
+ * to match the hdcp state.
+ */
+void drm_hdcp_atomic_check(struct drm_connector *connector,
+  struct drm_atomic_state *state)
+{
+   struct drm_connector_state *new_conn_state, *old_conn_state;
+   struct drm_crtc_state *new_crtc_state;
+   u64 old_hdcp, new_hdcp;
+
+   old_conn_state = drm_atomic_get_old_connector_state(state, connector);
+   old_hdcp = old_conn_state->content_protection;
+
+   new_conn_state = drm_atomic_get_new_connector_state(state, connector);
+   new_hdcp = new_conn_state->content_protection;
+
+   if (!new_conn_state->crtc) {
+   /*
+* If the connector is being disabled with CP enabled, mark it
+* desired so it's re-enabled when the connector is brought back
+*/
+   if (old_hdcp == DRM_MODE_CONTENT_PROTECTION_ENABLED)
+   new_conn_state->content_protection =
+   DRM_MODE_CONTENT_PROTECTION_DESIRED;
+   return;
+   }
+
+   new_crtc_state = drm_atomic_get_new_crtc_state(state,
+  new_conn_state->crtc);
+   /*
+   * Fix the HDCP uapi content protection state in case of modeset.
+   * FIXME: As per HDCP content protection property uapi doc, an uevent()
+   * need to be sent if there is transition from ENABLED->DESIRED.
+   */
+   if (drm_atomic_crtc_needs_modeset(new_crtc_state) &&
+   (old_hdcp == DRM_MODE_CONTENT_PROTECTION_ENABLED &&
+new_hdcp != DRM_MODE_CONTENT_PROTECTION_UNDESIRED))
+   new_conn_state->content_protection =
+   DRM_MODE_CONTENT_PROTECTION_DESIRED;
+
+   /*
+* Nothing to do if content type is unchanged and one of:
+*  - state didn't change
+*  - HDCP was activated since the last commit
+*  - attempting to set to desired while already enabled
+*/
+   if (old_hdcp == new_hdcp ||
+   (old_hdcp == DRM_MODE_CONTENT_PROTECTION_DESIRED &&
+new_hdcp == DRM_MODE_CONTENT_PROTECTION_ENABLED) ||
+   (old_hdcp == DRM_MODE_CONTENT_PROTECTION_ENABLED &&
+new_hdcp == DRM_MODE_CONTENT_PROTECTION_DESIRED)) {
+   if (old_conn_state->hdcp_content_type ==
+   new_conn_state->hdcp_content_type)
+   return;
+   }
+
+   new_crtc_state->mode_changed = true;
+}
+EXPORT_SYMBOL(drm_hdcp_atomic_check);
diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c 
b/drivers/gpu/drm/i915/display/intel_atomic.c
index b4e7ac51aa31..1e306e8427ec 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic.c
@@ -32,13 +32,13 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include "intel_atomic.h"
 #include "intel_cdclk.h"
 #include "intel_display_types.h"
 #include "intel_global_state.h"
-#include "intel_hdcp.h"
 #include "intel_psr.h"
 #include "skl_universal_plane.h"
 
@@ -122,7 +122,7 @@ int intel_digital_connector_atomic_check(struct 
drm_connector *conn,
to_intel_digital_connector_state(old_state);
struct drm_crtc_state *crtc_state;
 
-

Re: [Intel-gfx] [PATCH 12/27] drm/i915/guc: Add multi-lrc context registration

2021-09-15 Thread Matthew Brost

On Wed, Sep 15, 2021 at 01:23:19PM -0700, John Harrison wrote:
> On 9/15/2021 12:31, Matthew Brost wrote:
> > On Wed, Sep 15, 2021 at 12:21:35PM -0700, John Harrison wrote:
> > > On 8/20/2021 15:44, Matthew Brost wrote:
> > > > Add multi-lrc context registration H2G. In addition a workqueue and
> > > > process descriptor are setup during multi-lrc context registration as
> > > > these data structures are needed for multi-lrc submission.
> > > > 
> > > > Signed-off-by: Matthew Brost 
> > > > ---
> > > >drivers/gpu/drm/i915/gt/intel_context_types.h |  12 ++
> > > >drivers/gpu/drm/i915/gt/intel_lrc.c   |   5 +
> > > >drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |   2 +-
> > > >.../gpu/drm/i915/gt/uc/intel_guc_submission.c | 109 
> > > > +-
> > > >4 files changed, 126 insertions(+), 2 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
> > > > b/drivers/gpu/drm/i915/gt/intel_context_types.h
> > > > index 0fafc178cf2c..6f567ebeb039 100644
> > > > --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> > > > +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> > > > @@ -232,8 +232,20 @@ struct intel_context {
> > > > /** @parent: pointer to parent if child */
> > > > struct intel_context *parent;
> > > > +
> > > > +   /** @guc_wqi_head: head pointer in work queue */
> > > > +   u16 guc_wqi_head;
> > > > +   /** @guc_wqi_tail: tail pointer in work queue */
> > > > +   u16 guc_wqi_tail;
> > > > +
> > > These should be in the 'guc_state' sub-struct? Would be good to keep all 
> > > GuC
> > > specific content in one self-contained struct. Especially given the other
> > > child/parent fields are no going to be guc_ prefixed any more.
> > > 
> > Right now I have everything in guc_state protected by guc_state.lock,
> > these fields are not protected by this lock. IMO it is better to use a
> > different sub-structure for the parallel fields (even if anonymous).
> Hmm, I still think it is bad to be scattering back-end specific fields
> amongst regular fields. The GuC patches include a whole bunch of complaints
> about execlist back-end specific stuff leaking through to the higher levels,
> we really shouldn't be guilty of doing the same with GuC if at all possible.
> At the very least, the GuC specific fields should be grouped together at the
> end of the struct rather than inter-mingled.
> 

How 2 different sub-structures - parallel (shared) & guc_parallel (guc 
specific)?

> > 
> > > > /** @guc_number_children: number of children if parent 
> > > > */
> > > > u8 guc_number_children;
> > > > +
> > > > +   /**
> > > > +* @parent_page: page in context used by parent for 
> > > > work queue,
> > > Maybe 'page in context record'? Otherwise, exactly what 'context' is meant
> > > here? It isn't the 'struct intel_context'. The contetx record is saved as
> > > 'ce->state' / 'ce->lrc_reg_state', yes? Is it possible to link to either 
> > > of
> > It is the page in ce->state / page minus LRC reg offset in
> > ce->lrg_reg_state. Will update the commit to make that clear.
> > 
> > > those field? Probably not given that they don't appear to have any 
> > > kerneldoc
> > > description :(. Maybe add that in too :).
> > > 
> > > > +* work queue descriptor
> > > Later on, it is described as 'process descriptor and work queue'. It would
> > > be good to be consistent.
> > > 
> > Yep. Will fix.
> > 
> > > > +*/
> > > > +   u8 parent_page;
> > > > };
> > > >#ifdef CONFIG_DRM_I915_SELFTEST
> > > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
> > > > b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > > > index bb4af4977920..0ddbad4e062a 100644
> > > > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > > > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > > > @@ -861,6 +861,11 @@ __lrc_alloc_state(struct intel_context *ce, struct 
> > > > intel_engine_cs *engine)
> > > > context_size += PAGE_SIZE;
> > > > }
> > > > +   if (intel_context_is_parent(ce)) {
> > > > +   ce->parent_page = context_size / PAGE_SIZE;
> > > > +   context_size += PAGE_SIZE;
> > > > +   }
> > > > +
> > > > obj = i915_gem_object_create_lmem(engine->i915, context_size, 
> > > > 0);
> > > > if (IS_ERR(obj))
> > > > obj = i915_gem_object_create_shmem(engine->i915, 
> > > > context_size);
> > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h 
> > > > b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
> > > > index fa4be13c8854..0e600a3b8f1e 100644
> > > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
> > > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
> > > > @@ -52,7 +52,7 @@
> > > >#define GUC_DOORBELL_INVALID 256
> > > > -#define GUC_WQ_SIZE(PAGE_SIZE * 2)
> > > > +#define GUC_WQ

[PATCH v2 00/13] drm/hdcp: Pull HDCP auth/exchange/check into helpers

2021-09-15 Thread Sean Paul

From: Sean Paul 

Hello again,
This is the second version of the HDCP helper patchset. See version 1
here: https://patchwork.freedesktop.org/series/94623/

In this second version, I've fixed up the oopsies exposed by 0-day and
yamllint and incorporated early review feedback from the dt/dts reviews.

Please take a look,

Sean

Sean Paul (13):
  drm/hdcp: Add drm_hdcp_atomic_check()
  drm/hdcp: Avoid changing crtc state in hdcp atomic check
  drm/hdcp: Update property value on content type and user changes
  drm/hdcp: Expand HDCP helper library for enable/disable/check
  drm/i915/hdcp: Consolidate HDCP setup/state cache
  drm/i915/hdcp: Retain hdcp_capable return codes
  drm/i915/hdcp: Use HDCP helpers for i915
  drm/msm/dpu_kms: Re-order dpu includes
  drm/msm/dpu: Remove useless checks in dpu_encoder
  drm/msm/dpu: Remove encoder->enable() hack
  drm/msm/dp: Re-order dp_audio_put in deinit_sub_modules
  dt-bindings: msm/dp: Add bindings for HDCP registers
  drm/msm: Implement HDCP 1.x using the new drm HDCP helpers

 .../bindings/display/msm/dp-controller.yaml   |7 +-
 arch/arm64/boot/dts/qcom/sc7180.dtsi  |4 +-
 drivers/gpu/drm/drm_hdcp.c| 1197 -
 drivers/gpu/drm/i915/display/intel_atomic.c   |7 +-
 drivers/gpu/drm/i915/display/intel_ddi.c  |   29 +-
 .../drm/i915/display/intel_display_debugfs.c  |   11 +-
 .../drm/i915/display/intel_display_types.h|   58 +-
 drivers/gpu/drm/i915/display/intel_dp_hdcp.c  |  345 ++---
 drivers/gpu/drm/i915/display/intel_dp_mst.c   |   17 +-
 drivers/gpu/drm/i915/display/intel_hdcp.c | 1011 +++---
 drivers/gpu/drm/i915/display/intel_hdcp.h |   35 +-
 drivers/gpu/drm/i915/display/intel_hdmi.c |  256 ++--
 drivers/gpu/drm/msm/Makefile  |1 +
 drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c   |   17 +-
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c   |   30 +-
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h   |2 -
 drivers/gpu/drm/msm/disp/dpu1/dpu_trace.h |4 -
 drivers/gpu/drm/msm/dp/dp_debug.c |   49 +-
 drivers/gpu/drm/msm/dp/dp_debug.h |6 +-
 drivers/gpu/drm/msm/dp/dp_display.c   |   47 +-
 drivers/gpu/drm/msm/dp/dp_display.h   |5 +
 drivers/gpu/drm/msm/dp/dp_drm.c   |   68 +-
 drivers/gpu/drm/msm/dp/dp_drm.h   |5 +
 drivers/gpu/drm/msm/dp/dp_hdcp.c  |  433 ++
 drivers/gpu/drm/msm/dp/dp_hdcp.h  |   27 +
 drivers/gpu/drm/msm/dp/dp_parser.c|   22 +-
 drivers/gpu/drm/msm/dp/dp_parser.h|4 +
 drivers/gpu/drm/msm/dp/dp_reg.h   |   44 +-
 drivers/gpu/drm/msm/msm_atomic.c  |   15 +
 include/drm/drm_hdcp.h|  194 +++
 30 files changed, 2561 insertions(+), 1389 deletions(-)
 create mode 100644 drivers/gpu/drm/msm/dp/dp_hdcp.c
 create mode 100644 drivers/gpu/drm/msm/dp/dp_hdcp.h

-- 
Sean Paul, Software Engineer, Google / Chromium OS

Re: [PATCH 4/9] drm/privacy-screen: Add notifier support

2021-09-15 Thread Lyude Paul

On Mon, 2021-09-06 at 09:35 +0200, Hans de Goede wrote:
> Add support for privacy-screen consumers to register a notifier to
> be notified of external (e.g. done by the hw itself on a hotkey press)
> state changes.
> 
> Reviewed-by: Emil Velikov 
> Signed-off-by: Hans de Goede 
> ---
>  drivers/gpu/drm/drm_privacy_screen.c  | 67 +++
>  include/drm/drm_privacy_screen_consumer.h | 15 +
>  include/drm/drm_privacy_screen_driver.h   |  4 ++
>  3 files changed, 86 insertions(+)
> 
> diff --git a/drivers/gpu/drm/drm_privacy_screen.c
> b/drivers/gpu/drm/drm_privacy_screen.c
> index 294a09194bfb..7a5f878c3171 100644
> --- a/drivers/gpu/drm/drm_privacy_screen.c
> +++ b/drivers/gpu/drm/drm_privacy_screen.c
> @@ -255,6 +255,49 @@ void drm_privacy_screen_get_state(struct
> drm_privacy_screen *priv,
>  }
>  EXPORT_SYMBOL(drm_privacy_screen_get_state);
>  
> +/**
> + * drm_privacy_screen_register_notifier - register a notifier
> + * @priv: Privacy screen to register the notifier with
> + * @nb: Notifier-block for the notifier to register
> + *
> + * Register a notifier with the privacy-screen to be notified of changes
> made
> + * to the privacy-screen state from outside of the privacy-screen class.
> + * E.g. the state may be changed by the hardware itself in response to a
> + * hotkey press.
> + *
> + * The notifier is called with no locks held. The new hw_state and sw_state
> + * can be retrieved using the drm_privacy_screen_get_state() function.
> + * A pointer to the drm_privacy_screen's struct is passed as the void *data
> + * argument of the notifier_block's notifier_call.
> + *
> + * The notifier will NOT be called when changes are made through
> + * drm_privacy_screen_set_sw_state(). It is only called for external
> changes.
> + *
> + * Return: 0 on success, negative error code on failure.
> + */
> +int drm_privacy_screen_register_notifier(struct drm_privacy_screen *priv,
> +    struct notifier_block *nb)
> +{
> +   return blocking_notifier_chain_register(&priv->notifier_head, nb);
> +}
> +EXPORT_SYMBOL(drm_privacy_screen_register_notifier);
> +
> +/**
> + * drm_privacy_screen_unregister_notifier - unregister a notifier
> + * @priv: Privacy screen to register the notifier with
> + * @nb: Notifier-block for the notifier to register
> + *
> + * Unregister a notifier registered with
> drm_privacy_screen_register_notifier().
> + *
> + * Return: 0 on success, negative error code on failure.
> + */
> +int drm_privacy_screen_unregister_notifier(struct drm_privacy_screen *priv,
> +  struct notifier_block *nb)
> +{
> +   return blocking_notifier_chain_unregister(&priv->notifier_head, nb);
> +}
> +EXPORT_SYMBOL(drm_privacy_screen_unregister_notifier);
> +
>  /*** drm_privacy_screen_driver.h functions ***/
>  
>  static ssize_t sw_state_show(struct device *dev,
> @@ -352,6 +395,7 @@ struct drm_privacy_screen *drm_privacy_screen_register(
> return ERR_PTR(-ENOMEM);
>  
> mutex_init(&priv->lock);
> +   BLOCKING_INIT_NOTIFIER_HEAD(&priv->notifier_head);
>  
> priv->dev.class = drm_class;
> priv->dev.type = &drm_privacy_screen_type;
> @@ -399,3 +443,26 @@ void drm_privacy_screen_unregister(struct
> drm_privacy_screen *priv)
> device_unregister(&priv->dev);
>  }
>  EXPORT_SYMBOL(drm_privacy_screen_unregister);
> +
> +/**
> + * drm_privacy_screen_call_notifier_chain - notify consumers of state
> change
> + * @priv: Privacy screen to register the notifier with
> + *
> + * A privacy-screen provider driver can call this functions upon external
> + * changes to the privacy-screen state. E.g. the state may be changed by
> the
> + * hardware itself in response to a hotkey press.
> + * This function must be called without holding the privacy-screen lock.
> + * the driver must update sw_state and hw_state to reflect the new state
> before
> + * calling this function.
> + * The expected behavior from the driver upon receiving an external state
> + * change event is: 1. Take the lock; 2. Update sw_state and hw_state;
> + * 3. Release the lock. 4. Call drm_privacy_screen_call_notifier_chain().
> + */
> +void drm_privacy_screen_call_notifier_chain(struct drm_privacy_screen
> *priv)
> +{
> +   if (WARN_ON(mutex_is_locked(&priv->lock)))
> +   return;

Are we sure about this check? mutex_is_locked() checks whether a mutex is
locked by anyone, not just us. So this seems like it would cause us to
WARN_ON() and abort if anyone else (not just ourselves) is holding the lock to
read the privacy screen state.

> +
> +   blocking_notifier_call_chain(&priv->notifier_head, 0, priv);
> +}
> +EXPORT_SYMBOL(drm_privacy_screen_call_notifier_chain);
> diff --git a/include/drm/drm_privacy_screen_consumer.h
> b/include/drm/drm_privacy_screen_consumer.h
> index 0cbd23b0453d..7f66a90d15b7 100644
> --- a/include/drm/drm_privacy_screen_consumer.h
> +++ b/include/drm/drm_privacy_scree

Re: [Intel-gfx] [PATCH 12/27] drm/i915/guc: Add multi-lrc context registration

2021-09-15 Thread John Harrison


On 9/15/2021 12:31, Matthew Brost wrote:

On Wed, Sep 15, 2021 at 12:21:35PM -0700, John Harrison wrote:

On 8/20/2021 15:44, Matthew Brost wrote:

Add multi-lrc context registration H2G. In addition a workqueue and
process descriptor are setup during multi-lrc context registration as
these data structures are needed for multi-lrc submission.

Signed-off-by: Matthew Brost 
---
   drivers/gpu/drm/i915/gt/intel_context_types.h |  12 ++
   drivers/gpu/drm/i915/gt/intel_lrc.c   |   5 +
   drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |   2 +-
   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 109 +-
   4 files changed, 126 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 0fafc178cf2c..6f567ebeb039 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -232,8 +232,20 @@ struct intel_context {
/** @parent: pointer to parent if child */
struct intel_context *parent;
+
+   /** @guc_wqi_head: head pointer in work queue */
+   u16 guc_wqi_head;
+   /** @guc_wqi_tail: tail pointer in work queue */
+   u16 guc_wqi_tail;
+

These should be in the 'guc_state' sub-struct? Would be good to keep all GuC
specific content in one self-contained struct. Especially given the other
child/parent fields are no going to be guc_ prefixed any more.


Right now I have everything in guc_state protected by guc_state.lock,
these fields are not protected by this lock. IMO it is better to use a
different sub-structure for the parallel fields (even if anonymous).
Hmm, I still think it is bad to be scattering back-end specific fields 
amongst regular fields. The GuC patches include a whole bunch of 
complaints about execlist back-end specific stuff leaking through to the 
higher levels, we really shouldn't be guilty of doing the same with GuC 
if at all possible. At the very least, the GuC specific fields should be 
grouped together at the end of the struct rather than inter-mingled.





/** @guc_number_children: number of children if parent */
u8 guc_number_children;
+
+   /**
+* @parent_page: page in context used by parent for work queue,

Maybe 'page in context record'? Otherwise, exactly what 'context' is meant
here? It isn't the 'struct intel_context'. The contetx record is saved as
'ce->state' / 'ce->lrc_reg_state', yes? Is it possible to link to either of

It is the page in ce->state / page minus LRC reg offset in
ce->lrg_reg_state. Will update the commit to make that clear.


those field? Probably not given that they don't appear to have any kerneldoc
description :(. Maybe add that in too :).


+* work queue descriptor

Later on, it is described as 'process descriptor and work queue'. It would
be good to be consistent.


Yep. Will fix.


+*/
+   u8 parent_page;
};
   #ifdef CONFIG_DRM_I915_SELFTEST
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index bb4af4977920..0ddbad4e062a 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -861,6 +861,11 @@ __lrc_alloc_state(struct intel_context *ce, struct 
intel_engine_cs *engine)
context_size += PAGE_SIZE;
}
+   if (intel_context_is_parent(ce)) {
+   ce->parent_page = context_size / PAGE_SIZE;
+   context_size += PAGE_SIZE;
+   }
+
obj = i915_gem_object_create_lmem(engine->i915, context_size, 0);
if (IS_ERR(obj))
obj = i915_gem_object_create_shmem(engine->i915, context_size);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index fa4be13c8854..0e600a3b8f1e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -52,7 +52,7 @@
   #define GUC_DOORBELL_INVALID 256
-#define GUC_WQ_SIZE(PAGE_SIZE * 2)
+#define GUC_WQ_SIZE(PAGE_SIZE / 2)

Is this size actually dictated by the GuC API? Or is it just a driver level
decision? If the latter, shouldn't this be below instead?


Driver level decision. What exactly do you mean by below?
The next chunk of the patch - where WQ_OFFSET is defined and the whole 
caboodle is described.


  

   /* Work queue item header definitions */
   #define WQ_STATUS_ACTIVE 1
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 14b24298cdd7..dbcb9ab28a9a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -340,6 +340,39 @@ static struct i915_priolist *to_priolist(struct rb_node 
*rb)
return rb_entry(rb, struct i915_priolist, node)

Re: [Intel-gfx] [PATCH 14/27] drm/i915/guc: Assign contexts in parent-child relationship consecutive guc_ids

2021-09-15 Thread John Harrison


On 8/20/2021 15:44, Matthew Brost wrote:

Assign contexts in parent-child relationship consecutive guc_ids. This
is accomplished by partitioning guc_id space between ones that need to
be consecutive (1/16 available guc_ids) and ones that do not (15/16 of
available guc_ids). The consecutive search is implemented via the bitmap
API.

This is a precursor to the full GuC multi-lrc implementation but aligns
to how GuC mutli-lrc interface is defined - guc_ids must be consecutive
when using the GuC multi-lrc interface.

v2:
  (Daniel Vetter)
   - Explictly state why we assign consecutive guc_ids

Signed-off-by: Matthew Brost 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc.h|   6 +-
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 107 +-
  2 files changed, 86 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 023953e77553..3f95b1b4f15c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -61,9 +61,13 @@ struct intel_guc {
 */
spinlock_t lock;
/**
-* @guc_ids: used to allocate new guc_ids
+* @guc_ids: used to allocate new guc_ids, single-lrc
 */
struct ida guc_ids;
+   /**
+* @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc
+*/
+   unsigned long *guc_ids_bitmap;
/** @num_guc_ids: number of guc_ids that can be used */
u32 num_guc_ids;
/** @max_guc_ids: max number of guc_ids that can be used */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 00d54bb00bfb..e9dfd43d29a0 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -125,6 +125,18 @@ guc_create_virtual(struct intel_engine_cs **siblings, 
unsigned int count);
  
  #define GUC_REQUEST_SIZE 64 /* bytes */
  
+/*

+ * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
+ * per the GuC submission interface. A different allocation algorithm is used
+ * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
The 'hence' clause seems to be attached to the wrong reason. The id 
space is partition because of the contiguous vs random requirements of 
multi vs single LRC, not because a different allocator is used in one 
partion vs the other.



+ * partition the guc_id space. We believe the number of multi-lrc contexts in
+ * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
+ * multi-lrc.
+ */
+#define NUMBER_MULTI_LRC_GUC_ID(guc) \
+   ((guc)->submission_state.num_guc_ids / 16 > 32 ? \
+(guc)->submission_state.num_guc_ids / 16 : 32)
+
  /*
   * Below is a set of functions which control the GuC scheduling state which
   * require a lock.
@@ -1176,6 +1188,10 @@ int intel_guc_submission_init(struct intel_guc *guc)
INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
intel_gt_pm_unpark_work_init(&guc->submission_state.destroyed_worker,
 destroyed_worker_func);
+   guc->submission_state.guc_ids_bitmap =
+   bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
+   if (!guc->submission_state.guc_ids_bitmap)
+   return -ENOMEM;
  
  	return 0;

  }
@@ -1188,6 +1204,7 @@ void intel_guc_submission_fini(struct intel_guc *guc)
guc_lrc_desc_pool_destroy(guc);
guc_flush_destroyed_contexts(guc);
i915_sched_engine_put(guc->sched_engine);
+   bitmap_free(guc->submission_state.guc_ids_bitmap);
  }
  
  static void queue_request(struct i915_sched_engine *sched_engine,

@@ -1239,18 +1256,43 @@ static void guc_submit_request(struct i915_request *rq)
spin_unlock_irqrestore(&sched_engine->lock, flags);
  }
  
-static int new_guc_id(struct intel_guc *guc)

+static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
  {
-   return ida_simple_get(&guc->submission_state.guc_ids, 0,
- guc->submission_state.num_guc_ids, GFP_KERNEL |
- __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
+   int ret;
+
+   GEM_BUG_ON(intel_context_is_child(ce));
+
+   if (intel_context_is_parent(ce))
+   ret = 
bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
+ NUMBER_MULTI_LRC_GUC_ID(guc),
+ 
order_base_2(ce->guc_number_children
+  + 1));
+   else
+   ret = ida_simple_get(&guc->submission_state.guc_ids,
+NUMBER_MULTI_LRC_GUC_ID(guc),
+guc->submission_state.num_guc_ids,
+

Re: [PATCH 2/9] drm: Add privacy-screen class (v3)

2021-09-15 Thread Lyude Paul

On Mon, 2021-09-06 at 09:35 +0200, Hans de Goede wrote:
> On some new laptops the LCD panel has a builtin electronic privacy-screen.
> We want to export this functionality as a property on the drm connector
> object. But often this functionality is not exposed on the GPU but on some
> other (ACPI) device.
> 
> This commit adds a privacy-screen class allowing the driver for these
> other devices to register themselves as a privacy-screen provider; and
> allowing the drm/kms code to get a privacy-screen provider associated
> with a specific GPU/connector combo.
> 
> Changes in v2:
> - Make CONFIG_DRM_PRIVACY_SCREEN a bool which controls if the drm_privacy
>   code gets built as part of the main drm module rather then making it
>   a tristate which builds its own module.
> - Add a #if IS_ENABLED(CONFIG_DRM_PRIVACY_SCREEN) check to
>   drm_privacy_screen_consumer.h and define stubs when the check fails.
>   Together these 2 changes fix several dependency issues.
> - Remove module related code now that this is part of the main drm.ko
> - Use drm_class as class for the privacy-screen devices instead of
>   adding a separate class for this
> 
> Changes in v3:
> - Make the static inline drm_privacy_screen_get_state() stub set sw_state
>   and hw_state to PRIVACY_SCREEN_DISABLED to squelch an uninitialized
>   variable warning when CONFIG_DRM_PRIVICAY_SCREEN is not set
> 
> Reviewed-by: Emil Velikov 
> Signed-off-by: Hans de Goede 
> ---
>  Documentation/gpu/drm-kms-helpers.rst |  15 +
>  MAINTAINERS   |   8 +
>  drivers/gpu/drm/Kconfig   |   4 +
>  drivers/gpu/drm/Makefile  |   1 +
>  drivers/gpu/drm/drm_drv.c |   4 +
>  drivers/gpu/drm/drm_privacy_screen.c  | 401 ++
>  include/drm/drm_privacy_screen_consumer.h |  50 +++
>  include/drm/drm_privacy_screen_driver.h   |  80 +
>  include/drm/drm_privacy_screen_machine.h  |  41 +++
>  9 files changed, 604 insertions(+)
>  create mode 100644 drivers/gpu/drm/drm_privacy_screen.c
>  create mode 100644 include/drm/drm_privacy_screen_consumer.h
>  create mode 100644 include/drm/drm_privacy_screen_driver.h
>  create mode 100644 include/drm/drm_privacy_screen_machine.h
> 
> diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-
> kms-helpers.rst
> index 389892f36185..5d8715d2f998 100644
> --- a/Documentation/gpu/drm-kms-helpers.rst
> +++ b/Documentation/gpu/drm-kms-helpers.rst
> @@ -423,3 +423,18 @@ Legacy CRTC/Modeset Helper Functions Reference
>  
>  .. kernel-doc:: drivers/gpu/drm/drm_crtc_helper.c
>     :export:
> +
> +Privacy-screen class
> +
> +
> +.. kernel-doc:: drivers/gpu/drm/drm_privacy_screen.c
> +   :doc: overview
> +
> +.. kernel-doc:: include/drm/drm_privacy_screen_driver.h
> +   :internal:
> +
> +.. kernel-doc:: include/drm/drm_privacy_screen_machine.h
> +   :internal:
> +
> +.. kernel-doc:: drivers/gpu/drm/drm_privacy_screen.c
> +   :export:
> diff --git a/MAINTAINERS b/MAINTAINERS
> index ede4a37a53b3..a272ca600f98 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -6376,6 +6376,14 @@ F:   drivers/gpu/drm/drm_panel.c
>  F: drivers/gpu/drm/panel/
>  F: include/drm/drm_panel.h
>  
> +DRM PRIVACY-SCREEN CLASS
> +M: Hans de Goede 
> +L: dri-devel@lists.freedesktop.org
> +S: Maintained
> +T: git git://anongit.freedesktop.org/drm/drm-misc
> +F: drivers/gpu/drm/drm_privacy_screen*
> +F: include/drm/drm_privacy_screen*
> +
>  DRM TTM SUBSYSTEM
>  M: Christian Koenig 
>  M: Huang Rui 
> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index b17e231ca6f7..7249b010ab90 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -481,3 +481,7 @@ config DRM_PANEL_ORIENTATION_QUIRKS
>  config DRM_LIB_RANDOM
> bool
> default n
> +
> +config DRM_PRIVACY_SCREEN
> +   bool
> +   default n

This is probably worth documenting for folks configuring their kernels to
explain what this actually does (something simple like "Controls programmable
privacy screens found on some devices, if unsure select Y" would probably be
fine)

> diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
> index 0dff40bb863c..788fc37096f6 100644
> --- a/drivers/gpu/drm/Makefile
> +++ b/drivers/gpu/drm/Makefile
> @@ -32,6 +32,7 @@ drm-$(CONFIG_OF) += drm_of.o
>  drm-$(CONFIG_PCI) += drm_pci.o
>  drm-$(CONFIG_DEBUG_FS) += drm_debugfs.o drm_debugfs_crc.o
>  drm-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o
> +drm-$(CONFIG_DRM_PRIVACY_SCREEN) += drm_privacy_screen.o
>  
>  obj-$(CONFIG_DRM_DP_AUX_BUS) += drm_dp_aux_bus.o
>  
> diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
> index 7a5097467ba5..dc293b771c3f 100644
> --- a/drivers/gpu/drm/drm_drv.c
> +++ b/drivers/gpu/drm/drm_drv.c
> @@ -43,6 +43,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "drm_crtc_internal.h"
>  #include "drm_internal.h"
> @@ -102

Re: [PATCH 1/9] drm/connector: Add support for privacy-screen properties (v4)

2021-09-15 Thread Lyude Paul

On Mon, 2021-09-06 at 09:35 +0200, Hans de Goede wrote:
> From: Rajat Jain 
> 
> Add support for generic electronic privacy screen properties, that
> can be added by systems that have an integrated EPS.
> 
> Changes in v2 (Hans de Goede)
> - Create 2 properties, "privacy-screen sw-state" and
>   "privacy-screen hw-state", to deal with devices where the OS might be
>   locked out of making state changes
> - Write kerneldoc explaining how the 2 properties work together, what
>   happens when changes to the state are made outside of the DRM code's
>   control, etc.
> 
> Changes in v3 (Hans de Goede)
> - Some small tweaks to the kerneldoc describing the 2 properties
> 
> Changes in v4 (Hans de Goede)
> - Change the "Enabled, locked" and "Disabled, locked" hw-state enum value
>   names to "Enabled-locked" and "Disabled-locked". The xrandr command shows
>   all possible enum values separated by commas in its output, so having a
>   comma in an enum name is not a good idea.
> - Do not add a privacy_screen_hw_state member to drm_connector_state
>   since this property is immutable its value must be directly stored in the
>   obj->properties->values array
> 
> Signed-off-by: Rajat Jain 
> Co-authored-by: Hans de Goede 
> Acked-by: Pekka Paalanen 
> Reviewed-by: Mario Limonciello 
> Reviewed-by: Emil Velikov 
> Signed-off-by: Hans de Goede 
> ---
>  Documentation/gpu/drm-kms.rst |   2 +
>  drivers/gpu/drm/drm_atomic_uapi.c |   4 ++
>  drivers/gpu/drm/drm_connector.c   | 101 ++
>  include/drm/drm_connector.h   |  44 +
>  4 files changed, 151 insertions(+)
> 
> diff --git a/Documentation/gpu/drm-kms.rst b/Documentation/gpu/drm-kms.rst
> index 1ef7951ded5e..d14bf1c35d7e 100644
> --- a/Documentation/gpu/drm-kms.rst
> +++ b/Documentation/gpu/drm-kms.rst
> @@ -506,6 +506,8 @@ Property Types and Blob Property Support
>  .. kernel-doc:: drivers/gpu/drm/drm_property.c
>     :export:
>  
> +.. _standard_connector_properties:
> +
>  Standard Connector Properties
>  -
>  
> diff --git a/drivers/gpu/drm/drm_atomic_uapi.c
> b/drivers/gpu/drm/drm_atomic_uapi.c
> index 909f31833181..cdd31fc78bfc 100644
> --- a/drivers/gpu/drm/drm_atomic_uapi.c
> +++ b/drivers/gpu/drm/drm_atomic_uapi.c
> @@ -797,6 +797,8 @@ static int drm_atomic_connector_set_property(struct
> drm_connector *connector,
>    fence_ptr);
> } else if (property == connector->max_bpc_property) {
> state->max_requested_bpc = val;
> +   } else if (property == connector->privacy_screen_sw_state_property)
> {
> +   state->privacy_screen_sw_state = val;
> } else if (connector->funcs->atomic_set_property) {
> return connector->funcs->atomic_set_property(connector,
> state, property, val);
> @@ -874,6 +876,8 @@ drm_atomic_connector_get_property(struct drm_connector
> *connector,
> *val = 0;
> } else if (property == connector->max_bpc_property) {
> *val = state->max_requested_bpc;
> +   } else if (property == connector->privacy_screen_sw_state_property)
> {
> +   *val = state->privacy_screen_sw_state;
> } else if (connector->funcs->atomic_get_property) {
> return connector->funcs->atomic_get_property(connector,
> state, property, val);
> diff --git a/drivers/gpu/drm/drm_connector.c
> b/drivers/gpu/drm/drm_connector.c
> index e0a30e0ee86a..dd1ca68881ba 100644
> --- a/drivers/gpu/drm/drm_connector.c
> +++ b/drivers/gpu/drm/drm_connector.c
> @@ -1264,6 +1264,46 @@ static const struct drm_prop_enum_list
> dp_colorspaces[] = {
>   * For DVI-I and TVout there is also a matching property "select
> subconnector"
>   * allowing to switch between signal types.
>   * DP subconnector corresponds to a downstream port.
> + *
> + * privacy-screen sw-state, privacy-screen hw-state:
> + * These 2 optional properties can be used to query the state of the
> + * electronic privacy screen that is available on some displays; and in
> + * some cases also control the state. If a driver implements these
> + * properties then both properties must be present.
> + *
> + * "privacy-screen hw-state" is read-only and reflects the actual state
> + * of the privacy-screen, possible values: "Enabled", "Disabled,
> + * "Enabled-locked", "Disabled-locked". The locked states indicate
> + * that the state cannot be changed through the DRM API. E.g. there
> + * might be devices where the firmware-setup options, or a hardware
> + * slider-switch, offer always on / off modes.
> + *
> + * "privacy-screen sw-state" can be set to change the privacy-screen
> state
> + * when not locked. In this case the driver must update the hw-state
> + * property to reflect the new state on completion of the commit of the
> + * sw-state propert

Re: [Intel-gfx] [PATCH 13/27] drm/i915/guc: Ensure GuC schedule operations do not operate on child contexts

2021-09-15 Thread Matthew Brost

On Wed, Sep 15, 2021 at 12:24:41PM -0700, John Harrison wrote:
> On 8/20/2021 15:44, Matthew Brost wrote:
> > In GuC parent-child contexts the parent context controls the scheduling,
> > ensure only the parent does the scheduling operations.
> > 
> > Signed-off-by: Matthew Brost 
> > ---
> >   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 24 ++-
> >   1 file changed, 18 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > index dbcb9ab28a9a..00d54bb00bfb 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > @@ -320,6 +320,12 @@ static void decr_context_committed_requests(struct 
> > intel_context *ce)
> > GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
> >   }
> > +static struct intel_context *
> > +request_to_scheduling_context(struct i915_request *rq)
> > +{
> > +   return intel_context_to_parent(rq->context);
> > +}
> > +
> >   static bool context_guc_id_invalid(struct intel_context *ce)
> >   {
> > return ce->guc_id.id == GUC_INVALID_LRC_ID;
> > @@ -1684,6 +1690,7 @@ static void __guc_context_sched_disable(struct 
> > intel_guc *guc,
> > GEM_BUG_ON(guc_id == GUC_INVALID_LRC_ID);
> > +   GEM_BUG_ON(intel_context_is_child(ce));
> > trace_intel_context_sched_disable(ce);
> > guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
> > @@ -1898,6 +1905,8 @@ static void guc_context_sched_disable(struct 
> > intel_context *ce)
> > u16 guc_id;
> > bool enabled;
> > +   GEM_BUG_ON(intel_context_is_child(ce));
> > +
> > if (submission_disabled(guc) || context_guc_id_invalid(ce) ||
> > !lrc_desc_registered(guc, ce->guc_id.id)) {
> > spin_lock_irqsave(&ce->guc_state.lock, flags);
> > @@ -2286,6 +2295,8 @@ static void guc_signal_context_fence(struct 
> > intel_context *ce)
> >   {
> > unsigned long flags;
> > +   GEM_BUG_ON(intel_context_is_child(ce));
> > +
> > spin_lock_irqsave(&ce->guc_state.lock, flags);
> > clr_context_wait_for_deregister_to_register(ce);
> > __guc_signal_context_fence(ce);
> > @@ -2315,7 +2326,7 @@ static void guc_context_init(struct intel_context *ce)
> >   static int guc_request_alloc(struct i915_request *rq)
> >   {
> > -   struct intel_context *ce = rq->context;
> > +   struct intel_context *ce = request_to_scheduling_context(rq);
> > struct intel_guc *guc = ce_to_guc(ce);
> > unsigned long flags;
> > int ret;
> > @@ -2358,11 +2369,12 @@ static int guc_request_alloc(struct i915_request 
> > *rq)
> >  * exhausted and return -EAGAIN to the user indicating that they can try
> >  * again in the future.
> >  *
> > -* There is no need for a lock here as the timeline mutex ensures at
> > -* most one context can be executing this code path at once. The
> > -* guc_id_ref is incremented once for every request in flight and
> > -* decremented on each retire. When it is zero, a lock around the
> > -* increment (in pin_guc_id) is needed to seal a race with unpin_guc_id.
> > +* There is no need for a lock here as the timeline mutex (or
> > +* parallel_submit mutex in the case of multi-lrc) ensures at most one
> > +* context can be executing this code path at once. The guc_id_ref is
> Isn't that now two? One uni-LRC holding the timeline mutex and one multi-LRC
> holding the parallel submit mutex?
> 

This is actually a stale comment and need scrub this. The
parallel_submit mutex is gone, now we grab the ce->timeline locks
starting at the parent and then all children in a loop. I think the
original comment is sufficient.

Matt

> John.
> 
> > +* incremented once for every request in flight and decremented on each
> > +* retire. When it is zero, a lock around the increment (in pin_guc_id)
> > +* is needed to seal a race with unpin_guc_id.
> >  */
> > if (atomic_add_unless(&ce->guc_id.ref, 1, 0))
> > goto out;
>

Re: [Intel-gfx] [PATCH 12/27] drm/i915/guc: Add multi-lrc context registration

2021-09-15 Thread Matthew Brost

On Wed, Sep 15, 2021 at 12:21:35PM -0700, John Harrison wrote:
> On 8/20/2021 15:44, Matthew Brost wrote:
> > Add multi-lrc context registration H2G. In addition a workqueue and
> > process descriptor are setup during multi-lrc context registration as
> > these data structures are needed for multi-lrc submission.
> > 
> > Signed-off-by: Matthew Brost 
> > ---
> >   drivers/gpu/drm/i915/gt/intel_context_types.h |  12 ++
> >   drivers/gpu/drm/i915/gt/intel_lrc.c   |   5 +
> >   drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |   2 +-
> >   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 109 +-
> >   4 files changed, 126 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
> > b/drivers/gpu/drm/i915/gt/intel_context_types.h
> > index 0fafc178cf2c..6f567ebeb039 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> > @@ -232,8 +232,20 @@ struct intel_context {
> > /** @parent: pointer to parent if child */
> > struct intel_context *parent;
> > +
> > +   /** @guc_wqi_head: head pointer in work queue */
> > +   u16 guc_wqi_head;
> > +   /** @guc_wqi_tail: tail pointer in work queue */
> > +   u16 guc_wqi_tail;
> > +
> These should be in the 'guc_state' sub-struct? Would be good to keep all GuC
> specific content in one self-contained struct. Especially given the other
> child/parent fields are no going to be guc_ prefixed any more.
> 

Right now I have everything in guc_state protected by guc_state.lock,
these fields are not protected by this lock. IMO it is better to use a
different sub-structure for the parallel fields (even if anonymous).

> 
> > /** @guc_number_children: number of children if parent */
> > u8 guc_number_children;
> > +
> > +   /**
> > +* @parent_page: page in context used by parent for work queue,
> Maybe 'page in context record'? Otherwise, exactly what 'context' is meant
> here? It isn't the 'struct intel_context'. The contetx record is saved as
> 'ce->state' / 'ce->lrc_reg_state', yes? Is it possible to link to either of

It is the page in ce->state / page minus LRC reg offset in
ce->lrg_reg_state. Will update the commit to make that clear.

> those field? Probably not given that they don't appear to have any kerneldoc
> description :(. Maybe add that in too :).
> 
> > +* work queue descriptor
> Later on, it is described as 'process descriptor and work queue'. It would
> be good to be consistent.
>

Yep. Will fix.

> > +*/
> > +   u8 parent_page;
> > };
> >   #ifdef CONFIG_DRM_I915_SELFTEST
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
> > b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index bb4af4977920..0ddbad4e062a 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -861,6 +861,11 @@ __lrc_alloc_state(struct intel_context *ce, struct 
> > intel_engine_cs *engine)
> > context_size += PAGE_SIZE;
> > }
> > +   if (intel_context_is_parent(ce)) {
> > +   ce->parent_page = context_size / PAGE_SIZE;
> > +   context_size += PAGE_SIZE;
> > +   }
> > +
> > obj = i915_gem_object_create_lmem(engine->i915, context_size, 0);
> > if (IS_ERR(obj))
> > obj = i915_gem_object_create_shmem(engine->i915, context_size);
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
> > index fa4be13c8854..0e600a3b8f1e 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
> > @@ -52,7 +52,7 @@
> >   #define GUC_DOORBELL_INVALID  256
> > -#define GUC_WQ_SIZE(PAGE_SIZE * 2)
> > +#define GUC_WQ_SIZE(PAGE_SIZE / 2)
> Is this size actually dictated by the GuC API? Or is it just a driver level
> decision? If the latter, shouldn't this be below instead?
>

Driver level decision. What exactly do you mean by below?
 
> >   /* Work queue item header definitions */
> >   #define WQ_STATUS_ACTIVE  1
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > index 14b24298cdd7..dbcb9ab28a9a 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > @@ -340,6 +340,39 @@ static struct i915_priolist *to_priolist(struct 
> > rb_node *rb)
> > return rb_entry(rb, struct i915_priolist, node);
> >   }
> > +/*
> > + * When using multi-lrc submission an extra page in the context state is
> > + * reserved for the process descriptor and work queue.
> > + *
> > + * The layout of this page is below:
> > + * 0   guc_process_desc
> > + * ... unused
> > + * PAGE_

[PATCH] drm/i915: zero fill vma name buffer

2021-09-15 Thread Tim Gardner

In capture_vma() Coverity complains of a possible buffer overrun. Even
though this is a static function where all call sites can be checked,
limiting the copy length could save some future grief.

CID 93300 (#1 of 1): Copy into fixed size buffer (STRING_OVERFLOW)
4. fixed_size_dest: You might overrun the 16-character fixed-size string c->name
   by copying name without checking the length.
5. parameter_as_source: Note: This defect has an elevated risk because the
   source argument is a parameter of the current function.
1326strcpy(c->name, name);

Fix any possible overflows by using strncpy(). Zero fill the name buffer to
guarantee ASCII string NULL termination.

Cc: Jani Nikula 
Cc: Joonas Lahtinen 
Cc: Rodrigo Vivi 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: intel-...@lists.freedesktop.org
Cc: dri-devel@lists.freedesktop.org
Cc: linux-ker...@vger.kernel.org
Signed-off-by: Tim Gardner 
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 9cf6ac575de1..154df174e2d7 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1297,10 +1297,11 @@ static bool record_context(struct 
i915_gem_context_coredump *e,
return simulated;
 }
 
+#define VMA_NAME_LEN 16
 struct intel_engine_capture_vma {
struct intel_engine_capture_vma *next;
struct i915_vma *vma;
-   char name[16];
+   char name[VMA_NAME_LEN];
 };
 
 static struct intel_engine_capture_vma *
@@ -1314,7 +1315,7 @@ capture_vma(struct intel_engine_capture_vma *next,
if (!vma)
return next;
 
-   c = kmalloc(sizeof(*c), gfp);
+   c = kzalloc(sizeof(*c), gfp);
if (!c)
return next;
 
@@ -1323,7 +1324,7 @@ capture_vma(struct intel_engine_capture_vma *next,
return next;
}
 
-   strcpy(c->name, name);
+   strncpy(c->name, name, VMA_NAME_LEN-1);
c->vma = vma; /* reference held while active */
 
c->next = next;
-- 
2.33.0

Re: [PATCH] drm/i915/guc/slpc: remove unneeded clflush calls

2021-09-15 Thread John Harrison


On 9/15/2021 12:24, Belgaumkar, Vinay wrote:

On 9/14/2021 12:51 PM, Lucas De Marchi wrote:

The clflush calls here aren't doing anything since we are not writting
something and flushing the cache lines to be visible to GuC. Here the
intention seems to be to make sure whatever GuC has written is visible
to the CPU before we read them. However a clflush from the CPU side is
the wrong instruction to use.
Is there a right instruction to use? Either we need to verify that no 
flush/invalidate is required or we need to add in a replacement that 
does the correct thing?


John.



 From code inspection on the other clflush() calls in i915/gt/uc/ these
are the only ones with this behavrior. The others are apparently making
sure what we write is visible to GuC.

Signed-off-by: Lucas De Marchi 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 3 ---
  1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c

index 65a3e7fdb2b2..2e996b77df80 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -108,7 +108,6 @@ static u32 slpc_get_state(struct intel_guc_slpc 
*slpc)

    GEM_BUG_ON(!slpc->vma);
  -    drm_clflush_virt_range(slpc->vaddr, sizeof(u32));
  data = slpc->vaddr;
    return data->header.global_state;
@@ -172,8 +171,6 @@ static int slpc_query_task_state(struct 
intel_guc_slpc *slpc)

  drm_err(&i915->drm, "Failed to query task state (%pe)\n",
  ERR_PTR(ret));
  -    drm_clflush_virt_range(slpc->vaddr, SLPC_PAGE_SIZE_BYTES);
-


LGTM.
Reviewed-by: Vinay Belgaumkar 


  return ret;
  }

Re: [Intel-gfx] [PATCH 13/27] drm/i915/guc: Ensure GuC schedule operations do not operate on child contexts

2021-09-15 Thread John Harrison


On 8/20/2021 15:44, Matthew Brost wrote:

In GuC parent-child contexts the parent context controls the scheduling,
ensure only the parent does the scheduling operations.

Signed-off-by: Matthew Brost 
---
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 24 ++-
  1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index dbcb9ab28a9a..00d54bb00bfb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -320,6 +320,12 @@ static void decr_context_committed_requests(struct 
intel_context *ce)
GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
  }
  
+static struct intel_context *

+request_to_scheduling_context(struct i915_request *rq)
+{
+   return intel_context_to_parent(rq->context);
+}
+
  static bool context_guc_id_invalid(struct intel_context *ce)
  {
return ce->guc_id.id == GUC_INVALID_LRC_ID;
@@ -1684,6 +1690,7 @@ static void __guc_context_sched_disable(struct intel_guc 
*guc,
  
  	GEM_BUG_ON(guc_id == GUC_INVALID_LRC_ID);
  
+	GEM_BUG_ON(intel_context_is_child(ce));

trace_intel_context_sched_disable(ce);
  
  	guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),

@@ -1898,6 +1905,8 @@ static void guc_context_sched_disable(struct 
intel_context *ce)
u16 guc_id;
bool enabled;
  
+	GEM_BUG_ON(intel_context_is_child(ce));

+
if (submission_disabled(guc) || context_guc_id_invalid(ce) ||
!lrc_desc_registered(guc, ce->guc_id.id)) {
spin_lock_irqsave(&ce->guc_state.lock, flags);
@@ -2286,6 +2295,8 @@ static void guc_signal_context_fence(struct intel_context 
*ce)
  {
unsigned long flags;
  
+	GEM_BUG_ON(intel_context_is_child(ce));

+
spin_lock_irqsave(&ce->guc_state.lock, flags);
clr_context_wait_for_deregister_to_register(ce);
__guc_signal_context_fence(ce);
@@ -2315,7 +2326,7 @@ static void guc_context_init(struct intel_context *ce)
  
  static int guc_request_alloc(struct i915_request *rq)

  {
-   struct intel_context *ce = rq->context;
+   struct intel_context *ce = request_to_scheduling_context(rq);
struct intel_guc *guc = ce_to_guc(ce);
unsigned long flags;
int ret;
@@ -2358,11 +2369,12 @@ static int guc_request_alloc(struct i915_request *rq)
 * exhausted and return -EAGAIN to the user indicating that they can try
 * again in the future.
 *
-* There is no need for a lock here as the timeline mutex ensures at
-* most one context can be executing this code path at once. The
-* guc_id_ref is incremented once for every request in flight and
-* decremented on each retire. When it is zero, a lock around the
-* increment (in pin_guc_id) is needed to seal a race with unpin_guc_id.
+* There is no need for a lock here as the timeline mutex (or
+* parallel_submit mutex in the case of multi-lrc) ensures at most one
+* context can be executing this code path at once. The guc_id_ref is
Isn't that now two? One uni-LRC holding the timeline mutex and one 
multi-LRC holding the parallel submit mutex?


John.


+* incremented once for every request in flight and decremented on each
+* retire. When it is zero, a lock around the increment (in pin_guc_id)
+* is needed to seal a race with unpin_guc_id.
 */
if (atomic_add_unless(&ce->guc_id.ref, 1, 0))
goto out;

Re: [PATCH] drm/i915/guc/slpc: remove unneeded clflush calls

2021-09-15 Thread Belgaumkar, Vinay





On 9/14/2021 12:51 PM, Lucas De Marchi wrote:

The clflush calls here aren't doing anything since we are not writting
something and flushing the cache lines to be visible to GuC. Here the
intention seems to be to make sure whatever GuC has written is visible
to the CPU before we read them. However a clflush from the CPU side is
the wrong instruction to use.

 From code inspection on the other clflush() calls in i915/gt/uc/ these
are the only ones with this behavrior. The others are apparently making
sure what we write is visible to GuC.

Signed-off-by: Lucas De Marchi 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 3 ---
  1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 65a3e7fdb2b2..2e996b77df80 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -108,7 +108,6 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc)
  
  	GEM_BUG_ON(!slpc->vma);
  
-	drm_clflush_virt_range(slpc->vaddr, sizeof(u32));

data = slpc->vaddr;
  
  	return data->header.global_state;

@@ -172,8 +171,6 @@ static int slpc_query_task_state(struct intel_guc_slpc 
*slpc)
drm_err(&i915->drm, "Failed to query task state (%pe)\n",
ERR_PTR(ret));
  
-	drm_clflush_virt_range(slpc->vaddr, SLPC_PAGE_SIZE_BYTES);

-


LGTM.
Reviewed-by: Vinay Belgaumkar 


return ret;
  }

Re: [Intel-gfx] [PATCH 12/27] drm/i915/guc: Add multi-lrc context registration

2021-09-15 Thread John Harrison


On 8/20/2021 15:44, Matthew Brost wrote:

Add multi-lrc context registration H2G. In addition a workqueue and
process descriptor are setup during multi-lrc context registration as
these data structures are needed for multi-lrc submission.

Signed-off-by: Matthew Brost 
---
  drivers/gpu/drm/i915/gt/intel_context_types.h |  12 ++
  drivers/gpu/drm/i915/gt/intel_lrc.c   |   5 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |   2 +-
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 109 +-
  4 files changed, 126 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 0fafc178cf2c..6f567ebeb039 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -232,8 +232,20 @@ struct intel_context {
/** @parent: pointer to parent if child */
struct intel_context *parent;
  
+

+   /** @guc_wqi_head: head pointer in work queue */
+   u16 guc_wqi_head;
+   /** @guc_wqi_tail: tail pointer in work queue */
+   u16 guc_wqi_tail;
+
These should be in the 'guc_state' sub-struct? Would be good to keep all 
GuC specific content in one self-contained struct. Especially given the 
other child/parent fields are no going to be guc_ prefixed any more.




/** @guc_number_children: number of children if parent */
u8 guc_number_children;
+
+   /**
+* @parent_page: page in context used by parent for work queue,
Maybe 'page in context record'? Otherwise, exactly what 'context' is 
meant here? It isn't the 'struct intel_context'. The contetx record is 
saved as 'ce->state' / 'ce->lrc_reg_state', yes? Is it possible to link 
to either of those field? Probably not given that they don't appear to 
have any kerneldoc description :(. Maybe add that in too :).



+* work queue descriptor
Later on, it is described as 'process descriptor and work queue'. It 
would be good to be consistent.



+*/
+   u8 parent_page;
};
  
  #ifdef CONFIG_DRM_I915_SELFTEST

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index bb4af4977920..0ddbad4e062a 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -861,6 +861,11 @@ __lrc_alloc_state(struct intel_context *ce, struct 
intel_engine_cs *engine)
context_size += PAGE_SIZE;
}
  
+	if (intel_context_is_parent(ce)) {

+   ce->parent_page = context_size / PAGE_SIZE;
+   context_size += PAGE_SIZE;
+   }
+
obj = i915_gem_object_create_lmem(engine->i915, context_size, 0);
if (IS_ERR(obj))
obj = i915_gem_object_create_shmem(engine->i915, context_size);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index fa4be13c8854..0e600a3b8f1e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -52,7 +52,7 @@
  
  #define GUC_DOORBELL_INVALID		256
  
-#define GUC_WQ_SIZE			(PAGE_SIZE * 2)

+#define GUC_WQ_SIZE(PAGE_SIZE / 2)
Is this size actually dictated by the GuC API? Or is it just a driver 
level decision? If the latter, shouldn't this be below instead?


  
  /* Work queue item header definitions */

  #define WQ_STATUS_ACTIVE  1
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 14b24298cdd7..dbcb9ab28a9a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -340,6 +340,39 @@ static struct i915_priolist *to_priolist(struct rb_node 
*rb)
return rb_entry(rb, struct i915_priolist, node);
  }
  
+/*

+ * When using multi-lrc submission an extra page in the context state is
+ * reserved for the process descriptor and work queue.
+ *
+ * The layout of this page is below:
+ * 0   guc_process_desc
+ * ... unused
+ * PAGE_SIZE / 2   work queue start
+ * ... work queue
+ * PAGE_SIZE - 1   work queue end
+ */
+#define WQ_OFFSET  (PAGE_SIZE / 2)
Can this not be derived from GUC_WQ_SIZE given that the two are 
fundamentally linked? E.g. '#define WQ_OFFSET (PAGE_SIZE - 
GUC_WQ_SIZE)'? And maybe have a '#define WQ_TOTAL_SIZE PAGE_SIZE' and 
use that in all of WQ_OFFSET, GUC_WQ_SIZE and the allocation itself in 
intel_lrc.c?


Also, the process descriptor is actually an array of descriptors sized 
by the number of children? Or am I misunderstanding the code below? In 
so, shouldn't there be a 'COMPILE_BUG_ON((MAX_ENGINE_INSTANCE * 
sizeof(descriptor)) < (WQ_

[PATCH v3 12/12] drm/i915/ttm: enable shmem tt backend

2021-09-15 Thread Matthew Auld

Enable shmem tt backend, and enable shrinking.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index e758de336b96..6199e8c067ff 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -1050,6 +1050,7 @@ static u64 i915_ttm_mmap_offset(struct 
drm_i915_gem_object *obj)
 
 static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {
.name = "i915_gem_object_ttm",
+   .flags = I915_GEM_OBJECT_IS_SHRINKABLE,
 
.get_pages = i915_ttm_get_pages,
.put_pages = i915_ttm_put_pages,
-- 
2.26.3

[PATCH v3 11/12] drm/i915/ttm: make evicted shmem pages visible to the shrinker

2021-09-15 Thread Matthew Auld

We currently just evict lmem objects to system memory when under memory
pressure. For this case we lack the usual object mm.pages, which
effectively hides the pages from the i915-gem shrinker, until we
actually "attach" the TT to the object, or in the case of lmem-only
objects it just gets migrated back to lmem when touched again. For such
cases we can make the object visible as soon as we populate the TT with
shmem pages, and then hide it again when doing the unpopulate.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.h   |  1 +
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 29 +++-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c  | 11 
 3 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 561d6bd0a5c9..28b831c78c47 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -540,6 +540,7 @@ i915_gem_object_pin_to_display_plane(struct 
drm_i915_gem_object *obj,
 
 void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj);
 void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj);
+void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj);
 void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj);
 
 static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index 6b38e4414c5a..02175e8ad069 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -482,13 +482,12 @@ void i915_gem_object_make_unshrinkable(struct 
drm_i915_gem_object *obj)
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 }
 
-static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
- struct list_head *head)
+static void ___i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
+  struct list_head *head)
 {
struct drm_i915_private *i915 = obj_to_i915(obj);
unsigned long flags;
 
-   GEM_BUG_ON(!i915_gem_object_has_pages(obj));
if (!i915_gem_object_is_shrinkable(obj))
return;
 
@@ -507,6 +506,21 @@ static void __i915_gem_object_make_shrinkable(struct 
drm_i915_gem_object *obj,
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 }
 
+/**
+ * __i915_gem_object_make_shrinkable - Move the object to the tail of the
+ * shrinkable list. Objects on this list might be swapped out. Used with
+ * WILLNEED objects.
+ * @obj: The GEM object.
+ *
+ * DO NOT USE. This is intended to be called on very special objects that don't
+ * yet have mm.pages, but are guaranteed to have potentially reclaimable pages
+ * underneath.
+ */
+void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
+{
+   ___i915_gem_object_make_shrinkable(obj,
+  &obj_to_i915(obj)->mm.shrink_list);
+}
 
 /**
  * i915_gem_object_make_shrinkable - Move the object to the tail of the
@@ -518,8 +532,8 @@ static void __i915_gem_object_make_shrinkable(struct 
drm_i915_gem_object *obj,
  */
 void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
 {
-   __i915_gem_object_make_shrinkable(obj,
- &obj_to_i915(obj)->mm.shrink_list);
+   GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+   __i915_gem_object_make_shrinkable(obj);
 }
 
 /**
@@ -533,6 +547,7 @@ void i915_gem_object_make_shrinkable(struct 
drm_i915_gem_object *obj)
  */
 void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
 {
-   __i915_gem_object_make_shrinkable(obj,
- &obj_to_i915(obj)->mm.purge_list);
+   GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+   ___i915_gem_object_make_shrinkable(obj,
+  &obj_to_i915(obj)->mm.purge_list);
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 079a7a655ede..e758de336b96 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -234,6 +234,15 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device 
*bdev,
if (ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
ttm->page_flags &= ~TTM_PAGE_FLAG_SWAPPED;
 
+   /*
+* Even if we lack mm.pages for this object(which will be the case when
+* something is evicted to system memory by TTM), we still want to make
+* this object visible to the shrinker, since the underlying ttm_tt
+* still has the real shmem pages. When unpopulating the tt(possibly due
+* to shrinking) we hide it again from the shrinker.
+*/
+   __i915_gem_object_make_shrinkabl

[PATCH v3 08/12] drm/i915/ttm: add tt shmem backend

2021-09-15 Thread Matthew Auld

For cached objects we can allocate our pages directly in shmem. This
should make it possible(in a later patch) to utilise the existing
i915-gem shrinker code for such objects. For now this is still disabled.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
Cc: Christian König 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.h |   8 +
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c  |  14 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c| 217 ++---
 3 files changed, 209 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 48112b9d76df..561d6bd0a5c9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -618,6 +618,14 @@ int i915_gem_object_wait_migration(struct 
drm_i915_gem_object *obj,
 bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,
enum intel_memory_type type);
 
+struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
+   size_t size, struct intel_memory_region *mr,
+   struct address_space *mapping,
+   unsigned int max_segment);
+void shmem_free_st(struct sg_table *st, struct address_space *mapping,
+  bool dirty, bool backup);
+void __shmem_writeback(size_t size, struct address_space *mapping);
+
 #ifdef CONFIG_MMU_NOTIFIER
 static inline bool
 i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 36b711ae9e28..19e55cc29a15 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -25,8 +25,8 @@ static void check_release_pagevec(struct pagevec *pvec)
cond_resched();
 }
 
-static void shmem_free_st(struct sg_table *st, struct address_space *mapping,
- bool dirty, bool backup)
+void shmem_free_st(struct sg_table *st, struct address_space *mapping,
+  bool dirty, bool backup)
 {
struct sgt_iter sgt_iter;
struct pagevec pvec;
@@ -52,10 +52,10 @@ static void shmem_free_st(struct sg_table *st, struct 
address_space *mapping,
kfree(st);
 }
 
-static struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
-  size_t size, struct intel_memory_region 
*mr,
-  struct address_space *mapping,
-  unsigned int max_segment)
+struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
+   size_t size, struct intel_memory_region *mr,
+   struct address_space *mapping,
+   unsigned int max_segment)
 {
const unsigned long page_count = size / PAGE_SIZE;
unsigned long i;
@@ -300,7 +300,7 @@ shmem_truncate(struct drm_i915_gem_object *obj)
obj->mm.pages = ERR_PTR(-EFAULT);
 }
 
-static void __shmem_writeback(size_t size, struct address_space *mapping)
+void __shmem_writeback(size_t size, struct address_space *mapping)
 {
struct writeback_control wbc = {
.sync_mode = WB_SYNC_NONE,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index aefaf9293005..a93e3a9ef698 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -32,11 +32,17 @@
  */
 #define I915_TTM_MAX_PLACEMENTS INTEL_REGION_UNKNOWN
 
+/* In our ttm backend external objects translate to the shmem_tt backend */
+#define I915_TTM_TT_SHMEM TTM_PAGE_FLAG_EXTERNAL_MAPPABLE
+
 /**
  * struct i915_ttm_tt - TTM page vector with additional private information
  * @ttm: The base TTM page vector.
  * @dev: The struct device used for dma mapping and unmapping.
  * @cached_st: The cached scatter-gather table.
+ * @obj: The GEM object. Should be valid while we have a valid bo->ttm.
+ * @filp: The shmem file, if using shmem backend.
+ * @backup: Swap out the pages when unpopulating, if using shmem backend.
  *
  * Note that DMA may be going on right up to the point where the page-
  * vector is unpopulated in delayed destroy. Hence keep the
@@ -48,6 +54,9 @@ struct i915_ttm_tt {
struct ttm_tt ttm;
struct device *dev;
struct sg_table *cached_st;
+   struct drm_i915_gem_object *obj;
+   struct file *filp;
+   bool backup;
 };
 
 static const struct ttm_place sys_placement_flags = {
@@ -167,12 +176,105 @@ i915_ttm_placement_from_obj(const struct 
drm_i915_gem_object *obj,
placement->busy_placement = busy;
 }
 
+static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev,
+ struct ttm_tt *ttm,
+ struct ttm_operation_ctx *ctx)
+{
+   struct drm_i915_private *i915 = container_of(bdev, typ

[PATCH v3 10/12] drm/i915: try to simplify make_{un}shrinkable

2021-09-15 Thread Matthew Auld

Drop the atomic shrink_pin stuff, and just have make_{un}shrinkable
update the shrinker visible lists immediately. This at least simplifies
the next patch, and does make the behaviour more obvious. The potential
downside is that make_unshrinkable now grabs a global lock even when the
object itself is no longer shrinkable(transitioning from purgeable <->
shrinkable doesn't seem to be a thing), for example in the ppGTT
insertion paths we should now be careful not to needlessly call
make_unshrinkable multiple times. Outside of that there is some fallout
in intel_context which relies on nesting calls to shrink_pin.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  9 
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_pages.c | 16 +-
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  | 52 +--
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  |  1 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  |  1 -
 drivers/gpu/drm/i915/gt/intel_context.c   |  9 +---
 7 files changed, 41 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 6fb9afb65034..e8265a432fcb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -305,15 +305,6 @@ static void i915_gem_free_object(struct drm_gem_object 
*gem_obj)
 */
atomic_inc(&i915->mm.free_count);
 
-   /*
-* This serializes freeing with the shrinker. Since the free
-* is delayed, first by RCU then by the workqueue, we want the
-* shrinker to be able to free pages of unreferenced objects,
-* or else we may oom whilst there are plenty of deferred
-* freed objects.
-*/
-   i915_gem_object_make_unshrinkable(obj);
-
/*
 * Since we require blocking on struct_mutex to unbind the freed
 * object from the GPU before releasing resources back to the
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 2471f36aaff3..a035ac26a090 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -453,7 +453,6 @@ struct drm_i915_gem_object {
 * instead go through the pin/unpin interfaces.
 */
atomic_t pages_pin_count;
-   atomic_t shrink_pin;
 
/**
 * Priority list of potential placements for this object.
@@ -514,7 +513,7 @@ struct drm_i915_gem_object {
struct i915_gem_object_page_iter get_dma_page;
 
/**
-* Element within i915->mm.unbound_list or i915->mm.bound_list,
+* Element within i915->mm.shrink_list or i915->mm.purge_list,
 * locked by i915->mm.obj_lock.
 */
struct list_head link;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c 
b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 8eb1c3a6fc9c..f0df1394d7f6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -64,28 +64,16 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object 
*obj,
GEM_BUG_ON(i915_gem_object_has_tiling_quirk(obj));
i915_gem_object_set_tiling_quirk(obj);
GEM_BUG_ON(!list_empty(&obj->mm.link));
-   atomic_inc(&obj->mm.shrink_pin);
shrinkable = false;
}
 
if (shrinkable) {
-   struct list_head *list;
-   unsigned long flags;
-
assert_object_held(obj);
-   spin_lock_irqsave(&i915->mm.obj_lock, flags);
-
-   i915->mm.shrink_count++;
-   i915->mm.shrink_memory += obj->base.size;
 
if (obj->mm.madv != I915_MADV_WILLNEED)
-   list = &i915->mm.purge_list;
+   i915_gem_object_make_purgeable(obj);
else
-   list = &i915->mm.shrink_list;
-   list_add_tail(&obj->mm.link, list);
-
-   atomic_set(&obj->mm.shrink_pin, 0);
-   spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+   i915_gem_object_make_shrinkable(obj);
}
 }
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index e382b7f2353b..6b38e4414c5a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -455,23 +455,26 @@ void i915_gem_shrinker_taints_mutex(struct 
drm_i915_private *i915,
 
 #define obj_to_i915(obj__) to_i915((obj__)->base.dev)
 
+/**
+ * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
+ * default all object types that support shrinking(see IS_SHRINKABLE), will 
also
+ * make the

[PATCH v3 06/12] drm/ttm: add TTM_PAGE_FLAG_EXTERNAL_MAPPABLE

2021-09-15 Thread Matthew Auld

In commit:

commit 667a50db0477d47fdff01c666f5ee1ce26b5264c
Author: Thomas Hellstrom 
Date:   Fri Jan 3 11:17:18 2014 +0100

drm/ttm: Refuse to fault (prime-) imported pages

we introduced the restriction that imported pages should not be directly
mappable through TTM(this also extends to userptr). In the next patch we
want to introduce a shmem_tt backend, which should follow all the
existing rules with TTM_PAGE_FLAG_EXTERNAL, since it will need to handle
swapping itself, but with the above mapping restriction lifted.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
Cc: Christian König 
---
 drivers/gpu/drm/ttm/ttm_bo_vm.c | 6 --
 include/drm/ttm/ttm_tt.h| 7 +++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 708390588c7c..fd6e18f12f50 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -163,8 +163,10 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
 * (if at all) by redirecting mmap to the exporter.
 */
if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL)) {
-   dma_resv_unlock(bo->base.resv);
-   return VM_FAULT_SIGBUS;
+   if (!(bo->ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL_MAPPABLE)) {
+   dma_resv_unlock(bo->base.resv);
+   return VM_FAULT_SIGBUS;
+   }
}
 
return 0;
diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
index 7f54a83c95ef..800c9edb3e10 100644
--- a/include/drm/ttm/ttm_tt.h
+++ b/include/drm/ttm/ttm_tt.h
@@ -66,11 +66,18 @@ struct ttm_tt {
 * Note that enum ttm_bo_type.ttm_bo_type_sg objects will always enable
 * this flag.
 *
+* TTM_PAGE_FLAG_EXTERNAL_MAPPABLE: Same behaviour as
+* TTM_PAGE_FLAG_EXTERNAL, but with the reduced restriction that it is
+* still valid to use TTM to map the pages directly. This is useful when
+* implementing a ttm_tt backend which still allocates driver owned
+* pages underneath(say with shmem).
+*
 * TTM_PAGE_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE.
 */
 #define TTM_PAGE_FLAG_SWAPPED  (1 << 0)
 #define TTM_PAGE_FLAG_ZERO_ALLOC   (1 << 1)
 #define TTM_PAGE_FLAG_EXTERNAL (1 << 2)
+#define TTM_PAGE_FLAG_EXTERNAL_MAPPABLE(1 << 3 | 
TTM_PAGE_FLAG_EXTERNAL)
 
 #define TTM_PAGE_FLAG_PRIV_POPULATED   (1 << 31)
uint32_t page_flags;
-- 
2.26.3

[PATCH v3 09/12] drm/i915/ttm: use cached system pages when evicting lmem

2021-09-15 Thread Matthew Auld

This should let us do an accelerated copy directly to the shmem pages
when temporarily moving lmem-only objects, where the i915-gem shrinker
can later kick in to swap out the pages, if needed.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index a93e3a9ef698..079a7a655ede 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -126,11 +126,11 @@ static enum ttm_caching
 i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj)
 {
/*
-* Objects only allowed in system get cached cpu-mappings.
-* Other objects get WC mapping for now. Even if in system.
+* Objects only allowed in system get cached cpu-mappings, or when
+* evicting lmem-only buffers to system for swapping. Other objects get
+* WC mapping for now. Even if in system.
 */
-   if (obj->mm.region->type == INTEL_MEMORY_SYSTEM &&
-   obj->mm.n_placements <= 1)
+   if (obj->mm.n_placements <= 1)
return ttm_cached;
 
return ttm_write_combined;
-- 
2.26.3

[PATCH v3 07/12] drm/i915/gem: Break out some shmem backend utils

2021-09-15 Thread Matthew Auld

From: Thomas Hellström 

Break out some shmem backend utils for future reuse by the TTM backend:
shmem_alloc_st(), shmem_free_st() and __shmem_writeback() which we can
use to provide a shmem-backed TTM page pool for cached-only TTM
buffer objects.

Main functional change here is that we now compute the page sizes using
the dma segments rather than using the physical page address segments.

v2(Reported-by: kernel test robot )
- Make sure we initialise the mapping on the error path in
  shmem_get_pages()

Signed-off-by: Thomas Hellström 
Reviewed-by: Matthew Auld 
Signed-off-by: Matthew Auld 
---
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 181 +-
 1 file changed, 106 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 11f072193f3b..36b711ae9e28 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -25,46 +25,61 @@ static void check_release_pagevec(struct pagevec *pvec)
cond_resched();
 }
 
-static int shmem_get_pages(struct drm_i915_gem_object *obj)
+static void shmem_free_st(struct sg_table *st, struct address_space *mapping,
+ bool dirty, bool backup)
 {
-   struct drm_i915_private *i915 = to_i915(obj->base.dev);
-   struct intel_memory_region *mem = obj->mm.region;
-   const unsigned long page_count = obj->base.size / PAGE_SIZE;
+   struct sgt_iter sgt_iter;
+   struct pagevec pvec;
+   struct page *page;
+
+   mapping_clear_unevictable(mapping);
+
+   pagevec_init(&pvec);
+   for_each_sgt_page(page, sgt_iter, st) {
+   if (dirty)
+   set_page_dirty(page);
+
+   if (backup)
+   mark_page_accessed(page);
+
+   if (!pagevec_add(&pvec, page))
+   check_release_pagevec(&pvec);
+   }
+   if (pagevec_count(&pvec))
+   check_release_pagevec(&pvec);
+
+   sg_free_table(st);
+   kfree(st);
+}
+
+static struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
+  size_t size, struct intel_memory_region 
*mr,
+  struct address_space *mapping,
+  unsigned int max_segment)
+{
+   const unsigned long page_count = size / PAGE_SIZE;
unsigned long i;
-   struct address_space *mapping;
struct sg_table *st;
struct scatterlist *sg;
-   struct sgt_iter sgt_iter;
struct page *page;
unsigned long last_pfn = 0; /* suppress gcc warning */
-   unsigned int max_segment = i915_sg_segment_size();
-   unsigned int sg_page_sizes;
gfp_t noreclaim;
int ret;
 
-   /*
-* Assert that the object is not currently in any GPU domain. As it
-* wasn't in the GTT, there shouldn't be any way it could have been in
-* a GPU cache
-*/
-   GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
-   GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
-
/*
 * If there's no chance of allocating enough pages for the whole
 * object, bail early.
 */
-   if (obj->base.size > resource_size(&mem->region))
-   return -ENOMEM;
+   if (size > resource_size(&mr->region))
+   return ERR_PTR(-ENOMEM);
 
st = kmalloc(sizeof(*st), GFP_KERNEL);
if (!st)
-   return -ENOMEM;
+   return ERR_PTR(-ENOMEM);
 
-rebuild_st:
if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
kfree(st);
-   return -ENOMEM;
+   return ERR_PTR(-ENOMEM);
}
 
/*
@@ -73,14 +88,12 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
 *
 * Fail silently without starting the shrinker
 */
-   mapping = obj->base.filp->f_mapping;
mapping_set_unevictable(mapping);
noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
 
sg = st->sgl;
st->nents = 0;
-   sg_page_sizes = 0;
for (i = 0; i < page_count; i++) {
const unsigned int shrink[] = {
I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
@@ -135,10 +148,9 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
if (!i ||
sg->length >= max_segment ||
page_to_pfn(page) != last_pfn + 1) {
-   if (i) {
-   sg_page_sizes |= sg->length;
+   if (i)
sg = sg_next(sg);
-   }
+
st->nents++;
sg_set_page(sg, page, PAGE_SIZE, 0);
} else {
@@ -149,14 +161,65 @@ static int shmem_get_pages(struct drm_i915_gem_object 
*

[PATCH v3 05/12] drm/ttm: add some kernel-doc for TTM_PAGE_FLAG_*

2021-09-15 Thread Matthew Auld

Move it to inline kernel-doc, otherwise we can't add empty lines it
seems. Also drop the kernel-doc for pages_list, which doesn't seem to
exist, and get rid of all the strange holes.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
Cc: Christian König 
---
 include/drm/ttm/ttm_tt.h | 57 ++--
 1 file changed, 38 insertions(+), 19 deletions(-)

diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
index a6c284c21e72..7f54a83c95ef 100644
--- a/include/drm/ttm/ttm_tt.h
+++ b/include/drm/ttm/ttm_tt.h
@@ -38,35 +38,54 @@ struct ttm_resource;
 struct ttm_buffer_object;
 struct ttm_operation_ctx;
 
-#define TTM_PAGE_FLAG_SWAPPED  (1 << 4)
-#define TTM_PAGE_FLAG_ZERO_ALLOC   (1 << 6)
-#define TTM_PAGE_FLAG_EXTERNAL (1 << 8)
-
-#define TTM_PAGE_FLAG_PRIV_POPULATED  (1 << 31)
-
 /**
- * struct ttm_tt
- *
- * @pages: Array of pages backing the data.
- * @page_flags: see TTM_PAGE_FLAG_*
- * @num_pages: Number of pages in the page array.
- * @sg: for SG objects via dma-buf
- * @dma_address: The DMA (bus) addresses of the pages
- * @swap_storage: Pointer to shmem struct file for swap storage.
- * @pages_list: used by some page allocation backend
- * @caching: The current caching state of the pages, see enum ttm_caching.
- *
- * This is a structure holding the pages, caching- and aperture binding
- * status for a buffer object that isn't backed by fixed (VRAM / AGP)
+ * struct ttm_tt - This is a structure holding the pages, caching- and aperture
+ * binding status for a buffer object that isn't backed by fixed (VRAM / AGP)
  * memory.
  */
 struct ttm_tt {
+   /** @pages: Array of pages backing the data. */
struct page **pages;
+   /**
+* @page_flags: The page flags.
+*
+* Supported values:
+*
+* TTM_PAGE_FLAG_SWAPPED: Set if the pages have been swapped out.
+* Calling ttm_tt_populate() will swap the pages back in, and unset the
+* flag.
+*
+* TTM_PAGE_FLAG_ZERO_ALLOC: Set if the pages will be zeroed on
+* allocation.
+*
+* TTM_PAGE_FLAG_EXTERNAL: Set if the underlying pages were allocated
+* externally, like with dma-buf or userptr. This effectively disables
+* TTM swapping out such pages.  Also important is to prevent TTM from
+* ever directly mapping these pages.
+*
+* Note that enum ttm_bo_type.ttm_bo_type_sg objects will always enable
+* this flag.
+*
+* TTM_PAGE_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE.
+*/
+#define TTM_PAGE_FLAG_SWAPPED  (1 << 0)
+#define TTM_PAGE_FLAG_ZERO_ALLOC   (1 << 1)
+#define TTM_PAGE_FLAG_EXTERNAL (1 << 2)
+
+#define TTM_PAGE_FLAG_PRIV_POPULATED   (1 << 31)
uint32_t page_flags;
+   /** @num_pages: Number of pages in the page array. */
uint32_t num_pages;
+   /** @sg: for SG objects via dma-buf. */
struct sg_table *sg;
+   /** @dma_address: The DMA (bus) addresses of the pages. */
dma_addr_t *dma_address;
+   /** @swap_storage: Pointer to shmem struct file for swap storage. */
struct file *swap_storage;
+   /**
+* @caching: The current caching state of the pages, see enum
+* ttm_caching.
+*/
enum ttm_caching caching;
 };
 
-- 
2.26.3

[PATCH v3 04/12] drm/ttm: s/FLAG_SG/FLAG_EXTERNAL/

2021-09-15 Thread Matthew Auld

It covers more than just ttm_bo_type_sg usage, like with say dma-buf,
since one other user is userptr in amdgpu, and in the future we might
have some more. Hence EXTERNAL is likely a more suitable name.

Suggested-by: Christian König 
Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
Cc: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 10 +-
 drivers/gpu/drm/nouveau/nouveau_bo.c|  4 ++--
 drivers/gpu/drm/radeon/radeon_ttm.c |  8 
 drivers/gpu/drm/ttm/ttm_bo.c|  2 +-
 drivers/gpu/drm/ttm/ttm_bo_vm.c |  2 +-
 drivers/gpu/drm/ttm/ttm_tt.c| 10 +-
 include/drm/ttm/ttm_tt.h|  6 +++---
 7 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c5fa6e62f6ca..a6d606f91dfd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -894,7 +894,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
DRM_ERROR("failed to pin userptr\n");
return r;
}
-   } else if (ttm->page_flags & TTM_PAGE_FLAG_SG) {
+   } else if (ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL) {
if (!ttm->sg) {
struct dma_buf_attachment *attach;
struct sg_table *sgt;
@@ -1147,7 +1147,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
return 0;
}
 
-   if (ttm->page_flags & TTM_PAGE_FLAG_SG)
+   if (ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL)
return 0;
 
ret = ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx);
@@ -1179,7 +1179,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device 
*bdev,
return;
}
 
-   if (ttm->page_flags & TTM_PAGE_FLAG_SG)
+   if (ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL)
return;
 
adev = amdgpu_ttm_adev(bdev);
@@ -1210,8 +1210,8 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object 
*bo,
return -ENOMEM;
}
 
-   /* Set TTM_PAGE_FLAG_SG before populate but after create. */
-   bo->ttm->page_flags |= TTM_PAGE_FLAG_SG;
+   /* Set TTM_PAGE_FLAG_EXTERNAL before populate but after create. */
+   bo->ttm->page_flags |= TTM_PAGE_FLAG_EXTERNAL;
 
gtt = (void *)bo->ttm;
gtt->userptr = addr;
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 33dca2565cca..ba0fec252df7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1249,7 +1249,7 @@ nouveau_ttm_tt_populate(struct ttm_device *bdev,
struct ttm_tt *ttm_dma = (void *)ttm;
struct nouveau_drm *drm;
struct device *dev;
-   bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
+   bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL);
 
if (ttm_tt_is_populated(ttm))
return 0;
@@ -1272,7 +1272,7 @@ nouveau_ttm_tt_unpopulate(struct ttm_device *bdev,
 {
struct nouveau_drm *drm;
struct device *dev;
-   bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
+   bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL);
 
if (slave)
return;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index 7793249bc549..d891491b6da8 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -545,14 +545,14 @@ static int radeon_ttm_tt_populate(struct ttm_device *bdev,
 {
struct radeon_device *rdev = radeon_get_rdev(bdev);
struct radeon_ttm_tt *gtt = radeon_ttm_tt_to_gtt(rdev, ttm);
-   bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
+   bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL);
 
if (gtt && gtt->userptr) {
ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
if (!ttm->sg)
return -ENOMEM;
 
-   ttm->page_flags |= TTM_PAGE_FLAG_SG;
+   ttm->page_flags |= TTM_PAGE_FLAG_EXTERNAL;
return 0;
}
 
@@ -569,13 +569,13 @@ static void radeon_ttm_tt_unpopulate(struct ttm_device 
*bdev, struct ttm_tt *ttm
 {
struct radeon_device *rdev = radeon_get_rdev(bdev);
struct radeon_ttm_tt *gtt = radeon_ttm_tt_to_gtt(rdev, ttm);
-   bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
+   bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_EXTERNAL);
 
radeon_ttm_tt_unbind(bdev, ttm);
 
if (gtt && gtt->userptr) {
kfree(ttm->sg);
-   ttm->page_flags &= ~TTM_PAGE_FLAG_SG;
+   ttm->page_flags &= ~TTM_PAGE_FLAG_EXTERNAL;
return;
}
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 3b22c0013dbf..642dc7ce3081 100644
--- a/drivers/gpu/dr

[PATCH v3 03/12] drm/ttm: remove TTM_PAGE_FLAG_NO_RETRY

2021-09-15 Thread Matthew Auld

No longer used it seems.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
Cc: Christian König 
---
 include/drm/ttm/ttm_tt.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
index 89b15d673b22..842ce756213c 100644
--- a/include/drm/ttm/ttm_tt.h
+++ b/include/drm/ttm/ttm_tt.h
@@ -41,7 +41,6 @@ struct ttm_operation_ctx;
 #define TTM_PAGE_FLAG_SWAPPED (1 << 4)
 #define TTM_PAGE_FLAG_ZERO_ALLOC  (1 << 6)
 #define TTM_PAGE_FLAG_SG  (1 << 8)
-#define TTM_PAGE_FLAG_NO_RETRY   (1 << 9)
 
 #define TTM_PAGE_FLAG_PRIV_POPULATED  (1 << 31)
 
-- 
2.26.3

[PATCH v3 02/12] drm/ttm: move ttm_tt_{add, clear}_mapping into amdgpu

2021-09-15 Thread Matthew Auld

Now that setting page->index shouldn't be needed anymore, we are just
left with setting page->mapping, and here it looks like amdgpu is the
only user, where pointing the page->mapping at the dev_mapping is used
to verify that the pages do indeed belong to the device, if userspace
later tries to touch them.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
Cc: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 27 -
 drivers/gpu/drm/ttm/ttm_tt.c| 25 ---
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 1129e17e9f09..c5fa6e62f6ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1107,6 +1107,24 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct 
ttm_buffer_object *bo,
return >t->ttm;
 }
 
+static void amdgpu_ttm_tt_add_mapping(struct ttm_device *bdev,
+ struct ttm_tt *ttm)
+{
+   pgoff_t i;
+
+   for (i = 0; i < ttm->num_pages; ++i)
+   ttm->pages[i]->mapping = bdev->dev_mapping;
+}
+
+static void amdgpu_ttm_tt_clear_mapping(struct ttm_tt *ttm)
+{
+   struct page **page = ttm->pages;
+   pgoff_t i;
+
+   for (i = 0; i < ttm->num_pages; ++i)
+   (*page)->mapping = NULL;
+}
+
 /*
  * amdgpu_ttm_tt_populate - Map GTT pages visible to the device
  *
@@ -1119,6 +1137,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
 {
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
+   int ret;
 
/* user pages are bound by amdgpu_ttm_tt_pin_userptr() */
if (gtt->userptr) {
@@ -1131,7 +1150,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_device 
*bdev,
if (ttm->page_flags & TTM_PAGE_FLAG_SG)
return 0;
 
-   return ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx);
+   ret = ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx);
+   if (ret)
+   return ret;
+
+   amdgpu_ttm_tt_add_mapping(bdev, ttm);
+   return 0;
 }
 
 /*
@@ -1159,6 +1183,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device 
*bdev,
return;
 
adev = amdgpu_ttm_adev(bdev);
+   amdgpu_ttm_tt_clear_mapping(ttm);
return ttm_pool_free(&adev->mman.bdev.pool, ttm);
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 1cc04c224988..980ecb079b2c 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -289,17 +289,6 @@ int ttm_tt_swapout(struct ttm_device *bdev, struct ttm_tt 
*ttm,
return ret;
 }
 
-static void ttm_tt_add_mapping(struct ttm_device *bdev, struct ttm_tt *ttm)
-{
-   pgoff_t i;
-
-   if (ttm->page_flags & TTM_PAGE_FLAG_SG)
-   return;
-
-   for (i = 0; i < ttm->num_pages; ++i)
-   ttm->pages[i]->mapping = bdev->dev_mapping;
-}
-
 int ttm_tt_populate(struct ttm_device *bdev,
struct ttm_tt *ttm, struct ttm_operation_ctx *ctx)
 {
@@ -336,7 +325,6 @@ int ttm_tt_populate(struct ttm_device *bdev,
if (ret)
goto error;
 
-   ttm_tt_add_mapping(bdev, ttm);
ttm->page_flags |= TTM_PAGE_FLAG_PRIV_POPULATED;
if (unlikely(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) {
ret = ttm_tt_swapin(ttm);
@@ -359,24 +347,11 @@ int ttm_tt_populate(struct ttm_device *bdev,
 }
 EXPORT_SYMBOL(ttm_tt_populate);
 
-static void ttm_tt_clear_mapping(struct ttm_tt *ttm)
-{
-   pgoff_t i;
-   struct page **page = ttm->pages;
-
-   if (ttm->page_flags & TTM_PAGE_FLAG_SG)
-   return;
-
-   for (i = 0; i < ttm->num_pages; ++i)
-   (*page)->mapping = NULL;
-}
-
 void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
 {
if (!ttm_tt_is_populated(ttm))
return;
 
-   ttm_tt_clear_mapping(ttm);
if (bdev->funcs->ttm_tt_unpopulate)
bdev->funcs->ttm_tt_unpopulate(bdev, ttm);
else
-- 
2.26.3

[PATCH v3 01/12] drm/ttm: stop setting page->index for the ttm_tt

2021-09-15 Thread Matthew Auld

In commit:

commit 58aa6622d32af7d2c08d45085f44c54554a16ed7
Author: Thomas Hellstrom 
Date:   Fri Jan 3 11:47:23 2014 +0100

drm/ttm: Correctly set page mapping and -index members

we started setting the page->mapping and page->index to point to the
virtual address space, if the pages were faulted with TTM. Apparently
this was needed for core-mm to able to reverse lookup the virtual
address given the struct page, and potentially unmap it from the page
tables. However as pointed out by Thomas, since we are now using
PFN_MAP, instead of say PFN_MIXED, this should no longer be the case.

There was also apparently some usecase in vmwgfx which needed this for
dirty tracking, but that also doesn't appear to be the case anymore, as
pointed out by Thomas.

We still need keep the page->mapping for now, since that is still needed
for different reasons, but we try to address that in the next patch.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
Cc: Christian König 
---
 drivers/gpu/drm/ttm/ttm_bo_vm.c | 2 --
 drivers/gpu/drm/ttm/ttm_tt.c| 4 +---
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index f56be5bc0861..906ec8a1bf5a 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -346,8 +346,6 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
} else if (unlikely(!page)) {
break;
}
-   page->index = drm_vma_node_start(&bo->base.vma_node) +
-   page_offset;
pfn = page_to_pfn(page);
}
 
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index dae52433beeb..1cc04c224988 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -367,10 +367,8 @@ static void ttm_tt_clear_mapping(struct ttm_tt *ttm)
if (ttm->page_flags & TTM_PAGE_FLAG_SG)
return;
 
-   for (i = 0; i < ttm->num_pages; ++i) {
+   for (i = 0; i < ttm->num_pages; ++i)
(*page)->mapping = NULL;
-   (*page++)->index = 0;
-   }
 }
 
 void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
-- 
2.26.3

Re: [PATCH v3 4/8] powerpc/pseries/svm: Add a powerpc version of cc_platform_has()

2021-09-15 Thread Borislav Petkov

On Wed, Sep 15, 2021 at 07:18:34PM +0200, Christophe Leroy wrote:
> Could you please provide more explicit explanation why inlining such an
> helper is considered as bad practice and messy ?

Tom already told you to look at the previous threads. Let's read them
together. This one, for example:

https://lore.kernel.org/lkml/ysscwvpxevxw%2f...@infradead.org/

| > To take it out of line, I'm leaning towards the latter, creating a new
| > file that is built based on the ARCH_HAS_PROTECTED_GUEST setting.
| 
| Yes.  In general everytime architectures have to provide the prototype
| and not just the implementation of something we end up with a giant mess
| sooner or later.  In a few cases that is still warranted due to
| performance concerns, but i don't think that is the case here.

So I think what Christoph means here is that you want to have the
generic prototype defined in a header and arches get to implement it
exactly to the letter so that there's no mess.

As to what mess exactly, I'd let him explain that.

> Because as demonstrated in my previous response some days ago, taking that
> outline ends up with an unneccessary ugly generated code and we don't
> benefit front GCC's capability to fold in and opt out unreachable code.

And this is real fast path where a couple of instructions matter or what?

set_memory_encrypted/_decrypted doesn't look like one to me.

> I can't see your point here. Inlining the function wouldn't add any
> ifdeffery as far as I can see.

If the function is touching defines etc, they all need to be visible.
If that function needs to call other functions - which is the case on
x86, perhaps not so much on power - then you need to either ifdef around
them or provide stubs with ifdeffery in the headers. And you need to
make them global functions instead of keeping them static to the same
compilation unit, etc, etc.

With a separate compilation unit, you don't need any of that and it is
all kept in that single file.

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

Re: [PATCH 1/2] drm/sched: fix the bug of time out calculation(v4)

2021-09-15 Thread Andrey Grodzovsky


Pushed

Andrey

On 2021-09-15 7:45 a.m., Christian König wrote:

Yes, I think so as well. Andrey can you push this?

Christian.

Am 15.09.21 um 00:59 schrieb Grodzovsky, Andrey:

AFAIK this one is independent.

Christian, can you confirm ?

Andrey

*From:* amd-gfx  on behalf of 
Alex Deucher 

*Sent:* 14 September 2021 15:33
*To:* Christian König 
*Cc:* Liu, Monk ; amd-gfx list 
; Maling list - DRI developers 

*Subject:* Re: [PATCH 1/2] drm/sched: fix the bug of time out 
calculation(v4)

Was this fix independent of the other discussions?  Should this be
applied to drm-misc?

Alex

On Wed, Sep 1, 2021 at 4:42 PM Alex Deucher  
wrote:

>
> On Wed, Sep 1, 2021 at 2:50 AM Christian König
>  wrote:
> >
> > Am 01.09.21 um 02:46 schrieb Monk Liu:
> > > issue:
> > > in cleanup_job the cancle_delayed_work will cancel a TO timer
> > > even the its corresponding job is still running.
> > >
> > > fix:
> > > do not cancel the timer in cleanup_job, instead do the cancelling
> > > only when the heading job is signaled, and if there is a "next" job
> > > we start_timeout again.
> > >
> > > v2:
> > > further cleanup the logic, and do the TDR timer cancelling if 
the signaled job

> > > is the last one in its scheduler.
> > >
> > > v3:
> > > change the issue description
> > > remove the cancel_delayed_work in the begining of the cleanup_job
> > > recover the implement of drm_sched_job_begin.
> > >
> > > v4:
> > > remove the kthread_should_park() checking in cleanup_job routine,
> > > we should cleanup the signaled job asap
> > >
> > > TODO:
> > > 1)introduce pause/resume scheduler in job_timeout to serial the 
handling

> > > of scheduler and job_timeout.
> > > 2)drop the bad job's del and insert in scheduler due to above 
serialization

> > > (no race issue anymore with the serialization)
> > >
> > > tested-by: jingwen 
> > > Signed-off-by: Monk Liu 
> >
> > Reviewed-by: Christian König 
> >
>
> Are you planning to push this to drm-misc?
>
> Alex
>
>
> > > ---
> > >   drivers/gpu/drm/scheduler/sched_main.c | 26 
+-

> > >   1 file changed, 9 insertions(+), 17 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c

> > > index a2a9536..3e0bbc7 100644
> > > --- a/drivers/gpu/drm/scheduler/sched_main.c
> > > +++ b/drivers/gpu/drm/scheduler/sched_main.c
> > > @@ -676,15 +676,6 @@ drm_sched_get_cleanup_job(struct 
drm_gpu_scheduler *sched)

> > >   {
> > >   struct drm_sched_job *job, *next;
> > >
> > > - /*
> > > -  * Don't destroy jobs while the timeout worker is 
running  OR thread
> > > -  * is being parked and hence assumed to not touch 
pending_list

> > > -  */
> > > - if ((sched->timeout != MAX_SCHEDULE_TIMEOUT &&
> > > - !cancel_delayed_work(&sched->work_tdr)) ||
> > > - kthread_should_park())
> > > - return NULL;
> > > -
> > > spin_lock(&sched->job_list_lock);
> > >
> > >   job = list_first_entry_or_null(&sched->pending_list,
> > > @@ -693,17 +684,21 @@ drm_sched_get_cleanup_job(struct 
drm_gpu_scheduler *sched)

> > >   if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
> > >   /* remove job from pending_list */
> > > list_del_init(&job->list);
> > > +
> > > + /* cancel this job's TO timer */
> > > + cancel_delayed_work(&sched->work_tdr);
> > >   /* make the scheduled timestamp more accurate */
> > >   next = list_first_entry_or_null(&sched->pending_list,
> > > typeof(*next), list);
> > > - if (next)
> > > +
> > > + if (next) {
> > > next->s_fence->scheduled.timestamp =
> > > job->s_fence->finished.timestamp;
> > > -
> > > + /* start TO timer for next job */
> > > + drm_sched_start_timeout(sched);
> > > + }
> > >   } else {
> > >   job = NULL;
> > > - /* queue timeout for next job */
> > > - drm_sched_start_timeout(sched);
> > >   }
> > >
> > > spin_unlock(&sched->job_list_lock);
> > > @@ -791,11 +786,8 @@ static int drm_sched_main(void *param)
> > > (entity = drm_sched_select_entity(sched))) ||
> > > kthread_should_stop());
> > >
> > > - if (cleanup_job) {
> > > + if (cleanup_job)
> > > sched->ops->free_job(cleanup_job);
> > > - /* queue timeout for next job */
> > > - drm_sched_start_timeout(sched);
> > > - }
> > >
> > >   if (!entity)
> > >   continue;
> >

[PATCH v2] drm/sun4i: dw-hdmi: Fix HDMI PHY clock setup

2021-09-15 Thread Jernej Skrabec

Recent rework, which made HDMI PHY driver a platform device, inadvertely
reversed clock setup order. HW is very touchy about it. Proper way is to
handle controllers resets and clocks first and HDMI PHYs second.

Currently, without this fix, first mode set completely fails (nothing on
HDMI monitor) on H3 era PHYs. On H6, it still somehow work.

Move HDMI PHY reset & clocks handling to sun8i_hdmi_phy_init() which
will assure that code is executed after controllers reset & clocks are
handled. Additionally, add sun8i_hdmi_phy_deinit() which will deinit
them at controllers driver unload.

Tested on A64, H3, H6 and R40.

Fixes: 9bf3797796f5 ("drm/sun4i: dw-hdmi: Make HDMI PHY into a platform device")
Signed-off-by: Jernej Skrabec 
---

Changes from v1:
- if sun8i_hdmi_phy_init() fails, go to error hanling instead of returning
  immediately
- rename err_deassert_rst_phy -> err_assert_rst_phy

 drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c  |  7 +-
 drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h  |  4 +-
 drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c | 97 ++
 3 files changed, 61 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c 
b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c
index f75fb157f2ff..016b877051da 100644
--- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c
+++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c
@@ -216,11 +216,13 @@ static int sun8i_dw_hdmi_bind(struct device *dev, struct 
device *master,
goto err_disable_clk_tmds;
}
 
+   ret = sun8i_hdmi_phy_init(hdmi->phy);
+   if (ret)
+   goto err_disable_clk_tmds;
+
drm_encoder_helper_add(encoder, &sun8i_dw_hdmi_encoder_helper_funcs);
drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_TMDS);
 
-   sun8i_hdmi_phy_init(hdmi->phy);
-
plat_data->mode_valid = hdmi->quirks->mode_valid;
plat_data->use_drm_infoframe = hdmi->quirks->use_drm_infoframe;
sun8i_hdmi_phy_set_ops(hdmi->phy, plat_data);
@@ -262,6 +264,7 @@ static void sun8i_dw_hdmi_unbind(struct device *dev, struct 
device *master,
struct sun8i_dw_hdmi *hdmi = dev_get_drvdata(dev);
 
dw_hdmi_unbind(hdmi->hdmi);
+   sun8i_hdmi_phy_deinit(hdmi->phy);
clk_disable_unprepare(hdmi->clk_tmds);
reset_control_assert(hdmi->rst_ctrl);
gpiod_set_value(hdmi->ddc_en, 0);
diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h 
b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h
index 74f6ed0e2570..bffe1b9cd3dc 100644
--- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h
+++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h
@@ -169,6 +169,7 @@ struct sun8i_hdmi_phy {
struct clk  *clk_phy;
struct clk  *clk_pll0;
struct clk  *clk_pll1;
+   struct device   *dev;
unsigned intrcal;
struct regmap   *regs;
struct reset_control*rst_phy;
@@ -205,7 +206,8 @@ encoder_to_sun8i_dw_hdmi(struct drm_encoder *encoder)
 
 int sun8i_hdmi_phy_get(struct sun8i_dw_hdmi *hdmi, struct device_node *node);
 
-void sun8i_hdmi_phy_init(struct sun8i_hdmi_phy *phy);
+int sun8i_hdmi_phy_init(struct sun8i_hdmi_phy *phy);
+void sun8i_hdmi_phy_deinit(struct sun8i_hdmi_phy *phy);
 void sun8i_hdmi_phy_set_ops(struct sun8i_hdmi_phy *phy,
struct dw_hdmi_plat_data *plat_data);
 
diff --git a/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c 
b/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c
index c9239708d398..b64d93da651d 100644
--- a/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c
+++ b/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c
@@ -506,9 +506,60 @@ static void sun8i_hdmi_phy_init_h3(struct sun8i_hdmi_phy 
*phy)
phy->rcal = (val & SUN8I_HDMI_PHY_ANA_STS_RCAL_MASK) >> 2;
 }
 
-void sun8i_hdmi_phy_init(struct sun8i_hdmi_phy *phy)
+int sun8i_hdmi_phy_init(struct sun8i_hdmi_phy *phy)
 {
+   int ret;
+
+   ret = reset_control_deassert(phy->rst_phy);
+   if (ret) {
+   dev_err(phy->dev, "Cannot deassert phy reset control: %d\n", 
ret);
+   return ret;
+   }
+
+   ret = clk_prepare_enable(phy->clk_bus);
+   if (ret) {
+   dev_err(phy->dev, "Cannot enable bus clock: %d\n", ret);
+   goto err_assert_rst_phy;
+   }
+
+   ret = clk_prepare_enable(phy->clk_mod);
+   if (ret) {
+   dev_err(phy->dev, "Cannot enable mod clock: %d\n", ret);
+   goto err_disable_clk_bus;
+   }
+
+   if (phy->variant->has_phy_clk) {
+   ret = sun8i_phy_clk_create(phy, phy->dev,
+  phy->variant->has_second_pll);
+   if (ret) {
+   dev_err(phy->dev, "Couldn't create the PHY clock\n");
+   goto err_disable_clk_mod;
+   }
+
+   clk_prepare_enable(phy->clk_phy);
+   }
+
phy->variant->phy_init(phy);
+
+   return 0;
+
+err_disable_clk_mod:
+   clk_disab

Re: [PATCH v2] drm/v3d: fix wait for TMU write combiner flush

2021-09-15 Thread Melissa Wen

On 09/15, Iago Toral Quiroga wrote:
> The hardware sets the TMUWCF bit back to 0 when the TMU write
> combiner flush completes so we should be checking for that instead
> of the L2TFLS bit.
> 
> v2 (Melissa Wen):
>   - Add Signed-off-by and Fixes tags.
>   - Change the error message for the timeout to be more clear.
> 
> Fixes spurious Vulkan CTS failures in:
> dEQP-VK.binding_model.descriptorset_random.*
> 
> Fixes: d223f98f02099 ("drm/v3d: Add support for compute shader dispatch")
> Signed-off-by: Iago Toral Quiroga 
> Reviewed-by: Melissa Wen 

Applied to drm-misc-next.

Thanks,

Melissa

> ---
>  drivers/gpu/drm/v3d/v3d_gem.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
> index a3529809d547..1953706bdaeb 100644
> --- a/drivers/gpu/drm/v3d/v3d_gem.c
> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> @@ -197,8 +197,8 @@ v3d_clean_caches(struct v3d_dev *v3d)
>  
>   V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF);
>   if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
> -V3D_L2TCACTL_L2TFLS), 100)) {
> - DRM_ERROR("Timeout waiting for L1T write combiner flush\n");
> +V3D_L2TCACTL_TMUWCF), 100)) {
> + DRM_ERROR("Timeout waiting for TMU write combiner flush\n");
>   }
>  
>   mutex_lock(&v3d->cache_clean_lock);
> -- 
> 2.25.1
>

[PATCH RESEND v2 3/3] lib, stackdepot: Add helper to print stack entries into buffer.

2021-09-15 Thread Imran Khan

To print stack entries into a buffer, users of stackdepot,
first get a list of stack entries using stack_depot_fetch
and then print this list into a buffer using stack_trace_snprint.
Provide a helper in stackdepot for this purpose.
Also change above mentioned users to use this helper.

Signed-off-by: Imran Khan 
Suggested-by: Vlastimil Babka 

Acked-by: Vlastimil Babka 
---
 drivers/gpu/drm/drm_dp_mst_topology.c   |  5 +
 drivers/gpu/drm/drm_mm.c|  5 +
 drivers/gpu/drm/i915/i915_vma.c |  5 +
 drivers/gpu/drm/i915/intel_runtime_pm.c | 20 +---
 include/linux/stackdepot.h  |  3 +++
 lib/stackdepot.c| 24 
 mm/page_owner.c |  5 +
 7 files changed, 36 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c 
b/drivers/gpu/drm/drm_dp_mst_topology.c
index 86d13d6bc463..2d1adab9e360 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -1668,13 +1668,10 @@ __dump_topology_ref_history(struct 
drm_dp_mst_topology_ref_history *history,
for (i = 0; i < history->len; i++) {
const struct drm_dp_mst_topology_ref_entry *entry =
&history->entries[i];
-   ulong *entries;
-   uint nr_entries;
u64 ts_nsec = entry->ts_nsec;
u32 rem_nsec = do_div(ts_nsec, 10);
 
-   nr_entries = stack_depot_fetch(entry->backtrace, &entries);
-   stack_trace_snprint(buf, PAGE_SIZE, entries, nr_entries, 4);
+   stack_depot_snprint(entry->backtrace, buf, PAGE_SIZE, 4);
 
drm_printf(&p, "  %d %ss (last at %5llu.%06u):\n%s",
   entry->count,
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 93d48a6f04ab..ca04d7f6f7b5 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -118,8 +118,6 @@ static noinline void save_stack(struct drm_mm_node *node)
 static void show_leaks(struct drm_mm *mm)
 {
struct drm_mm_node *node;
-   unsigned long *entries;
-   unsigned int nr_entries;
char *buf;
 
buf = kmalloc(BUFSZ, GFP_KERNEL);
@@ -133,8 +131,7 @@ static void show_leaks(struct drm_mm *mm)
continue;
}
 
-   nr_entries = stack_depot_fetch(node->stack, &entries);
-   stack_trace_snprint(buf, BUFSZ, entries, nr_entries, 0);
+   stack_depot_snprint(node->stack, buf, BUFSZ, 0);
DRM_ERROR("node [%08llx + %08llx]: inserted at\n%s",
  node->start, node->size, buf);
}
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 4b7fc4647e46..f2d9ed375109 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -56,8 +56,6 @@ void i915_vma_free(struct i915_vma *vma)
 
 static void vma_print_allocator(struct i915_vma *vma, const char *reason)
 {
-   unsigned long *entries;
-   unsigned int nr_entries;
char buf[512];
 
if (!vma->node.stack) {
@@ -66,8 +64,7 @@ static void vma_print_allocator(struct i915_vma *vma, const 
char *reason)
return;
}
 
-   nr_entries = stack_depot_fetch(vma->node.stack, &entries);
-   stack_trace_snprint(buf, sizeof(buf), entries, nr_entries, 0);
+   stack_depot_snprint(vma->node.stack, buf, sizeof(buf), 0);
DRM_DEBUG_DRIVER("vma.node [%08llx + %08llx] %s: inserted at %s\n",
 vma->node.start, vma->node.size, reason, buf);
 }
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c 
b/drivers/gpu/drm/i915/intel_runtime_pm.c
index eaf7688f517d..cc312f0a05eb 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -65,16 +65,6 @@ static noinline depot_stack_handle_t __save_depot_stack(void)
return stack_depot_save(entries, n, GFP_NOWAIT | __GFP_NOWARN);
 }
 
-static void __print_depot_stack(depot_stack_handle_t stack,
-   char *buf, int sz, int indent)
-{
-   unsigned long *entries;
-   unsigned int nr_entries;
-
-   nr_entries = stack_depot_fetch(stack, &entries);
-   stack_trace_snprint(buf, sz, entries, nr_entries, indent);
-}
-
 static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
 {
spin_lock_init(&rpm->debug.lock);
@@ -146,12 +136,12 @@ static void untrack_intel_runtime_pm_wakeref(struct 
intel_runtime_pm *rpm,
if (!buf)
return;
 
-   __print_depot_stack(stack, buf, PAGE_SIZE, 2);
+   stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
DRM_DEBUG_DRIVER("wakeref %x from\n%s", stack, buf);
 
stack = READ_ONCE(rpm->debug.last_release);
if (stack) {
-   __print_depot_stack(stack, buf

[PATCH RESEND v2 2/3] lib, stackdepot: Add helper to print stack entries.

2021-09-15 Thread Imran Khan

To print a stack entries, users of stackdepot, first
use stack_depot_fetch to get a list of stack entries
and then use stack_trace_print to print this list.
Provide a helper in stackdepot to print stack entries
based on stackdepot handle.
Also change above mentioned users to use this helper.

Signed-off-by: Imran Khan 
Suggested-by: Vlastimil Babka 

Acked-by: Vlastimil Babka 
Reviewed-by: Alexander Potapenko 
---
 include/linux/stackdepot.h |  2 ++
 lib/stackdepot.c   | 18 ++
 mm/kasan/report.c  | 15 +++
 mm/page_owner.c| 13 -
 4 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index 6bb4bc1a5f54..d77a30543dd4 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -19,6 +19,8 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries,
 unsigned int stack_depot_fetch(depot_stack_handle_t handle,
   unsigned long **entries);
 
+void stack_depot_print(depot_stack_handle_t stack);
+
 unsigned int filter_irq_stacks(unsigned long *entries, unsigned int 
nr_entries);
 
 #ifdef CONFIG_STACKDEPOT
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index 67439c082490..354fe1b62017 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -214,6 +214,24 @@ static inline struct stack_record *find_stack(struct 
stack_record *bucket,
return NULL;
 }
 
+/**
+ * stack_depot_print - print stack entries from a depot
+ *
+ * @stack: Stack depot handle which was returned from
+ * stack_depot_save().
+ *
+ */
+void stack_depot_print(depot_stack_handle_t stack)
+{
+   unsigned long *entries;
+   unsigned int nr_entries;
+
+   nr_entries = stack_depot_fetch(stack, &entries);
+   if (nr_entries > 0)
+   stack_trace_print(entries, nr_entries, 0);
+}
+EXPORT_SYMBOL_GPL(stack_depot_print);
+
 /**
  * stack_depot_fetch - Fetch stack entries from a depot
  *
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 884a950c7026..3239fd8f8747 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -132,20 +132,11 @@ static void end_report(unsigned long *flags, unsigned 
long addr)
kasan_enable_current();
 }
 
-static void print_stack(depot_stack_handle_t stack)
-{
-   unsigned long *entries;
-   unsigned int nr_entries;
-
-   nr_entries = stack_depot_fetch(stack, &entries);
-   stack_trace_print(entries, nr_entries, 0);
-}
-
 static void print_track(struct kasan_track *track, const char *prefix)
 {
pr_err("%s by task %u:\n", prefix, track->pid);
if (track->stack) {
-   print_stack(track->stack);
+   stack_depot_print(track->stack);
} else {
pr_err("(stack is not available)\n");
}
@@ -214,12 +205,12 @@ static void describe_object_stacks(struct kmem_cache 
*cache, void *object,
return;
if (alloc_meta->aux_stack[0]) {
pr_err("Last potentially related work creation:\n");
-   print_stack(alloc_meta->aux_stack[0]);
+   stack_depot_print(alloc_meta->aux_stack[0]);
pr_err("\n");
}
if (alloc_meta->aux_stack[1]) {
pr_err("Second to last potentially related work creation:\n");
-   print_stack(alloc_meta->aux_stack[1]);
+   stack_depot_print(alloc_meta->aux_stack[1]);
pr_err("\n");
}
 #endif
diff --git a/mm/page_owner.c b/mm/page_owner.c
index d24ed221357c..7918770c2b2b 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -394,8 +394,6 @@ void __dump_page_owner(const struct page *page)
struct page_ext *page_ext = lookup_page_ext(page);
struct page_owner *page_owner;
depot_stack_handle_t handle;
-   unsigned long *entries;
-   unsigned int nr_entries;
gfp_t gfp_mask;
int mt;
 
@@ -423,20 +421,17 @@ void __dump_page_owner(const struct page *page)
 page_owner->pid, page_owner->ts_nsec, 
page_owner->free_ts_nsec);
 
handle = READ_ONCE(page_owner->handle);
-   if (!handle) {
+   if (!handle)
pr_alert("page_owner allocation stack trace missing\n");
-   } else {
-   nr_entries = stack_depot_fetch(handle, &entries);
-   stack_trace_print(entries, nr_entries, 0);
-   }
+   else
+   stack_depot_print(handle);
 
handle = READ_ONCE(page_owner->free_handle);
if (!handle) {
pr_alert("page_owner free stack trace missing\n");
} else {
-   nr_entries = stack_depot_fetch(handle, &entries);
pr_alert("page last free stack trace:\n");
-   stack_trace_print(entries, nr_entries, 0);
+   stack_depot_print(handle);
}
 
if (page_owner->last_migrate_reason != -1)
-- 
2.30.2

[PATCH RESEND v2 1/3] lib, stackdepot: check stackdepot handle before accessing slabs.

2021-09-15 Thread Imran Khan

stack_depot_save allocates slabs that will be used for storing
objects in future.If this slab allocation fails we may get to
a situation where space allocation for a new stack_record fails,
causing stack_depot_save to return 0 as handle.
If user of this handle ends up invoking stack_depot_fetch with
this handle value, current implementation of stack_depot_fetch
will end up using slab from wrong index.
To avoid this check handle value at the beginning.

Signed-off-by: Imran Khan 
Suggested-by: Vlastimil Babka 

Acked-by: Vlastimil Babka 
---
 lib/stackdepot.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index 0a2e417f83cb..67439c082490 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -232,6 +232,9 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle,
struct stack_record *stack;
 
*entries = NULL;
+   if (!handle)
+   return 0;
+
if (parts.slabindex > depot_index) {
WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n",
parts.slabindex, depot_index, handle);
-- 
2.30.2

[PATCH RESEND v2 0/3] lib, stackdepot: check stackdepot handle before accessing slabs

2021-09-15 Thread Imran Khan

Changes in v2:
 - Fixed compilation error [1] due to typo in patch-3 (stack_depot_print
   used in place of stack_depot_snprint)
   This compilation error appears with CONFIG_DRM_I915_DEBUG_RUNTIME_PM=y
   and this was missed by my test config (x86_64_defconfig)

[1] https://patchwork.freedesktop.org/series/94696/

Original cover letter
--
This patch series consolidates the changes submitted and reviewed at [1]
and [2].
The patches at [1] and [2] were submitted separarely, but they have some
inter dependency (later patches were created on top of earlier ones).
As both sets are still under review, I have put them in a single
change set here, so that it can be reviewed/included together and also
to avoid automation build failures where git am fails because of absent
parent.

I have included Acked-by (from Vlastimil) and Reviewed-by (from Alexander)
tags obtained so far for these changes and have also addressed last review
comment from Vlastimil [3].

To summarize, the changes in this set are as follows:

PATCH-1: Checks validity of a stackdepot handle before proceeding
to access stackdepot slab/objects.

PATCH-2: Adds a helper in stackdepot, to allow users to print
stack entries just by specifying the stackdepot handle. It also
changes such users to use this new interface.   

PATCH-3: Adds a helper in stackdepot, to allow users to print
stack entries into buffers just by specifying the stackdepot handle and
destination buffer. It also changes such users to use this new interface.

[1] 
https://lore.kernel.org/lkml/20210902000154.1096484-1-imran.f.k...@oracle.com/
[2] 
https://lore.kernel.org/lkml/20210910141001.1622130-1-imran.f.k...@oracle.com/
[3] https://lore.kernel.org/lkml/ef0aa660-0cb6-dc21-f2ce-368b34f8a...@suse.cz/

Imran Khan (3):
  lib, stackdepot: check stackdepot handle before accessing slabs.
  lib, stackdepot: Add helper to print stack entries.
  lib, stackdepot: Add helper to print stack entries into buffer.

 drivers/gpu/drm/drm_dp_mst_topology.c   |  5 +--
 drivers/gpu/drm/drm_mm.c|  5 +--
 drivers/gpu/drm/i915/i915_vma.c |  5 +--
 drivers/gpu/drm/i915/intel_runtime_pm.c | 20 +++
 include/linux/stackdepot.h  |  5 +++
 lib/stackdepot.c| 45 +
 mm/kasan/report.c   | 15 ++---
 mm/page_owner.c | 18 +++---
 8 files changed, 66 insertions(+), 52 deletions(-)

-- 
2.30.2

Re: [PATCH v3 0/8] Implement generic cc_platform_has() helper function

2021-09-15 Thread Kuppuswamy, Sathyanarayanan





On 9/15/21 9:46 AM, Borislav Petkov wrote:

Sathya,

if you want to prepare the Intel variant intel_cc_platform_has() ontop
of those and send it to me, that would be good because then I can
integrate it all in one branch which can be used to base future work
ontop.


I have a Intel variant patch (please check following patch). But it includes
TDX changes as well. Shall I move TDX changes to different patch and just
create a separate patch for adding intel_cc_platform_has()?


commit fc5f98a0ed94629d903827c5b44ee9295f835831
Author: Kuppuswamy Sathyanarayanan 
Date:   Wed May 12 11:35:13 2021 -0700

x86/tdx: Add confidential guest support for TDX guest

TDX architecture provides a way for VM guests to be highly secure and
isolated (from untrusted VMM). To achieve this requirement, any data
coming from VMM cannot be completely trusted. TDX guest fixes this
issue by hardening the IO drivers against the attack from the VMM.
So, when adding hardening fixes to the generic drivers, to protect
custom fixes use cc_platform_has() API.

Also add TDX guest support to cc_platform_has() API to protect the
TDX specific fixes.

Signed-off-by: Kuppuswamy Sathyanarayanan 


diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a5b14de03458..2e78358923a1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -871,6 +871,7 @@ config INTEL_TDX_GUEST
depends on SECURITY
select X86_X2APIC
select SECURITY_LOCKDOWN_LSM
+   select ARCH_HAS_CC_PLATFORM
help
  Provide support for running in a trusted domain on Intel processors
  equipped with Trusted Domain eXtensions. TDX is a new Intel
diff --git a/arch/x86/include/asm/intel_cc_platform.h 
b/arch/x86/include/asm/intel_cc_platform.h
new file mode 100644
index ..472c3174beac
--- /dev/null
+++ b/arch/x86/include/asm/intel_cc_platform.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021 Intel Corporation */
+#ifndef _ASM_X86_INTEL_CC_PLATFORM_H
+#define _ASM_X86_INTEL_CC_PLATFORM_H
+
+#if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_ARCH_HAS_CC_PLATFORM)
+bool intel_cc_platform_has(unsigned int flag);
+#else
+static inline bool intel_cc_platform_has(unsigned int flag) { return false; }
+#endif
+
+#endif /* _ASM_X86_INTEL_CC_PLATFORM_H */
+
diff --git a/arch/x86/kernel/cc_platform.c b/arch/x86/kernel/cc_platform.c
index 3c9bacd3c3f3..e83bc2f48efe 100644
--- a/arch/x86/kernel/cc_platform.c
+++ b/arch/x86/kernel/cc_platform.c
@@ -10,11 +10,16 @@
 #include 
 #include 
 #include 
+#include 
+
+#include 

 bool cc_platform_has(enum cc_attr attr)
 {
if (sme_me_mask)
return amd_cc_platform_has(attr);
+   else if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+   return intel_cc_platform_has(attr);

return false;
 }
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 8321c43554a1..ab486a3b1eb0 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 

 #include 
 #include 
@@ -60,6 +61,21 @@ static u64 msr_test_ctrl_cache __ro_after_init;
  */
 static bool cpu_model_supports_sld __ro_after_init;

+#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
+bool intel_cc_platform_has(enum cc_attr attr)
+{
+   switch (attr) {
+   case CC_ATTR_GUEST_TDX:
+   return cpu_feature_enabled(X86_FEATURE_TDX_GUEST);
+   default:
+   return false;
+   }
+
+   return false;
+}
+EXPORT_SYMBOL_GPL(intel_cc_platform_has);
+#endif
+
 /*
  * Processors which have self-snooping capability can handle conflicting
  * memory type across CPUs by snooping its own cache. However, there exists
diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h
index 253f3ea66cd8..e38430e6e396 100644
--- a/include/linux/cc_platform.h
+++ b/include/linux/cc_platform.h
@@ -61,6 +61,15 @@ enum cc_attr {
 * Examples include SEV-ES.
 */
CC_ATTR_GUEST_STATE_ENCRYPT,
+
+   /**
+* @CC_ATTR_GUEST_TDX: Trusted Domain Extension Support
+*
+* The platform/OS is running as a TDX guest/virtual machine.
+*
+* Examples include SEV-ES.
+*/
+   CC_ATTR_GUEST_TDX,
 };

 #ifdef CONFIG_ARCH_HAS_CC_PLATFORM


--
Sathyanarayanan Kuppuswamy
Linux Kernel Developer

Re: [PATCH v3 4/8] powerpc/pseries/svm: Add a powerpc version of cc_platform_has()

2021-09-15 Thread Christophe Leroy





Le 15/09/2021 à 12:08, Borislav Petkov a écrit :

On Wed, Sep 15, 2021 at 10:28:59AM +1000, Michael Ellerman wrote:

I don't love it, a new C file and an out-of-line call to then call back
to a static inline that for most configuration will return false ... but
whatever :)


Yeah, hch thinks it'll cause a big mess otherwise:

https://lore.kernel.org/lkml/ysscwvpxevxw%2f...@infradead.org/


Could you please provide more explicit explanation why inlining such an 
helper is considered as bad practice and messy ?


Because as demonstrated in my previous response some days ago, taking 
that outline ends up with an unneccessary ugly generated code and we 
don't benefit front GCC's capability to fold in and opt out unreachable 
code.


As pointed by Michael in most cases the function will just return false 
so behind the performance concern, there is also the code size and code 
coverage topic that is to be taken into account. And even when the 
function doesn't return false, the only thing it does folds into a 
single powerpc instruction so there is really no point in making a 
dedicated out-of-line fonction for that and suffer the cost and the size 
of a function call and to justify the addition of a dedicated C file.





I guess less ifdeffery is nice too.


I can't see your point here. Inlining the function wouldn't add any 
ifdeffery as far as I can see.


So, would you mind reconsidering your approach and allow architectures 
to provide inline implementation by just not enforcing a generic 
prototype ? Or otherwise provide more details and exemple of why the 
cons are more important versus the pros ?


Thanks
Christophe

Re: [Intel-gfx] [PATCH 08/27] drm/i915: Add logical engine mapping

2021-09-15 Thread Matthew Brost

On Wed, Sep 15, 2021 at 09:24:15AM +0100, Tvrtko Ursulin wrote:
> 
> On 14/09/2021 19:04, Matthew Brost wrote:
> > On Tue, Sep 14, 2021 at 09:34:08AM +0100, Tvrtko Ursulin wrote:
> > > 
> 
> 8<
> 
> > > Today we have:
> > > 
> > > for_each intel_engines: // intel_engines is a flat list of all engines
> > >   intel_engine_setup()
> > > 
> > > You propose to change it to:
> > > 
> > > for_each engine_class:
> > > for 0..max_global_engine_instance:
> > >for_each intel_engines:
> > >   skip engine not present
> > >   skip class not matching
> > > 
> > >   count logical instance
> > > 
> > > for_each intel_engines:
> > >skip engine not present
> > >skip wrong class
> > > 
> > >intel_engine_setup()
> > > 
> > > 
> > > I propose:
> > > 
> > > // Leave as is:
> > > 
> > > for_each intel_engines:
> > > intel_engine_setup()
> > > 
> > > // Add:
> > > 
> > > for_each engine_class:
> > > logical = 0
> > > for_each gt->engine_class[class]:
> > >skip engine not present
> > > 
> > >engine->logical_instance = logical++
> > > 
> > > 
> > > When code which actually needs a preturbed "map" arrives you add that in 
> > > to
> > > this second loop.
> > > 
> > 
> > See above, why introduce an algorithm that doesn't work for future parts
> > + future patches are land imminently? It makes zero sense whatsoever.
> > With your proposal we would literally land code to just throw it away a
> > couple of months from now + break patches we intend to land soon. This
> 
> It sure works, it just walks the per class list instead of walking the flat
> list skipping one class at the time.
> 
> Just add the map based transformation to the second pass later, when it
> becomes required.
> 

I can flatten the algorithm if that helps alleviate your concerns but
with that being said, I've played around this locally and IMO makes the
code way more ugly. Sure it eliminates some iterations of the loop but
who really cares about that in a one time setup function?

> > algorithm works and has no reason whatsoever to be optimal as it a one
> > time setup call. I really don't understand why we are still talking
> > about this paint color.
> 
> I don't think bike shedding is not an appropriate term when complaint is how
> proposed algorithm is needlessly complicated.
>

Are you just ignoring the fact that the algorithm (map) is needed in
pending patches? IMO it is more complicated to write throw away code
when the proper algorithm is already written. If the logical mapping was
straight forward on all platforms as the ones currently upstream I would
100% agree with your suggestion, but it isn't on unembargoed platforms
eminently going upstream. The algorithm I have works for the current
platforms + the pending platforms. IMO is 100% acceptable to merge
something looking towards a known future.

Matt

> Regards,
> 
> Tvrtko

Re: [PATCH v3 0/8] Implement generic cc_platform_has() helper function

2021-09-15 Thread Borislav Petkov

On Wed, Sep 08, 2021 at 05:58:31PM -0500, Tom Lendacky wrote:
> This patch series provides a generic helper function, cc_platform_has(),
> to replace the sme_active(), sev_active(), sev_es_active() and
> mem_encrypt_active() functions.
> 
> It is expected that as new confidential computing technologies are
> added to the kernel, they can all be covered by a single function call
> instead of a collection of specific function calls all called from the
> same locations.
> 
> The powerpc and s390 patches have been compile tested only. Can the
> folks copied on this series verify that nothing breaks for them. Also,
> a new file, arch/powerpc/platforms/pseries/cc_platform.c, has been
> created for powerpc to hold the out of line function.

...

> 
> Tom Lendacky (8):
>   x86/ioremap: Selectively build arch override encryption functions
>   mm: Introduce a function to check for confidential computing features
>   x86/sev: Add an x86 version of cc_platform_has()
>   powerpc/pseries/svm: Add a powerpc version of cc_platform_has()
>   x86/sme: Replace occurrences of sme_active() with cc_platform_has()
>   x86/sev: Replace occurrences of sev_active() with cc_platform_has()
>   x86/sev: Replace occurrences of sev_es_active() with cc_platform_has()
>   treewide: Replace the use of mem_encrypt_active() with
> cc_platform_has()

Ok, modulo the minor things the plan is to take this through tip after
-rc2 releases in order to pick up the powerpc build fix and have a clean
base (-rc2) to base stuff on, at the same time.

Pls holler if something's still amiss.

Sathya,

if you want to prepare the Intel variant intel_cc_platform_has() ontop
of those and send it to me, that would be good because then I can
integrate it all in one branch which can be used to base future work
ontop.

Thx.

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

Re: [PATCH v3 2/3] drm/bridge: parade-ps8640: Use regmap APIs

2021-09-15 Thread Doug Anderson

Hi,

On Tue, Sep 14, 2021 at 7:50 PM Stephen Boyd  wrote:
>
> Quoting Doug Anderson (2021-09-14 19:17:03)
> > Hi,
> >
> > On Tue, Sep 14, 2021 at 5:29 PM Stephen Boyd  wrote:
> > >
> > > Quoting Philip Chen (2021-09-14 16:28:44)
> > > > diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c 
> > > > b/drivers/gpu/drm/bridge/parade-ps8640.c
> > > > index e340af381e05..8d3e7a147170 100644
> > > > --- a/drivers/gpu/drm/bridge/parade-ps8640.c
> > > > +++ b/drivers/gpu/drm/bridge/parade-ps8640.c
> > > > @@ -368,6 +396,12 @@ static int ps8640_probe(struct i2c_client *client)
> > > >
> > > > ps_bridge->page[PAGE0_DP_CNTL] = client;
> > > >
> > > > +   ps_bridge->regmap[PAGE0_DP_CNTL] = devm_regmap_init_i2c(client, 
> > > > ps8640_regmap_config);
> > > > +   if (IS_ERR(ps_bridge->regmap[PAGE0_DP_CNTL])) {
> > > > +   return dev_err_probe(dev, 
> > > > PTR_ERR(ps_bridge->regmap[PAGE0_DP_CNTL]),
> > > > +"Error initting page 0 regmap\n");
> > >
> > > This one also doesn't return -EPROBE_DEFER? The dev_err_probe() should
> > > really only be used on "get" style APIs that can defer.
> >
> > Any reason why you say that dev_err_probe() should only be used on
> > "get" style APIs that can defer? Even if an API can't return
> > -EPROBE_DEFER, using dev_err_probe() still (IMO) makes the code
> > cleaner and should be used for any error cases like this during probe.
> > Why?
> >
> > * It shows the error code in a standard way for you.
> > * It returns the error code you passed it so you can make your error
> > return "one line" instead of 2.
>
> I'd rather see any sort of error message in getter APIs be pushed into
> the callee so that we reduce the text size of the kernel by having one
> message instead of hundreds/thousands about "failure to get something".
> As far as I can tell this API is designed to skip printing anything when
> EPROBE_DEFER is returned, and only print something when it isn't that
> particular error code. The other benefit of this API is it sets the
> deferred reason in debugfs which is nice to know why some device failed
> to probe. Of course now with fw_devlink that almost never triggers so
> the feature is becoming useless.

I guess we need to split this apart into two issues. One (1) is
whether we should be printing errors like this in probe() and the
other (2) is the use of dev_err_probe() for cases where err could
never be -EPROBE_DEFER.

So the argument about reducing the text size for thousands of slightly
different errors is all about (1), right? In other words, you'd be
equally opposed to a change that added a normal error print with
dev_err(), right? IMO, this is a fair debate to have and it comes down
to a choice that has pros and cons. Yes the error messages are not
needed in the normal case and yes they bloat the kernel size, but when
something inevitably goes wrong then you have a way to track it down
instead of trying to guess or having to recompile the code to add
prints everywhere. Often this can give you a quick clue about a
missing Kconfig or a wrongly coded device tree file without tons of
time adding prints and recompiling code. That seems like it's worth
something...

One could also make the argument that if you don't care about all
these similar errors bloating the text segment that it would be pretty
easy to create a new Kconfig: "CONFIG_I_THINK_PROBE_ERRORS_ARE_BLOAT".
If that config is set then it could throw away the strings for every
dev_err_probe() that you compile in.

I'm not so convinced about the argument (2) that dev_err_probe()
should only be used if the error code could be -EPROBE_DEFER. Compare
these two:

Old:
  ret = do_something_that_cant_defer();
  if (ret < 0) {
dev_err(dev, "The foo failed to bar (%pe)\n", ERR_PTR(ret));
return ret;
  }

New:
  ret = do_something_that_cant_defer();
  if (ret < 0)
return dev_err_probe(dev, ret, "The foo failed to bar\n");

It seems clear to me that the "New" case is better. The error code is
printed in a consistent fashion compared to all other error prints and
the fact that it returns the error code makes it cleaner. It's fine
that the error could never be -EPROBE_DEFER. Certainly we could add a
new function called dev_err_with_code() that worked exactly like
dev_err_probe() except that it didn't have special logic for
-EPROBE_DEFER but why?

Also note that the current function is dev_err_probe(), not
dev_err_might_defer(). By the name, it should be useful / OK to use
for any errors that come up in the probe path.

> > Is there some bad thing about dev_err_probe() that makes it
> > problematic to use? If not then the above advantages should be a net
> > win, right?
> >
>
> I view it as an anti-pattern. We should strive for driver probe to be
> fairly simple so that it's basically getting resources and registering
> with frameworks. The error messages in probe may help when you're trying
> to get the driver to work and the resource APIs don't make any

[PATCH v8 16/16] nouveau: fold multiple DRM_DEBUG_DRIVERs together

2021-09-15 Thread Jim Cromie

With DRM_USE_DYNAMIC_DEBUG, each callsite record requires 56 bytes.
We can combine 12 into one here and save ~620 bytes.

Signed-off-by: Jim Cromie 
---
 drivers/gpu/drm/nouveau/nouveau_drm.c | 36 +--
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c 
b/drivers/gpu/drm/nouveau/nouveau_drm.c
index ba4cd5f83725..0f45399535bf 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -1245,19 +1245,29 @@ nouveau_drm_pci_table[] = {
 
 static void nouveau_display_options(void)
 {
-   DRM_DEBUG_DRIVER("Loading Nouveau with parameters:\n");
-
-   DRM_DEBUG_DRIVER("... tv_disable   : %d\n", nouveau_tv_disable);
-   DRM_DEBUG_DRIVER("... ignorelid: %d\n", nouveau_ignorelid);
-   DRM_DEBUG_DRIVER("... duallink : %d\n", nouveau_duallink);
-   DRM_DEBUG_DRIVER("... nofbaccel: %d\n", nouveau_nofbaccel);
-   DRM_DEBUG_DRIVER("... config   : %s\n", nouveau_config);
-   DRM_DEBUG_DRIVER("... debug: %s\n", nouveau_debug);
-   DRM_DEBUG_DRIVER("... noaccel  : %d\n", nouveau_noaccel);
-   DRM_DEBUG_DRIVER("... modeset  : %d\n", nouveau_modeset);
-   DRM_DEBUG_DRIVER("... runpm: %d\n", nouveau_runtime_pm);
-   DRM_DEBUG_DRIVER("... vram_pushbuf : %d\n", nouveau_vram_pushbuf);
-   DRM_DEBUG_DRIVER("... hdmimhz  : %d\n", nouveau_hdmimhz);
+   DRM_DEBUG_DRIVER("Loading Nouveau with parameters:\n"
+"... tv_disable   : %d\n"
+"... ignorelid: %d\n"
+"... duallink : %d\n"
+"... nofbaccel: %d\n"
+"... config   : %s\n"
+"... debug: %s\n"
+"... noaccel  : %d\n"
+"... modeset  : %d\n"
+"... runpm: %d\n"
+"... vram_pushbuf : %d\n"
+"... hdmimhz  : %d\n"
+, nouveau_tv_disable
+, nouveau_ignorelid
+, nouveau_duallink
+, nouveau_nofbaccel
+, nouveau_config
+, nouveau_debug
+, nouveau_noaccel
+, nouveau_modeset
+, nouveau_runtime_pm
+, nouveau_vram_pushbuf
+, nouveau_hdmimhz);
 }
 
 static const struct dev_pm_ops nouveau_pm_ops = {
-- 
2.31.1

[PATCH v8 15/16] amdgpu_ucode: reduce number of pr_debug calls

2021-09-15 Thread Jim Cromie

There are blocks of DRM_DEBUG calls, consolidate their args into
single calls.  With dynamic-debug in use, each callsite consumes 56
bytes of callsite data, and this patch removes about 65 calls, so
it saves ~3.5kb.

no functional changes.

RFC: this creates multi-line log messages, does that break any syslog
conventions ?

Signed-off-by: Jim Cromie 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 293 --
 1 file changed, 158 insertions(+), 135 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 2834981f8c08..14a9fef1f4c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -30,17 +30,26 @@
 
 static void amdgpu_ucode_print_common_hdr(const struct common_firmware_header 
*hdr)
 {
-   DRM_DEBUG("size_bytes: %u\n", le32_to_cpu(hdr->size_bytes));
-   DRM_DEBUG("header_size_bytes: %u\n", 
le32_to_cpu(hdr->header_size_bytes));
-   DRM_DEBUG("header_version_major: %u\n", 
le16_to_cpu(hdr->header_version_major));
-   DRM_DEBUG("header_version_minor: %u\n", 
le16_to_cpu(hdr->header_version_minor));
-   DRM_DEBUG("ip_version_major: %u\n", le16_to_cpu(hdr->ip_version_major));
-   DRM_DEBUG("ip_version_minor: %u\n", le16_to_cpu(hdr->ip_version_minor));
-   DRM_DEBUG("ucode_version: 0x%08x\n", le32_to_cpu(hdr->ucode_version));
-   DRM_DEBUG("ucode_size_bytes: %u\n", le32_to_cpu(hdr->ucode_size_bytes));
-   DRM_DEBUG("ucode_array_offset_bytes: %u\n",
- le32_to_cpu(hdr->ucode_array_offset_bytes));
-   DRM_DEBUG("crc32: 0x%08x\n", le32_to_cpu(hdr->crc32));
+   DRM_DEBUG("size_bytes: %u\n"
+ "header_size_bytes: %u\n"
+ "header_version_major: %u\n"
+ "header_version_minor: %u\n"
+ "ip_version_major: %u\n"
+ "ip_version_minor: %u\n"
+ "ucode_version: 0x%08x\n"
+ "ucode_size_bytes: %u\n"
+ "ucode_array_offset_bytes: %u\n"
+ "crc32: 0x%08x\n",
+ le32_to_cpu(hdr->size_bytes),
+ le32_to_cpu(hdr->header_size_bytes),
+ le16_to_cpu(hdr->header_version_major),
+ le16_to_cpu(hdr->header_version_minor),
+ le16_to_cpu(hdr->ip_version_major),
+ le16_to_cpu(hdr->ip_version_minor),
+ le32_to_cpu(hdr->ucode_version),
+ le32_to_cpu(hdr->ucode_size_bytes),
+ le32_to_cpu(hdr->ucode_array_offset_bytes),
+ le32_to_cpu(hdr->crc32));
 }
 
 void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr)
@@ -55,9 +64,9 @@ void amdgpu_ucode_print_mc_hdr(const struct 
common_firmware_header *hdr)
const struct mc_firmware_header_v1_0 *mc_hdr =
container_of(hdr, struct mc_firmware_header_v1_0, 
header);
 
-   DRM_DEBUG("io_debug_size_bytes: %u\n",
- le32_to_cpu(mc_hdr->io_debug_size_bytes));
-   DRM_DEBUG("io_debug_array_offset_bytes: %u\n",
+   DRM_DEBUG("io_debug_size_bytes: %u\n"
+ "io_debug_array_offset_bytes: %u\n",
+ le32_to_cpu(mc_hdr->io_debug_size_bytes),
  le32_to_cpu(mc_hdr->io_debug_array_offset_bytes));
} else {
DRM_ERROR("Unknown MC ucode version: %u.%u\n", version_major, 
version_minor);
@@ -82,13 +91,17 @@ void amdgpu_ucode_print_smc_hdr(const struct 
common_firmware_header *hdr)
switch (version_minor) {
case 0:
v2_0_hdr = container_of(hdr, struct 
smc_firmware_header_v2_0, v1_0.header);
-   DRM_DEBUG("ppt_offset_bytes: %u\n", 
le32_to_cpu(v2_0_hdr->ppt_offset_bytes));
-   DRM_DEBUG("ppt_size_bytes: %u\n", 
le32_to_cpu(v2_0_hdr->ppt_size_bytes));
+   DRM_DEBUG("ppt_offset_bytes: %u\n"
+ "ppt_size_bytes: %u\n",
+ le32_to_cpu(v2_0_hdr->ppt_offset_bytes),
+ le32_to_cpu(v2_0_hdr->ppt_size_bytes));
break;
case 1:
v2_1_hdr = container_of(hdr, struct 
smc_firmware_header_v2_1, v1_0.header);
-   DRM_DEBUG("pptable_count: %u\n", 
le32_to_cpu(v2_1_hdr->pptable_count));
-   DRM_DEBUG("pptable_entry_offset: %u\n", 
le32_to_cpu(v2_1_hdr->pptable_entry_offset));
+   DRM_DEBUG("pptable_count: %u\n"
+ "pptable_entry_offset: %u\n",
+ le32_to_cpu(v2_1_hdr->pptable_count),
+ le32_to_cpu(v2_1_hdr->pptable_entry_offset));
break;
default:
break;
@@ -111,10 +124,12 @@ void am

[PATCH v8 14/16] drm_print: instrument drm_debug_enabled

2021-09-15 Thread Jim Cromie

Duplicate drm_debug_enabled() code into both "basic" and "dyndbg"
ifdef branches.  Then add a pr_debug("todo: ...") into the "dyndbg"
branch.

Then convert the "dyndbg" branch's code to a macro, so that the
pr_debug() get its callsite info from the invoking function, instead
of from drm_debug_enabled() itself.

This gives us unique callsite info for the 8 remaining users of
drm_debug_enabled(), and lets us enable them individually to see how
much logging traffic they generate.  The oft-visited callsites can
then be reviewed for runtime cost and possible optimizations.

Heres what we get:

bash-5.1# modprobe drm
dyndbg: 384 debug prints in module drm
bash-5.1# grep todo: /proc/dynamic_debug/control
drivers/gpu/drm/drm_edid.c:1843 [drm]connector_bad_edid =_ "todo: maybe avoid 
via dyndbg\012"
drivers/gpu/drm/drm_print.c:309 [drm]___drm_dbg =p "todo: maybe avoid via 
dyndbg\012"
drivers/gpu/drm/drm_print.c:286 [drm]__drm_dev_dbg =p "todo: maybe avoid via 
dyndbg\012"
drivers/gpu/drm/drm_vblank.c:1491 [drm]drm_vblank_restore =_ "todo: maybe avoid 
via dyndbg\012"
drivers/gpu/drm/drm_vblank.c:787 
[drm]drm_crtc_vblank_helper_get_vblank_timestamp_internal =_ "todo: maybe avoid 
via dyndbg\012"
drivers/gpu/drm/drm_vblank.c:410 [drm]drm_crtc_accurate_vblank_count =_ "todo: 
maybe avoid via dyndbg\012"
drivers/gpu/drm/drm_atomic_uapi.c:1457 [drm]drm_mode_atomic_ioctl =_ "todo: 
maybe avoid via dyndbg\012"
drivers/gpu/drm/drm_edid_load.c:178 [drm]edid_load =_ "todo: maybe avoid via 
dyndbg\012"

At quick glance, edid won't qualify, drm_print might, drm_vblank is
strongest chance, maybe atomic-ioctl too.

Signed-off-by: Jim Cromie 
---
---
 include/drm/drm_print.h | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index 4a38591a424b..c9cabc8db672 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -381,6 +381,11 @@ enum drm_debug_category {
 #define DRM_DBG_CAT_DP DRM_UT_DP
 #define DRM_DBG_CAT_DRMRES DRM_UT_DRMRES
 
+static inline bool drm_debug_enabled(enum drm_debug_category category)
+{
+   return unlikely(__drm_debug & category);
+}
+
 #else /* CONFIG_DRM_USE_DYNAMIC_DEBUG */
 
 /* join prefix + format in cpp so dyndbg can see it */
@@ -414,12 +419,13 @@ enum drm_debug_category {
 #define DRM_DBG_CAT_DP "drm:dp:"
 #define DRM_DBG_CAT_DRMRES "drm:res:"
 
-#endif /* CONFIG_DRM_USE_DYNAMIC_DEBUG */
+#define drm_debug_enabled(category)\
+   ({  \
+   pr_debug("todo: maybe avoid via dyndbg\n"); \
+   unlikely(__drm_debug & (category)); \
+   })
 
-static inline bool drm_debug_enabled(enum drm_debug_category category)
-{
-   return unlikely(__drm_debug & category);
-}
+#endif /* CONFIG_DRM_USE_DYNAMIC_DEBUG */
 
 /*
  * struct device based logging
@@ -569,7 +575,6 @@ void __drm_dev_dbg(const struct device *dev, enum 
drm_debug_category category,
 #define drm_dbg_drmres(drm, fmt, ...)  \
drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_DBG_CAT_DRMRES, fmt, 
##__VA_ARGS__)
 
-
 /*
  * printk based logging
  *
-- 
2.31.1

[PATCH v8 13/16] drm_print: add choice to use dynamic debug in drm-debug

2021-09-15 Thread Jim Cromie

drm's debug system writes 10 distinct categories of messages to syslog
using a small API[1]: drm_dbg*(10 names), DRM_DEV_DEBUG*(3 names),
DRM_DEBUG*(8 names).  There are thousands of these callsites, each
categorized in this systematized way.

These callsites can be enabled at runtime by their category, each
controlled by a bit in drm.debug (/sys/modules/drm/parameter/debug).
In the current "basic" implementation, drm_debug_enabled() tests these
bits in __drm_debug each time an API[1] call is executed; while cheap
individually, the costs accumulate with uptime.

This patch uses dynamic-debug with (required) jump-label to patch
enabled calls onto their respective NOOP slots, avoiding all runtime
bit-checks of __drm_debug by drm_debug_enabled().

Dynamic debug has no concept of category, but we can emulate one by
replacing enum categories with a set of prefix-strings; "drm:core:",
"drm:kms:" "drm:driver:" etc, and prepend them (at compile time) to
the given formats.

Then we can use:
  `echo module drm format "^drm:core: " +p > control`

to enable the whole category with one query.

This conversion yields many new prdbg callsites:

  dyndbg: 207 debug prints in module drm_kms_helper
  dyndbg: 376 debug prints in module drm
  dyndbg: 1811 debug prints in module i915
  dyndbg: 3917 debug prints in module amdgpu

Each site costs 56 bytes of .data, which is a big increase for
drm modules, so CONFIG_DRM_USE_DYNAMIC_DEBUG makes it optional.

CONFIG_JUMP_LABEL is also required, to get the promised optimizations.

The "basic" -> "dyndbg" switchover is layered into the macro scheme

A. A "prefix" version of DRM_UT_ map, named DRM_DBG_CAT_

"basic":  DRM_DBG_CAT_  <===  DRM_UT_.  Identity map.
"dyndbg":
   #define DRM_DBG_CAT_KMS"drm:kms: "
   #define DRM_DBG_CAT_PRIME  "drm:prime: "
   #define DRM_DBG_CAT_ATOMIC "drm:atomic: "

In v3, had older name, DRM_DBG_CLASS_ was countered, I had
agreed, but this seems better still; CATEGORY is already DRM's
term-of-art, and adding a near-synonym 'CLASS' only adds ambiguity.

DRM_UT_* are preserved, since theyre used elsewhere.  Since the
callback maintains its state in __drm_debug, drm_debug_enabled() will
stay synchronized, and continue to work.  We can address them
separately if they are called enough to be worth fixing.

B. drm_dev_dbg() & drm_debug() are interposed with macros

basic:forward to renamed fn, with args preserved
enabled:  redirect to pr_debug, dev_dbg, with CATEGORY format catenated

This is where drm_debug_enabled() is avoided.  The prefix is prepended
at compile-time, no category at runtime.

C. API[1] uses DRM_DBG_CAT_s

These already use (B), now they use (A) too instead of DRM_UT_,
to get the correct token type for "basic" and "dyndbg" configs.

D. use DEFINE_DYNAMIC_DEBUG_CATEGORIES()

This defines the map using DRM_CAT_s, and creates the /sysfs
bitmap to control those categories.

CONFIG_DRM_USE_DYNAMIC_DEBUG is also used to adjust amdgpu, i915
makefiles to add -DDYNAMIC_DEBUG_MODULE; it includes the current
CONFIG_DYNAMIC_DEBUG_CORE and is enabled by the user.

NOTES:

Because the dyndbg callback is keeping state in __drm_debug, it
synchronizes with drm_debug_enabled() and its remaining users; the
switchover should be transparent.

Code Review is expected to catch the lack of correspondence between
bit=>prefix definitions (the selector) and the prefixes used in the
API[1] layer above pr_debug()

I've coded the categories using the _DD_cat_() macro, which adds the
^anchor and trailing space.  This excludes any sub-categories added
later.  This convention protects any "drm:atomic:fail:" callsites from
getting stomped on by `echo 0 > debug`.  Other categories could
differ, but we need some default.

Dyndbg requires that the prefix be in the compiled-in format string;
run-time prefixing evades callsite selection by category.

pr_debug("%s: ...", __func__, ...) // not ideal

Unfortunately __func__ is not a macro, and cannot be catenated at
preprocess/compile time.

If you want that, you might consider +mfl flags instead;

Signed-off-by: Jim Cromie 
---
v5:
. use DEFINE_DYNAMIC_DEBUG_CATEGORIES in drm_print.c
. s/DRM_DBG_CLASS_/DRM_DBG_CAT_/ - dont need another term
. default=y in Kconfig entry - per @DanVet
. move some commit-log prose to dyndbg commit
. add-prototyes to (param_get/set)_dyndbg
. more wrinkles found by 
. relocate ratelimit chunk from elsewhere
v6:
. add kernel doc
. fix cpp paste, drop '#'
v7:
. change __drm_debug to long, to fit with DEFINE_DYNAMIC_DEBUG_CATEGORIES
. add -DDYNAMIC_DEBUG_MODULE to ccflags if DRM_USE_DYNAMIC_DEBUG
v8:
. adapt to altered ^ insertion
. add mem cost numbers to kconfig
. kdoc improvements (I hope)
---
 drivers/gpu/drm/Kconfig |  26 
 drivers/gpu/drm/Makefile|   3 +
 drivers/gpu/drm/amd/amdgpu/Makefile |   2 +-
 drivers/gpu/drm/drm_print.c |  53 ++---
 drivers/gpu/drm/i915/Makefile   |   2 +-
 include/drm/drm_print.h | 177 ++

[PATCH v8 12/16] amdgpu: use DEFINE_DYNAMIC_DEBUG_CATEGORIES on existing prdbgs

2021-09-15 Thread Jim Cromie

logger_types.h defines many DC_LOG_*() categorized debug wrappers.
Most of these already use DRM debug API, so are controllable using
drm.debug, but others use a bare pr_debug("$prefix: .."), with 1 of 13
different class-prefixes matching ~/^\[[_A-Z]+\]:/

Use DEFINE_DYNAMIC_DEBUG_CATEGORIES to create a sysfs location which
maps from bits to these 13 sets of categorized pr_debugs to en/disable.

Makefile adds -DDYNAMIC_DEBUG_MODULE for CONFIG_DYNAMIC_DEBUG_CORE,
otherwise BUILD_BUG_ON triggers (obvious misuses are better than
mysterious ones).

Anyway heres a baseline, of existing prdbg use.
Each callsite costs 56 bytes of kernel .data
amdgpu has "extra" prdbgs due to macro expansion.
(see repeating linenos in control)
(it also has substantial use of drm.debug)
- tedious fix but clear size payoff, as a separate patch, later.

bash-5.1# for m in i915 amdgpu nouveau; do modprobe $m; done
dyndbg:   1 debug prints in module drm
dyndbg:   2 debug prints in module ttm
dyndbg:   8 debug prints in module video
dyndbg: 167 debug prints in module i915
dyndbg: 2339 debug prints in module amdgpu
dyndbg:   3 debug prints in module wmi
dyndbg:   3 debug prints in module nouveau

Signed-off-by: Jim Cromie 
---
 drivers/gpu/drm/amd/amdgpu/Makefile   |  2 +
 .../gpu/drm/amd/display/dc/core/dc_debug.c| 43 ++-
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index c56320e78c0e..1f084919294c 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -38,6 +38,8 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
-I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \
-I$(FULL_AMD_PATH)/amdkfd
 
+ccflags-$(CONFIG_DYNAMIC_DEBUG_CORE) += -DYNAMIC_DEBUG_MODULE
+
 amdgpu-y := amdgpu_drv.o
 
 # add KMS driver
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
index 21be2a684393..ae462e5d42c6 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
@@ -36,8 +36,49 @@
 
 #include "resource.h"
 
-#define DC_LOGGER_INIT(logger)
+#ifdef CONFIG_DRM_USE_DYNAMIC_DEBUG
+/* define a drm.debug style dyndbg pr-debug control point */
+#include 
+
+unsigned long __debug_dc;
+EXPORT_SYMBOL(__debug_dc);
+
+#define help_(_N, _cat)"\t  Bit-" #_N "\t" _cat "\n"
+
+#define DC_DYNDBG_BITMAP_DESC(name)\
+   "Control pr_debugs via /sys/module/amdgpu/parameters/" #name\
+   ", where each bit controls a debug category.\n" \
+   help_(0, "[SURFACE]:")  \
+   help_(1, "[CURSOR]:")   \
+   help_(2, "[PFLIP]:")\
+   help_(3, "[VBLANK]:")   \
+   help_(4, "[HW_LINK_TRAINING]:") \
+   help_(5, "[HW_AUDIO]:") \
+   help_(6, "[SCALER]:")   \
+   help_(7, "[BIOS]:") \
+   help_(8, "[BANDWIDTH_CALCS]:")  \
+   help_(9, "[DML]:")  \
+   help_(10, "[IF_TRACE]:")\
+   help_(11, "[GAMMA]:")   \
+   help_(12, "[SMU_MSG]:")
+
+DEFINE_DYNAMIC_DEBUG_CATEGORIES(debug_dc, __debug_dc,
+   DC_DYNDBG_BITMAP_DESC(debug_dc),
+   _DD_cat_(0, "[CURSOR]:"),
+   _DD_cat_(1, "[PFLIP]:"),
+   _DD_cat_(2, "[VBLANK]:"),
+   _DD_cat_(3, "[HW_LINK_TRAINING]:"),
+   _DD_cat_(4, "[HW_AUDIO]:"),
+   _DD_cat_(5, "[SCALER]:"),
+   _DD_cat_(6, "[BIOS]:"),
+   _DD_cat_(7, "[BANDWIDTH_CALCS]:"),
+   _DD_cat_(8, "[DML]:"),
+   _DD_cat_(9, "[IF_TRACE]:"),
+   _DD_cat_(10, "[GAMMA]:"),
+   _DD_cat_(11, "[SMU_MSG]:"));
+#endif
 
+#define DC_LOGGER_INIT(logger)
 
 #define SURFACE_TRACE(...) do {\
if (dc->debug.surface_trace) \
-- 
2.31.1

[PATCH v8 11/16] i915/gvt: use DEFINE_DYNAMIC_DEBUG_CATEGORIES for existing prdbgs

2021-09-15 Thread Jim Cromie

The gvt component of this driver has ~120 pr_debugs, in 9 categories
quite similar to those in DRM.  Following the interface model of
drm.debug, add a parameter to map bits to these categorizations.

DEFINE_DYNAMIC_DEBUG_CATEGORIES(debug_gvt, __gvt_debug,
"dyndbg bitmap desc",
_DD_cat_(0, "gvt:cmd:"),
_DD_cat_(1, "gvt:core:"),
_DD_cat_(2, "gvt:dpy:"),
_DD_cat_(3, "gvt:el:"),
_DD_cat_(4, "gvt:irq:"),
_DD_cat_(5, "gvt:mm:"),
_DD_cat_(6, "gvt:mmio:"),
_DD_cat_(7, "gvt:render:"),
_DD_cat_(8, "gvt:sched:"));

If CONFIG_DYNAMIC_DEBUG_CORE=y, then gvt/Makefile adds
-DDYNAMIC_DEBUG_MODULE to cflags, which CONFIG_DYNAMIC_DEBUG=n
(CORE-only) builds need.  This is redone more comprehensively soon.

Signed-off-by: Jim Cromie 
---
v5:
. static decl of vector of bit->class descriptors - Emil.V
. relocate gvt-makefile chunk from elsewhere

v7:
. move ccflags addition up to i915/Makefile from i915/gvt

v8:
. relocate DDD_CAT decl code into intel_gvt.c - Tvrtko
  cflags back to i915/Makefile
. add -DDYNAMIC_DEBUG_MODULE to support DYNAMIC_DEBUG_CORE-only builds

heres unchanged? footprint:

bash-5.1# for m in i915 amdgpu nouveau; do modprobe $m; done
dyndbg:   1 debug prints in module drm
dyndbg:   2 debug prints in module ttm
dyndbg:   8 debug prints in module video
dyndbg: 167 debug prints in module i915
dyndbg: 2339 debug prints in module amdgpu
[drm] amdgpu kernel modesetting enabled.
amdgpu: CRAT table disabled by module option
amdgpu: Virtual CRAT table created for CPU
amdgpu: Topology: Add CPU node
dyndbg:   3 debug prints in module wmi
dyndbg:   3 debug prints in module nouveau
---
 drivers/gpu/drm/i915/Makefile|  2 ++
 drivers/gpu/drm/i915/intel_gvt.c | 34 
 2 files changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 4f22cac1c49b..038fd29c89d4 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -295,6 +295,8 @@ i915-y += intel_gvt.o
 include $(src)/gvt/Makefile
 endif
 
+ccflags-$(CONFIG_DYNAMIC_DEBUG_CORE) += -DDYNAMIC_DEBUG_MODULE
+
 obj-$(CONFIG_DRM_I915) += i915.o
 obj-$(CONFIG_DRM_I915_GVT_KVMGT) += gvt/kvmgt.o
 
diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c
index 4e70c1a9ef2e..eb0da9173b23 100644
--- a/drivers/gpu/drm/i915/intel_gvt.c
+++ b/drivers/gpu/drm/i915/intel_gvt.c
@@ -162,3 +162,37 @@ void intel_gvt_resume(struct drm_i915_private *dev_priv)
if (intel_gvt_active(dev_priv))
intel_gvt_pm_resume(dev_priv->gvt);
 }
+
+#if defined(CONFIG_DRM_USE_DYNAMIC_DEBUG)
+
+unsigned long __gvt_debug;
+EXPORT_SYMBOL(__gvt_debug);
+
+#define help_(_N, _cat)"\t  Bit-" #_N ":\t" _cat "\n"
+
+#define I915_GVT_CATEGORIES(name) \
+   " Enable debug output via /sys/module/i915/parameters/" #name   \
+   ", where each bit enables a debug category.\n"  \
+   help_(0, "gvt:cmd:")\
+   help_(1, "gvt:core:")   \
+   help_(2, "gvt:dpy:")\
+   help_(3, "gvt:el:") \
+   help_(4, "gvt:irq:")\
+   help_(5, "gvt:mm:") \
+   help_(6, "gvt:mmio:")   \
+   help_(7, "gvt:render:") \
+   help_(8, "gvt:sched:")
+
+DEFINE_DYNAMIC_DEBUG_CATEGORIES(debug_gvt, __gvt_debug,
+   I915_GVT_CATEGORIES(debug_gvt),
+   _DD_cat_(0, "gvt:cmd:"),
+   _DD_cat_(1, "gvt:core:"),
+   _DD_cat_(2, "gvt:dpy:"),
+   _DD_cat_(3, "gvt:el:"),
+   _DD_cat_(4, "gvt:irq:"),
+   _DD_cat_(5, "gvt:mm:"),
+   _DD_cat_(6, "gvt:mmio:"),
+   _DD_cat_(7, "gvt:render:"),
+   _DD_cat_(8, "gvt:sched:"));
+
+#endif
-- 
2.31.1

[PATCH v8 09/16] drm: fix doc grammar error

2021-09-15 Thread Jim Cromie

no code changes, good for rc

Signed-off-by: Jim Cromie 
---
 include/drm/drm_drv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index b439ae1921b8..ebb22166ace1 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -522,7 +522,7 @@ void *__devm_drm_dev_alloc(struct device *parent,
  * @type: the type of the struct which contains struct &drm_device
  * @member: the name of the &drm_device within @type.
  *
- * This allocates and initialize a new DRM device. No device registration is 
done.
+ * This allocates and initializes a new DRM device. No device registration is 
done.
  * Call drm_dev_register() to advertice the device to user space and register 
it
  * with other core subsystems. This should be done last in the device
  * initialization sequence to make sure userspace can't access an inconsistent
-- 
2.31.1

[PATCH v8 10/16] i915/gvt: remove spaces in pr_debug "gvt: core:" etc prefixes

2021-09-15 Thread Jim Cromie

Taking embedded spaces out of existing prefixes makes them better
class-prefixes; simplifying the extra quoting needed otherwise:

  $> echo format "^gvt: core:" +p >control

Dropping the internal spaces means any trailing space in a query will
more clearly terminate the prefix being searched for.

Consider a generic drm-debug example:

  # turn off ATOMIC reports
  echo format "^drm:atomic: " -p > control

  # turn off all ATOMIC:* reports, including any sub-categories
  echo format "^drm:atomic:" -p > control

  # turn on ATOMIC:FAIL: reports
  echo format "^drm:atomic:fail: " +p > control

Removing embedded spaces in the class-prefixes simplifies the
corresponding match-prefix.  This means that "quoted" match-prefixes
are only needed when the trailing space is desired, in order to
exclude explicitly sub-categorized pr-debugs; in this example,
"drm:atomic:fail:".

RFC: maybe the prefix catenation should paste in the " " class-prefix
terminator explicitly.  A pr_debug_() flavor could exclude the " ",
allowing ad-hoc sub-categorization by appending for example, "fail:"
to "drm:atomic:" without the default " " insertion.

Signed-off-by: Jim Cromie 
---
v8:
. fix patchwork CI warning
---
 drivers/gpu/drm/i915/gvt/debug.h | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/debug.h b/drivers/gpu/drm/i915/gvt/debug.h
index c6027125c1ec..bbecc279e077 100644
--- a/drivers/gpu/drm/i915/gvt/debug.h
+++ b/drivers/gpu/drm/i915/gvt/debug.h
@@ -36,30 +36,30 @@ do {
\
 } while (0)
 
 #define gvt_dbg_core(fmt, args...) \
-   pr_debug("gvt: core: "fmt, ##args)
+   pr_debug("gvt:core: " fmt, ##args)
 
 #define gvt_dbg_irq(fmt, args...) \
-   pr_debug("gvt: irq: "fmt, ##args)
+   pr_debug("gvt:irq: " fmt, ##args)
 
 #define gvt_dbg_mm(fmt, args...) \
-   pr_debug("gvt: mm: "fmt, ##args)
+   pr_debug("gvt:mm: " fmt, ##args)
 
 #define gvt_dbg_mmio(fmt, args...) \
-   pr_debug("gvt: mmio: "fmt, ##args)
+   pr_debug("gvt:mmio: " fmt, ##args)
 
 #define gvt_dbg_dpy(fmt, args...) \
-   pr_debug("gvt: dpy: "fmt, ##args)
+   pr_debug("gvt:dpy: " fmt, ##args)
 
 #define gvt_dbg_el(fmt, args...) \
-   pr_debug("gvt: el: "fmt, ##args)
+   pr_debug("gvt:el: " fmt, ##args)
 
 #define gvt_dbg_sched(fmt, args...) \
-   pr_debug("gvt: sched: "fmt, ##args)
+   pr_debug("gvt:sched: " fmt, ##args)
 
 #define gvt_dbg_render(fmt, args...) \
-   pr_debug("gvt: render: "fmt, ##args)
+   pr_debug("gvt:render: " fmt, ##args)
 
 #define gvt_dbg_cmd(fmt, args...) \
-   pr_debug("gvt: cmd: "fmt, ##args)
+   pr_debug("gvt:cmd: " fmt, ##args)
 
 #endif
-- 
2.31.1

1 2 >

1 - 100 of 148 matches

Mail list logo