Re: [PATCH] drm/radeon: Avoid open coded arithmetic in memory allocation

2022-02-07 Thread Christian König

Am 05.02.22 um 18:38 schrieb Christophe JAILLET:

kmalloc_array()/kcalloc() should be used to avoid potential overflow when
a multiplication is needed to compute the size of the requested memory.

So turn a kzalloc()+explicit size computation into an equivalent kcalloc().

Signed-off-by: Christophe JAILLET 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/radeon/radeon_atombios.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c 
b/drivers/gpu/drm/radeon/radeon_atombios.c
index 28c4413f4dc8..7b9cc7a9f42f 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -897,13 +897,13 @@ bool 
radeon_get_atom_connector_info_from_supported_devices_table(struct
union atom_supported_devices *supported_devices;
int i, j, max_device;
struct bios_connector *bios_connectors;
-   size_t bc_size = sizeof(*bios_connectors) * ATOM_MAX_SUPPORTED_DEVICE;
struct radeon_router router;
  
  	router.ddc_valid = false;

router.cd_valid = false;
  
-	bios_connectors = kzalloc(bc_size, GFP_KERNEL);

+   bios_connectors = kcalloc(ATOM_MAX_SUPPORTED_DEVICE,
+ sizeof(*bios_connectors), GFP_KERNEL);
if (!bios_connectors)
return false;
  




[PATCH] drm/amdgpu: reserve the pd while cleaning up PRTs

2022-02-07 Thread Christian König
We want to have lockdep annotation here, so make sure that we reserve
the PD while removing PRTs even if it isn't strictly necessary since the
VM object is about to be destroyed anyway.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index efd13898c83e..9f985bd463be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1194,8 +1194,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE) != NULL)
amdgpu_vce_free_handles(adev, file_priv);
 
-   amdgpu_vm_bo_del(adev, fpriv->prt_va);
-
if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
/* TODO: how to handle reserve failure */
BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true));
@@ -1206,6 +1204,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 
pasid = fpriv->vm.pasid;
pd = amdgpu_bo_ref(fpriv->vm.root.bo);
+   if (!WARN_ON(amdgpu_bo_reserve(pd, true))) {
+   amdgpu_vm_bo_del(adev, fpriv->prt_va);
+   amdgpu_bo_unreserve(pd);
+   }
 
amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
amdgpu_vm_fini(adev, &fpriv->vm);
-- 
2.25.1



[PATCH] drm/amd/pm: fix error handling

2022-02-07 Thread trix
From: Tom Rix 

clang static analysis reports this error
amdgpu_smu.c:2289:9: warning: Called function pointer
  is null (null dereference)
return smu->ppt_funcs->emit_clk_levels(
   ^~~~

There is a logic error in the earlier check of
emit_clk_levels.  The error value is set to
the ret variable but ret is never used.  Return
directly and remove the unneeded ret variable.

Fixes: 5d64f9bbb628 ("amdgpu/pm: Implement new API function "emit" that accepts 
buffer base and write offset")
Signed-off-by: Tom Rix 
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index af368aa1fd0ae..5f3b3745a9b7a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -2274,7 +2274,6 @@ static int smu_emit_ppclk_levels(void *handle, enum 
pp_clock_type type, char *bu
 {
struct smu_context *smu = handle;
enum smu_clk_type clk_type;
-   int ret = 0;
 
clk_type = smu_convert_to_smuclk(type);
if (clk_type == SMU_CLK_COUNT)
@@ -2284,7 +2283,7 @@ static int smu_emit_ppclk_levels(void *handle, enum 
pp_clock_type type, char *bu
return -EOPNOTSUPP;
 
if (!smu->ppt_funcs->emit_clk_levels)
-   ret = -ENOENT;
+   return -ENOENT;
 
return smu->ppt_funcs->emit_clk_levels(smu, clk_type, buf, offset);
 
-- 
2.26.3



[PATCH v2 3/3] tools: add hmm gup test for long term pinned device pages

2022-02-07 Thread Alistair Popple
From: Alex Sierra 

The intention is to test device coherent type pages that have been
called through get user pages with PIN_LONGTERM flag set. These pages
should get migrated back to normal system memory.

Signed-off-by: Alex Sierra 
Signed-off-by: Alistair Popple 
Reviewed-by: Felix Kuehling  
---

Changes for v2:
 - Added Felix's Reviewed-by (thanks!)

 tools/testing/selftests/vm/Makefile|  2 +-
 tools/testing/selftests/vm/hmm-tests.c | 81 +++-
 2 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/vm/Makefile 
b/tools/testing/selftests/vm/Makefile
index 96714d2..32032c7 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -143,7 +143,7 @@ $(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS 
+= -lcap
 
 $(OUTPUT)/gup_test: ../../../../mm/gup_test.h
 
-$(OUTPUT)/hmm-tests: local_config.h
+$(OUTPUT)/hmm-tests: local_config.h ../../../../mm/gup_test.h
 
 # HMM_EXTRA_LIBS may get set in local_config.mk, or it may be left empty.
 $(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS)
diff --git a/tools/testing/selftests/vm/hmm-tests.c 
b/tools/testing/selftests/vm/hmm-tests.c
index 84ec8c4..11b83a8 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -36,6 +36,7 @@
  * in the usual include/uapi/... directory.
  */
 #include "../../../../lib/test_hmm_uapi.h"
+#include "../../../../mm/gup_test.h"
 
 struct hmm_buffer {
void*ptr;
@@ -60,6 +61,8 @@ enum {
 #define NTIMES 10
 
 #define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
+/* Just the flags we need, copied from mm.h: */
+#define FOLL_WRITE 0x01/* check pte is writable */
 
 FIXTURE(hmm)
 {
@@ -1766,4 +1769,82 @@ TEST_F(hmm, exclusive_cow)
hmm_buffer_free(buffer);
 }
 
+/*
+ * Test get user device pages through gup_test. Setting PIN_LONGTERM flag.
+ * This should trigger a migration back to system memory for both, private
+ * and coherent type pages.
+ * This test makes use of gup_test module. Make sure GUP_TEST_CONFIG is added
+ * to your configuration before you run it.
+ */
+TEST_F(hmm, hmm_gup_test)
+{
+   struct hmm_buffer *buffer;
+   struct gup_test gup;
+   int gup_fd;
+   unsigned long npages;
+   unsigned long size;
+   unsigned long i;
+   int *ptr;
+   int ret;
+   unsigned char *m;
+
+   gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+   if (gup_fd == -1)
+   SKIP(return, "Skipping test, could not find gup_test driver");
+
+   npages = 4;
+   ASSERT_NE(npages, 0);
+   size = npages << self->page_shift;
+
+   buffer = malloc(sizeof(*buffer));
+   ASSERT_NE(buffer, NULL);
+
+   buffer->fd = -1;
+   buffer->size = size;
+   buffer->mirror = malloc(size);
+   ASSERT_NE(buffer->mirror, NULL);
+
+   buffer->ptr = mmap(NULL, size,
+  PROT_READ | PROT_WRITE,
+  MAP_PRIVATE | MAP_ANONYMOUS,
+  buffer->fd, 0);
+   ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+   /* Initialize buffer in system memory. */
+   for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+   ptr[i] = i;
+
+   /* Migrate memory to device. */
+   ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+   ASSERT_EQ(ret, 0);
+   ASSERT_EQ(buffer->cpages, npages);
+   /* Check what the device read. */
+   for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+   ASSERT_EQ(ptr[i], i);
+
+   gup.nr_pages_per_call = npages;
+   gup.addr = (unsigned long)buffer->ptr;
+   gup.gup_flags = FOLL_WRITE;
+   gup.size = size;
+   /*
+* Calling gup_test ioctl. It will try to PIN_LONGTERM these device 
pages
+* causing a migration back to system memory for both, private and 
coherent
+* type pages.
+*/
+   if (ioctl(gup_fd, PIN_LONGTERM_BENCHMARK, &gup)) {
+   perror("ioctl on PIN_LONGTERM_BENCHMARK\n");
+   goto out_test;
+   }
+
+   /* Take snapshot to make sure pages have been migrated to sys memory */
+   ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+   ASSERT_EQ(ret, 0);
+   ASSERT_EQ(buffer->cpages, npages);
+   m = buffer->mirror;
+   for (i = 0; i < npages; i++)
+   ASSERT_EQ(m[i], HMM_DMIRROR_PROT_WRITE);
+out_test:
+   close(gup_fd);
+   hmm_buffer_free(buffer);
+}
 TEST_HARNESS_MAIN
-- 
git-series 0.9.1


Re: Waiting for fences timed out on MacBook Pro 2019

2022-02-07 Thread Tomasz Moń
On Mon, Jul 12, 2021 at 11:56 AM Tomasz Moń  wrote:
> I am having trouble getting Linux to run on MacBook Pro 2019 with
> Radeon Pro Vega 20 4 GB. Basically as soon as graphical user interface
> starts, the whole system freezes. This happens with every Linux kernel
> version I have tried over the last few months, including 5.13.

It is significantly better on 5.17-rc2. That is, the whole system is
not frozen. just the screen keeps blinking and visual artifacts show.
Graphical desktop is not usable, but switching between virtual
terminals works just fine.

dmesg | grep amdgpu shows:
[2.310680] [drm] amdgpu kernel modesetting enabled.
[2.310888] amdgpu: CRAT table not found
[2.310891] amdgpu: Virtual CRAT table created for CPU
[2.310902] amdgpu: Topology: Add CPU node
[2.310966] fb0: switching to amdgpu from EFI VGA
[2.311069] amdgpu :03:00.0: vgaarb: deactivate vga console
[2.311161] amdgpu :03:00.0: amdgpu: Trusted Memory Zone (TMZ)
feature not supported
[2.322968] amdgpu :03:00.0: amdgpu: Fetched VBIOS from VFCT
[2.322972] amdgpu: ATOM BIOS: 113-D20601MA0T-016
[2.325980] amdgpu :03:00.0: BAR 2: releasing [mem
0xc000-0xc01f 64bit pref]
[2.325983] amdgpu :03:00.0: BAR 0: releasing [mem
0xb000-0xbfff 64bit pref]
[2.326015] amdgpu :03:00.0: BAR 0: assigned [mem
0x41-0x41 64bit pref]
[2.326022] amdgpu :03:00.0: BAR 2: assigned [mem
0x408000-0x40801f 64bit pref]
[2.326075] amdgpu :03:00.0: amdgpu: VRAM: 4080M
0x00F4 - 0x00F4FEFF (4080M used)
[2.326078] amdgpu :03:00.0: amdgpu: GART: 512M
0x - 0x1FFF
[2.326080] amdgpu :03:00.0: amdgpu: AGP: 267419648M
0x00F8 - 0x
[2.326144] [drm] amdgpu: 4080M of VRAM memory ready
[2.326145] [drm] amdgpu: 4080M of GTT memory ready.
[2.330452] amdgpu :03:00.0: amdgpu: PSP runtime database doesn't exist
[2.330457] amdgpu: hwmgr_sw_init smu backed is vega12_smu
[3.169108] snd_hda_intel :03:00.1: bound :03:00.0 (ops
amdgpu_dm_audio_component_bind_ops [amdgpu])
[4.427470] kfd kfd: amdgpu: Allocated 3969056 bytes on gart
[4.462468] amdgpu: HMM registered 4080MB device memory
[4.462492] amdgpu: SRAT table not found
[4.462492] amdgpu: Virtual CRAT table created for GPU
[4.462567] amdgpu: Topology: Add dGPU node [0x69af:0x1002]
[4.462572] kfd kfd: amdgpu: added device 1002:69af
[4.462587] amdgpu :03:00.0: amdgpu: SE 4, SH per SE 1, CU per
SH 5, active_cu_number 20
[4.462674] amdgpu :03:00.0: amdgpu: ring gfx uses VM inv eng 0 on hub 0
[4.462676] amdgpu :03:00.0: amdgpu: ring comp_1.0.0 uses VM
inv eng 1 on hub 0
[4.462678] amdgpu :03:00.0: amdgpu: ring comp_1.1.0 uses VM
inv eng 4 on hub 0
[4.462679] amdgpu :03:00.0: amdgpu: ring comp_1.2.0 uses VM
inv eng 5 on hub 0
[4.462680] amdgpu :03:00.0: amdgpu: ring comp_1.3.0 uses VM
inv eng 6 on hub 0
[4.462682] amdgpu :03:00.0: amdgpu: ring comp_1.0.1 uses VM
inv eng 7 on hub 0
[4.462683] amdgpu :03:00.0: amdgpu: ring comp_1.1.1 uses VM
inv eng 8 on hub 0
[4.462684] amdgpu :03:00.0: amdgpu: ring comp_1.2.1 uses VM
inv eng 9 on hub 0
[4.462685] amdgpu :03:00.0: amdgpu: ring comp_1.3.1 uses VM
inv eng 10 on hub 0
[4.462686] amdgpu :03:00.0: amdgpu: ring kiq_2.1.0 uses VM inv
eng 11 on hub 0
[4.462688] amdgpu :03:00.0: amdgpu: ring sdma0 uses VM inv eng
0 on hub 1
[4.462689] amdgpu :03:00.0: amdgpu: ring sdma1 uses VM inv eng
1 on hub 1
[4.462690] amdgpu :03:00.0: amdgpu: ring uvd_0 uses VM inv eng
4 on hub 1
[4.462691] amdgpu :03:00.0: amdgpu: ring uvd_enc_0.0 uses VM
inv eng 5 on hub 1
[4.462693] amdgpu :03:00.0: amdgpu: ring uvd_enc_0.1 uses VM
inv eng 6 on hub 1
[4.462694] amdgpu :03:00.0: amdgpu: ring vce0 uses VM inv eng 7 on hub 1
[4.462695] amdgpu :03:00.0: amdgpu: ring vce1 uses VM inv eng 8 on hub 1
[4.462696] amdgpu :03:00.0: amdgpu: ring vce2 uses VM inv eng 9 on hub 1
[4.469544] [drm] Initialized amdgpu 3.44.0 20150101 for
:03:00.0 on minor 0
[4.474424] fbcon: amdgpudrmfb (fb0) is primary device
[5.547836] amdgpu :03:00.0: [drm] fb0: amdgpudrmfb frame buffer device
[5.636489] audit: type=1130 audit(1644133454.781:45): pid=1 uid=0
auid=4294967295 ses=4294967295
msg='unit=systemd-backlight@backlight:amdgpu_bl0 comm="systemd"
exe="/usr/lib/systemd/systemd" hostname=? addr=? terminal=?
res=success'
[   24.927611] [drm:amdgpu_dm_atomic_commit_tail [amdgpu]] *ERROR*
Waiting for fences timed out!
[   24.927611] [drm:amdgpu_dm_atomic_commit_tail [amdgpu]] *ERROR*
Waiting for fences timed out!
[   30.057616] [drm:amdgpu_job_timedout [amdgpu]] *ERROR* ring gfx
timeout, signaled seq=895, emitted seq=897
[   30.057933] [drm:amdgpu_job_timedout [amdgpu]] *ERROR* Process
information: process Xorg pid 722

Re: [PATCH 2/3] mm/gup.c: Migrate device coherent pages when pinning instead of failing

2022-02-07 Thread Alistair Popple
On Wednesday, 2 February 2022 2:03:01 AM AEDT Felix Kuehling wrote:
> 
> Am 2022-02-01 um 02:05 schrieb Alistair Popple:
> > Currently any attempts to pin a device coherent page will fail. This is
> > because device coherent pages need to be managed by a device driver, and
> > pinning them would prevent a driver from migrating them off the device.
> >
> > However this is no reason to fail pinning of these pages. These are
> > coherent and accessible from the CPU so can be migrated just like
> > pinning ZONE_MOVABLE pages. So instead of failing all attempts to pin
> > them first try migrating them out of ZONE_DEVICE.
> >
> > Signed-off-by: Alistair Popple 
> 
> Thank you for working on this. I have two questions inline.
> 
> Other than that, patches 1 and 2 are
> 
> Acked-by: Felix Kuehling 
> 
> 
> > ---
> >   mm/gup.c | 105 ++--
> >   1 file changed, 95 insertions(+), 10 deletions(-)
> >
> > diff --git a/mm/gup.c b/mm/gup.c
> > index f596b93..2cbef54 100644
> > --- a/mm/gup.c
> > +++ b/mm/gup.c
> > @@ -1834,6 +1834,60 @@ struct page *get_dump_page(unsigned long addr)
> >   
> >   #ifdef CONFIG_MIGRATION
> >   /*
> > + * Migrates a device coherent page back to normal memory. Caller should 
> > have a
> > + * reference on page which will be copied to the new page if migration is
> > + * successful or dropped on failure.
> > + */
> > +static struct page *migrate_device_page(struct page *page,
> > +   unsigned int gup_flags)
> > +{
> > +   struct page *dpage;
> > +   struct migrate_vma args;
> > +   unsigned long src_pfn, dst_pfn = 0;
> > +
> > +   lock_page(page);
> > +   src_pfn = migrate_pfn(page_to_pfn(page)) | MIGRATE_PFN_MIGRATE;
> > +   args.src = &src_pfn;
> > +   args.dst = &dst_pfn;
> > +   args.cpages = 1;
> > +   args.npages = 1;
> > +   args.vma = NULL;
> > +   migrate_vma_setup(&args);
> > +   if (!(src_pfn & MIGRATE_PFN_MIGRATE))
> > +   return NULL;
> > +
> > +   dpage = alloc_pages(GFP_USER | __GFP_NOWARN, 0);
> 
> Don't you need to check dpage for NULL before the try_grab_page call below?

Yes, thanks for pointing that out. Will fix for v2.

> > +
> > +   /*
> > +* get/pin the new page now so we don't have to retry gup after
> > +* migrating. We already have a reference so this should never fail.
> > +*/
> > +   if (WARN_ON_ONCE(!try_grab_page(dpage, gup_flags))) {
> > +   __free_pages(dpage, 0);
> > +   dpage = NULL;
> > +   }
> > +
> > +   if (dpage) {
> > +   lock_page(dpage);
> > +   dst_pfn = migrate_pfn(page_to_pfn(dpage));
> > +   }
> > +
> > +   migrate_vma_pages(&args);
> > +   if (src_pfn & MIGRATE_PFN_MIGRATE)
> > +   copy_highpage(dpage, page);
> 
> Can't dpage can be NULL here as well?

No - migrate_vma_pages() will clear src_pfn & MIGRATE_PFN_MIGRATE if no
destination page is provided in dst_pfn.

> Regards,
>Felix
> 
> 
> > +   migrate_vma_finalize(&args);
> > +   if (dpage && !(src_pfn & MIGRATE_PFN_MIGRATE)) {
> > +   if (gup_flags & FOLL_PIN)
> > +   unpin_user_page(dpage);
> > +   else
> > +   put_page(dpage);
> > +   dpage = NULL;
> > +   }
> > +
> > +   return dpage;
> > +}
> > +
> > +/*
> >* Check whether all pages are pinnable, if so return number of pages.  
> > If some
> >* pages are not pinnable, migrate them, and unpin all pages. Return zero 
> > if
> >* pages were migrated, or if some pages were not successfully isolated.
> > @@ -1861,15 +1915,40 @@ static long 
> > check_and_migrate_movable_pages(unsigned long nr_pages,
> > continue;
> > prev_head = head;
> > /*
> > -* If we get a movable page, since we are going to be pinning
> > -* these entries, try to move them out if possible.
> > +* Device coherent pages are managed by a driver and should not
> > +* be pinned indefinitely as it prevents the driver moving the
> > +* page. So when trying to pin with FOLL_LONGTERM instead try
> > +* migrating page out of device memory.
> >  */
> > if (is_dev_private_or_coherent_page(head)) {
> > +   /*
> > +* device private pages will get faulted in during gup
> > +* so it shouldn't be possible to see one here.
> > +*/
> > WARN_ON_ONCE(is_device_private_page(head));
> > -   ret = -EFAULT;
> > -   goto unpin_pages;
> > +   WARN_ON_ONCE(PageCompound(head));
> > +
> > +   /*
> > +* migration will fail if the page is pinned, so convert
> > +* the pin on the source page to a normal reference.
> > +*/
> > +   if (gup_flags & FOLL_PIN) {
> > +   get_page(head);
> > + 

[PATCH v2 2/3] mm/gup.c: Migrate device coherent pages when pinning instead of failing

2022-02-07 Thread Alistair Popple
Currently any attempts to pin a device coherent page will fail. This is
because device coherent pages need to be managed by a device driver, and
pinning them would prevent a driver from migrating them off the device.

However this is no reason to fail pinning of these pages. These are
coherent and accessible from the CPU so can be migrated just like
pinning ZONE_MOVABLE pages. So instead of failing all attempts to pin
them first try migrating them out of ZONE_DEVICE.

Signed-off-by: Alistair Popple 
Acked-by: Felix Kuehling 
---

Changes for v2:

 - Added Felix's Acked-by
 - Fixed missing check for dpage == NULL

 mm/gup.c | 105 ++--
 1 file changed, 95 insertions(+), 10 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index 56d9577..5e826db 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1861,6 +1861,60 @@ struct page *get_dump_page(unsigned long addr)
 
 #ifdef CONFIG_MIGRATION
 /*
+ * Migrates a device coherent page back to normal memory. Caller should have a
+ * reference on page which will be copied to the new page if migration is
+ * successful or dropped on failure.
+ */
+static struct page *migrate_device_page(struct page *page,
+   unsigned int gup_flags)
+{
+   struct page *dpage;
+   struct migrate_vma args;
+   unsigned long src_pfn, dst_pfn = 0;
+
+   lock_page(page);
+   src_pfn = migrate_pfn(page_to_pfn(page)) | MIGRATE_PFN_MIGRATE;
+   args.src = &src_pfn;
+   args.dst = &dst_pfn;
+   args.cpages = 1;
+   args.npages = 1;
+   args.vma = NULL;
+   migrate_vma_setup(&args);
+   if (!(src_pfn & MIGRATE_PFN_MIGRATE))
+   return NULL;
+
+   dpage = alloc_pages(GFP_USER | __GFP_NOWARN, 0);
+
+   /*
+* get/pin the new page now so we don't have to retry gup after
+* migrating. We already have a reference so this should never fail.
+*/
+   if (dpage && WARN_ON_ONCE(!try_grab_page(dpage, gup_flags))) {
+   __free_pages(dpage, 0);
+   dpage = NULL;
+   }
+
+   if (dpage) {
+   lock_page(dpage);
+   dst_pfn = migrate_pfn(page_to_pfn(dpage));
+   }
+
+   migrate_vma_pages(&args);
+   if (src_pfn & MIGRATE_PFN_MIGRATE)
+   copy_highpage(dpage, page);
+   migrate_vma_finalize(&args);
+   if (dpage && !(src_pfn & MIGRATE_PFN_MIGRATE)) {
+   if (gup_flags & FOLL_PIN)
+   unpin_user_page(dpage);
+   else
+   put_page(dpage);
+   dpage = NULL;
+   }
+
+   return dpage;
+}
+
+/*
  * Check whether all pages are pinnable, if so return number of pages.  If some
  * pages are not pinnable, migrate them, and unpin all pages. Return zero if
  * pages were migrated, or if some pages were not successfully isolated.
@@ -1888,15 +1942,40 @@ static long check_and_migrate_movable_pages(unsigned 
long nr_pages,
continue;
prev_head = head;
/*
-* If we get a movable page, since we are going to be pinning
-* these entries, try to move them out if possible.
+* Device coherent pages are managed by a driver and should not
+* be pinned indefinitely as it prevents the driver moving the
+* page. So when trying to pin with FOLL_LONGTERM instead try
+* migrating page out of device memory.
 */
if (is_dev_private_or_coherent_page(head)) {
+   /*
+* device private pages will get faulted in during gup
+* so it shouldn't be possible to see one here.
+*/
WARN_ON_ONCE(is_device_private_page(head));
-   ret = -EFAULT;
-   goto unpin_pages;
+   WARN_ON_ONCE(PageCompound(head));
+
+   /*
+* migration will fail if the page is pinned, so convert
+* the pin on the source page to a normal reference.
+*/
+   if (gup_flags & FOLL_PIN) {
+   get_page(head);
+   unpin_user_page(head);
+   }
+
+   pages[i] = migrate_device_page(head, gup_flags);
+   if (!pages[i]) {
+   ret = -EBUSY;
+   break;
+   }
+   continue;
}
 
+   /*
+* If we get a movable page, since we are going to be pinning
+* these entries, try to move them out if possible.
+*/
if (!is_pinnable_page(head)) {
if (PageHuge(head)) {
if 

[PATCH v2 0/3] Migrate device coherent pages on get_user_pages()

2022-02-07 Thread Alistair Popple
Device coherent pages represent memory on a coherently attached device such
as a GPU which is usually under the control of a driver. These pages should
not be pinned as the driver needs to be able to move pages as required.
Currently this is enforced by failing any attempt to pin a device coherent
page.

A similar problem exists for ZONE_MOVABLE pages. In that case though the
pages are migrated instead of causing failure. There is no reason the
kernel can't migrate device coherent pages so this series implements
migration for device coherent pages so the same strategy of migrate and pin
can be used.

This series depends on the series "Add MEMORY_DEVICE_COHERENT for coherent
device memory mapping"[1] which is in linux-next-20220204 and should apply
cleanly to that.

[1] - 
https://lore.kernel.org/linux-mm/20220128200825.8623-1-alex.sie...@amd.com/

Changes for v2:

 - Rebased on to linux-next-20220204

Alex Sierra (1):
  tools: add hmm gup test for long term pinned device pages

Alistair Popple (2):
  migrate.c: Remove vma check in migrate_vma_setup()
  mm/gup.c: Migrate device coherent pages when pinning instead of failing

 mm/gup.c   | 105 +++---
 mm/migrate.c   |  34 
 tools/testing/selftests/vm/Makefile|   2 +-
 tools/testing/selftests/vm/hmm-tests.c |  81 -
 4 files changed, 194 insertions(+), 28 deletions(-)

base-commit: ef6b35306dd8f15a7e5e5a2532e665917a43c5d9
-- 
git-series 0.9.1


[PATCH v2 1/3] migrate.c: Remove vma check in migrate_vma_setup()

2022-02-07 Thread Alistair Popple
migrate_vma_setup() checks that a valid vma is passed so that the page
tables can be walked to find the pfns associated with a given address
range. However in some cases the pfns are already known, such as when
migrating device coherent pages during pin_user_pages() meaning a valid
vma isn't required.

Signed-off-by: Alistair Popple 
Acked-by: Felix Kuehling 
---

Changes for v2:

 - Added Felix's Acked-by

 mm/migrate.c | 34 +-
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index a9aed12..0d6570d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2602,24 +2602,24 @@ int migrate_vma_setup(struct migrate_vma *args)
 
args->start &= PAGE_MASK;
args->end &= PAGE_MASK;
-   if (!args->vma || is_vm_hugetlb_page(args->vma) ||
-   (args->vma->vm_flags & VM_SPECIAL) || vma_is_dax(args->vma))
-   return -EINVAL;
-   if (nr_pages <= 0)
-   return -EINVAL;
-   if (args->start < args->vma->vm_start ||
-   args->start >= args->vma->vm_end)
-   return -EINVAL;
-   if (args->end <= args->vma->vm_start || args->end > args->vma->vm_end)
-   return -EINVAL;
if (!args->src || !args->dst)
return -EINVAL;
-
-   memset(args->src, 0, sizeof(*args->src) * nr_pages);
-   args->cpages = 0;
-   args->npages = 0;
-
-   migrate_vma_collect(args);
+   if (args->vma) {
+   if (is_vm_hugetlb_page(args->vma) ||
+   (args->vma->vm_flags & VM_SPECIAL) || 
vma_is_dax(args->vma))
+   return -EINVAL;
+   if (args->start < args->vma->vm_start ||
+   args->start >= args->vma->vm_end)
+   return -EINVAL;
+   if (args->end <= args->vma->vm_start || args->end > 
args->vma->vm_end)
+   return -EINVAL;
+
+   memset(args->src, 0, sizeof(*args->src) * nr_pages);
+   args->cpages = 0;
+   args->npages = 0;
+
+   migrate_vma_collect(args);
+   }
 
if (args->cpages)
migrate_vma_unmap(args);
@@ -2804,7 +2804,7 @@ void migrate_vma_pages(struct migrate_vma *migrate)
continue;
}
 
-   if (!page) {
+   if (!page && migrate->vma) {
if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE))
continue;
if (!notified) {
-- 
git-series 0.9.1


[PATCH] drm/radeon: Avoid open coded arithmetic in memory allocation

2022-02-07 Thread Christophe JAILLET
kmalloc_array()/kcalloc() should be used to avoid potential overflow when
a multiplication is needed to compute the size of the requested memory.

So turn a kzalloc()+explicit size computation into an equivalent kcalloc().

Signed-off-by: Christophe JAILLET 
---
 drivers/gpu/drm/radeon/radeon_atombios.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c 
b/drivers/gpu/drm/radeon/radeon_atombios.c
index 28c4413f4dc8..7b9cc7a9f42f 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -897,13 +897,13 @@ bool 
radeon_get_atom_connector_info_from_supported_devices_table(struct
union atom_supported_devices *supported_devices;
int i, j, max_device;
struct bios_connector *bios_connectors;
-   size_t bc_size = sizeof(*bios_connectors) * ATOM_MAX_SUPPORTED_DEVICE;
struct radeon_router router;
 
router.ddc_valid = false;
router.cd_valid = false;
 
-   bios_connectors = kzalloc(bc_size, GFP_KERNEL);
+   bios_connectors = kcalloc(ATOM_MAX_SUPPORTED_DEVICE,
+ sizeof(*bios_connectors), GFP_KERNEL);
if (!bios_connectors)
return false;
 
-- 
2.32.0



RE: [PATCH] drm/amdkfd: enable heavy-weight TLB flush on Vega20

2022-02-07 Thread Chen, Guchun
[Public]

Hi Eric,

Are you sure that there is no FW requirement for this patch on Vega20? 
KFDMemory test failed by this commit.

Regards,
Guchun

-Original Message-
From: amd-gfx  On Behalf Of Alex Deucher
Sent: Tuesday, January 25, 2022 4:08 AM
To: Huang, JinHuiEric 
Cc: amd-gfx list 
Subject: Re: [PATCH] drm/amdkfd: enable heavy-weight TLB flush on Vega20

On Fri, Jan 21, 2022 at 11:17 AM Eric Huang  wrote:
>
> It is to meet the requirement for memory allocation optimization on 
> MI50.
>
> Signed-off-by: Eric Huang 

Assuming there is no firmware version requirement, the patch is:
Acked-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 5b8ae0795c0a..d708f1a502cf 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1582,7 +1582,8 @@ static int kfd_ioctl_free_memory_of_gpu(struct 
> file *filep,  static bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev) {
> return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
>(KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) &&
> -   dev->adev->sdma.instance[0].fw_version >= 18);
> +   dev->adev->sdma.instance[0].fw_version >= 18) ||
> +   KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0);
>  }
>
>  static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
> --
> 2.25.1
>


Re: [PATCH 3/8] mm: remove pointless includes from

2022-02-07 Thread Jason Gunthorpe
On Mon, Feb 07, 2022 at 07:32:44AM +0100, Christoph Hellwig wrote:
> hmm.h pulls in the world for no good reason at all.  Remove the
> includes and push a few ones into the users instead.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 1 +
>  drivers/gpu/drm/nouveau/nouveau_dmem.c   | 1 +
>  include/linux/hmm.h  | 9 ++---
>  lib/test_hmm.c   | 2 ++
>  4 files changed, 6 insertions(+), 7 deletions(-)

Reviewed-by: Jason Gunthorpe 

Jason



Re: [PATCH 2/2] drm/amdkfd: use unmap all queues for poison consumption

2022-02-07 Thread Felix Kuehling



Am 2022-02-07 um 02:27 schrieb Tao Zhou:

Replace reset queue for specific PASID with unmap all queues, reset
queue could break CP scheduler.

Signed-off-by: Tao Zhou 


The series is

Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 3 +--
  1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index 7a2b6342a8f2..68ee923a440b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -109,8 +109,7 @@ static void event_interrupt_poison_consumption(struct 
kfd_dev *dev,
  
  	switch (source_id) {

case SOC15_INTSRC_SQ_INTERRUPT_MSG:
-   if (dev->dqm->ops.reset_queues)
-   ret = dev->dqm->ops.reset_queues(dev->dqm, pasid);
+   kfd_dqm_evict_pasid(dev->dqm, pasid);
break;
case SOC15_INTSRC_SDMA_ECC:
default:


Re: [PATCH] drm/amdgpu: reserve the pd while cleaning up PRTs

2022-02-07 Thread Felix Kuehling

Am 2022-02-07 um 03:07 schrieb Christian König:

We want to have lockdep annotation here, so make sure that we reserve
the PD while removing PRTs even if it isn't strictly necessary since the
VM object is about to be destroyed anyway.

Signed-off-by: Christian König 


Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 6 --
  1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index efd13898c83e..9f985bd463be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1194,8 +1194,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE) != NULL)
amdgpu_vce_free_handles(adev, file_priv);
  
-	amdgpu_vm_bo_del(adev, fpriv->prt_va);

-
if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
/* TODO: how to handle reserve failure */
BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true));
@@ -1206,6 +1204,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
  
  	pasid = fpriv->vm.pasid;

pd = amdgpu_bo_ref(fpriv->vm.root.bo);
+   if (!WARN_ON(amdgpu_bo_reserve(pd, true))) {
+   amdgpu_vm_bo_del(adev, fpriv->prt_va);
+   amdgpu_bo_unreserve(pd);
+   }
  
  	amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);

amdgpu_vm_fini(adev, &fpriv->vm);


RE: [PATCH 00/14] DC Patchset, Feb 7 2022 v2

2022-02-07 Thread Wheeler, Daniel
[Public]

Hi all,
 
This week this patchset was tested on the following systems:
 
HP Envy 360, with Ryzen 5 4500U, with the following display types: eDP 1080p 
60hz, 4k 60hz  (via USB-C to DP/HDMI), 1440p 144hz (via USB-C to DP/HDMI), 
1680*1050 60hz (via USB-C to DP and then DP to DVI/VGA)
 
Lenovo Thinkpad T14s Gen2 with AMD Ryzen 5 5650U, with the following display 
types: eDP 1080p 60hz, 4k 60hz  (via USB-C to DP/HDMI), 1440p 144hz (via USB-C 
to DP/HDMI), 1680*1050 60hz (via USB-C to DP and then DP to DVI/VGA)
 
Sapphire Pulse RX5700XT with the following display types:
4k 60hz  (via DP/HDMI), 1440p 144hz (via DP/HDMI), 1680*1050 60hz (via DP to 
DVI/VGA)
 
Reference AMD RX6800 with the following display types:
4k 60hz  (via DP/HDMI and USB-C to DP/HDMI), 1440p 144hz (via USB-C to DP/HDMI 
and USB-C to DP/HDMI), 1680*1050 60hz (via DP to DVI/VGA)
 
Included testing using a Startech DP 1.4 MST hub at 2x 4k 60hz, and 3x 1080p 
60hz on all systems. Also tested DSC via USB-C to DP DSC Hub with 3x 4k 60hz on 
Ryzen 9 5900h and Ryzen 5 4500u.
 
Tested on Ubuntu 20.04.3 with Kernel Version 5.13 and ChromeOS
 
Tested-by: Daniel Wheeler 
 
 
Thank you,
 
Dan Wheeler
Technologist  |  AMD
SW Display
--
1 Commerce Valley Dr E, Thornhill, ON L3T 7X6
Facebook |  Twitter |  amd.com  

-Original Message-
From: amd-gfx  On Behalf Of Jasdeep 
Dhillon
Sent: February 4, 2022 8:50 PM
To: amd-gfx@lists.freedesktop.org
Cc: Wang, Chao-kai (Stylon) ; Li, Sun peng (Leo) 
; Wentland, Harry ; Zhuo, Qingqing 
(Lillian) ; Dhillon, Jasdeep ; 
Siqueira, Rodrigo ; Li, Roman ; 
Chiu, Solomon ; Pillai, Aurabindo 
; Lin, Wayne ; Lakha, Bhawanpreet 
; Gutierrez, Agustin ; 
Kotarac, Pavle 
Subject: [PATCH 00/14] DC Patchset, Feb 7 2022 v2

This DC patchset brings improvements in multiple areas. In summary, we have:
-fix for build failure uninitalized error -Bug fix for DP2 using uncertified 
cable -limit unbounded request to 5k -fix DP LT sequence on EQ fail -Bug fixes 
for S3/S4


Anthony Koo (1):
  drm/amd/display: [FW Promotion] Release 0.0.103.0

Aric Cyr (1):
  drm/amd/display: 3.2.172

Dmytro Laktyushkin (2):
  drm/amd/display: limit unbounded requesting to 5k
  drm/amd/display: fix yellow carp wm clamping

Eric Bernstein (2):
  drm/amd/display: Fix for variable may be used uninitialized error
  drm/amd/display: remove static from optc31_set_drr

Guo, Bing (1):
  dc: do blocked MST topology discovery at resume from S3/S4

Ilya (1):
  drm/amd/display: Fix DP LT sequence on EQ fail

Martin Tsai (1):
  drm/amd/display: handle null link encoder

Nicholas Kazlauskas (1):
  drm/amd/display: Fix stream->link_enc unassigned during stream removal

Oliver Logush (2):
  drm/amd/display: Basic support with device ID
  SWDEV-321758 - dc: Code clean

Paul Hsieh (1):
  drm/amd/display: change fastboot timing validation

Zhan Liu (1):
  drm/amd/display: keep eDP Vdd on when eDP stream is already enabled

 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 24 +++-  
.../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  2 +-  
.../gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c  |  2 +-
 drivers/gpu/drm/amd/display/dc/core/dc.c  |  2 +-
 drivers/gpu/drm/amd/display/dc/core/dc_link.c | 27 ++--  
.../gpu/drm/amd/display/dc/core/dc_link_dp.c  |  4 +-  
.../gpu/drm/amd/display/dc/core/dc_resource.c |  8 +--
 drivers/gpu/drm/amd/display/dc/dc.h   |  4 +-
 drivers/gpu/drm/amd/display/dc/dc_link.h  |  1 +
 .../display/dc/dce110/dce110_hw_sequencer.c   | 27 +++-
 .../drm/amd/display/dc/dcn20/dcn20_resource.c | 11 +---
 .../drm/amd/display/dc/dcn31/dcn31_hubbub.c   | 61 ++-
 .../gpu/drm/amd/display/dc/dcn31/dcn31_optc.c |  2 +-  
.../gpu/drm/amd/display/dc/dcn31/dcn31_optc.h |  2 +  
.../drm/amd/display/dc/dcn31/dcn31_resource.c |  3 +-
 .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h   |  4 +-
 .../gpu/drm/amd/display/include/dal_asic_id.h |  3 +-
 17 files changed, 104 insertions(+), 83 deletions(-)

--
2.25.1


Re: [PATCH] drm/amdkfd: enable heavy-weight TLB flush on Vega20

2022-02-07 Thread Eric Huang

Hi Guchun,

SDMA FW team confirms MI50/VG20 doesn't have the same bug as MI100, 
which cases asic hang issue when running RVS test. If this change makes 
KFDMemoryTest failed, please fill a Jira and assign to me.


Thanks,
Eric

On 2022-02-07 08:01, Chen, Guchun wrote:

[Public]

Hi Eric,

Are you sure that there is no FW requirement for this patch on Vega20? 
KFDMemory test failed by this commit.

Regards,
Guchun

-Original Message-
From: amd-gfx  On Behalf Of Alex Deucher
Sent: Tuesday, January 25, 2022 4:08 AM
To: Huang, JinHuiEric 
Cc: amd-gfx list 
Subject: Re: [PATCH] drm/amdkfd: enable heavy-weight TLB flush on Vega20

On Fri, Jan 21, 2022 at 11:17 AM Eric Huang  wrote:

It is to meet the requirement for memory allocation optimization on
MI50.

Signed-off-by: Eric Huang 

Assuming there is no firmware version requirement, the patch is:
Acked-by: Alex Deucher 


---
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 5b8ae0795c0a..d708f1a502cf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1582,7 +1582,8 @@ static int kfd_ioctl_free_memory_of_gpu(struct
file *filep,  static bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev) {
 return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
(KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) &&
-   dev->adev->sdma.instance[0].fw_version >= 18);
+   dev->adev->sdma.instance[0].fw_version >= 18) ||
+   KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0);
  }

  static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
--
2.25.1





Re: [PATCH 1/3] drm/amdkfd: Fix TLB flushing in KFD SVM with no HWS

2022-02-07 Thread philip yang

  


On 2022-02-04 6:45 p.m., Mukul Joshi
  wrote:


  With no HWS, TLB flushing will not work in SVM code.
Fix this by calling kfd_flush_tlb() which works for both
HWS and no HWS case.

Signed-off-by: Mukul Joshi 

With below change to remove one extra calling parameter, this
  patch is

Reviewed-by: Philip Yang 

  
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 16 ++--
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 41f03d165bad..b1315c97b952 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1229,15 +1229,14 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
 			if (r)
 break;
 		}
-		amdgpu_amdkfd_flush_gpu_tlb_pasid(pdd->dev->adev,
-	p->pasid, TLB_FLUSH_HEAVYWEIGHT);
+		kfd_flush_tlb(pdd, TLB_FLUSH_HEAVYWEIGHT);
 	}
 
 	return r;
 }
 
 static int
-svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+svm_range_map_to_gpu(struct amdgpu_device *adev, struct kfd_process_device *pdd,

svm_range_map_to_gpu(struct kfd_process_device *pdd, struct
svm_range *prange,

  
 		 struct svm_range *prange, unsigned long offset,
 		 unsigned long npages, bool readonly, dma_addr_t *dma_addr,
 		 struct amdgpu_device *bo_adev, struct dma_fence **fence)
@@ -1248,6 +1247,7 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,

struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
struct amdgpu_device adev = pdd->dev->adev;

  
 	int last_domain;
 	int r = 0;
 	int64_t i, j;
+	struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
 
 	last_start = prange->start + offset;
 
@@ -1305,12 +1305,8 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	if (fence)
 		*fence = dma_fence_get(vm->last_update);
 
-	if (table_freed) {
-		struct kfd_process *p;
-
-		p = container_of(prange->svms, struct kfd_process, svms);
-		amdgpu_amdkfd_flush_gpu_tlb_pasid(adev, p->pasid, TLB_FLUSH_LEGACY);
-	}
+	if (table_freed)
+		kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
 out:
 	return r;
 }
@@ -1351,7 +1347,7 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
 			continue;
 		}
 
-		r = svm_range_map_to_gpu(pdd->dev->adev, drm_priv_to_vm(pdd->drm_priv),
+		r = svm_range_map_to_gpu(pdd->dev->adev, pdd,

svm_range_map_to_gpu(pdd, prange, offset, npages

  
 	 prange, offset, npages, readonly,
 	 prange->dma_addr[gpuidx],
 	 bo_adev, wait ? &fence : NULL);


  



Re: [PATCH 2/3] drm/amdkfd: Remove unused old debugger implementation

2022-02-07 Thread Felix Kuehling



Am 2022-02-04 um 18:45 schrieb Mukul Joshi:

Cleanup the kfd code by removing the unused old debugger
implementation.
Only a small piece of resetting wavefronts is kept and
is moved to kfd_device_queue_manager.c

Signed-off-by: Mukul Joshi 


We should update include/uapi/linux/kfd_ioctl.h as well to mark the 
ioctls as deprecated. Just rename the AMDKFD_IOC_DBG_* macros to 
AMDKFD_IOC_DBG_*_DEPRECATED. A corresponding update to the Thunk would 
be good as a follow up.


I think there is also some more code you can remove in 
amdgpu_amdkfd_gfx_v*.c and kgd_kfd_interface.h:


    int (*address_watch_disable)(struct amdgpu_device *adev);
    int (*address_watch_execute)(struct amdgpu_device *adev,
    unsigned int watch_point_id,
    uint32_t cntl_val,
    uint32_t addr_hi,
    uint32_t addr_lo);
    uint32_t (*address_watch_get_offset)(struct amdgpu_device *adev,
    unsigned int watch_point_id,
    unsigned int reg_offset);

It seems wave_control_execute is still used in dbgdev_wave_reset_wavefronts.

Regards,
  Felix



---
  drivers/gpu/drm/amd/amdkfd/Makefile   |   2 -
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 282 +-
  drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c   | 845 --
  drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h   | 230 -
  drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c   | 158 
  drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h   | 293 --
  drivers/gpu/drm/amd/amdkfd/kfd_device.c   |   2 -
  .../drm/amd/amdkfd/kfd_device_queue_manager.c |  59 ++
  .../drm/amd/amdkfd/kfd_device_queue_manager.h |  35 +
  drivers/gpu/drm/amd/amdkfd/kfd_iommu.c|  12 -
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   5 -
  drivers/gpu/drm/amd/amdkfd/kfd_process.c  |  19 -
  12 files changed, 98 insertions(+), 1844 deletions(-)
  delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
  delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h
  delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
  delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h

diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
b/drivers/gpu/drm/amd/amdkfd/Makefile
index c4f3aff11072..19cfbf9577b4 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -51,8 +51,6 @@ AMDKFD_FILES  := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_events.o \
$(AMDKFD_PATH)/cik_event_interrupt.o \
$(AMDKFD_PATH)/kfd_int_process_v9.o \
-   $(AMDKFD_PATH)/kfd_dbgdev.o \
-   $(AMDKFD_PATH)/kfd_dbgmgr.o \
$(AMDKFD_PATH)/kfd_smi_events.o \
$(AMDKFD_PATH)/kfd_crat.o
  
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c

index 64e3b4e3a712..cfe12525165f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -39,7 +39,6 @@
  #include 
  #include "kfd_priv.h"
  #include "kfd_device_queue_manager.h"
-#include "kfd_dbgmgr.h"
  #include "kfd_svm.h"
  #include "amdgpu_amdkfd.h"
  #include "kfd_smi_events.h"
@@ -580,299 +579,26 @@ static int kfd_ioctl_set_trap_handler(struct file *filep,
  static int kfd_ioctl_dbg_register(struct file *filep,
struct kfd_process *p, void *data)
  {
-   struct kfd_ioctl_dbg_register_args *args = data;
-   struct kfd_dev *dev;
-   struct kfd_dbgmgr *dbgmgr_ptr;
-   struct kfd_process_device *pdd;
-   bool create_ok;
-   long status = 0;
-
-   mutex_lock(&p->mutex);
-   pdd = kfd_process_device_data_by_id(p, args->gpu_id);
-   if (!pdd) {
-   status = -EINVAL;
-   goto err_pdd;
-   }
-   dev = pdd->dev;
-
-   if (dev->adev->asic_type == CHIP_CARRIZO) {
-   pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
-   status = -EINVAL;
-   goto err_chip_unsupp;
-   }
-
-   mutex_lock(kfd_get_dbgmgr_mutex());
-
-   /*
-* make sure that we have pdd, if this the first queue created for
-* this process
-*/
-   pdd = kfd_bind_process_to_device(dev, p);
-   if (IS_ERR(pdd)) {
-   status = PTR_ERR(pdd);
-   goto out;
-   }
-
-   if (!dev->dbgmgr) {
-   /* In case of a legal call, we have no dbgmgr yet */
-   create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
-   if (create_ok) {
-   status = kfd_dbgmgr_register(dbgmgr_ptr, p);
-   if (status != 0)
-   kfd_dbgmgr_destroy(dbgmgr_ptr);
-   else
-   dev->dbgmgr = dbgmgr_ptr;
-   }
-   } else {
-  

Re: [PATCH 3/3] drm/amdkfd: Consolidate MQD manager functions

2022-02-07 Thread Felix Kuehling



Am 2022-02-04 um 18:45 schrieb Mukul Joshi:

A few MQD manager functions are duplicated for all versions of
MQD manager. Remove this duplication by moving the common
functions into kfd_mqd_manager.c file.

Signed-off-by: Mukul Joshi 
---
  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c  | 63 +
  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h  | 27 
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c  | 54 ---
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  | 61 -
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   | 68 ---
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c   | 53 ---
  6 files changed, 90 insertions(+), 236 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index e2825ad4d699..f4a6af98db2d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -173,3 +173,66 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
}
}
  }
+
+int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+uint32_t pipe_id, uint32_t queue_id,
+struct queue_properties *p, struct mm_struct *mms)


Since these functions are no longer static, they should get an 
appropriate name prefix to avoid future namespace collisions. Just a 
kfd_ prefix will do.


I think there are existing functions in this file that could use the 
same treatment (in a separate patch).




+{
+   return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, mqd, pipe_id,
+ queue_id, p->doorbell_off);
+}
+
+int destroy_mqd(struct mqd_manager *mm, void *mqd,
+   enum kfd_preempt_type type, unsigned int timeout,
+   uint32_t pipe_id,uint32_t queue_id)


This function is only applicable to CP queues. Therefore I'd give it a 
more specific name, e.g. kfd_destroy_cp_mqd. Similar for the other 
non-SDMA functions below.


Regards,
  Felix



+{
+   return mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, mqd, type, timeout,
+   pipe_id, queue_id);
+}
+
+void free_mqd(struct mqd_manager *mm, void *mqd,
+ struct kfd_mem_obj *mqd_mem_obj)
+{
+   if (mqd_mem_obj->gtt_mem) {
+   amdgpu_amdkfd_free_gtt_mem(mm->dev->adev, mqd_mem_obj->gtt_mem);
+   kfree(mqd_mem_obj);
+   } else {
+   kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+   }
+}
+
+bool is_occupied(struct mqd_manager *mm, void *mqd,
+uint64_t queue_address, uint32_t pipe_id,
+uint32_t queue_id)
+{
+   return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->adev, queue_address,
+   pipe_id, queue_id);
+}
+
+int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+   return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd,
+   (uint32_t __user *)p->write_ptr,
+   mms);
+}
+
+/*
+ * preempt type here is ignored because there is only one way
+ * to preempt sdma queue
+ */
+int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+enum kfd_preempt_type type,
+unsigned int timeout, uint32_t pipe_id,
+uint32_t queue_id)
+{
+   return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout);
+}
+
+bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+   return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index 23486a23df84..76f20637b938 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -136,4 +136,31 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
const uint32_t *cu_mask, uint32_t cu_mask_count,
uint32_t *se_mask);
  
+int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,

+   uint32_t pipe_id, uint32_t queue_id,
+   struct queue_properties *p, struct mm_struct *mms);
+
+int destroy_mqd(struct mqd_manager *mm, void *mqd,
+   enum kfd_preempt_type type, unsigned int timeout,
+   uint32_t pipe_id,uint32_t queue_id);
+
+void free_mqd(struct mqd_manager *mm, void *mqd,
+   struct kfd_mem_obj *mqd_mem_obj);
+
+bool is_occupied(struct mqd_manager *mm, void *mqd,
+uint64_t queue_address, uint32_t pipe_id,
+uint32_t queue_id);
+
+int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+   uint32_t pipe_id, 

Re: [PATCH 13/13] drm/amd/display: Basic support with device ID

2022-02-07 Thread Alex Deucher
On Fri, Feb 4, 2022 at 11:33 PM Jasdeep Dhillon  wrote:
>
> From: Oliver Logush 
>
> [why]
> To get the the cyan_skillfish check working

NAK.  This is still not correct.

>
> Reviewed-by: Charlene Liu 
> Reviewed-by: Charlene Liu 
> Acked-by: Jasdeep Dhillon 
> Signed-off-by: Oliver Logush 
> ---
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 24 +--
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  2 +-
>  .../gpu/drm/amd/display/dc/core/dc_resource.c |  2 +-
>  .../gpu/drm/amd/display/include/dal_asic_id.h |  3 ++-
>  4 files changed, 26 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 8f53c9f6b267..f5941e59e5ad 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -1014,6 +1014,14 @@ static  void amdgpu_dm_audio_eld_notify(struct 
> amdgpu_device *adev, int pin)
> }
>  }
>
> +bool is_skillfish_series(struct amdgpu_device *adev)
> +{
> +   if (adev->asic_type == CHIP_CYAN_SKILLFISH || adev->pdev->revision == 
> 0x143F) {
> +   return true;
> +   }
> +   return false;
> +}

I don't see why we need this.

> +
>  static int dm_dmub_hw_init(struct amdgpu_device *adev)
>  {
> const struct dmcub_firmware_header_v1_0 *hdr;
> @@ -1049,7 +1057,7 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
> return -EINVAL;
> }
>
> -   if (!has_hw_support) {
> +   if (is_skillfish_series(adev)) {

Why this change?  won't this break other asics with no hw support?

> DRM_INFO("DMUB unsupported on ASIC\n");
> return 0;
> }
> @@ -1471,6 +1479,10 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
> default:
> break;
> }
> +   if (is_skillfish_series(adev)) {
> +   init_data.flags.disable_dmcu = true;
> +   break;
> +   }

Should not be necessary.

> break;
> }
>
> @@ -1777,7 +1789,6 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
> case CHIP_VEGA10:
> case CHIP_VEGA12:
> case CHIP_VEGA20:
> -   return 0;

This change seems unrelated and may break other asics.

> case CHIP_NAVI12:
> fw_name_dmcu = FIRMWARE_NAVI12_DMCU;
> break;
> @@ -1805,6 +1816,9 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
> default:
> break;
> }
> +   if (is_skillfish_series(adev)) {
> +   return 0;
> +   }

Why do we need this?

> DRM_ERROR("Unsupported ASIC type: 0x%X\n", adev->asic_type);
> return -EINVAL;
> }
> @@ -4515,6 +4529,12 @@ static int dm_early_init(void *handle)
> adev->mode_info.num_dig = 6;
> break;
> default:
> +   if (is_skillfish_series(adev)) {
> +   adev->mode_info.num_crtc = 2;
> +   adev->mode_info.num_hpd = 2;
> +   adev->mode_info.num_dig = 2;
> +   break;
> +   }

Same here.

>  #if defined(CONFIG_DRM_AMD_DC_DCN)
> switch (adev->ip_versions[DCE_HWIP][0]) {
> case IP_VERSION(2, 0, 2):
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> index e35977fda5c1..13875d669acd 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> @@ -82,7 +82,7 @@ struct common_irq_params {
> enum dc_irq_source irq_src;
> atomic64_t previous_timestamp;
>  };
> -
> +bool is_skillfish_series(struct amdgpu_device *adev);
>  /**
>   * struct dm_compressor_info - Buffer info used by frame buffer compression
>   * @cpu_addr: MMIO cpu addr
> diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c 
> b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
> index b36bae4b5bc9..318d381e2910 100644
> --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
> +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
> @@ -135,7 +135,7 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id 
> asic_id)
>
> case FAMILY_NV:
> dc_version = DCN_VERSION_2_0;
> -   if (asic_id.chip_id == DEVICE_ID_NV_13FE) {
> +   if (asic_id.chip_id == DEVICE_ID_NV_NAVI10_LITE_P_13FE || 
> asic_id.chip_id == DEVICE_ID_NV_NAVI10_LITE_P_143F) {

I think these last two hunks are the only ones you need.  The rest
should be unnecessary.

> dc_version = DCN_VERSION_2_01;
> break;
> }
> diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h 
> b/drivers/gpu/drm/amd/displ

[PATCH 0/1] Revert Patch from Feb 7 2022 Patchset

2022-02-07 Thread Jasdeep Dhillon
Reverting patch

Jasdeep Dhillon (1):
  Revert "drm/amd/display: Basic support with device ID"

 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 24 ++-
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  2 +-
 .../gpu/drm/amd/display/dc/core/dc_resource.c |  2 +-
 .../gpu/drm/amd/display/include/dal_asic_id.h |  3 +--
 4 files changed, 5 insertions(+), 26 deletions(-)

-- 
2.25.1



Re: [PATCH 13/13] drm/amd/display: Basic support with device ID

2022-02-07 Thread Dhillon, Jasdeep
[AMD Official Use Only]

Hi Alex,

I already merged the branch but I have sent you the revert patch.

Regards,
Jasdeep

From: Alex Deucher 
Sent: February 7, 2022 10:58 AM
To: Dhillon, Jasdeep 
Cc: amd-gfx list ; Wang, Chao-kai (Stylon) 
; Liu, Charlene ; Logush, Oliver 
; Li, Sun peng (Leo) ; Wentland, 
Harry ; Zhuo, Qingqing (Lillian) 
; Siqueira, Rodrigo ; Li, 
Roman ; Chiu, Solomon ; Pillai, 
Aurabindo ; Lin, Wayne ; Lakha, 
Bhawanpreet ; Gutierrez, Agustin 
; Kotarac, Pavle 
Subject: Re: [PATCH 13/13] drm/amd/display: Basic support with device ID

On Fri, Feb 4, 2022 at 11:33 PM Jasdeep Dhillon  wrote:
>
> From: Oliver Logush 
>
> [why]
> To get the the cyan_skillfish check working

NAK.  This is still not correct.

>
> Reviewed-by: Charlene Liu 
> Reviewed-by: Charlene Liu 
> Acked-by: Jasdeep Dhillon 
> Signed-off-by: Oliver Logush 
> ---
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 24 +--
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  2 +-
>  .../gpu/drm/amd/display/dc/core/dc_resource.c |  2 +-
>  .../gpu/drm/amd/display/include/dal_asic_id.h |  3 ++-
>  4 files changed, 26 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 8f53c9f6b267..f5941e59e5ad 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -1014,6 +1014,14 @@ static  void amdgpu_dm_audio_eld_notify(struct 
> amdgpu_device *adev, int pin)
> }
>  }
>
> +bool is_skillfish_series(struct amdgpu_device *adev)
> +{
> +   if (adev->asic_type == CHIP_CYAN_SKILLFISH || adev->pdev->revision == 
> 0x143F) {
> +   return true;
> +   }
> +   return false;
> +}

I don't see why we need this.

> +
>  static int dm_dmub_hw_init(struct amdgpu_device *adev)
>  {
> const struct dmcub_firmware_header_v1_0 *hdr;
> @@ -1049,7 +1057,7 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
> return -EINVAL;
> }
>
> -   if (!has_hw_support) {
> +   if (is_skillfish_series(adev)) {

Why this change?  won't this break other asics with no hw support?

> DRM_INFO("DMUB unsupported on ASIC\n");
> return 0;
> }
> @@ -1471,6 +1479,10 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
> default:
> break;
> }
> +   if (is_skillfish_series(adev)) {
> +   init_data.flags.disable_dmcu = true;
> +   break;
> +   }

Should not be necessary.

> break;
> }
>
> @@ -1777,7 +1789,6 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
> case CHIP_VEGA10:
> case CHIP_VEGA12:
> case CHIP_VEGA20:
> -   return 0;

This change seems unrelated and may break other asics.

> case CHIP_NAVI12:
> fw_name_dmcu = FIRMWARE_NAVI12_DMCU;
> break;
> @@ -1805,6 +1816,9 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
> default:
> break;
> }
> +   if (is_skillfish_series(adev)) {
> +   return 0;
> +   }

Why do we need this?

> DRM_ERROR("Unsupported ASIC type: 0x%X\n", adev->asic_type);
> return -EINVAL;
> }
> @@ -4515,6 +4529,12 @@ static int dm_early_init(void *handle)
> adev->mode_info.num_dig = 6;
> break;
> default:
> +   if (is_skillfish_series(adev)) {
> +   adev->mode_info.num_crtc = 2;
> +   adev->mode_info.num_hpd = 2;
> +   adev->mode_info.num_dig = 2;
> +   break;
> +   }

Same here.

>  #if defined(CONFIG_DRM_AMD_DC_DCN)
> switch (adev->ip_versions[DCE_HWIP][0]) {
> case IP_VERSION(2, 0, 2):
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> index e35977fda5c1..13875d669acd 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> @@ -82,7 +82,7 @@ struct common_irq_params {
> enum dc_irq_source irq_src;
> atomic64_t previous_timestamp;
>  };
> -
> +bool is_skillfish_series(struct amdgpu_device *adev);
>  /**
>   * struct dm_compressor_info - Buffer info used by frame buffer compression
>   * @cpu_addr: MMIO cpu addr
> diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c 
> b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
> index b36bae4b5bc9..318d381e2910 100644
> --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
> +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
> @@ -135,7 +135,7 @@ enum dce_version resource_parse_asic_id(struct 

Re: Minimal GPU setup

2022-02-07 Thread Deucher, Alexander
[AMD Official Use Only]

Most of the register programming in evergreen_gpu_init is required.  That code 
handles things like harvesting (e.g., disabling bad hardware resources) and 
setting sane asic specific settings in some registers.  If you don't do it, 
work may get scheduled to bad or incorrectly configured hardware blocks which 
will lead to hangs or corrupted results.  You can probably skip some of them, 
but I don't remember what is minimally required off hand.  It's generally a 
good idea to re-initialize those registers anyway in case someone has 
previously messed with them (e.g., manual register munging or GPU passed 
through to a VM etc.).

Posting the bios is enough to get you a working memory controller and enough 
asic setup to light up displays (basically what you need for pre-OS console).  
As Christian mentioned, loading the ucodes will get the associated engines 
working so that you can start feeding commands to the GPU, but without proper 
configuration of the various hardware blocks on the GPU, you may not have 
success in feeding data to the GPU.

Alex



From: amd-gfx  on behalf of Amol 

Sent: Saturday, February 5, 2022 4:47 AM
To: amd-gfx@lists.freedesktop.org 
Subject: Minimal GPU setup

Hello,

I am learning to program Radeon HD 7350 by reading the radeon
driver source in Linux, and the guides/manuals from AMD.

I understand the general flow of initialization the driver performs. I
have also been able to understand and re-implement the ATOM
BIOS virtual machine.

I am trying to program the device up from scratch (i.e. bare-metal).
Do I need to perform all those steps that the driver does? Reading
the evergreen_gpu_init function is demotivating; it initializes many
fields and registers which I suspect may not be required for a minimal
setup.

Is posting the BIOS and loading the microcode enough to get me started
with running basic tasks (DMA transfers, simple packet processing, etc.)?

Thanks,
Amol


Re: [PATCH 0/1] Revert Patch from Feb 7 2022 Patchset

2022-02-07 Thread Deucher, Alexander
[AMD Official Use Only]

Acked-by: Alex Deucher 

From: amd-gfx  on behalf of Jasdeep 
Dhillon 
Sent: Monday, February 7, 2022 11:23 AM
To: amd-gfx@lists.freedesktop.org 
Cc: Wang, Chao-kai (Stylon) ; Li, Sun peng (Leo) 
; Wentland, Harry ; Zhuo, Qingqing 
(Lillian) ; Dhillon, Jasdeep ; 
Siqueira, Rodrigo ; Li, Roman ; 
Chiu, Solomon ; Pillai, Aurabindo 
; Lin, Wayne ; Lakha, Bhawanpreet 
; Gutierrez, Agustin ; 
Kotarac, Pavle 
Subject: [PATCH 0/1] Revert Patch from Feb 7 2022 Patchset

Reverting patch

Jasdeep Dhillon (1):
  Revert "drm/amd/display: Basic support with device ID"

 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 24 ++-
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  2 +-
 .../gpu/drm/amd/display/dc/core/dc_resource.c |  2 +-
 .../gpu/drm/amd/display/include/dal_asic_id.h |  3 +--
 4 files changed, 5 insertions(+), 26 deletions(-)

--
2.25.1



[PATCH 0/1] Revert Patch from Feb 7 2022 Patchset v2

2022-02-07 Thread Jasdeep Dhillon
Reverting patch

Jasdeep Dhillon (1):
  Revert "drm/amd/display: Basic support with device ID"

 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 24 ++-
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  2 +-
 .../gpu/drm/amd/display/dc/core/dc_resource.c |  2 +-
 .../gpu/drm/amd/display/include/dal_asic_id.h |  3 +--
 4 files changed, 5 insertions(+), 26 deletions(-)

-- 
2.25.1



[PATCH 1/1] Revert "drm/amd/display: Basic support with device ID"

2022-02-07 Thread Jasdeep Dhillon
This reverts commit 733a212f20dfa14fa20814f21526fb180f25fdd8.
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 24 ++-
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  2 +-
 .../gpu/drm/amd/display/dc/core/dc_resource.c |  2 +-
 .../gpu/drm/amd/display/include/dal_asic_id.h |  3 +--
 4 files changed, 5 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index f5941e59e5ad..8f53c9f6b267 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1014,14 +1014,6 @@ static  void amdgpu_dm_audio_eld_notify(struct 
amdgpu_device *adev, int pin)
}
 }
 
-bool is_skillfish_series(struct amdgpu_device *adev)
-{
-   if (adev->asic_type == CHIP_CYAN_SKILLFISH || adev->pdev->revision == 
0x143F) {
-   return true;
-   }
-   return false;
-}
-
 static int dm_dmub_hw_init(struct amdgpu_device *adev)
 {
const struct dmcub_firmware_header_v1_0 *hdr;
@@ -1057,7 +1049,7 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
return -EINVAL;
}
 
-   if (is_skillfish_series(adev)) {
+   if (!has_hw_support) {
DRM_INFO("DMUB unsupported on ASIC\n");
return 0;
}
@@ -1479,10 +1471,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
default:
break;
}
-   if (is_skillfish_series(adev)) {
-   init_data.flags.disable_dmcu = true;
-   break;
-   }
break;
}
 
@@ -1789,6 +1777,7 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
+   return 0;
case CHIP_NAVI12:
fw_name_dmcu = FIRMWARE_NAVI12_DMCU;
break;
@@ -1816,9 +1805,6 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
default:
break;
}
-   if (is_skillfish_series(adev)) {
-   return 0;
-   }
DRM_ERROR("Unsupported ASIC type: 0x%X\n", adev->asic_type);
return -EINVAL;
}
@@ -4529,12 +4515,6 @@ static int dm_early_init(void *handle)
adev->mode_info.num_dig = 6;
break;
default:
-   if (is_skillfish_series(adev)) {
-   adev->mode_info.num_crtc = 2;
-   adev->mode_info.num_hpd = 2;
-   adev->mode_info.num_dig = 2;
-   break;
-   }
 #if defined(CONFIG_DRM_AMD_DC_DCN)
switch (adev->ip_versions[DCE_HWIP][0]) {
case IP_VERSION(2, 0, 2):
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 13875d669acd..e35977fda5c1 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -82,7 +82,7 @@ struct common_irq_params {
enum dc_irq_source irq_src;
atomic64_t previous_timestamp;
 };
-bool is_skillfish_series(struct amdgpu_device *adev);
+
 /**
  * struct dm_compressor_info - Buffer info used by frame buffer compression
  * @cpu_addr: MMIO cpu addr
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 318d381e2910..b36bae4b5bc9 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -135,7 +135,7 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id 
asic_id)
 
case FAMILY_NV:
dc_version = DCN_VERSION_2_0;
-   if (asic_id.chip_id == DEVICE_ID_NV_NAVI10_LITE_P_13FE || 
asic_id.chip_id == DEVICE_ID_NV_NAVI10_LITE_P_143F) {
+   if (asic_id.chip_id == DEVICE_ID_NV_13FE) {
dc_version = DCN_VERSION_2_01;
break;
}
diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h 
b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
index 37ec6343dbd6..e4a2dfacab4c 100644
--- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h
+++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
@@ -211,8 +211,7 @@ enum {
 #ifndef ASICREV_IS_GREEN_SARDINE
 #define ASICREV_IS_GREEN_SARDINE(eChipRev) ((eChipRev >= GREEN_SARDINE_A0) && 
(eChipRev < 0xFF))
 #endif
-#define DEVICE_ID_NV_NAVI10_LITE_P_13FE  0x13FE  // CYAN_SKILLFISH
-#define DEVICE_ID_NV_NAVI10_LITE_P_143F0x143F 
+#define DEVICE_ID_NV_13FE 0x13FE  // CYAN_SKILLFISH
 #define FAMILY_VGH 144
 #define DEVICE_ID_VGH_163F 0x163F
 #define VANGOGH_A0 0x01
-- 
2.25.1



RE: [PATCH 2/3] drm/amdkfd: Remove unused old debugger implementation

2022-02-07 Thread Joshi, Mukul
[AMD Official Use Only]



> -Original Message-
> From: Kuehling, Felix 
> Sent: Monday, February 7, 2022 10:33 AM
> To: Joshi, Mukul ; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 2/3] drm/amdkfd: Remove unused old debugger
> implementation
> 
> 
> Am 2022-02-04 um 18:45 schrieb Mukul Joshi:
> > Cleanup the kfd code by removing the unused old debugger
> > implementation.
> > Only a small piece of resetting wavefronts is kept and
> > is moved to kfd_device_queue_manager.c
> >
> > Signed-off-by: Mukul Joshi 
> 
> We should update include/uapi/linux/kfd_ioctl.h as well to mark the
> ioctls as deprecated. Just rename the AMDKFD_IOC_DBG_* macros to
> AMDKFD_IOC_DBG_*_DEPRECATED. A corresponding update to the Thunk
> would
> be good as a follow up.
> 
Would it require an update to the KFD ioctl minor version?
Wouldn't it be better to just increment the minor version for ioctl and say 
these aren't
supported after this version instead of changing the name?

Regards,
Mukul

> I think there is also some more code you can remove in
> amdgpu_amdkfd_gfx_v*.c and kgd_kfd_interface.h:
> 
>      int (*address_watch_disable)(struct amdgpu_device *adev);
>      int (*address_watch_execute)(struct amdgpu_device *adev,
>      unsigned int watch_point_id,
>      uint32_t cntl_val,
>      uint32_t addr_hi,
>      uint32_t addr_lo);
>      uint32_t (*address_watch_get_offset)(struct amdgpu_device *adev,
>      unsigned int watch_point_id,
>      unsigned int reg_offset);
> 
> It seems wave_control_execute is still used in dbgdev_wave_reset_wavefronts.
> 
> Regards,
>    Felix
> 
> 
> > ---
> >   drivers/gpu/drm/amd/amdkfd/Makefile   |   2 -
> >   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 282 +-
> >   drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c   | 845 --
> >   drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h   | 230 -
> >   drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c   | 158 
> >   drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h   | 293 --
> >   drivers/gpu/drm/amd/amdkfd/kfd_device.c   |   2 -
> >   .../drm/amd/amdkfd/kfd_device_queue_manager.c |  59 ++
> >   .../drm/amd/amdkfd/kfd_device_queue_manager.h |  35 +
> >   drivers/gpu/drm/amd/amdkfd/kfd_iommu.c|  12 -
> >   drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   5 -
> >   drivers/gpu/drm/amd/amdkfd/kfd_process.c  |  19 -
> >   12 files changed, 98 insertions(+), 1844 deletions(-)
> >   delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
> >   delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h
> >   delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
> >   delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile
> b/drivers/gpu/drm/amd/amdkfd/Makefile
> > index c4f3aff11072..19cfbf9577b4 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/Makefile
> > +++ b/drivers/gpu/drm/amd/amdkfd/Makefile
> > @@ -51,8 +51,6 @@ AMDKFD_FILES  := $(AMDKFD_PATH)/kfd_module.o \
> > $(AMDKFD_PATH)/kfd_events.o \
> > $(AMDKFD_PATH)/cik_event_interrupt.o \
> > $(AMDKFD_PATH)/kfd_int_process_v9.o \
> > -   $(AMDKFD_PATH)/kfd_dbgdev.o \
> > -   $(AMDKFD_PATH)/kfd_dbgmgr.o \
> > $(AMDKFD_PATH)/kfd_smi_events.o \
> > $(AMDKFD_PATH)/kfd_crat.o
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > index 64e3b4e3a712..cfe12525165f 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > @@ -39,7 +39,6 @@
> >   #include 
> >   #include "kfd_priv.h"
> >   #include "kfd_device_queue_manager.h"
> > -#include "kfd_dbgmgr.h"
> >   #include "kfd_svm.h"
> >   #include "amdgpu_amdkfd.h"
> >   #include "kfd_smi_events.h"
> > @@ -580,299 +579,26 @@ static int kfd_ioctl_set_trap_handler(struct file
> *filep,
> >   static int kfd_ioctl_dbg_register(struct file *filep,
> > struct kfd_process *p, void *data)
> >   {
> > -   struct kfd_ioctl_dbg_register_args *args = data;
> > -   struct kfd_dev *dev;
> > -   struct kfd_dbgmgr *dbgmgr_ptr;
> > -   struct kfd_process_device *pdd;
> > -   bool create_ok;
> > -   long status = 0;
> > -
> > -   mutex_lock(&p->mutex);
> > -   pdd = kfd_process_device_data_by_id(p, args->gpu_id);
> > -   if (!pdd) {
> > -   status = -EINVAL;
> > -   goto err_pdd;
> > -   }
> > -   dev = pdd->dev;
> > -
> > -   if (dev->adev->asic_type == CHIP_CARRIZO) {
> > -   pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
> > -   status = -EINVAL;
> > -   goto err_chip_unsupp;
> > -   }
> > -
> > -   mutex_lock(kfd_get_dbgmgr_mutex());
> > -
> > -   /*
> > -* make sure

Re: [PATCH 2/3] drm/amdkfd: Remove unused old debugger implementation

2022-02-07 Thread Felix Kuehling

Am 2022-02-07 um 12:10 schrieb Joshi, Mukul:

[AMD Official Use Only]




-Original Message-
From: Kuehling, Felix 
Sent: Monday, February 7, 2022 10:33 AM
To: Joshi, Mukul ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/3] drm/amdkfd: Remove unused old debugger
implementation


Am 2022-02-04 um 18:45 schrieb Mukul Joshi:

Cleanup the kfd code by removing the unused old debugger
implementation.
Only a small piece of resetting wavefronts is kept and
is moved to kfd_device_queue_manager.c

Signed-off-by: Mukul Joshi 

We should update include/uapi/linux/kfd_ioctl.h as well to mark the
ioctls as deprecated. Just rename the AMDKFD_IOC_DBG_* macros to
AMDKFD_IOC_DBG_*_DEPRECATED. A corresponding update to the Thunk
would
be good as a follow up.


Would it require an update to the KFD ioctl minor version?
Wouldn't it be better to just increment the minor version for ioctl and say 
these aren't
supported after this version instead of changing the name?


I don't think that would help anyone. If existing (really old) user mode 
was using it, they would not be checking the version number. All they 
get is -EPERM from the ioctls. And new user mode code will not be using 
these APIs and has no reason to check the version number.


Regards,
  Felix




Regards,
Mukul


I think there is also some more code you can remove in
amdgpu_amdkfd_gfx_v*.c and kgd_kfd_interface.h:

      int (*address_watch_disable)(struct amdgpu_device *adev);
      int (*address_watch_execute)(struct amdgpu_device *adev,
      unsigned int watch_point_id,
      uint32_t cntl_val,
      uint32_t addr_hi,
      uint32_t addr_lo);
      uint32_t (*address_watch_get_offset)(struct amdgpu_device *adev,
      unsigned int watch_point_id,
      unsigned int reg_offset);

It seems wave_control_execute is still used in dbgdev_wave_reset_wavefronts.

Regards,
    Felix



---
   drivers/gpu/drm/amd/amdkfd/Makefile   |   2 -
   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 282 +-
   drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c   | 845 --
   drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h   | 230 -
   drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c   | 158 
   drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h   | 293 --
   drivers/gpu/drm/amd/amdkfd/kfd_device.c   |   2 -
   .../drm/amd/amdkfd/kfd_device_queue_manager.c |  59 ++
   .../drm/amd/amdkfd/kfd_device_queue_manager.h |  35 +
   drivers/gpu/drm/amd/amdkfd/kfd_iommu.c|  12 -
   drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   5 -
   drivers/gpu/drm/amd/amdkfd/kfd_process.c  |  19 -
   12 files changed, 98 insertions(+), 1844 deletions(-)
   delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
   delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h
   delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
   delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h

diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile

b/drivers/gpu/drm/amd/amdkfd/Makefile

index c4f3aff11072..19cfbf9577b4 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -51,8 +51,6 @@ AMDKFD_FILES  := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_events.o \
$(AMDKFD_PATH)/cik_event_interrupt.o \
$(AMDKFD_PATH)/kfd_int_process_v9.o \
-   $(AMDKFD_PATH)/kfd_dbgdev.o \
-   $(AMDKFD_PATH)/kfd_dbgmgr.o \
$(AMDKFD_PATH)/kfd_smi_events.o \
$(AMDKFD_PATH)/kfd_crat.o

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c

b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c

index 64e3b4e3a712..cfe12525165f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -39,7 +39,6 @@
   #include 
   #include "kfd_priv.h"
   #include "kfd_device_queue_manager.h"
-#include "kfd_dbgmgr.h"
   #include "kfd_svm.h"
   #include "amdgpu_amdkfd.h"
   #include "kfd_smi_events.h"
@@ -580,299 +579,26 @@ static int kfd_ioctl_set_trap_handler(struct file

*filep,

   static int kfd_ioctl_dbg_register(struct file *filep,
struct kfd_process *p, void *data)
   {
-   struct kfd_ioctl_dbg_register_args *args = data;
-   struct kfd_dev *dev;
-   struct kfd_dbgmgr *dbgmgr_ptr;
-   struct kfd_process_device *pdd;
-   bool create_ok;
-   long status = 0;
-
-   mutex_lock(&p->mutex);
-   pdd = kfd_process_device_data_by_id(p, args->gpu_id);
-   if (!pdd) {
-   status = -EINVAL;
-   goto err_pdd;
-   }
-   dev = pdd->dev;
-
-   if (dev->adev->asic_type == CHIP_CARRIZO) {
-   pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
-   status = -EINVAL;
-  

RE: [PATCH 3/3] drm/amdkfd: Consolidate MQD manager functions

2022-02-07 Thread Joshi, Mukul
[AMD Official Use Only]



> -Original Message-
> From: Kuehling, Felix 
> Sent: Monday, February 7, 2022 10:43 AM
> To: Joshi, Mukul ; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 3/3] drm/amdkfd: Consolidate MQD manager functions
> 
> 
> Am 2022-02-04 um 18:45 schrieb Mukul Joshi:
> > A few MQD manager functions are duplicated for all versions of MQD
> > manager. Remove this duplication by moving the common functions into
> > kfd_mqd_manager.c file.
> >
> > Signed-off-by: Mukul Joshi 
> > ---
> >   drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c  | 63
> +
> >   drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h  | 27 
> >   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c  | 54 ---
> >   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  | 61 -
> >   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   | 68 ---
> >   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c   | 53 ---
> >   6 files changed, 90 insertions(+), 236 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> > index e2825ad4d699..f4a6af98db2d 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> > @@ -173,3 +173,66 @@ void mqd_symmetrically_map_cu_mask(struct
> mqd_manager *mm,
> > }
> > }
> >   }
> > +
> > +int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
> > +uint32_t pipe_id, uint32_t queue_id,
> > +struct queue_properties *p, struct mm_struct *mms)
> 
> Since these functions are no longer static, they should get an appropriate 
> name
> prefix to avoid future namespace collisions. Just a kfd_ prefix will do.
> 
> I think there are existing functions in this file that could use the same 
> treatment
> (in a separate patch).
> 
> 
> > +{
> > +   return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, mqd,
> pipe_id,
> > + queue_id, p->doorbell_off);
> > +}
> > +
> > +int destroy_mqd(struct mqd_manager *mm, void *mqd,
> > +   enum kfd_preempt_type type, unsigned int timeout,
> > +   uint32_t pipe_id,uint32_t queue_id)
> 
> This function is only applicable to CP queues. Therefore I'd give it a
> more specific name, e.g. kfd_destroy_cp_mqd. Similar for the other
> non-SDMA functions below.
> 
We define destroy_hqd for HIQ (and DIQ ) also. Same for free_mqd and other 
functions.
I guess that’s why we have _sdma for SDMA queues, and the rest use a generic 
name.
Maybe leaving it without '_cp' is better here. What do you think?

Regards,
Mukul

> Regards,
>    Felix
> 
> 
> > +{
> > +   return mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, mqd, type,
> timeout,
> > +   pipe_id, queue_id);
> > +}
> > +
> > +void free_mqd(struct mqd_manager *mm, void *mqd,
> > + struct kfd_mem_obj *mqd_mem_obj)
> > +{
> > +   if (mqd_mem_obj->gtt_mem) {
> > +   amdgpu_amdkfd_free_gtt_mem(mm->dev->adev,
> mqd_mem_obj->gtt_mem);
> > +   kfree(mqd_mem_obj);
> > +   } else {
> > +   kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
> > +   }
> > +}
> > +
> > +bool is_occupied(struct mqd_manager *mm, void *mqd,
> > +uint64_t queue_address, uint32_t pipe_id,
> > +uint32_t queue_id)
> > +{
> > +   return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->adev,
> queue_address,
> > +   pipe_id, queue_id);
> > +}
> > +
> > +int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
> > + uint32_t pipe_id, uint32_t queue_id,
> > + struct queue_properties *p, struct mm_struct *mms)
> > +{
> > +   return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd,
> > +   (uint32_t __user *)p-
> >write_ptr,
> > +   mms);
> > +}
> > +
> > +/*
> > + * preempt type here is ignored because there is only one way
> > + * to preempt sdma queue
> > + */
> > +int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
> > +enum kfd_preempt_type type,
> > +unsigned int timeout, uint32_t pipe_id,
> > +uint32_t queue_id)
> > +{
> > +   return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd,
> timeout);
> > +}
> > +
> > +bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
> > + uint64_t queue_address, uint32_t pipe_id,
> > + uint32_t queue_id)
> > +{
> > +   return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev,
> mqd);
> > +}
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
> > index 23486a23df84..76f20637b938 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
> > @@ -136,4 +136,31 @@ void mqd_symmetrically_map_cu_mask(struct
> mqd_manager *mm,
> >   

Re: [PATCH 3/3] drm/amdkfd: Consolidate MQD manager functions

2022-02-07 Thread Felix Kuehling



Am 2022-02-07 um 12:50 schrieb Joshi, Mukul:

[AMD Official Use Only]




-Original Message-
From: Kuehling, Felix 
Sent: Monday, February 7, 2022 10:43 AM
To: Joshi, Mukul ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 3/3] drm/amdkfd: Consolidate MQD manager functions


Am 2022-02-04 um 18:45 schrieb Mukul Joshi:

A few MQD manager functions are duplicated for all versions of MQD
manager. Remove this duplication by moving the common functions into
kfd_mqd_manager.c file.

Signed-off-by: Mukul Joshi 
---
   drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c  | 63

+

   drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h  | 27 
   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c  | 54 ---
   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  | 61 -
   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   | 68 ---
   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c   | 53 ---
   6 files changed, 90 insertions(+), 236 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index e2825ad4d699..f4a6af98db2d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -173,3 +173,66 @@ void mqd_symmetrically_map_cu_mask(struct

mqd_manager *mm,

}
}
   }
+
+int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+uint32_t pipe_id, uint32_t queue_id,
+struct queue_properties *p, struct mm_struct *mms)

Since these functions are no longer static, they should get an appropriate name
prefix to avoid future namespace collisions. Just a kfd_ prefix will do.

I think there are existing functions in this file that could use the same 
treatment
(in a separate patch).



+{
+   return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, mqd,

pipe_id,

+ queue_id, p->doorbell_off);
+}
+
+int destroy_mqd(struct mqd_manager *mm, void *mqd,
+   enum kfd_preempt_type type, unsigned int timeout,
+   uint32_t pipe_id,uint32_t queue_id)

This function is only applicable to CP queues. Therefore I'd give it a
more specific name, e.g. kfd_destroy_cp_mqd. Similar for the other
non-SDMA functions below.


We define destroy_hqd for HIQ (and DIQ ) also. Same for free_mqd and other 
functions.
I guess that’s why we have _sdma for SDMA queues, and the rest use a generic 
name.
Maybe leaving it without '_cp' is better here. What do you think?


I still think calling out the queue type in the function name makes 
sense, if the function is not applicable to all queue types. In some 
cases there is overlap. For example HIQ and DIQ would use 
destroy_hqd_cp. But HIQ uses free_mqd_hiq_sdma.


I guess it still leaves some ambiguity because CP is used as superset of 
CP, HIQ and DIQ in some cases, but only CP compute queues in others. If 
you want, you could use "CP" to mean the superset and "compute" for CP 
compute queues only.


Regards,
  Felix




Regards,
Mukul


Regards,
    Felix



+{
+   return mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, mqd, type,

timeout,

+   pipe_id, queue_id);
+}
+
+void free_mqd(struct mqd_manager *mm, void *mqd,
+ struct kfd_mem_obj *mqd_mem_obj)
+{
+   if (mqd_mem_obj->gtt_mem) {
+   amdgpu_amdkfd_free_gtt_mem(mm->dev->adev,

mqd_mem_obj->gtt_mem);

+   kfree(mqd_mem_obj);
+   } else {
+   kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+   }
+}
+
+bool is_occupied(struct mqd_manager *mm, void *mqd,
+uint64_t queue_address, uint32_t pipe_id,
+uint32_t queue_id)
+{
+   return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->adev,

queue_address,

+   pipe_id, queue_id);
+}
+
+int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+   return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd,
+   (uint32_t __user *)p-
write_ptr,
+   mms);
+}
+
+/*
+ * preempt type here is ignored because there is only one way
+ * to preempt sdma queue
+ */
+int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+enum kfd_preempt_type type,
+unsigned int timeout, uint32_t pipe_id,
+uint32_t queue_id)
+{
+   return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd,

timeout);

+}
+
+bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+   return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev,

mqd);

+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h

Re: [PATCH 1/8] mm: remove a pointless CONFIG_ZONE_DEVICE check in memremap_pages

2022-02-07 Thread Dan Williams
On Sun, Feb 6, 2022 at 10:33 PM Christoph Hellwig  wrote:
>
> memremap.c is only built when CONFIG_ZONE_DEVICE is set, so remove
> the superflous extra check.

Looks good to me.

Reviewed-by: Dan Williams 



Re: [PATCH 2/8] mm: remove the __KERNEL__ guard from

2022-02-07 Thread Dan Williams
On Sun, Feb 6, 2022 at 10:33 PM Christoph Hellwig  wrote:
>
> __KERNEL__ ifdefs don't make sense outside of include/uapi/.

Yes.

Reviewed-by: Dan Williams 



Re: [PATCH 6/8] mm: don't include in

2022-02-07 Thread Logan Gunthorpe



On 2022-02-06 11:32 p.m., Christoph Hellwig wrote:
> Move the check for the actual pgmap types that need the free at refcount
> one behavior into the out of line helper, and thus avoid the need to
> pull memremap.h into mm.h.
> 
> Signed-off-by: Christoph Hellwig 

I've noticed mm/memcontrol.c uses is_device_private_page() and also
needs a memremap.h include added to compile with my configuration.

Logan



[PATCH 05/23] drm/amd/display: Fix color encoding mismatch

2022-02-07 Thread Maxime Ripard
The amdgpu KMS driver calls drm_plane_create_color_properties() with a
default encoding set to BT709.

However, the core will ignore it and the driver doesn't force it in its
plane state reset hook, so the initial value will be 0, which represents
BT601.

Fix the mismatch by using an initial value of BT601 in
drm_plane_create_color_properties().

Cc: amd-gfx@lists.freedesktop.org
Cc: Alex Deucher 
Cc: "Christian König" 
Cc: Harry Wentland 
Cc: Leo Li 
Cc: "Pan, Xinhui" 
Cc: Rodrigo Siqueira 
Signed-off-by: Maxime Ripard 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index feccf2b555d2..86b27a355e90 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -7914,7 +7914,7 @@ static int amdgpu_dm_plane_init(struct 
amdgpu_display_manager *dm,
BIT(DRM_COLOR_YCBCR_BT2020),
BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) |
BIT(DRM_COLOR_YCBCR_FULL_RANGE),
-   DRM_COLOR_YCBCR_BT709, DRM_COLOR_YCBCR_LIMITED_RANGE);
+   DRM_COLOR_YCBCR_BT601, DRM_COLOR_YCBCR_LIMITED_RANGE);
}
 
supported_rotations =
-- 
2.34.1



Re: [PATCH 05/23] drm/amd/display: Fix color encoding mismatch

2022-02-07 Thread Harry Wentland
On 2022-02-07 11:34, Maxime Ripard wrote:
> The amdgpu KMS driver calls drm_plane_create_color_properties() with a
> default encoding set to BT709.
> 
> However, the core will ignore it and the driver doesn't force it in its
> plane state reset hook, so the initial value will be 0, which represents
> BT601.
> 

Isn't this a core issue? Should __drm_atomic_helper_plane_state_reset
reset all plane_state members to their properties' default values?

The amdgpu KMS driver currently doesn't respect the color_encoding
property but I would expect that a call to drm_plane_create_color_properties
with a default of BT709 means we're getting BT709 as color_encoding 
as part of atomic commits.

Harry

> Fix the mismatch by using an initial value of BT601 in
> drm_plane_create_color_properties().
> 
> Cc: amd-gfx@lists.freedesktop.org
> Cc: Alex Deucher 
> Cc: "Christian König" 
> Cc: Harry Wentland 
> Cc: Leo Li 
> Cc: "Pan, Xinhui" 
> Cc: Rodrigo Siqueira 
> Signed-off-by: Maxime Ripard 
> ---
>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index feccf2b555d2..86b27a355e90 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -7914,7 +7914,7 @@ static int amdgpu_dm_plane_init(struct 
> amdgpu_display_manager *dm,
>   BIT(DRM_COLOR_YCBCR_BT2020),
>   BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) |
>   BIT(DRM_COLOR_YCBCR_FULL_RANGE),
> - DRM_COLOR_YCBCR_BT709, DRM_COLOR_YCBCR_LIMITED_RANGE);
> + DRM_COLOR_YCBCR_BT601, DRM_COLOR_YCBCR_LIMITED_RANGE);
>   }
>  
>   supported_rotations =



Re: [PATCH 05/23] drm/amd/display: Fix color encoding mismatch

2022-02-07 Thread Harry Wentland



On 2022-02-07 13:57, Harry Wentland wrote:
> On 2022-02-07 11:34, Maxime Ripard wrote:
>> The amdgpu KMS driver calls drm_plane_create_color_properties() with a
>> default encoding set to BT709.
>>
>> However, the core will ignore it and the driver doesn't force it in its
>> plane state reset hook, so the initial value will be 0, which represents
>> BT601.
>>
> 
> Isn't this a core issue? Should __drm_atomic_helper_plane_state_reset
> reset all plane_state members to their properties' default values?
> 

Ah, looks like that's exactly what you do in the later patches, which is
perfect. With that, I don't think you'll need this patch anymore.

Harry

> The amdgpu KMS driver currently doesn't respect the color_encoding
> property but I would expect that a call to drm_plane_create_color_properties
> with a default of BT709 means we're getting BT709 as color_encoding 
> as part of atomic commits.
> 
> Harry
> 
>> Fix the mismatch by using an initial value of BT601 in
>> drm_plane_create_color_properties().
>>
>> Cc: amd-gfx@lists.freedesktop.org
>> Cc: Alex Deucher 
>> Cc: "Christian König" 
>> Cc: Harry Wentland 
>> Cc: Leo Li 
>> Cc: "Pan, Xinhui" 
>> Cc: Rodrigo Siqueira 
>> Signed-off-by: Maxime Ripard 
>> ---
>>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> index feccf2b555d2..86b27a355e90 100644
>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> @@ -7914,7 +7914,7 @@ static int amdgpu_dm_plane_init(struct 
>> amdgpu_display_manager *dm,
>>  BIT(DRM_COLOR_YCBCR_BT2020),
>>  BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) |
>>  BIT(DRM_COLOR_YCBCR_FULL_RANGE),
>> -DRM_COLOR_YCBCR_BT709, DRM_COLOR_YCBCR_LIMITED_RANGE);
>> +DRM_COLOR_YCBCR_BT601, DRM_COLOR_YCBCR_LIMITED_RANGE);
>>  }
>>  
>>  supported_rotations =
> 



Re: [PATCH 4/8] mm: move free_devmap_managed_page to memremap.c

2022-02-07 Thread Dan Williams
On Sun, Feb 6, 2022 at 10:33 PM Christoph Hellwig  wrote:
>
> free_devmap_managed_page has nothing to do with the code in swap.c,
> move it to live with the rest of the code for devmap handling.
>

Looks good.

Reviewed-by: Dan Williams 



Re: [PATCH 7/8] mm: remove the extra ZONE_DEVICE struct page refcount

2022-02-07 Thread Jason Gunthorpe
On Mon, Feb 07, 2022 at 07:32:48AM +0100, Christoph Hellwig wrote:
> ZONE_DEVICE struct pages have an extra reference count that complicates
> the code for put_page() and several places in the kernel that need to
> check the reference count to see that a page is not being used (gup,
> compaction, migration, etc.). Clean up the code so the reference count
> doesn't need to be treated specially for ZONE_DEVICE pages.
> 
> Note that this excludes the special idle page wakeup for fsdax pages,
> which still happens at refcount 1.  This is a separate issue and will
> be sorted out later.  Given that only fsdax pages require the
> notifiacation when the refcount hits 1 now, the PAGEMAP_OPS Kconfig
> symbol can go away and be replaced with a FS_DAX check for this hook
> in the put_page fastpath.
> 
> Based on an earlier patch from Ralph Campbell .
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/powerpc/kvm/book3s_hv_uvmem.c   |  1 -
>  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  1 -
>  drivers/gpu/drm/nouveau/nouveau_dmem.c   |  1 -
>  fs/Kconfig   |  1 -
>  include/linux/memremap.h | 12 +++--
>  include/linux/mm.h   |  6 +--
>  lib/test_hmm.c   |  1 -
>  mm/Kconfig   |  4 --
>  mm/internal.h|  2 +
>  mm/memcontrol.c  | 11 ++---
>  mm/memremap.c| 57 
>  mm/migrate.c |  6 ---
>  mm/swap.c| 16 ++-
>  13 files changed, 36 insertions(+), 83 deletions(-)

It looks like a good next step to me

Reviewed-by: Jason Gunthorpe 

>  struct dev_pagemap_ops {
>   /*
> -  * Called once the page refcount reaches 1.  (ZONE_DEVICE pages never
> -  * reach 0 refcount unless there is a refcount bug. This allows the
> -  * device driver to implement its own memory management.)
> +  * Called once the page refcount reaches 0.  The reference count will be
> +  * reset to one by the core code after the method is called to prepare
> +  * for handing out the page again.

I did prefer Ralph's version of this that kept the refcount at 0 while
the page was on the free-list. I hope we can get there again after
later series :)

Jason



Re: [PATCH 1/8] mm: remove a pointless CONFIG_ZONE_DEVICE check in memremap_pages

2022-02-07 Thread Jason Gunthorpe
On Mon, Feb 07, 2022 at 07:32:42AM +0100, Christoph Hellwig wrote:
> memremap.c is only built when CONFIG_ZONE_DEVICE is set, so remove
> the superflous extra check.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  mm/memremap.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)

Reviewed-by: Jason Gunthorpe 

Jason



Re: [PATCH 2/8] mm: remove the __KERNEL__ guard from

2022-02-07 Thread Jason Gunthorpe
On Mon, Feb 07, 2022 at 07:32:43AM +0100, Christoph Hellwig wrote:
> __KERNEL__ ifdefs don't make sense outside of include/uapi/.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  include/linux/mm.h | 4 
>  1 file changed, 4 deletions(-)

Reviewed-by: Jason Gunthorpe 

Jason



Re: [PATCH 4/8] mm: move free_devmap_managed_page to memremap.c

2022-02-07 Thread Jason Gunthorpe
On Mon, Feb 07, 2022 at 07:32:45AM +0100, Christoph Hellwig wrote:
> free_devmap_managed_page has nothing to do with the code in swap.c,
> move it to live with the rest of the code for devmap handling.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  include/linux/mm.h |  1 -
>  mm/memremap.c  | 21 +
>  mm/swap.c  | 23 ---
>  3 files changed, 21 insertions(+), 24 deletions(-)

Reviewed-by: Jason Gunthorpe 

Jason



Re: [PATCH 5/8] mm: simplify freeing of devmap managed pages

2022-02-07 Thread Jason Gunthorpe
On Mon, Feb 07, 2022 at 07:32:46AM +0100, Christoph Hellwig wrote:
> Make put_devmap_managed_page return if it took charge of the page
> or not and remove the separate page_is_devmap_managed helper.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  include/linux/mm.h | 34 ++
>  mm/memremap.c  | 20 +---
>  mm/swap.c  | 10 +-
>  3 files changed, 20 insertions(+), 44 deletions(-)

Reviewed-by: Jason Gunthorpe 

Jason



Re: [PATCH 6/8] mm: don't include in

2022-02-07 Thread Jason Gunthorpe
On Mon, Feb 07, 2022 at 07:32:47AM +0100, Christoph Hellwig wrote:
> Move the check for the actual pgmap types that need the free at refcount
> one behavior into the out of line helper, and thus avoid the need to
> pull memremap.h into mm.h.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/arm64/mm/mmu.c|  1 +
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h  |  1 +
>  drivers/gpu/drm/drm_cache.c|  2 +-
>  drivers/gpu/drm/nouveau/nouveau_dmem.c |  1 +
>  drivers/gpu/drm/nouveau/nouveau_svm.c  |  1 +
>  drivers/infiniband/core/rw.c   |  1 +
>  drivers/nvdimm/pmem.h  |  1 +
>  drivers/nvme/host/pci.c|  1 +
>  drivers/nvme/target/io-cmd-bdev.c  |  1 +
>  fs/fuse/virtio_fs.c|  1 +
>  include/linux/memremap.h   | 18 ++
>  include/linux/mm.h | 20 
>  lib/test_hmm.c |  1 +
>  mm/memremap.c  |  6 +-
>  14 files changed, 34 insertions(+), 22 deletions(-)

Reviewed-by: Jason Gunthorpe 

Jason



Re: [PATCH 8/8] fsdax: depend on ZONE_DEVICE || FS_DAX_LIMITED

2022-02-07 Thread Jason Gunthorpe
On Mon, Feb 07, 2022 at 07:32:49AM +0100, Christoph Hellwig wrote:
> Add a depends on ZONE_DEVICE support or the s390-specific limited DAX
> support, as one of the two is required at runtime for fsdax code to
> actually work.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  fs/Kconfig | 1 +
>  1 file changed, 1 insertion(+)

Makes sense, but leaves me wonder why a kconfig randomizer didn't hit
this.. Or maybe it means some of the function stubs on !ZONE_DEVICE
are unnecessary now..

Reviewed-by: Jason Gunthorpe 

Jason



[PATCHv2 1/3] drm/amdkfd: Fix TLB flushing in KFD SVM with no HWS

2022-02-07 Thread Mukul Joshi
With no HWS, TLB flushing will not work in SVM code.
Fix this by calling kfd_flush_tlb() which works for both
HWS and no HWS case.

Signed-off-by: Mukul Joshi 
Reviewed-by: Philip Yang 
---
v1->v2:
- Don't pass adev to svm_range_map_to_gpu().
 
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 24 ++--
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 41f03d165bad..058f85b432b0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1229,18 +1229,17 @@ svm_range_unmap_from_gpus(struct svm_range *prange, 
unsigned long start,
if (r)
break;
}
-   amdgpu_amdkfd_flush_gpu_tlb_pasid(pdd->dev->adev,
-   p->pasid, TLB_FLUSH_HEAVYWEIGHT);
+   kfd_flush_tlb(pdd, TLB_FLUSH_HEAVYWEIGHT);
}
 
return r;
 }
 
 static int
-svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-struct svm_range *prange, unsigned long offset,
-unsigned long npages, bool readonly, dma_addr_t *dma_addr,
-struct amdgpu_device *bo_adev, struct dma_fence **fence)
+svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
+unsigned long offset, unsigned long npages, bool readonly,
+dma_addr_t *dma_addr, struct amdgpu_device *bo_adev,
+struct dma_fence **fence)
 {
bool table_freed = false;
uint64_t pte_flags;
@@ -1248,6 +1247,8 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
int last_domain;
int r = 0;
int64_t i, j;
+   struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
+   struct amdgpu_device *adev = pdd->dev->adev;
 
last_start = prange->start + offset;
 
@@ -1305,12 +1306,8 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
if (fence)
*fence = dma_fence_get(vm->last_update);
 
-   if (table_freed) {
-   struct kfd_process *p;
-
-   p = container_of(prange->svms, struct kfd_process, svms);
-   amdgpu_amdkfd_flush_gpu_tlb_pasid(adev, p->pasid, 
TLB_FLUSH_LEGACY);
-   }
+   if (table_freed)
+   kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
 out:
return r;
 }
@@ -1351,8 +1348,7 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned 
long offset,
continue;
}
 
-   r = svm_range_map_to_gpu(pdd->dev->adev, 
drm_priv_to_vm(pdd->drm_priv),
-prange, offset, npages, readonly,
+   r = svm_range_map_to_gpu(pdd, prange, offset, npages, readonly,
 prange->dma_addr[gpuidx],
 bo_adev, wait ? &fence : NULL);
if (r)
-- 
2.33.1



[PATCHv2 2/3] drm/amdkfd: Remove unused old debugger implementation

2022-02-07 Thread Mukul Joshi
Cleanup the kfd code by removing the unused old debugger
implementation.
Only a small piece of resetting wavefronts is kept and
is moved to kfd_device_queue_manager.c

Signed-off-by: Mukul Joshi 
---
v1->v2:
- Rename AMDKFD_IOC_DBG_* to AMDKFD_IOC_DBG_*_DEPRECATED.
- Cleanup address_watch_disable(), address_watch_execute(),
  and address_watch_get_offset() from amdgpu_amdkfd_gfx_* files.

 .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |   3 -
 .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |   3 -
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|  24 -
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c  |  25 -
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c |  96 --
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c |  24 -
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  24 -
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  10 -
 drivers/gpu/drm/amd/amdkfd/Makefile   |   2 -
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 290 +-
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c   | 845 --
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h   | 230 -
 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c   | 158 
 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h   | 293 --
 drivers/gpu/drm/amd/amdkfd/kfd_device.c   |   2 -
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |  59 ++
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |  35 +
 drivers/gpu/drm/amd/amdkfd/kfd_iommu.c|  12 -
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   5 -
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  |  19 -
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |   9 -
 include/uapi/linux/kfd_ioctl.h|   8 +-
 22 files changed, 106 insertions(+), 2070 deletions(-)
 delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
 delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h
 delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
 delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index 46cd4ee6bafb..c8935d718207 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -37,10 +37,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied,
.hqd_destroy = kgd_gfx_v9_hqd_destroy,
.hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy,
-   .address_watch_disable = kgd_gfx_v9_address_watch_disable,
-   .address_watch_execute = kgd_gfx_v9_address_watch_execute,
.wave_control_execute = kgd_gfx_v9_wave_control_execute,
-   .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
.get_atc_vmid_pasid_mapping_info =
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base = 
kgd_gfx_v9_set_vm_context_page_table_base,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index abe93b3ff765..4191af5a3f13 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -289,10 +289,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
.hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied,
.hqd_destroy = kgd_gfx_v9_hqd_destroy,
.hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy,
-   .address_watch_disable = kgd_gfx_v9_address_watch_disable,
-   .address_watch_execute = kgd_gfx_v9_address_watch_execute,
.wave_control_execute = kgd_gfx_v9_wave_control_execute,
-   .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
.get_atc_vmid_pasid_mapping_info =
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base =
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 7b7f4b2764c1..9378fc79e9ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -671,20 +671,6 @@ static bool get_atc_vmid_pasid_mapping_info(struct 
amdgpu_device *adev,
return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
 }
 
-static int kgd_address_watch_disable(struct amdgpu_device *adev)
-{
-   return 0;
-}
-
-static int kgd_address_watch_execute(struct amdgpu_device *adev,
-   unsigned int watch_point_id,
-   uint32_t cntl_val,
-   uint32_t addr_hi,
-   uint32_t addr_lo)
-{
-   return 0;
-}
-
 static int kgd_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val,
uint32_t sq_cmd)
@@ -709,13 +695,6 @@ static int kgd_wave_control_execute(struct a

[PATCHv2 3/3] drm/amdkfd: Consolidate MQD manager functions

2022-02-07 Thread Mukul Joshi
A few MQD manager functions are duplicated for all versions of
MQD manager. Remove this duplication by moving the common
functions into kfd_mqd_manager.c file.

Signed-off-by: Mukul Joshi 
---
v1->v2:
- Add "kfd_" prefix to functions moved to kfd_mqd_manager.c.
- Also, suffix "_cp" to function names shared by CP, HIQ and DIQ.

 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c  | 63 +
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h  | 27 ++
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c  | 76 +++
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  | 85 +++--
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   | 92 +++
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c   | 75 +++
 6 files changed, 136 insertions(+), 282 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index e2825ad4d699..dd99f23e24f1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -173,3 +173,66 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
}
}
 }
+
+int kfd_hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+uint32_t pipe_id, uint32_t queue_id,
+struct queue_properties *p, struct mm_struct *mms)
+{
+   return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, mqd, pipe_id,
+ queue_id, p->doorbell_off);
+}
+
+int kfd_destroy_mqd_cp(struct mqd_manager *mm, void *mqd,
+   enum kfd_preempt_type type, unsigned int timeout,
+   uint32_t pipe_id,uint32_t queue_id)
+{
+   return mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, mqd, type, timeout,
+   pipe_id, queue_id);
+}
+
+void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
+ struct kfd_mem_obj *mqd_mem_obj)
+{
+   if (mqd_mem_obj->gtt_mem) {
+   amdgpu_amdkfd_free_gtt_mem(mm->dev->adev, mqd_mem_obj->gtt_mem);
+   kfree(mqd_mem_obj);
+   } else {
+   kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+   }
+}
+
+bool kfd_is_occupied_cp(struct mqd_manager *mm, void *mqd,
+uint64_t queue_address, uint32_t pipe_id,
+uint32_t queue_id)
+{
+   return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->adev, queue_address,
+   pipe_id, queue_id);
+}
+
+int kfd_load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+   return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd,
+   (uint32_t __user *)p->write_ptr,
+   mms);
+}
+
+/*
+ * preempt type here is ignored because there is only one way
+ * to preempt sdma queue
+ */
+int kfd_destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+enum kfd_preempt_type type,
+unsigned int timeout, uint32_t pipe_id,
+uint32_t queue_id)
+{
+   return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout);
+}
+
+bool kfd_is_occupied_sdma(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+   return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index 23486a23df84..21851110f9eb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -136,4 +136,31 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
const uint32_t *cu_mask, uint32_t cu_mask_count,
uint32_t *se_mask);
 
+int kfd_hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+   uint32_t pipe_id, uint32_t queue_id,
+   struct queue_properties *p, struct mm_struct *mms);
+
+int kfd_destroy_mqd_cp(struct mqd_manager *mm, void *mqd,
+   enum kfd_preempt_type type, unsigned int timeout,
+   uint32_t pipe_id,uint32_t queue_id);
+
+void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
+   struct kfd_mem_obj *mqd_mem_obj);
+
+bool kfd_is_occupied_cp(struct mqd_manager *mm, void *mqd,
+uint64_t queue_address, uint32_t pipe_id,
+uint32_t queue_id);
+
+int kfd_load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+   uint32_t pipe_id, uint32_t queue_id,
+   struct queue_properties *p, struct mm_struct *mms);
+
+int kfd_destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+   enum kfd_preempt_type type,unsigned int timeout,
+   uint32_t pipe_id, uint32_t queue_id);
+
+bool kfd_is_occupied_sdma(struct mqd_manager *mm, 

Re: [PATCH] drm/amd/pm: fix error handling

2022-02-07 Thread Alex Deucher
Applied.  Thanks!

Alex

On Sun, Feb 6, 2022 at 10:04 PM Quan, Evan  wrote:
>
> [AMD Official Use Only]
>
> Reviewed-by: Evan Quan 
>
> > -Original Message-
> > From: t...@redhat.com 
> > Sent: Saturday, February 5, 2022 11:00 PM
> > To: Quan, Evan ; Deucher, Alexander
> > ; Koenig, Christian
> > ; Pan, Xinhui ;
> > airl...@linux.ie; dan...@ffwll.ch; nat...@kernel.org;
> > ndesaulni...@google.com; Lazar, Lijo ; Powell, Darren
> > ; Chen, Guchun ;
> > Grodzovsky, Andrey 
> > Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; linux-
> > ker...@vger.kernel.org; l...@lists.linux.dev; Tom Rix 
> > Subject: [PATCH] drm/amd/pm: fix error handling
> >
> > From: Tom Rix 
> >
> > clang static analysis reports this error
> > amdgpu_smu.c:2289:9: warning: Called function pointer
> >   is null (null dereference)
> > return smu->ppt_funcs->emit_clk_levels(
> >^~~~
> >
> > There is a logic error in the earlier check of
> > emit_clk_levels.  The error value is set to
> > the ret variable but ret is never used.  Return
> > directly and remove the unneeded ret variable.
> >
> > Fixes: 5d64f9bbb628 ("amdgpu/pm: Implement new API function "emit" that
> > accepts buffer base and write offset")
> > Signed-off-by: Tom Rix 
> > ---
> >  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 3 +--
> >  1 file changed, 1 insertion(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> > b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> > index af368aa1fd0ae..5f3b3745a9b7a 100644
> > --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> > +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> > @@ -2274,7 +2274,6 @@ static int smu_emit_ppclk_levels(void *handle,
> > enum pp_clock_type type, char *bu
> >  {
> >   struct smu_context *smu = handle;
> >   enum smu_clk_type clk_type;
> > - int ret = 0;
> >
> >   clk_type = smu_convert_to_smuclk(type);
> >   if (clk_type == SMU_CLK_COUNT)
> > @@ -2284,7 +2283,7 @@ static int smu_emit_ppclk_levels(void *handle,
> > enum pp_clock_type type, char *bu
> >   return -EOPNOTSUPP;
> >
> >   if (!smu->ppt_funcs->emit_clk_levels)
> > - ret = -ENOENT;
> > + return -ENOENT;
> >
> >   return smu->ppt_funcs->emit_clk_levels(smu, clk_type, buf, offset);
> >
> > --
> > 2.26.3


Re: [PATCH] drm/amd/pm: add missing prototypes to amdgpu_dpm_internal

2022-02-07 Thread Alex Deucher
Applied.  Thanks!

On Sun, Feb 6, 2022 at 10:30 PM Quan, Evan  wrote:
>
> [AMD Official Use Only]
>
> Thanks for the fix!
> Reviewed-by: Evan Quan 
>
> > -Original Message-
> > From: Maíra Canal 
> > Sent: Thursday, February 3, 2022 8:40 AM
> > To: Quan, Evan ; Deucher, Alexander
> > ; Koenig, Christian
> > ; Pan, Xinhui ;
> > airl...@linux.ie; dan...@ffwll.ch; nat...@kernel.org;
> > ndesaulni...@google.com; Lazar, Lijo ; Tuikov, Luben
> > ; Chen, Guchun ;
> > Zhang, Hawking ;
> > jiapeng.ch...@linux.alibaba.com
> > Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; linux-
> > ker...@vger.kernel.org
> > Subject: [PATCH] drm/amd/pm: add missing prototypes to
> > amdgpu_dpm_internal
> >
> > Include the header with the prototype to silence the following clang
> > warnings:
> >
> > drivers/gpu/drm/amd/amdgpu/../pm/amdgpu_dpm_internal.c:29:6:
> > warning: no
> > previous prototype for function 'amdgpu_dpm_get_active_displays'
> > [-Wmissing-prototypes]
> > void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev)
> >  ^
> > drivers/gpu/drm/amd/amdgpu/../pm/amdgpu_dpm_internal.c:29:1: note:
> > declare
> > 'static' if the function is not intended to be used outside of this
> > translation unit
> > void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev)
> > ^
> > static
> > drivers/gpu/drm/amd/amdgpu/../pm/amdgpu_dpm_internal.c:76:5:
> > warning: no
> > previous prototype for function 'amdgpu_dpm_get_vrefresh'
> > [-Wmissing-prototypes]
> > u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev)
> > ^
> > drivers/gpu/drm/amd/amdgpu/../pm/amdgpu_dpm_internal.c:76:1: note:
> > declare
> > 'static' if the function is not intended to be used outside of this
> > translation unit
> > u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev)
> > ^
> > static
> > 2 warnings generated.
> >
> > Besides that, remove the duplicated prototype of the function
> > amdgpu_dpm_get_vblank_time in order to keep the consistency of the
> > headers.
> >
> > fixes: 6ddbd37f ("drm/amd/pm: optimize the amdgpu_pm_compute_clocks()
> > implementations")
> >
> > Signed-off-by: Maíra Canal 
> > ---
> >  drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c | 1 +
> >  drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h  | 1 -
> >  drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c   | 1 +
> >  3 files changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c
> > b/drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c
> > index ba5f6413412d..42efe838fa85 100644
> > --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c
> > +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c
> > @@ -25,6 +25,7 @@
> >  #include "amdgpu_display.h"
> >  #include "hwmgr.h"
> >  #include "amdgpu_smu.h"
> > +#include "amdgpu_dpm_internal.h"
> >
> >  void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev)
> >  {
> > diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> > b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> > index 5cc05110cdae..09790413cbc4 100644
> > --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> > +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> > @@ -343,7 +343,6 @@ struct amdgpu_pm {
> >   struct amdgpu_ctx   *stable_pstate_ctx;
> >  };
> >
> > -u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev);
> >  int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum
> > amd_pp_sensors sensor,
> >  void *data, uint32_t *size);
> >
> > diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> > b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> > index 7427c50409d4..caae54487f9c 100644
> > --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> > +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> > @@ -28,6 +28,7 @@
> >  #include "amdgpu_pm.h"
> >  #include "amdgpu_dpm.h"
> >  #include "amdgpu_atombios.h"
> > +#include "amdgpu_dpm_internal.h"
> >  #include "amd_pcie.h"
> >  #include "sid.h"
> >  #include "r600_dpm.h"
> > --
> > 2.34.1


Re: [PATCH 6/8] mm: don't include in

2022-02-07 Thread Felix Kuehling



Am 2022-02-07 um 01:32 schrieb Christoph Hellwig:

Move the check for the actual pgmap types that need the free at refcount
one behavior into the out of line helper, and thus avoid the need to
pull memremap.h into mm.h.

Signed-off-by: Christoph Hellwig 


The amdkfd part looks good to me.

It looks like this patch is not based on Alex Sierra's coherent memory 
series. He added two new helpers is_device_coherent_page and 
is_dev_private_or_coherent_page that would need to be moved along with 
is_device_private_page and is_pci_p2pdma_page.


Acked-by: Felix Kuehling 



---
  arch/arm64/mm/mmu.c|  1 +
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h  |  1 +
  drivers/gpu/drm/drm_cache.c|  2 +-
  drivers/gpu/drm/nouveau/nouveau_dmem.c |  1 +
  drivers/gpu/drm/nouveau/nouveau_svm.c  |  1 +
  drivers/infiniband/core/rw.c   |  1 +
  drivers/nvdimm/pmem.h  |  1 +
  drivers/nvme/host/pci.c|  1 +
  drivers/nvme/target/io-cmd-bdev.c  |  1 +
  fs/fuse/virtio_fs.c|  1 +
  include/linux/memremap.h   | 18 ++
  include/linux/mm.h | 20 
  lib/test_hmm.c |  1 +
  mm/memremap.c  |  6 +-
  14 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index acfae9b41cc8c9..580abae6c0b93f 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -17,6 +17,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
  #include 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index ea68f3b3a4e9cb..6d643b4b791d87 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -25,6 +25,7 @@
  
  #include 

  #include 
+#include 
  #include 
  #include 
  #include 
diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
index f19d9acbe95936..50b8a088f763a6 100644
--- a/drivers/gpu/drm/drm_cache.c
+++ b/drivers/gpu/drm/drm_cache.c
@@ -27,11 +27,11 @@
  /*
   * Authors: Thomas Hellström 
   */
-
  #include 
  #include 
  #include 
  #include 
+#include 
  #include 
  
  #include 

diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c 
b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index e886a3b9e08c7d..a5cdfbe32b5e54 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -39,6 +39,7 @@
  
  #include 

  #include 
+#include 
  #include 
  
  /*

diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c 
b/drivers/gpu/drm/nouveau/nouveau_svm.c
index 266809e511e2c1..090b9b47708cca 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -35,6 +35,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  
  struct nouveau_svm {

diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index 5a3bd41b331c93..4d98f931a13ddd 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -2,6 +2,7 @@
  /*
   * Copyright (c) 2016 HGST, a Western Digital Company.
   */
+#include 
  #include 
  #include 
  #include 
diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h
index 59cfe13ea8a85c..1f51a23614299b 100644
--- a/drivers/nvdimm/pmem.h
+++ b/drivers/nvdimm/pmem.h
@@ -3,6 +3,7 @@
  #define __NVDIMM_PMEM_H__
  #include 
  #include 
+#include 
  #include 
  #include 
  #include 
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 6a99ed68091589..ab15bc72710dbe 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -15,6 +15,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
  #include 
diff --git a/drivers/nvme/target/io-cmd-bdev.c 
b/drivers/nvme/target/io-cmd-bdev.c
index 70ca9dfc1771a9..a141446db1bea3 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -6,6 +6,7 @@
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  #include 
  #include 
+#include 
  #include 
  #include "nvmet.h"
  
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c

index 9d737904d07c0b..86b7dbb6a0d43e 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -8,6 +8,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
  #include 
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 1fafcc38acbad6..514ab46f597e5c 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -1,6 +1,8 @@
  /* SPDX-License-Identifier: GPL-2.0 */
  #ifndef _LINUX_MEMREMAP_H_
  #define _LINUX_MEMREMAP_H_
+
+#include 
  #include 
  #include 
  #include 
@@ -129,6 +131,22 @@ static inline unsigned long pgmap_vmemmap_nr(struct 
dev_pagemap *pgmap)
return 1 << pgmap->vmemmap_shift;
  }
  
+static inline bool is_device_private_page(const struct page *page)

+{
+   return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) &&
+   IS_ENABLED(CONFIG_DEVICE_PRIVATE) &&
+

Re: [PATCHv2 2/3] drm/amdkfd: Remove unused old debugger implementation

2022-02-07 Thread Felix Kuehling

Am 2022-02-07 um 14:52 schrieb Mukul Joshi:

Cleanup the kfd code by removing the unused old debugger
implementation.


You can add here, that address watch was only ever implemented in the 
upstream driver for GFXv7 (Kaveri). The user mode tools runtime using 
this API was never open-sourced. Work on the old debugger prototype that 
used this API has been discontinued years ago.


With that, the patch is

Reviewed-by: Felix Kuehling 



Only a small piece of resetting wavefronts is kept and
is moved to kfd_device_queue_manager.c

Signed-off-by: Mukul Joshi 
---
v1->v2:
- Rename AMDKFD_IOC_DBG_* to AMDKFD_IOC_DBG_*_DEPRECATED.
- Cleanup address_watch_disable(), address_watch_execute(),
   and address_watch_get_offset() from amdgpu_amdkfd_gfx_* files.

  .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |   3 -
  .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |   3 -
  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|  24 -
  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c  |  25 -
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c |  96 --
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c |  24 -
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  24 -
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  10 -
  drivers/gpu/drm/amd/amdkfd/Makefile   |   2 -
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 290 +-
  drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c   | 845 --
  drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h   | 230 -
  drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c   | 158 
  drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h   | 293 --
  drivers/gpu/drm/amd/amdkfd/kfd_device.c   |   2 -
  .../drm/amd/amdkfd/kfd_device_queue_manager.c |  59 ++
  .../drm/amd/amdkfd/kfd_device_queue_manager.h |  35 +
  drivers/gpu/drm/amd/amdkfd/kfd_iommu.c|  12 -
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   5 -
  drivers/gpu/drm/amd/amdkfd/kfd_process.c  |  19 -
  .../gpu/drm/amd/include/kgd_kfd_interface.h   |   9 -
  include/uapi/linux/kfd_ioctl.h|   8 +-
  22 files changed, 106 insertions(+), 2070 deletions(-)
  delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
  delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h
  delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
  delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index 46cd4ee6bafb..c8935d718207 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -37,10 +37,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied,
.hqd_destroy = kgd_gfx_v9_hqd_destroy,
.hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy,
-   .address_watch_disable = kgd_gfx_v9_address_watch_disable,
-   .address_watch_execute = kgd_gfx_v9_address_watch_execute,
.wave_control_execute = kgd_gfx_v9_wave_control_execute,
-   .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
.get_atc_vmid_pasid_mapping_info =
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base = 
kgd_gfx_v9_set_vm_context_page_table_base,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index abe93b3ff765..4191af5a3f13 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -289,10 +289,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
.hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied,
.hqd_destroy = kgd_gfx_v9_hqd_destroy,
.hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy,
-   .address_watch_disable = kgd_gfx_v9_address_watch_disable,
-   .address_watch_execute = kgd_gfx_v9_address_watch_execute,
.wave_control_execute = kgd_gfx_v9_wave_control_execute,
-   .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
.get_atc_vmid_pasid_mapping_info =
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.set_vm_context_page_table_base =
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 7b7f4b2764c1..9378fc79e9ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -671,20 +671,6 @@ static bool get_atc_vmid_pasid_mapping_info(struct 
amdgpu_device *adev,
return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
  }
  
-static int kgd_address_watch_disable(struct amdgpu_device *adev)

-{
-   return 0;
-}
-
-static int kgd_address_watch_execute(struct amdgpu_device *adev,
-   unsigned int watch_point_id,
-   uint32_t cn

Re: [PATCHv2 1/3] drm/amdkfd: Fix TLB flushing in KFD SVM with no HWS

2022-02-07 Thread Felix Kuehling

Am 2022-02-07 um 14:52 schrieb Mukul Joshi:

With no HWS, TLB flushing will not work in SVM code.
Fix this by calling kfd_flush_tlb() which works for both
HWS and no HWS case.

Signed-off-by: Mukul Joshi 
Reviewed-by: Philip Yang 
---
v1->v2:
- Don't pass adev to svm_range_map_to_gpu().
  
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 24 ++--

  1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 41f03d165bad..058f85b432b0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1229,18 +1229,17 @@ svm_range_unmap_from_gpus(struct svm_range *prange, 
unsigned long start,
if (r)
break;
}
-   amdgpu_amdkfd_flush_gpu_tlb_pasid(pdd->dev->adev,
-   p->pasid, TLB_FLUSH_HEAVYWEIGHT);
+   kfd_flush_tlb(pdd, TLB_FLUSH_HEAVYWEIGHT);
}
  
  	return r;

  }
  
  static int

-svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-struct svm_range *prange, unsigned long offset,
-unsigned long npages, bool readonly, dma_addr_t *dma_addr,
-struct amdgpu_device *bo_adev, struct dma_fence **fence)
+svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
+unsigned long offset, unsigned long npages, bool readonly,
+dma_addr_t *dma_addr, struct amdgpu_device *bo_adev,
+struct dma_fence **fence)
  {
bool table_freed = false;
uint64_t pte_flags;
@@ -1248,6 +1247,8 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
int last_domain;
int r = 0;
int64_t i, j;
+   struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
+   struct amdgpu_device *adev = pdd->dev->adev;


Minor style nit-pick. It's more readable when longer variable 
declarations are at the top. Other than that, the patch is


Reviewed-by: Felix Kuehling 


  
  	last_start = prange->start + offset;
  
@@ -1305,12 +1306,8 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,

if (fence)
*fence = dma_fence_get(vm->last_update);
  
-	if (table_freed) {

-   struct kfd_process *p;
-
-   p = container_of(prange->svms, struct kfd_process, svms);
-   amdgpu_amdkfd_flush_gpu_tlb_pasid(adev, p->pasid, 
TLB_FLUSH_LEGACY);
-   }
+   if (table_freed)
+   kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
  out:
return r;
  }
@@ -1351,8 +1348,7 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned 
long offset,
continue;
}
  
-		r = svm_range_map_to_gpu(pdd->dev->adev, drm_priv_to_vm(pdd->drm_priv),

-prange, offset, npages, readonly,
+   r = svm_range_map_to_gpu(pdd, prange, offset, npages, readonly,
 prange->dma_addr[gpuidx],
 bo_adev, wait ? &fence : NULL);
if (r)


Re: [PATCHv2 3/3] drm/amdkfd: Consolidate MQD manager functions

2022-02-07 Thread Felix Kuehling



Am 2022-02-07 um 14:53 schrieb Mukul Joshi:

A few MQD manager functions are duplicated for all versions of
MQD manager. Remove this duplication by moving the common
functions into kfd_mqd_manager.c file.

Signed-off-by: Mukul Joshi 


Reviewed-by: Felix Kuehling 



---
v1->v2:
- Add "kfd_" prefix to functions moved to kfd_mqd_manager.c.
- Also, suffix "_cp" to function names shared by CP, HIQ and DIQ.

  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c  | 63 +
  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h  | 27 ++
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c  | 76 +++
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  | 85 +++--
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   | 92 +++
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c   | 75 +++
  6 files changed, 136 insertions(+), 282 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index e2825ad4d699..dd99f23e24f1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -173,3 +173,66 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
}
}
  }
+
+int kfd_hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+uint32_t pipe_id, uint32_t queue_id,
+struct queue_properties *p, struct mm_struct *mms)
+{
+   return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, mqd, pipe_id,
+ queue_id, p->doorbell_off);
+}
+
+int kfd_destroy_mqd_cp(struct mqd_manager *mm, void *mqd,
+   enum kfd_preempt_type type, unsigned int timeout,
+   uint32_t pipe_id,uint32_t queue_id)
+{
+   return mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, mqd, type, timeout,
+   pipe_id, queue_id);
+}
+
+void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
+ struct kfd_mem_obj *mqd_mem_obj)
+{
+   if (mqd_mem_obj->gtt_mem) {
+   amdgpu_amdkfd_free_gtt_mem(mm->dev->adev, mqd_mem_obj->gtt_mem);
+   kfree(mqd_mem_obj);
+   } else {
+   kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+   }
+}
+
+bool kfd_is_occupied_cp(struct mqd_manager *mm, void *mqd,
+uint64_t queue_address, uint32_t pipe_id,
+uint32_t queue_id)
+{
+   return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->adev, queue_address,
+   pipe_id, queue_id);
+}
+
+int kfd_load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+   return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd,
+   (uint32_t __user *)p->write_ptr,
+   mms);
+}
+
+/*
+ * preempt type here is ignored because there is only one way
+ * to preempt sdma queue
+ */
+int kfd_destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+enum kfd_preempt_type type,
+unsigned int timeout, uint32_t pipe_id,
+uint32_t queue_id)
+{
+   return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout);
+}
+
+bool kfd_is_occupied_sdma(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+   return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index 23486a23df84..21851110f9eb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -136,4 +136,31 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
const uint32_t *cu_mask, uint32_t cu_mask_count,
uint32_t *se_mask);
  
+int kfd_hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,

+   uint32_t pipe_id, uint32_t queue_id,
+   struct queue_properties *p, struct mm_struct *mms);
+
+int kfd_destroy_mqd_cp(struct mqd_manager *mm, void *mqd,
+   enum kfd_preempt_type type, unsigned int timeout,
+   uint32_t pipe_id,uint32_t queue_id);
+
+void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
+   struct kfd_mem_obj *mqd_mem_obj);
+
+bool kfd_is_occupied_cp(struct mqd_manager *mm, void *mqd,
+uint64_t queue_address, uint32_t pipe_id,
+uint32_t queue_id);
+
+int kfd_load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+   uint32_t pipe_id, uint32_t queue_id,
+   struct queue_properties *p, struct mm_struct *mms);
+
+int kfd_destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+   enum kfd_preempt_type type,unsigned int timeout,
+  

Re: [PATCH 5/8] mm: simplify freeing of devmap managed pages

2022-02-07 Thread Dan Williams
On Sun, Feb 6, 2022 at 10:33 PM Christoph Hellwig  wrote:
>
> Make put_devmap_managed_page return if it took charge of the page
> or not and remove the separate page_is_devmap_managed helper.

Looks good to me:

Reviewed-by: Dan Williams 



Re: [PATCH 6/8] mm: don't include in

2022-02-07 Thread Dan Williams
On Sun, Feb 6, 2022 at 10:33 PM Christoph Hellwig  wrote:
>
> Move the check for the actual pgmap types that need the free at refcount
> one behavior into the out of line helper, and thus avoid the need to
> pull memremap.h into mm.h.

Looks good to me assuming the compile bots agree.

Reviewed-by: Dan Williams 

>
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/arm64/mm/mmu.c|  1 +
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h  |  1 +
>  drivers/gpu/drm/drm_cache.c|  2 +-
>  drivers/gpu/drm/nouveau/nouveau_dmem.c |  1 +
>  drivers/gpu/drm/nouveau/nouveau_svm.c  |  1 +
>  drivers/infiniband/core/rw.c   |  1 +
>  drivers/nvdimm/pmem.h  |  1 +
>  drivers/nvme/host/pci.c|  1 +
>  drivers/nvme/target/io-cmd-bdev.c  |  1 +
>  fs/fuse/virtio_fs.c|  1 +
>  include/linux/memremap.h   | 18 ++
>  include/linux/mm.h | 20 
>  lib/test_hmm.c |  1 +
>  mm/memremap.c  |  6 +-
>  14 files changed, 34 insertions(+), 22 deletions(-)
>
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index acfae9b41cc8c9..580abae6c0b93f 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -17,6 +17,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index ea68f3b3a4e9cb..6d643b4b791d87 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -25,6 +25,7 @@
>
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
> index f19d9acbe95936..50b8a088f763a6 100644
> --- a/drivers/gpu/drm/drm_cache.c
> +++ b/drivers/gpu/drm/drm_cache.c
> @@ -27,11 +27,11 @@
>  /*
>   * Authors: Thomas Hellström 
>   */
> -
>  #include 
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>
>  #include 
> diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c 
> b/drivers/gpu/drm/nouveau/nouveau_dmem.c
> index e886a3b9e08c7d..a5cdfbe32b5e54 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
> @@ -39,6 +39,7 @@
>
>  #include 
>  #include 
> +#include 
>  #include 
>
>  /*
> diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c 
> b/drivers/gpu/drm/nouveau/nouveau_svm.c
> index 266809e511e2c1..090b9b47708cca 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_svm.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
> @@ -35,6 +35,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>
>  struct nouveau_svm {
> diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
> index 5a3bd41b331c93..4d98f931a13ddd 100644
> --- a/drivers/infiniband/core/rw.c
> +++ b/drivers/infiniband/core/rw.c
> @@ -2,6 +2,7 @@
>  /*
>   * Copyright (c) 2016 HGST, a Western Digital Company.
>   */
> +#include 
>  #include 
>  #include 
>  #include 
> diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h
> index 59cfe13ea8a85c..1f51a23614299b 100644
> --- a/drivers/nvdimm/pmem.h
> +++ b/drivers/nvdimm/pmem.h
> @@ -3,6 +3,7 @@
>  #define __NVDIMM_PMEM_H__
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index 6a99ed68091589..ab15bc72710dbe 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -15,6 +15,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> diff --git a/drivers/nvme/target/io-cmd-bdev.c 
> b/drivers/nvme/target/io-cmd-bdev.c
> index 70ca9dfc1771a9..a141446db1bea3 100644
> --- a/drivers/nvme/target/io-cmd-bdev.c
> +++ b/drivers/nvme/target/io-cmd-bdev.c
> @@ -6,6 +6,7 @@
>  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>  #include 
>  #include 
> +#include 
>  #include 
>  #include "nvmet.h"
>
> diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
> index 9d737904d07c0b..86b7dbb6a0d43e 100644
> --- a/fs/fuse/virtio_fs.c
> +++ b/fs/fuse/virtio_fs.c
> @@ -8,6 +8,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> diff --git a/include/linux/memremap.h b/include/linux/memremap.h
> index 1fafcc38acbad6..514ab46f597e5c 100644
> --- a/include/linux/memremap.h
> +++ b/include/linux/memremap.h
> @@ -1,6 +1,8 @@
>  /* SPDX-License-Identifier: GPL-2.0 */
>  #ifndef _LINUX_MEMREMAP_H_
>  #define _LINUX_MEMREMAP_H_
> +
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -129,6 +131,22 @@ static inline unsigned long pgmap_vmemmap_nr(struct 
> dev_pagemap *pgmap)
> return 1 << pgmap->vmemmap_shift;
>  }
>
> +static inline bool is_device_private_page(const struct page *page)
> +{
> +   return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) &&
> +   IS_ENABLED(CONFIG_DEVICE_PRIVATE) &&

Re: start sorting out the ZONE_DEVICE refcount mess

2022-02-07 Thread Logan Gunthorpe



On 2022-02-06 11:32 p.m., Christoph Hellwig wrote:
> Hi all,
> 
> this series removes the offset by one refcount for ZONE_DEVICE pages
> that are freed back to the driver owning them, which is just device
> private ones for now, but also the planned device coherent pages
> and the ehanced p2p ones pending.
> 
> It does not address the fsdax pages yet, which will be attacked in a
> follow on series.
> 
> Diffstat:
>  arch/arm64/mm/mmu.c  |1 
>  arch/powerpc/kvm/book3s_hv_uvmem.c   |1 
>  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |2 
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h|1 
>  drivers/gpu/drm/drm_cache.c  |2 
>  drivers/gpu/drm/nouveau/nouveau_dmem.c   |3 -
>  drivers/gpu/drm/nouveau/nouveau_svm.c|1 
>  drivers/infiniband/core/rw.c |1 
>  drivers/nvdimm/pmem.h|1 
>  drivers/nvme/host/pci.c  |1 
>  drivers/nvme/target/io-cmd-bdev.c|1 
>  fs/Kconfig   |2 
>  fs/fuse/virtio_fs.c  |1 
>  include/linux/hmm.h  |9 
>  include/linux/memremap.h |   22 +-
>  include/linux/mm.h   |   59 -
>  lib/test_hmm.c   |4 +
>  mm/Kconfig   |4 -
>  mm/internal.h|2 
>  mm/memcontrol.c  |   11 +
>  mm/memremap.c|   63 
> ---
>  mm/migrate.c |6 --
>  mm/swap.c|   49 ++--
>  23 files changed, 90 insertions(+), 157 deletions(-)

Looks good to me. I was wondering about the location of some of this
code, so it's nice to see it cleaned up. Except for the one minor issue
I noted on patch 6, it all looks good to me. I've reviewed all the
patches and tested the series under my p2pdma series.

Reviewed-by: Logan Gunthorpe 

Logan



Re: [PATCH 7/8] mm: remove the extra ZONE_DEVICE struct page refcount

2022-02-07 Thread Ralph Campbell

On 2/6/22 22:32, Christoph Hellwig wrote:

ZONE_DEVICE struct pages have an extra reference count that complicates
the code for put_page() and several places in the kernel that need to
check the reference count to see that a page is not being used (gup,
compaction, migration, etc.). Clean up the code so the reference count
doesn't need to be treated specially for ZONE_DEVICE pages.

Note that this excludes the special idle page wakeup for fsdax pages,
which still happens at refcount 1.  This is a separate issue and will
be sorted out later.  Given that only fsdax pages require the
notifiacation when the refcount hits 1 now, the PAGEMAP_OPS Kconfig
symbol can go away and be replaced with a FS_DAX check for this hook
in the put_page fastpath.

Based on an earlier patch from Ralph Campbell .

Signed-off-by: Christoph Hellwig 


Thanks for working on this, definite step forward.

Reviewed-by: Ralph Campbell 



[PATCH 1/7] drm/amd/pm: correct UMD pstate clocks for Dimgrey Cavefish and Beige Goby

2022-02-07 Thread Evan Quan
Correct the UMD pstate profiling clocks for Dimgrey Cavefish and Beige
Goby.

Signed-off-by: Evan Quan 
Change-Id: I74fdbcf2cfa11f97ae16e4921449ab7cdb7e43c9
---
 .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 26 +++
 .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.h   |  8 ++
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 5bd48c922385..c335f398ba4c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -1231,21 +1231,37 @@ static int sienna_cichlid_populate_umd_state_clk(struct 
smu_context *smu)
&dpm_context->dpm_tables.soc_table;
struct smu_umd_pstate_table *pstate_table =
&smu->pstate_table;
+   struct amdgpu_device *adev = smu->adev;
 
pstate_table->gfxclk_pstate.min = gfx_table->min;
pstate_table->gfxclk_pstate.peak = gfx_table->max;
-   if (gfx_table->max >= SIENNA_CICHLID_UMD_PSTATE_PROFILING_GFXCLK)
-   pstate_table->gfxclk_pstate.standard = 
SIENNA_CICHLID_UMD_PSTATE_PROFILING_GFXCLK;
 
pstate_table->uclk_pstate.min = mem_table->min;
pstate_table->uclk_pstate.peak = mem_table->max;
-   if (mem_table->max >= SIENNA_CICHLID_UMD_PSTATE_PROFILING_MEMCLK)
-   pstate_table->uclk_pstate.standard = 
SIENNA_CICHLID_UMD_PSTATE_PROFILING_MEMCLK;
 
pstate_table->socclk_pstate.min = soc_table->min;
pstate_table->socclk_pstate.peak = soc_table->max;
-   if (soc_table->max >= SIENNA_CICHLID_UMD_PSTATE_PROFILING_SOCCLK)
+
+   switch (adev->asic_type) {
+   case CHIP_SIENNA_CICHLID:
+   case CHIP_NAVY_FLOUNDER:
+   pstate_table->gfxclk_pstate.standard = 
SIENNA_CICHLID_UMD_PSTATE_PROFILING_GFXCLK;
+   pstate_table->uclk_pstate.standard = 
SIENNA_CICHLID_UMD_PSTATE_PROFILING_MEMCLK;
pstate_table->socclk_pstate.standard = 
SIENNA_CICHLID_UMD_PSTATE_PROFILING_SOCCLK;
+   break;
+   case CHIP_DIMGREY_CAVEFISH:
+   pstate_table->gfxclk_pstate.standard = 
DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_GFXCLK;
+   pstate_table->uclk_pstate.standard = 
DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_MEMCLK;
+   pstate_table->socclk_pstate.standard = 
DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_SOCCLK;
+   break;
+   case CHIP_BEIGE_GOBY:
+   pstate_table->gfxclk_pstate.standard = 
BEIGE_GOBY_UMD_PSTATE_PROFILING_GFXCLK;
+   pstate_table->uclk_pstate.standard = 
BEIGE_GOBY_UMD_PSTATE_PROFILING_MEMCLK;
+   pstate_table->socclk_pstate.standard = 
BEIGE_GOBY_UMD_PSTATE_PROFILING_SOCCLK;
+   break;
+   default:
+   break;
+   }
 
return 0;
 }
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.h 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.h
index 38cd0ece24f6..42f705c7a36f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.h
@@ -33,6 +33,14 @@ typedef enum {
 #define SIENNA_CICHLID_UMD_PSTATE_PROFILING_SOCCLK960
 #define SIENNA_CICHLID_UMD_PSTATE_PROFILING_MEMCLK1000
 
+#define DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_GFXCLK 1950
+#define DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_SOCCLK 960
+#define DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_MEMCLK 676
+
+#define BEIGE_GOBY_UMD_PSTATE_PROFILING_GFXCLK 2200
+#define BEIGE_GOBY_UMD_PSTATE_PROFILING_SOCCLK 960
+#define BEIGE_GOBY_UMD_PSTATE_PROFILING_MEMCLK 1000
+
 extern void sienna_cichlid_set_ppt_funcs(struct smu_context *smu);
 
 #endif
-- 
2.29.0



[PATCH 2/7] drm/amd/pm: fulfill the support for DriverSmuConfig table

2022-02-07 Thread Evan Quan
Enable the support for DriverSmuConfig table on Navi1x and
Sienna_Cichlid.

Signed-off-by: Evan Quan 
Change-Id: Ie510f8b06b7a4910b1574b6e9affa875805ef868
---
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h   |  1 +
 drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c |  9 +
 .../gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 13 -
 drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c  |  2 ++
 4 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index fd38f628da3f..51a32ac39990 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -337,6 +337,7 @@ struct smu_table_context
struct smu_bios_boot_up_values  boot_values;
void*driver_pptable;
void*ecc_table;
+   void*driver_smu_config_table;
struct smu_tabletables[SMU_TABLE_COUNT];
/*
 * The driver table is just a staging buffer for
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index 467477974962..4aff185f0cea 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -510,6 +510,8 @@ static int navi10_tables_init(struct smu_context *smu)
SMU_TABLE_INIT(tables, SMU_TABLE_ACTIVITY_MONITOR_COEFF,
   sizeof(DpmActivityMonitorCoeffInt_t), PAGE_SIZE,
   AMDGPU_GEM_DOMAIN_VRAM);
+   SMU_TABLE_INIT(tables, SMU_TABLE_DRIVER_SMU_CONFIG, 
sizeof(DriverSmuConfig_t),
+  PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
 
smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_NV1X_t),
   GFP_KERNEL);
@@ -526,8 +528,15 @@ static int navi10_tables_init(struct smu_context *smu)
if (!smu_table->watermarks_table)
goto err2_out;
 
+   smu_table->driver_smu_config_table =
+   kzalloc(tables[SMU_TABLE_DRIVER_SMU_CONFIG].size, GFP_KERNEL);
+   if (!smu_table->driver_smu_config_table)
+   goto err3_out;
+
return 0;
 
+err3_out:
+   kfree(smu_table->watermarks_table);
 err2_out:
kfree(smu_table->gpu_metrics_table);
 err1_out:
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index c335f398ba4c..978c0ebe9d19 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -475,6 +475,8 @@ static int sienna_cichlid_tables_init(struct smu_context 
*smu)
   AMDGPU_GEM_DOMAIN_VRAM);
SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO, sizeof(EccInfoTable_t),
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
+   SMU_TABLE_INIT(tables, SMU_TABLE_DRIVER_SMU_CONFIG, 
sizeof(DriverSmuConfigExternal_t),
+  PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
 
smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t), 
GFP_KERNEL);
if (!smu_table->metrics_table)
@@ -492,10 +494,19 @@ static int sienna_cichlid_tables_init(struct smu_context 
*smu)
 
smu_table->ecc_table = kzalloc(tables[SMU_TABLE_ECCINFO].size, 
GFP_KERNEL);
if (!smu_table->ecc_table)
-   return -ENOMEM;
+   goto err3_out;
+
+   smu_table->driver_smu_config_table =
+   kzalloc(tables[SMU_TABLE_DRIVER_SMU_CONFIG].size, GFP_KERNEL);
+   if (!smu_table->driver_smu_config_table)
+   goto err4_out;
 
return 0;
 
+err4_out:
+   kfree(smu_table->ecc_table);
+err3_out:
+   kfree(smu_table->watermarks_table);
 err2_out:
kfree(smu_table->gpu_metrics_table);
 err1_out:
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index d71155a66f97..b87f550af26b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -473,9 +473,11 @@ int smu_v11_0_fini_smc_tables(struct smu_context *smu)
kfree(smu_table->hardcode_pptable);
smu_table->hardcode_pptable = NULL;
 
+   kfree(smu_table->driver_smu_config_table);
kfree(smu_table->ecc_table);
kfree(smu_table->metrics_table);
kfree(smu_table->watermarks_table);
+   smu_table->driver_smu_config_table = NULL;
smu_table->ecc_table = NULL;
smu_table->metrics_table = NULL;
smu_table->watermarks_table = NULL;
-- 
2.29.0



[PATCH 3/7] drm/amd/pm: correct the default DriverSmuConfig table settings

2022-02-07 Thread Evan Quan
For Some ASICs, with the PMFW default settings, we may see the
power consumption reported via metrics table is "Very Erratic".
With the socket power alpha filter set as 10/100ms, we can correct
that issue.

Signed-off-by: Evan Quan 
Change-Id: Ia352579e1cc7a531cb1de5c835fe5bf132d5dd20
---
 drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h   | 14 ++
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 18 ++
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 10 ++
 drivers/gpu/drm/amd/pm/swsmu/smu_internal.h   |  2 ++
 4 files changed, 44 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h 
b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
index ef44c8c3d616..d3424cc586aa 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
@@ -293,6 +293,18 @@ struct amdgpu_smu_i2c_bus {
struct mutex mutex;
 };
 
+struct config_table_setting
+{
+   uint16_t gfxclk_average_tau;
+   uint16_t socclk_average_tau;
+   uint16_t uclk_average_tau;
+   uint16_t gfx_activity_average_tau;
+   uint16_t mem_activity_average_tau;
+   uint16_t socket_power_average_tau;
+   uint16_t apu_socket_power_average_tau;
+   uint16_t fclk_average_tau;
+};
+
 struct amdgpu_pm {
struct mutexmutex;
u32 current_sclk;
@@ -341,6 +353,8 @@ struct amdgpu_pm {
 
struct mutexstable_pstate_ctx_lock;
struct amdgpu_ctx   *stable_pstate_ctx;
+
+   struct config_table_setting config_table;
 };
 
 int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors 
sensor,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index c6a42ac8ba78..9e1ea9d54f50 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -601,6 +601,18 @@ static int smu_set_default_dpm_table(struct smu_context 
*smu)
return ret;
 }
 
+static int smu_apply_default_config_table_settings(struct smu_context *smu)
+{
+   struct amdgpu_device *adev = smu->adev;
+   int ret = 0;
+
+   ret = smu_get_default_config_table_settings(smu,
+   &adev->pm.config_table);
+   if (ret)
+   return ret;
+
+   return smu_set_config_table(smu, &adev->pm.config_table);
+}
 
 static int smu_late_init(void *handle)
 {
@@ -655,6 +667,12 @@ static int smu_late_init(void *handle)
smu->smu_dpm.dpm_level,
AMD_PP_TASK_COMPLETE_INIT);
 
+   ret = smu_apply_default_config_table_settings(smu);
+   if (ret && (ret != -EOPNOTSUPP)) {
+   dev_err(adev->dev, "Failed to apply default DriverSmuConfig 
settings!\n");
+   return ret;
+   }
+
smu_restore_dpm_user_profile(smu);
 
return 0;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 51a32ac39990..f0894676b20e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -1275,6 +1275,16 @@ struct pptable_funcs {
 * @stb_collect_info: Collects Smart Trace Buffers data.
 */
int (*stb_collect_info)(struct smu_context *smu, void *buf, uint32_t 
size);
+
+   /**
+* @get_default_config_table_settings: Get the ASIC default 
DriverSmuConfig table settings.
+*/
+   int (*get_default_config_table_settings)(struct smu_context *smu, 
struct config_table_setting *table);
+
+   /**
+* @set_config_table: Apply the input DriverSmuConfig table settings.
+*/
+   int (*set_config_table)(struct smu_context *smu, struct 
config_table_setting *table);
 };
 
 typedef enum {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h 
b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
index 48e80ec9b258..2d18b39d7c2a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
@@ -92,6 +92,8 @@
 #define smu_gpo_control(smu, enablement)   
smu_ppt_funcs(gpo_control, 0, smu, enablement)
 #define smu_set_fine_grain_gfx_freq_parameters(smu)
smu_ppt_funcs(set_fine_grain_gfx_freq_parameters, 0, smu)
 #define smu_bump_power_profile_mode(smu, param, param_size)
smu_ppt_funcs(set_power_profile_mode, -EOPNOTSUPP, smu, param, param_size)
+#define smu_get_default_config_table_settings(smu, config_table)   
smu_ppt_funcs(get_default_config_table_settings, -EOPNOTSUPP, smu, config_table)
+#define smu_set_config_table(smu, config_table)
smu_ppt_funcs(set_config_table, -EOPNOTSUPP, smu, config_table)
 
 #endif
 #endif
-- 
2.29.0



[PATCH 5/7] drm/amd/pm: fulfill Sienna_Cichlid implementations for DriverSmuConfig setting

2022-02-07 Thread Evan Quan
Fulfill the implementations for DriverSmuConfig setting on Sienna_Cichlid.

Signed-off-by: Evan Quan 
Change-Id: Ic519c8d4fcfeefdda79ba9ed01b235824d76e40f
---
 .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 54 +++
 1 file changed, 54 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 978c0ebe9d19..a7bb5358d4a4 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -3922,6 +3922,58 @@ static void sienna_cichlid_stb_init(struct smu_context 
*smu)
 
 }
 
+static int sienna_cichlid_get_default_config_table_settings(struct smu_context 
*smu,
+   struct 
config_table_setting *table)
+{
+   struct amdgpu_device *adev = smu->adev;
+
+   if (!table)
+   return -EINVAL;
+
+   table->gfxclk_average_tau = 10;
+   table->socclk_average_tau = 10;
+   table->fclk_average_tau = 10;
+   table->uclk_average_tau = 10;
+   table->gfx_activity_average_tau = 10;
+   table->mem_activity_average_tau = 10;
+   table->socket_power_average_tau = 100;
+   if (adev->asic_type != CHIP_SIENNA_CICHLID)
+   table->apu_socket_power_average_tau = 100;
+
+   return 0;
+}
+
+static int sienna_cichlid_set_config_table(struct smu_context *smu,
+  struct config_table_setting *table)
+{
+   DriverSmuConfigExternal_t driver_smu_config_table;
+
+   if (!table)
+   return -EINVAL;
+
+   memset(&driver_smu_config_table,
+  0,
+  sizeof(driver_smu_config_table));
+   driver_smu_config_table.DriverSmuConfig.GfxclkAverageLpfTau =
+   table->gfxclk_average_tau;
+   driver_smu_config_table.DriverSmuConfig.FclkAverageLpfTau =
+   table->fclk_average_tau;
+   driver_smu_config_table.DriverSmuConfig.UclkAverageLpfTau =
+   table->uclk_average_tau;
+   driver_smu_config_table.DriverSmuConfig.GfxActivityLpfTau =
+   table->gfx_activity_average_tau;
+   driver_smu_config_table.DriverSmuConfig.UclkActivityLpfTau =
+   table->mem_activity_average_tau;
+   driver_smu_config_table.DriverSmuConfig.SocketPowerLpfTau =
+   table->socket_power_average_tau;
+
+   return smu_cmn_update_table(smu,
+   SMU_TABLE_DRIVER_SMU_CONFIG,
+   0,
+   (void *)&driver_smu_config_table,
+   true);
+}
+
 static int sienna_cichlid_stb_get_data_direct(struct smu_context *smu,
  void *buf,
  uint32_t size)
@@ -4036,6 +4088,8 @@ static const struct pptable_funcs 
sienna_cichlid_ppt_funcs = {
.set_mp1_state = sienna_cichlid_set_mp1_state,
.stb_collect_info = sienna_cichlid_stb_get_data_direct,
.get_ecc_info = sienna_cichlid_get_ecc_info,
+   .get_default_config_table_settings = 
sienna_cichlid_get_default_config_table_settings,
+   .set_config_table = sienna_cichlid_set_config_table,
 };
 
 void sienna_cichlid_set_ppt_funcs(struct smu_context *smu)
-- 
2.29.0



[PATCH 7/7] drm/amd/pm: fix some OEM SKU specific stability issues

2022-02-07 Thread Evan Quan
Add a quirk in sienna_cichlid_ppt.c to fix some OEM SKU
specific stability issues.

Signed-off-by: Evan Quan 
Change-Id: I172c6429c54253788dbf28f7acf877375f2bfc5b
---
 .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 32 ++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index a7bb5358d4a4..f964af05f376 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -428,6 +428,36 @@ static int sienna_cichlid_store_powerplay_table(struct 
smu_context *smu)
return 0;
 }
 
+static int sienna_cichlid_patch_pptable_quirk(struct smu_context *smu)
+{
+   struct amdgpu_device *adev = smu->adev;
+   uint32_t *board_reserved;
+   uint16_t *freq_table_gfx;
+   uint32_t i;
+
+   /* Fix some OEM SKU specific stability issues */
+   GET_PPTABLE_MEMBER(BoardReserved, &board_reserved);
+   if ((adev->pdev->device == 0x73DF) &&
+   (adev->pdev->revision == 0XC3) &&
+   (adev->pdev->subsystem_device == 0x16C2) &&
+   (adev->pdev->subsystem_vendor == 0x1043))
+   board_reserved[0] = 1387;
+
+   GET_PPTABLE_MEMBER(FreqTableGfx, &freq_table_gfx);
+   if ((adev->pdev->device == 0x73DF) &&
+   (adev->pdev->revision == 0XC3) &&
+   ((adev->pdev->subsystem_device == 0x16C2) ||
+   (adev->pdev->subsystem_device == 0x133C)) &&
+   (adev->pdev->subsystem_vendor == 0x1043)) {
+   for (i = 0; i < NUM_GFXCLK_DPM_LEVELS; i++) {
+   if (freq_table_gfx[i] > 2500)
+   freq_table_gfx[i] = 2500;
+   }
+   }
+
+   return 0;
+}
+
 static int sienna_cichlid_setup_pptable(struct smu_context *smu)
 {
int ret = 0;
@@ -448,7 +478,7 @@ static int sienna_cichlid_setup_pptable(struct smu_context 
*smu)
if (ret)
return ret;
 
-   return ret;
+   return sienna_cichlid_patch_pptable_quirk(smu);
 }
 
 static int sienna_cichlid_tables_init(struct smu_context *smu)
-- 
2.29.0



[PATCH 6/7] drm/amdgpu: disable MMHUB PG for Picasso

2022-02-07 Thread Evan Quan
MMHUB PG needs to be disabled for Picasso for stability reasons.

Signed-off-by: Evan Quan 
Change-Id: Iea0ec757582a764ab5a000d7cc411fb814ffb19f
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index e07a5fd09d06..15ee56406bc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1081,8 +1081,11 @@ static int soc15_common_early_init(void *handle)
AMD_CG_SUPPORT_SDMA_LS |
AMD_CG_SUPPORT_VCN_MGCG;
 
+   /*
+* MMHUB PG needs to be disabled for Picasso for
+* stability reasons.
+*/
adev->pg_flags = AMD_PG_SUPPORT_SDMA |
-   AMD_PG_SUPPORT_MMHUB |
AMD_PG_SUPPORT_VCN;
} else {
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
-- 
2.29.0



[PATCH 4/7] drm/amd/pm: fulfill Navi1x implementations for DriverSmuConfig setting

2022-02-07 Thread Evan Quan
Fulfill the implementations for DriverSmuConfig setting on Navi1x.

Signed-off-by: Evan Quan 
Change-Id: I244766a893b4070dfdf171451f6338d33572ec1d
---
 .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 50 +++
 1 file changed, 50 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index 4aff185f0cea..0c8ac2d4307b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -3218,6 +3218,54 @@ static int navi10_post_smu_init(struct smu_context *smu)
return ret;
 }
 
+static int navi10_get_default_config_table_settings(struct smu_context *smu,
+   struct config_table_setting 
*table)
+{
+   if (!table)
+   return -EINVAL;
+
+   table->gfxclk_average_tau = 10;
+   table->socclk_average_tau = 10;
+   table->uclk_average_tau = 10;
+   table->gfx_activity_average_tau = 10;
+   table->mem_activity_average_tau = 10;
+   table->socket_power_average_tau = 10;
+
+   return 0;
+}
+
+static int navi10_set_config_table(struct smu_context *smu,
+  struct config_table_setting *table)
+{
+   DriverSmuConfig_t driver_smu_config_table;
+
+   if (!table)
+   return -EINVAL;
+
+   memset(&driver_smu_config_table,
+  0,
+  sizeof(driver_smu_config_table));
+
+   driver_smu_config_table.GfxclkAverageLpfTau =
+   table->gfxclk_average_tau;
+   driver_smu_config_table.SocclkAverageLpfTau =
+   table->socclk_average_tau;
+   driver_smu_config_table.UclkAverageLpfTau =
+   table->uclk_average_tau;
+   driver_smu_config_table.GfxActivityLpfTau =
+   table->gfx_activity_average_tau;
+   driver_smu_config_table.UclkActivityLpfTau =
+   table->mem_activity_average_tau;
+   driver_smu_config_table.SocketPowerLpfTau =
+   table->socket_power_average_tau;
+
+   return smu_cmn_update_table(smu,
+   SMU_TABLE_DRIVER_SMU_CONFIG,
+   0,
+   (void *)&driver_smu_config_table,
+   true);
+}
+
 static const struct pptable_funcs navi10_ppt_funcs = {
.get_allowed_feature_mask = navi10_get_allowed_feature_mask,
.set_default_dpm_table = navi10_set_default_dpm_table,
@@ -3306,6 +3354,8 @@ static const struct pptable_funcs navi10_ppt_funcs = {
.post_init = navi10_post_smu_init,
.interrupt_work = smu_v11_0_interrupt_work,
.set_mp1_state = smu_cmn_set_mp1_state,
+   .get_default_config_table_settings = 
navi10_get_default_config_table_settings,
+   .set_config_table = navi10_set_config_table,
 };
 
 void navi10_set_ppt_funcs(struct smu_context *smu)
-- 
2.29.0



[PATCH] drm/amd/pm: disable GetPptLimit message in sriov mode

2022-02-07 Thread Yang Wang
the pmfw is not allowed GetPptLimit message in virtualzation mode.

Fixes: 3e4a01689daa ("drm/amd/pm: Enable sysfs required by rocm-smi toolfor One 
VF mode")

Signed-off-by: Yang Wang 
---
 drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 804e1c98238d..2a7da2bad96a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -140,7 +140,7 @@ static struct cmn2asic_msg_mapping 
sienna_cichlid_message_map[SMU_MSG_MAX_COUNT]
MSG_MAP(PrepareMp1ForUnload,PPSMC_MSG_PrepareMp1ForUnload,  
   1),
MSG_MAP(AllowGfxOff,PPSMC_MSG_AllowGfxOff,  
   0),
MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff,   
   0),
-   MSG_MAP(GetPptLimit,PPSMC_MSG_GetPptLimit,  
   1),
+   MSG_MAP(GetPptLimit,PPSMC_MSG_GetPptLimit,  
   0),
MSG_MAP(GetDcModeMaxDpmFreq,PPSMC_MSG_GetDcModeMaxDpmFreq,  
   1),
MSG_MAP(ExitBaco,   PPSMC_MSG_ExitBaco, 
   0),
MSG_MAP(PowerUpVcn, PPSMC_MSG_PowerUpVcn,   
   0),
-- 
2.25.1



Re: [PATCH] drm/amd/pm: disable GetPptLimit message in sriov mode

2022-02-07 Thread Lazar, Lijo




On 2/8/2022 10:00 AM, Yang Wang wrote:

the pmfw is not allowed GetPptLimit message in virtualzation mode.



Maybe rephrase as - "PPT limit cannot be queried from VF".

Reviewed-by: Lijo Lazar 

Thanks,
Lijo


Fixes: 3e4a01689daa ("drm/amd/pm: Enable sysfs required by rocm-smi toolfor One VF 
mode")

Signed-off-by: Yang Wang 
---
  drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 804e1c98238d..2a7da2bad96a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -140,7 +140,7 @@ static struct cmn2asic_msg_mapping 
sienna_cichlid_message_map[SMU_MSG_MAX_COUNT]
MSG_MAP(PrepareMp1ForUnload,PPSMC_MSG_PrepareMp1ForUnload,  
   1),
MSG_MAP(AllowGfxOff,PPSMC_MSG_AllowGfxOff,  
   0),
MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff,   
   0),
-   MSG_MAP(GetPptLimit,PPSMC_MSG_GetPptLimit,  
   1),
+   MSG_MAP(GetPptLimit,PPSMC_MSG_GetPptLimit,  
   0),
MSG_MAP(GetDcModeMaxDpmFreq,PPSMC_MSG_GetDcModeMaxDpmFreq,  
   1),
MSG_MAP(ExitBaco,   PPSMC_MSG_ExitBaco, 
   0),
MSG_MAP(PowerUpVcn, PPSMC_MSG_PowerUpVcn,   
   0),



RE: [PATCH] drm/amd/pm: disable GetPptLimit message in sriov mode

2022-02-07 Thread Feng, Kenneth
Reviewed-by: Kenneth Feng 


-Original Message-
From: Wang, Yang(Kevin)  
Sent: Tuesday, February 8, 2022 12:30 PM
To: amd-gfx@lists.freedesktop.org
Cc: Feng, Kenneth ; Lazar, Lijo ; 
Nikolic, Marina ; Wang, Yang(Kevin) 

Subject: [PATCH] drm/amd/pm: disable GetPptLimit message in sriov mode

the pmfw is not allowed GetPptLimit message in virtualzation mode.

Fixes: 3e4a01689daa ("drm/amd/pm: Enable sysfs required by rocm-smi toolfor One 
VF mode")

Signed-off-by: Yang Wang 
---
 drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 804e1c98238d..2a7da2bad96a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -140,7 +140,7 @@ static struct cmn2asic_msg_mapping 
sienna_cichlid_message_map[SMU_MSG_MAX_COUNT]
MSG_MAP(PrepareMp1ForUnload,PPSMC_MSG_PrepareMp1ForUnload,  
   1),
MSG_MAP(AllowGfxOff,PPSMC_MSG_AllowGfxOff,  
   0),
MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff,   
   0),
-   MSG_MAP(GetPptLimit,PPSMC_MSG_GetPptLimit,  
   1),
+   MSG_MAP(GetPptLimit,PPSMC_MSG_GetPptLimit,  
   0),
MSG_MAP(GetDcModeMaxDpmFreq,PPSMC_MSG_GetDcModeMaxDpmFreq,  
   1),
MSG_MAP(ExitBaco,   PPSMC_MSG_ExitBaco, 
   0),
MSG_MAP(PowerUpVcn, PPSMC_MSG_PowerUpVcn,   
   0),
-- 
2.25.1



Re: [RFC v3 06/12] drm/amdgpu: Drop hive->in_reset

2022-02-07 Thread Lazar, Lijo




On 1/26/2022 4:07 AM, Andrey Grodzovsky wrote:

Since we serialize all resets no need to protect from concurrent
resets.

Signed-off-by: Andrey Grodzovsky 
Reviewed-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 19 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   |  1 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   |  1 -
  3 files changed, 1 insertion(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 258ec3c0b2af..107a393ebbfd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5013,25 +5013,9 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device 
*adev,
dev_info(adev->dev, "GPU %s begin!\n",
need_emergency_restart ? "jobs stop":"reset");
  
-	/*

-* Here we trylock to avoid chain of resets executing from
-* either trigger by jobs on different adevs in XGMI hive or jobs on
-* different schedulers for same device while this TO handler is 
running.
-* We always reset all schedulers for device and all devices for XGMI
-* hive so that should take care of them too.
-*/
hive = amdgpu_get_xgmi_hive(adev);
-   if (hive) {
-   if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
-   DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as 
another already in progress",
-   job ? job->base.id : -1, hive->hive_id);
-   amdgpu_put_xgmi_hive(hive);
-   if (job && job->vm)
-   drm_sched_increase_karma(&job->base);
-   return 0;
-   }


This function in general will reset all devices in a hive.

In a situation like GPU0 in hive0 gets to this function first and GPU1 
in hive0 also hangs shortly (before GPU0 recovery process starts 
reseting other devices in hive), we don't want to execute work queued as 
part of GPU1's recovery also.Both GPU0 and GPU1 recovery process will 
try to reset all the devices in hive.


In short - if a reset domain is already active, probably we don't need 
to queue another work to the domain since all devices in the domain are 
expected to get reset shortly.


Thanks,
Lijo


+   if (hive)
mutex_lock(&hive->hive_lock);
-   }
  
  	reset_context.method = AMD_RESET_METHOD_NONE;

reset_context.reset_req_dev = adev;
@@ -5227,7 +5211,6 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device 
*adev,
  
  skip_recovery:

if (hive) {
-   atomic_set(&hive->in_reset, 0);
mutex_unlock(&hive->hive_lock);
amdgpu_put_xgmi_hive(hive);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index a858e3457c5c..9ad742039ac9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -404,7 +404,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct 
amdgpu_device *adev)
INIT_LIST_HEAD(&hive->device_list);
INIT_LIST_HEAD(&hive->node);
mutex_init(&hive->hive_lock);
-   atomic_set(&hive->in_reset, 0);
atomic_set(&hive->number_devices, 0);
task_barrier_init(&hive->tb);
hive->pstate = AMDGPU_XGMI_PSTATE_UNKNOWN;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index 6121aaa292cb..2f2ce53645a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -33,7 +33,6 @@ struct amdgpu_hive_info {
struct list_head node;
atomic_t number_devices;
struct mutex hive_lock;
-   atomic_t in_reset;
int hi_req_count;
struct amdgpu_device *hi_req_gpu;
struct task_barrier tb;



[PATCH] drm/amd/pm: fix hwmon node of power1_label create issue

2022-02-07 Thread Yang Wang
fix typo issue about "power1_label",
it will cause hwmon node of power1_label is not created.

Fixes: ae07970a06 ("drm/amd/pm: add support for hwmon control of slow and fast 
PPT limit on vangogh")

Signed-off-by: Yang Wang 
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index d68e7132da2c..d6c01c59f32e 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -3288,7 +3288,7 @@ static umode_t hwmon_attributes_visible(struct kobject 
*kobj,
 attr == &sensor_dev_attr_power2_cap.dev_attr.attr ||
 attr == &sensor_dev_attr_power2_cap_default.dev_attr.attr ||
 attr == &sensor_dev_attr_power2_label.dev_attr.attr ||
-attr == &sensor_dev_attr_power1_label.dev_attr.attr))
+attr == &sensor_dev_attr_power2_label.dev_attr.attr))
return 0;
 
return effective_mode;
-- 
2.25.1



Re: [PATCH 6/8] mm: don't include in

2022-02-07 Thread Christoph Hellwig
On Mon, Feb 07, 2022 at 04:19:29PM -0500, Felix Kuehling wrote:
>
> Am 2022-02-07 um 01:32 schrieb Christoph Hellwig:
>> Move the check for the actual pgmap types that need the free at refcount
>> one behavior into the out of line helper, and thus avoid the need to
>> pull memremap.h into mm.h.
>>
>> Signed-off-by: Christoph Hellwig 
>
> The amdkfd part looks good to me.
>
> It looks like this patch is not based on Alex Sierra's coherent memory 
> series. He added two new helpers is_device_coherent_page and 
> is_dev_private_or_coherent_page that would need to be moved along with 
> is_device_private_page and is_pci_p2pdma_page.

Yes.  I Naked that series because it spreads te mess with the refcount
further in this latest version.  My intent is that it gets rebased
on top of this to avoid that spread.  Same for the p2p series form Logan.



RE: [PATCH] drm/amd/pm: fix hwmon node of power1_label create issue

2022-02-07 Thread Feng, Kenneth
Reviewed-by: Kenneth Feng 


-Original Message-
From: Wang, Yang(Kevin)  
Sent: Tuesday, February 8, 2022 2:35 PM
To: amd-gfx@lists.freedesktop.org
Cc: Feng, Kenneth ; Lazar, Lijo ; 
Hou, Xiaomeng (Matthew) ; Wang, Yang(Kevin) 

Subject: [PATCH] drm/amd/pm: fix hwmon node of power1_label create issue

fix typo issue about "power1_label",
it will cause hwmon node of power1_label is not created.

Fixes: ae07970a06 ("drm/amd/pm: add support for hwmon control of slow and fast 
PPT limit on vangogh")

Signed-off-by: Yang Wang 
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index d68e7132da2c..d6c01c59f32e 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -3288,7 +3288,7 @@ static umode_t hwmon_attributes_visible(struct kobject 
*kobj,
 attr == &sensor_dev_attr_power2_cap.dev_attr.attr ||
 attr == &sensor_dev_attr_power2_cap_default.dev_attr.attr ||
 attr == &sensor_dev_attr_power2_label.dev_attr.attr ||
-attr == &sensor_dev_attr_power1_label.dev_attr.attr))
+attr == &sensor_dev_attr_power2_label.dev_attr.attr))
return 0;
 
return effective_mode;
-- 
2.25.1



[PATCH v2] drm/amd/pm: fix hwmon node of power1_label create issue

2022-02-07 Thread Yang Wang
it will cause hwmon node of power1_label is not created.

v2:
the hwmon node of "power1_lable" is always needed for all ASICs.
and the patch will remove ASIC type check for "power1_label".

Fixes: ae07970a06 ("drm/amd/pm: add support for hwmon control of slow and fast 
PPT limit on vangogh")

Signed-off-by: Yang Wang 
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index d68e7132da2c..d6c01c59f32e 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -3288,7 +3288,7 @@ static umode_t hwmon_attributes_visible(struct kobject 
*kobj,
 attr == &sensor_dev_attr_power2_cap.dev_attr.attr ||
 attr == &sensor_dev_attr_power2_cap_default.dev_attr.attr ||
 attr == &sensor_dev_attr_power2_label.dev_attr.attr ||
-attr == &sensor_dev_attr_power1_label.dev_attr.attr))
+attr == &sensor_dev_attr_power2_label.dev_attr.attr))
return 0;
 
return effective_mode;
-- 
2.25.1



Re: [PATCH 1/8] mm: remove a pointless CONFIG_ZONE_DEVICE check in memremap_pages

2022-02-07 Thread Chaitanya Kulkarni
On 2/6/22 10:32 PM, Christoph Hellwig wrote:
> memremap.c is only built when CONFIG_ZONE_DEVICE is set, so remove
> the superflous extra check.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>   mm/memremap.c | 3 +--
>   1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/mm/memremap.c b/mm/memremap.c
> index 6aa5f0c2d11fda..5f04a0709e436e 100644
> --- a/mm/memremap.c
> +++ b/mm/memremap.c
> @@ -328,8 +328,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
>   }
>   break;
>   case MEMORY_DEVICE_FS_DAX:
> - if (!IS_ENABLED(CONFIG_ZONE_DEVICE) ||
> - IS_ENABLED(CONFIG_FS_DAX_LIMITED)) {
> + if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) {
>   WARN(1, "File system DAX not supported\n");
>   return ERR_PTR(-EINVAL);
>   }
> 

Indeed it does have it in the makefile:-

root@dev mm (for-next) # grep memremap.o Makefile
obj-$(CONFIG_ZONE_DEVICE) += memremap.o


Looks good.

Reviewed-by: Chaitanya Kulkarni 


Re: [PATCH 2/8] mm: remove the __KERNEL__ guard from

2022-02-07 Thread Chaitanya Kulkarni
On 2/6/22 10:32 PM, Christoph Hellwig wrote:
> __KERNEL__ ifdefs don't make sense outside of include/uapi/.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>   include/linux/mm.h | 4 
>   1 file changed, 4 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 213cc569b19223..7b46174989b086 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3,9 +3,6 @@
>   #define _LINUX_MM_H
>   
>   #include 
> -
> -#ifdef __KERNEL__
> -
>   #include 
>   #include 
>   #include 
> @@ -3381,5 +3378,4 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned 
> long start,
>   }
>   #endif
>   
> -#endif /* __KERNEL__ */
>   #endif /* _LINUX_MM_H */
> 

Looks good.

Reviewed-by: Chaitanya Kulkarni 



Re: [PATCH v2 2/2] drm/radeon/uvd: Fix forgotten unmap buffer objects

2022-02-07 Thread Christian König

Am 08.02.22 um 04:14 schrieb zhanglianjie:

after the buffer object is successfully mapped, call radeon_bo_kunmap before 
the function returns.

Signed-off-by: zhanglianjie 


Reviewed-by: Christian König 



diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c 
b/drivers/gpu/drm/radeon/radeon_uvd.c
index 377f9cdb5b53..0558d928d98d 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -497,6 +497,7 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, 
struct radeon_bo *bo,
handle = msg[2];

if (handle == 0) {
+   radeon_bo_kunmap(bo);
DRM_ERROR("Invalid UVD handle!\n");
return -EINVAL;
}
@@ -559,12 +560,10 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, 
struct radeon_bo *bo,
return 0;

default:
-
DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
-   return -EINVAL;
}

-   BUG();
+   radeon_bo_kunmap(bo);
return -EINVAL;
  }

--
2.20.1







Re: [PATCH 3/8] mm: remove pointless includes from

2022-02-07 Thread Chaitanya Kulkarni
On 2/6/22 10:32 PM, Christoph Hellwig wrote:
> hmm.h pulls in the world for no good reason at all.  Remove the
> includes and push a few ones into the users instead.
> 
> Signed-off-by: Christoph Hellwig

Looks good.

Reviewed-by: Chaitanya Kulkarni 



Re: [PATCH 4/8] mm: move free_devmap_managed_page to memremap.c

2022-02-07 Thread Chaitanya Kulkarni
On 2/6/22 10:32 PM, Christoph Hellwig wrote:
> free_devmap_managed_page has nothing to do with the code in swap.c,
> move it to live with the rest of the code for devmap handling.
> 
> Signed-off-by: Christoph Hellwig

True, the only devmap code is present in the swap.c is couple of
calls in my tree.

Looks good.

Reviewed-by: Chaitanya Kulkarni 



[PATCH] drm/amd/pm: correct hwmon power lable name

2022-02-07 Thread Yang Wang
only vangogh has 2 types of hwmon power node: "fastPPT" and "slowPPT",
the other asic only has 1 type of hwmon power node: "PPT".

Signed-off-by: Yang Wang 
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 426e00112c91..ad5da252228b 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2842,10 +2842,14 @@ static ssize_t amdgpu_hwmon_show_power_label(struct 
device *dev,
 struct device_attribute *attr,
 char *buf)
 {
-   int limit_type = to_sensor_dev_attr(attr)->index;
+   struct amdgpu_device *adev = dev_get_drvdata(dev);
 
-   return sysfs_emit(buf, "%s\n",
-   limit_type == PP_PWR_TYPE_FAST ? "fastPPT" : "slowPPT");
+   if (adev->asic_type == CHIP_VANGOGH)
+   return sysfs_emit(buf, "%s\n",
+ to_sensor_dev_attr(attr)->index == 
PP_PWR_TYPE_FAST ?
+ "fastPPT" : "slowPPT");
+   else
+   return sysfs_emit(buf, "PPT\n");
 }
 
 static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
-- 
2.25.1



Re: [PATCH 5/8] mm: simplify freeing of devmap managed pages

2022-02-07 Thread Chaitanya Kulkarni
> -static inline bool page_is_devmap_managed(struct page *page)
> +bool __put_devmap_managed_page(struct page *page);
> +static inline bool put_devmap_managed_page(struct page *page)
>   {
>   if (!static_branch_unlikely(&devmap_managed_key))
>   return false;
>   if (!is_zone_device_page(page))
>   return false;
> - switch (page->pgmap->type) {
> - case MEMORY_DEVICE_PRIVATE:
> - case MEMORY_DEVICE_FS_DAX:
> - return true;
> - default:
> - break;
> - }

nit:- how some variant of following to makes all cases evident
without having to look into memremap.h for other enum values ?

 switch (page->pgmap->type) {
 case MEMORY_DEVICE_PRIVATE:
 case MEMORY_DEVICE_FS_DAX:
 return __put_devmap_managed_page(page);
 case MEMORY_DEVICE_GENERIC:
 case MEMORY_DEVICE_PCI_P2PDMA:
 return false;
 default:
 WARN_ON_ONCE(1);
 return false;
 }


> - return false;
> + if (page->pgmap->type != MEMORY_DEVICE_PRIVATE &&
> + page->pgmap->type != MEMORY_DEVICE_FS_DAX)
> + return false;
> + return __put_devmap_managed_page(page);

nit:- we are only returning true value from __put_devmap_managed_page()
in this patch. Perhaps make it __put_dev_map_managed_page()
return void and return true from above ?

or maybe someone can send a cleanup once this is merged.

>   }
>   

Irrespective of above comment(s), looks good.

Reviewed-by: Chaitanya Kulkarni 



Re: [Intel-gfx] [PATCH v7 1/3] gpu: drm: separate panel orientation property creating and value setting

2022-02-07 Thread Ville Syrjälä
On Tue, Feb 08, 2022 at 03:37:12PM +0800, Hsin-Yi Wang wrote:
> +int drm_connector_init_panel_orientation_property(
> + struct drm_connector *connector)
> +{
> + struct drm_device *dev = connector->dev;
> + struct drm_property *prop;
> +
> + prop = drm_property_create_enum(dev, DRM_MODE_PROP_IMMUTABLE,
> + "panel orientation",
> + drm_panel_orientation_enum_list,
> + ARRAY_SIZE(drm_panel_orientation_enum_list));
> + if (!prop)
> + return -ENOMEM;
> +
> + dev->mode_config.panel_orientation_property = prop;

Leak when called multiple times. I guess you could just put
this into drm_connector_create_standard_properties() instead
and avoid that issue entirely.

-- 
Ville Syrjälä
Intel