Re: [PATCH RESEND 1/1] drm/amd/display: add DCN support for ARM64

2022-10-26 Thread Christian König

Am 27.10.22 um 02:25 schrieb Ao Zhong:

After moving all FPU code to the DML folder, we can enable DCN support
for the ARM64 platform. Remove the -mgeneral-regs-only CFLAG from the
code in the DML folder that needs to use hardware FPU, and add a control
mechanism for ARM Neon.


It's nice to see that the FPU isolation work is so fruitful :)


Signed-off-by: Ao Zhong 


Acked-by: Christian König 


---
  drivers/gpu/drm/amd/display/Kconfig   |  2 +-
  .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.c|  6 ++
  drivers/gpu/drm/amd/display/dc/dml/Makefile   | 64 ---
  3 files changed, 49 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/Kconfig 
b/drivers/gpu/drm/amd/display/Kconfig
index 0142affcdaa3..a7f1c4e51719 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -6,7 +6,7 @@ config DRM_AMD_DC
bool "AMD DC - Enable new display engine"
default y
select SND_HDA_COMPONENT if SND_HDA_CORE
-   select DRM_AMD_DC_DCN if (X86 || PPC64)
+   select DRM_AMD_DC_DCN if (X86 || PPC64 || (ARM64 && KERNEL_MODE_NEON))
help
  Choose this option if you want to use the new display engine
  support for AMDGPU. This adds required support for Vega and
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
index ab0c6d191038..1743ca0a3641 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
@@ -31,6 +31,8 @@
  #elif defined(CONFIG_PPC64)
  #include 
  #include 
+#elif defined(CONFIG_ARM64)
+#include 
  #endif
  
  /**

@@ -99,6 +101,8 @@ void dc_fpu_begin(const char *function_name, const int line)
preempt_disable();
enable_kernel_fp();
}
+#elif defined(CONFIG_ARM64)
+   kernel_neon_begin();
  #endif
}
  
@@ -136,6 +140,8 @@ void dc_fpu_end(const char *function_name, const int line)

disable_kernel_fp();
preempt_enable();
}
+#elif defined(CONFIG_ARM64)
+   kernel_neon_end();
  #endif
}
  
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile

index d0c6cf61c676..3cdd109189e0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -33,6 +33,12 @@ ifdef CONFIG_PPC64
  dml_ccflags := -mhard-float -maltivec
  endif
  
+ifdef CONFIG_ARM64

+ifdef CONFIG_DRM_AMD_DC_DCN
+dml_rcflags_arm64 := -mgeneral-regs-only
+endif
+endif
+
  ifdef CONFIG_CC_IS_GCC
  ifeq ($(call cc-ifversion, -lt, 0701, y), y)
  IS_OLD_GCC = 1
@@ -87,32 +93,46 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := 
$(dml_ccflags)
  CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_ccflags)
  CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_ccflags)
  CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_ccflags) 
-Wno-tautological-compare
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags) 
$(dml_rcflags_arm64)
  CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn2x/dcn2x.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o  := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := 
$(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := 
$(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := 
$(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_r

Re: [PATCH] drm/amdkfd: Cleanup kfd_dev struct

2022-10-26 Thread Christian König

Am 26.10.22 um 21:36 schrieb Alex Deucher:

From: Mukul Joshi 

Cleanup kfd_dev struct by removing ddev and pdev as both
drm_device and pci_dev can be fetched from amdgpu_device.

Signed-off-by: Mukul Joshi 
Tested-by: Amber Lin 
Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 


Acked-by: Christian König 

Should we go even a step further and make the kfd dev a member of the 
adev structure so that we can upcast?


Regards,
Christian.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  3 +--
  drivers/gpu/drm/amd/amdkfd/kfd_crat.c  | 12 -
  drivers/gpu/drm/amd/amdkfd/kfd_device.c| 16 +---
  drivers/gpu/drm/amd/amdkfd/kfd_iommu.c | 29 +++---
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h  |  4 +--
  drivers/gpu/drm/amd/amdkfd/kfd_process.c   | 12 -
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c   |  2 +-
  drivers/gpu/drm/amd/amdkfd/kfd_topology.c  | 25 +--
  9 files changed, 49 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 0561812aa0a4..e2b0f8049b9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -195,7 +195,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
}
  
  		adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,

-   adev_to_drm(adev), 
&gpu_resources);
+   &gpu_resources);
  
  		amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

index 647220a8762d..f50e3ba4d7a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -353,7 +353,6 @@ int kgd2kfd_init(void);
  void kgd2kfd_exit(void);
  struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf);
  bool kgd2kfd_device_init(struct kfd_dev *kfd,
-struct drm_device *ddev,
 const struct kgd2kfd_shared_resources *gpu_resources);
  void kgd2kfd_device_exit(struct kfd_dev *kfd);
  void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm);
@@ -381,7 +380,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, 
bool vf)
  }
  
  static inline

-bool kgd2kfd_device_init(struct kfd_dev *kfd, struct drm_device *ddev,
+bool kgd2kfd_device_init(struct kfd_dev *kfd,
const struct kgd2kfd_shared_resources 
*gpu_resources)
  {
return false;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 3c771d580098..acb8bc29218b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -2115,8 +2115,8 @@ static void kfd_find_numa_node_in_srat(struct kfd_dev 
*kdev)
struct acpi_table_header *table_header = NULL;
struct acpi_subtable_header *sub_header = NULL;
unsigned long table_end, subtable_len;
-   u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
-   pci_dev_id(kdev->pdev);
+   u32 pci_id = pci_domain_nr(kdev->adev->pdev->bus) << 16 |
+   pci_dev_id(kdev->adev->pdev);
u32 bdf;
acpi_status status;
struct acpi_srat_cpu_affinity *cpu;
@@ -2191,7 +2191,7 @@ static void kfd_find_numa_node_in_srat(struct kfd_dev 
*kdev)
numa_node = 0;
  
  	if (numa_node != NUMA_NO_NODE)

-   set_dev_node(&kdev->pdev->dev, numa_node);
+   set_dev_node(&kdev->adev->pdev->dev, numa_node);
  }
  #endif
  
@@ -2252,14 +2252,14 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,

sub_type_hdr->proximity_domain_from = proximity_domain;
  
  #ifdef CONFIG_ACPI_NUMA

-   if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
+   if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE)
kfd_find_numa_node_in_srat(kdev);
  #endif
  #ifdef CONFIG_NUMA
-   if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
+   if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE)
sub_type_hdr->proximity_domain_to = 0;
else
-   sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;
+   sub_type_hdr->proximity_domain_to = 
kdev->adev->pdev->dev.numa_node;
  #else
sub_type_hdr->proximity_domain_to = 0;
  #endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index be7a0b5a2dbc..909397fdb7d8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -228,7 +228,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, 
bool vf)
  {
struct kfd_dev *kfd = NULL;
const struct kfd2kgd_calls *f2g = NULL;
-   struct pci_dev *pdev = adev->pde

Re: [PATCH] drm/amd: Fail the suspend if resources can't be evicted

2022-10-26 Thread Christian König

Am 26.10.22 um 21:03 schrieb Mario Limonciello:

If a system does not have swap and memory is under 100% usage,
amdgpu will fail to evict resources.  Currently the suspend
carries on proceeding to reset the GPU:

```
[drm] evicting device resources failed
[drm:amdgpu_device_ip_suspend_phase2 [amdgpu]] *ERROR* suspend of IP block 
 failed -12
[drm] free PSP TMR buffer
[TTM] Failed allocating page table
[drm] evicting device resources failed
amdgpu :03:00.0: amdgpu: MODE1 reset
amdgpu :03:00.0: amdgpu: GPU mode1 reset
amdgpu :03:00.0: amdgpu: GPU smu mode1 reset
```

At this point if the suspend actually succeeded I think that amdgpu
would have recovered because the GPU would have power cut off and
restored.  However the kernel fails to continue the suspend from the
memory pressure and amdgpu fails to run the "resume" from the aborted
suspend.

```
ACPI: PM: Preparing to enter system sleep state S3
SLUB: Unable to allocate memory on node -1, gfp=0xdc0(GFP_KERNEL|__GFP_ZERO)
   cache: Acpi-State, object size: 80, buffer size: 80, default order: 0, min 
order: 0
   node 0: slabs: 22, objs: 1122, free: 0
ACPI Error: AE_NO_MEMORY, Could not update object reference count 
(20210730/utdelete-651)

[drm:psp_hw_start [amdgpu]] *ERROR* PSP load kdb failed!
[drm:psp_resume [amdgpu]] *ERROR* PSP resume failed
[drm:amdgpu_device_fw_loading [amdgpu]] *ERROR* resume of IP block  failed 
-62
amdgpu :03:00.0: amdgpu: amdgpu_device_ip_resume failed (-62).
PM: dpm_run_callback(): pci_pm_resume+0x0/0x100 returns -62
amdgpu :03:00.0: PM: failed to resume async: error -62
```

To avoid this series of unfortunate events, fail amdgpu's suspend
when the memory eviction fails.  This will let the system gracefully
recover and the user can try suspend again when the memory pressure
is relieved.


Yeah, I've been thinking about that handling for a while now as well.

Failing to suspend when we are OOM is certainly the right thing to do 
from a technical perspective.


But it also means that when users close their laptop it can happen that 
it keeps running and draining the battery.


On the other hand when you don't have swap configured it's your fault 
and not the drivers.


It's a trade off and I'm not sure what's better. Alex any comment here?

Thanks,
Christian.



Reported-by: p...@davidak.de
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2223
Signed-off-by: Mario Limonciello 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 15 ++-
  1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6f958603c8cc2..ae10acede495e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4060,15 +4060,18 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
   * at suspend time.
   *
   */
-static void amdgpu_device_evict_resources(struct amdgpu_device *adev)
+static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
  {
+   int ret;
+
/* No need to evict vram on APUs for suspend to ram or s2idle */
if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
-   return;
+   return 0;
  
-	if (amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM))

+   ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
+   if (ret)
DRM_WARN("evicting device resources failed\n");
-
+   return ret;
  }
  
  /*

@@ -4118,7 +4121,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
fbcon)
if (!adev->in_s0ix)
amdgpu_amdkfd_suspend(adev, adev->in_runpm);
  
-	amdgpu_device_evict_resources(adev);

+   r = amdgpu_device_evict_resources(adev);
+   if (r)
+   return r;
  
  	amdgpu_fence_driver_hw_fini(adev);
  




[PATCH] MAINTAINERS: Update GPU Scheduler email

2022-10-26 Thread Luben Tuikov
Update GPU Scheduler maintainer email.

Cc: Alex Deucher 
Cc: Christian König 
Cc: Daniel Vetter 
Cc: Dave Airlie 
Cc: AMD Graphics 
Cc: Direct Rendering Infrastructure - Development 

Signed-off-by: Luben Tuikov 
Acked-by: Christian König 
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index b1f3d0893393bb..1997006d1e25a6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7109,7 +7109,7 @@ F:drivers/gpu/drm/ttm/
 F: include/drm/ttm/
 
 DRM GPU SCHEDULER
-M: Andrey Grodzovsky 
+M: Luben Tuikov 
 L: dri-de...@lists.freedesktop.org
 S: Maintained
 T: git git://anongit.freedesktop.org/drm/drm-misc

base-commit: e1e7bc481d49c3e3ada11029ce0d9b85a0a539d7
-- 
2.38.1



Re: [PATCH 2/5] drm/amdgpu: stop resubmitting jobs for bare metal reset

2022-10-26 Thread Christian König

Hi Shaoyun,

yes, absolutely. If you say that this is ok then I'm fine with that as well.

Thanks,
Christian.

Am 26.10.22 um 20:13 schrieb Liu, Shaoyun:

[AMD Official Use Only - General]

The SRIOV already has its own reset routine amdgpu_device_reset_sriov,  we try 
to put the sriov specific sequence  inside this function. For the rest 
part(re-submitting etc ) we should try to have the same  behavior as bare-metal.
Can  we just don't do the re-submission for all kind of reset since kernel 
already signal the reset event  to user level (at least for compute stack) ?

Regard
Sshaoyun.liu

-Original Message-
From: Koenig, Christian 
Sent: Wednesday, October 26, 2022 1:27 PM
To: Liu, Shaoyun ; Tuikov, Luben ; Prosyak, 
Vitaly ; Deucher, Alexander ; 
daniel.vet...@ffwll.ch; amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org
Subject: Re: [PATCH 2/5] drm/amdgpu: stop resubmitting jobs for bare metal reset

The problem is that this re-submitting is currently an integral part of how 
SRIOV works.

The host can send a function level reset request to the clients when it sees 
that some schedule switching didn't worked as expected and in this case (and 
only this case) the hardware has actually never started to even work on the 
IBs. So the re-submission is actually save from this side.

But in general you are right, the sw side is just completely broken because we 
came up with a bunch of rather strict rules for the dma_fence implementation 
(and those rules are perfectly valid and necessary).

Regards,
Christian.

Am 26.10.22 um 18:10 schrieb Liu, Shaoyun:

[AMD Official Use Only - General]

The  user space  shouldn't care about  SRIOV or not ,  I don't think we need to 
keep the re-submission for SRIOV as well.  The reset from SRIOV could trigger 
the  host do a whole GPU reset which will have the same issue as bare metal.

Regards
Shaoyun.liu

-Original Message-
From: amd-gfx  On Behalf Of
Christian König
Sent: Wednesday, October 26, 2022 11:36 AM
To: Tuikov, Luben ; Prosyak, Vitaly
; Deucher, Alexander
; daniel.vet...@ffwll.ch;
amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org
Cc: Koenig, Christian 
Subject: [PATCH 2/5] drm/amdgpu: stop resubmitting jobs for bare metal
reset

Re-submitting IBs by the kernel has many problems because pre- requisite state 
is not automatically re-created as well. In other words neither binary 
semaphores nor things like ring buffer pointers are in the state they should be 
when the hardware starts to work on the IBs again.

Additional to that even after more than 5 years of developing this feature it 
is still not stable and we have massively problems getting the reference counts 
right.

As discussed with user space developers this behavior is not helpful in the 
first place. For graphics and multimedia workloads it makes much more sense to 
either completely re-create the context or at least re-submitting the IBs from 
userspace.

For compute use cases re-submitting is also not very helpful since userspace 
must rely on the accuracy of the result.

Because of this we stop this practice and instead just properly note that the 
fence submission was canceled. The only use case we keep the re-submission for 
now is SRIOV and function level resets.

Signed-off-by: Christian König 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 ++-
   1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d4584e577b51..39e94feba1ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5288,7 +5288,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
  continue;

  /* No point to resubmit jobs if we didn't HW reset*/
-   if (!tmp_adev->asic_reset_res && !job_signaled)
+   if (!tmp_adev->asic_reset_res && !job_signaled &&
+   amdgpu_sriov_vf(tmp_adev))

drm_sched_resubmit_jobs(&ring->sched);

  drm_sched_start(&ring->sched,
!tmp_adev->asic_reset_res);
--
2.25.1





RE: [pull] amdgpu, amdkfd drm-fixes-6.1

2022-10-26 Thread Chen, Guchun
Hello Alex,

Regarding below patch, I guess we need to pick "8eb402f16d5b drm/amdgpu: Fix 
uninitialized warning in mmhub_v2_0_get_clockgating()" together, otherwise, 
build will possibly fail. Is it true?

 " Lijo Lazar (1): 
  drm/amdgpu: Remove ATC L2 access for MMHUB 2.1.x"

Regards,
Guchun

-Original Message-
From: amd-gfx  On Behalf Of Alex Deucher
Sent: Thursday, October 27, 2022 10:41 AM
To: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; 
airl...@gmail.com; daniel.vet...@ffwll.ch
Cc: Deucher, Alexander 
Subject: [pull] amdgpu, amdkfd drm-fixes-6.1

Hi Dave, Daniel,

Fixes for 6.1.  Fixes for new IPs and misc other fixes.

The following changes since commit cbc543c59e8e7c8bc8604d6ac3e18a029e3d5118:

  Merge tag 'drm-misc-fixes-2022-10-20' of 
git://anongit.freedesktop.org/drm/drm-misc into drm-fixes (2022-10-21 09:56:14 
+1000)

are available in the Git repository at:

  
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitlab.freedesktop.org%2Fagd5f%2Flinux.git&data=05%7C01%7Cguchun.chen%40amd.com%7C6bbe7e42eb3d43bf622208dab7c4c906%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638024353059986195%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&sdata=Y%2BU1OrPyhCaS44nGQMTrtqBpdkcJwFdFJEAaqWGiaqo%3D&reserved=0
 tags/amd-drm-fixes-6.1-2022-10-26-1

for you to fetch changes up to d61e1d1d5225a9baeb995bcbdb904f66f70ed87e:

  drm/amdgpu: disallow gfxoff until GC IP blocks complete s2idle resume 
(2022-10-26 17:48:43 -0400)


amd-drm-fixes-6.1-2022-10-26-1:

amdgpu:
- Stable pstate fix
- SMU 13.x updates
- SR-IOV fixes
- PCI AER fix
- GC 11.x fixes
- Display fixes
- Expose IMU firmware version for debugging
- Plane modifier fix
- S0i3 fix

amdkfd:
- Fix possible memory leak
- Fix GC 10.x cache info reporting

UAPI:
- Expose IMU firmware version via existing INFO firmware query


Alvin Lee (1):
  drm/amd/display: Don't return false if no stream

Chengming Gui (1):
  drm/amdgpu: fix pstate setting issue

David Francis (1):
  drm/amd: Add IMU fw version to fw version queries

Jesse Zhang (1):
  drm/amdkfd: correct the cache info for gfx1036

Joaquín Ignacio Aramendía (1):
  drm/amd/display: Revert logic for plane modifiers

Kenneth Feng (2):
  drm/amd/pm: update driver-if header for smu_v13_0_10
  drm/amd/pm: allow gfxoff on gc_11_0_3

Lijo Lazar (1):
  drm/amdgpu: Remove ATC L2 access for MMHUB 2.1.x

Prike Liang (2):
  drm/amdkfd: update gfx1037 Lx cache setting
  drm/amdgpu: disallow gfxoff until GC IP blocks complete s2idle resume

Rafael Mendonca (1):
  drm/amdkfd: Fix memory leak in kfd_mem_dmamap_userptr()

Rodrigo Siqueira (1):
  drm/amd/display: Remove wrong pipe control lock

Yiqing Yao (1):
  drm/amdgpu: Adjust MES polling timeout for sriov

YuBiao Wang (1):
  drm/amdgpu: skip mes self test for gc 11.0.3 in recover

 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c   |   6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c|   5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  18 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  13 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c  |   4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h|   1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c |   1 +
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c |   9 +-
 drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c|  28 ++
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c  | 106 +++-
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c|  50 ++
 drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c |  12 +--
 .../amd/display/dc/dcn32/dcn32_resource_helpers.c  |   2 +-
 .../pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h | 111 +++--
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h   |   2 +-
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c |   7 +-
 include/uapi/drm/amdgpu_drm.h  |   2 +
 18 files changed, 259 insertions(+), 119 deletions(-)


Re: [PATCH v2 1/2] drm/amdkfd: Init the base cu processor id

2022-10-26 Thread Ma, Jun
Hi Felix,

On 10/27/2022 3:07 AM, Felix Kuehling wrote:
> On 2022-10-26 05:03, Ma Jun wrote:
>> Init and save the base cu processor id for later use
>>
>> Signed-off-by: Ma Jun 
>> ---
>>   drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 20 +---
>>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  3 +++
>>   2 files changed, 16 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>> index d25ac9cbe5b2..35d09e29aafb 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>> @@ -41,13 +41,18 @@ static uint32_t gpu_processor_id_low = 0x80001000;
>>* @total_cu_count - Total CUs present in the GPU including ones
>>*   masked off
>>*/
>> -static inline unsigned int get_and_inc_gpu_processor_id(
>> -unsigned int total_cu_count)
>> +static inline void assign_gpu_processor_id(struct kfd_dev *kfd)
>>   {
>> -int current_id = gpu_processor_id_low;
>> +struct amdgpu_device *adev = kfd->adev;
>> +unsigned int array_count = 0;
>> +unsigned int total_cu_count = 0;
>> +
>> +kfd->processor_id_low = gpu_processor_id_low;
> 
> This still seems backwards. And I don't think you need it. You can 
> access this after the topology has been parsed from the 
> kfd_topology_device (dev->node_props.simd_id_base). I see you're using 
> it in the next patch in kfd_fill_cache_non_crat_info, which has a 
> kfd_topology_device parameter. This should run after parsing the CRAT 
> tables, so the info should be available at that time.

Thanks. I will use dev->node_props.simd_id_base in the second patch and drop
this one.

Regards
Ma Jun
> 
> Regards,
>    Felix
> 
> 
>> +
>> +array_count = adev->gfx.config.max_sh_per_se * 
>> adev->gfx.config.max_shader_engines;
>> +total_cu_count = array_count * adev->gfx.config.max_cu_per_sh;
>>   
>>  gpu_processor_id_low += total_cu_count;
>> -return current_id;
>>   }
>>   
>>   /* Static table to describe GPU Cache information */
>> @@ -2223,7 +2228,6 @@ static int kfd_create_vcrat_image_gpu(void 
>> *pcrat_image,
>>  struct crat_subtype_computeunit *cu;
>>  struct kfd_cu_info cu_info;
>>  int avail_size = *size;
>> -uint32_t total_num_of_cu;
>>  int num_of_cache_entries = 0;
>>  int cache_mem_filled = 0;
>>  uint32_t nid = 0;
>> @@ -2275,8 +2279,10 @@ static int kfd_create_vcrat_image_gpu(void 
>> *pcrat_image,
>>  cu->wave_front_size = cu_info.wave_front_size;
>>  cu->array_count = cu_info.num_shader_arrays_per_engine *
>>  cu_info.num_shader_engines;
>> -total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh);
>> -cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu);
>> +
>> +assign_gpu_processor_id(kdev);
>> +cu->processor_id_low = kdev->processor_id_low;
>> +
>>  cu->num_cu_per_array = cu_info.num_cu_per_sh;
>>  cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu;
>>  cu->num_banks = cu_info.num_shader_engines;
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> index 182eb67edbc5..4c06b233472f 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> @@ -370,6 +370,9 @@ struct kfd_dev {
>>   
>>  /* Track per device allocated watch points. */
>>  uint32_t alloc_watch_ids;
>> +
>> +/* cu processor id base */
>> +unsigned intprocessor_id_low;
>>   };
>>   
>>   struct kfd_ipc_obj;


[pull] amdgpu, amdkfd drm-fixes-6.1

2022-10-26 Thread Alex Deucher
Hi Dave, Daniel,

Fixes for 6.1.  Fixes for new IPs and misc other fixes.

The following changes since commit cbc543c59e8e7c8bc8604d6ac3e18a029e3d5118:

  Merge tag 'drm-misc-fixes-2022-10-20' of 
git://anongit.freedesktop.org/drm/drm-misc into drm-fixes (2022-10-21 09:56:14 
+1000)

are available in the Git repository at:

  https://gitlab.freedesktop.org/agd5f/linux.git 
tags/amd-drm-fixes-6.1-2022-10-26-1

for you to fetch changes up to d61e1d1d5225a9baeb995bcbdb904f66f70ed87e:

  drm/amdgpu: disallow gfxoff until GC IP blocks complete s2idle resume 
(2022-10-26 17:48:43 -0400)


amd-drm-fixes-6.1-2022-10-26-1:

amdgpu:
- Stable pstate fix
- SMU 13.x updates
- SR-IOV fixes
- PCI AER fix
- GC 11.x fixes
- Display fixes
- Expose IMU firmware version for debugging
- Plane modifier fix
- S0i3 fix

amdkfd:
- Fix possible memory leak
- Fix GC 10.x cache info reporting

UAPI:
- Expose IMU firmware version via existing INFO firmware query


Alvin Lee (1):
  drm/amd/display: Don't return false if no stream

Chengming Gui (1):
  drm/amdgpu: fix pstate setting issue

David Francis (1):
  drm/amd: Add IMU fw version to fw version queries

Jesse Zhang (1):
  drm/amdkfd: correct the cache info for gfx1036

Joaquín Ignacio Aramendía (1):
  drm/amd/display: Revert logic for plane modifiers

Kenneth Feng (2):
  drm/amd/pm: update driver-if header for smu_v13_0_10
  drm/amd/pm: allow gfxoff on gc_11_0_3

Lijo Lazar (1):
  drm/amdgpu: Remove ATC L2 access for MMHUB 2.1.x

Prike Liang (2):
  drm/amdkfd: update gfx1037 Lx cache setting
  drm/amdgpu: disallow gfxoff until GC IP blocks complete s2idle resume

Rafael Mendonca (1):
  drm/amdkfd: Fix memory leak in kfd_mem_dmamap_userptr()

Rodrigo Siqueira (1):
  drm/amd/display: Remove wrong pipe control lock

Yiqing Yao (1):
  drm/amdgpu: Adjust MES polling timeout for sriov

YuBiao Wang (1):
  drm/amdgpu: skip mes self test for gc 11.0.3 in recover

 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c   |   6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c|   5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  18 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  13 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c  |   4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h|   1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c |   1 +
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c |   9 +-
 drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c|  28 ++
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c  | 106 +++-
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c|  50 ++
 drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c |  12 +--
 .../amd/display/dc/dcn32/dcn32_resource_helpers.c  |   2 +-
 .../pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h | 111 +++--
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h   |   2 +-
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c |   7 +-
 include/uapi/drm/amdgpu_drm.h  |   2 +
 18 files changed, 259 insertions(+), 119 deletions(-)


Re: [PATCH v3 2/2] drm/amd/display: move remaining FPU code to dml folder

2022-10-26 Thread Ao Zhong
Hi Rodrigo,

Thanks for your review! This is my first time submitting a patch to the kernel.

I'm not very good at using these tools yet. 😂

Recently I got a Huawei Qingyun W510 (擎云 W510) ARM workstation

from the second-hand market in China. It's SBSA and has a Kunpeng 920 (3211k) 
SoC

with 24 Huawei-customized TSV110 cores. Since it's SFF form factor, and my 
machine

supports PCIe 4.0 (looks like some W510 have it disabled), I installed an RX 
6400 on it

as my daily drive machine. It has decent performance. I uploaded a benchmark 
result on Geekbench.

Link: https://browser.geekbench.com/v5/cpu/18237269

Ao

Am 26.10.22 um 18:12 schrieb Rodrigo Siqueira:
>
>
> On 10/26/22 07:13, Ao Zhong wrote:
>> pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
>> pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
>> these two operations in dcn32/dcn32_resource.c still need to use FPU,
>> This will cause compilation to fail on ARM64 platforms because
>> -mgeneral-regs-only is enabled by default to disable the hardware FPU.
>> Therefore, imitate the dcn31_zero_pipe_dcc_fraction function in
>> dml/dcn31/dcn31_fpu.c, declare the dcn32_zero_pipe_dcc_fraction function
>> in dcn32_fpu.c, and move above two operations into this function.
>>
>> Acked-by: Christian König 
>> Signed-off-by: Ao Zhong 
>> ---
>>   drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 5 +++--
>>   drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 8 
>>   drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  | 3 +++
>>   3 files changed, 14 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c 
>> b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
>> index a88dd7b3d1c1..287b7fa9bf41 100644
>> --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
>> +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
>> @@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
>>   timing = &pipe->stream->timing;
>>     pipes[pipe_cnt].pipe.src.gpuvm = true;
>> -    pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
>> -    pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
>> +    DC_FP_START();
>> +    dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
>> +    DC_FP_END();
>>   pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
>>   pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // 
>> according to spreadsheet
>>   pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c 
>> b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
>> index 819de0f11012..58772fce6437 100644
>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
>> @@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, 
>> struct clk_bw_params *bw_pa
>>   }
>>   }
>>   +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
>> +  int pipe_cnt)
>> +{
>> +    dc_assert_fp_enabled();
>> +
>> +    pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
>> +    pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
>> +}
>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h 
>> b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
>> index 3a3dc2ce4c73..ab010e7e840b 100644
>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
>> @@ -73,4 +73,7 @@ int 
>> dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
>>     void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
>>   +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
>> +  int pipe_cnt);
>> +
>>   #endif
>
> Hi Ao,
>
> First of all, thanks a lot for your patchset.
>
> For both patches:
>
> Reviewed-by: Rodrigo Siqueira 
>
> And I also applied them to amd-staging-drm-next.
>
> Btw, if you are using git-send-email for sending patches, I recommend the 
> following options:
>
> git send-email --annotate --cover-letter --thread --no-chain-reply-to 
> --to="EMAILS" --cc="mail...@list.com" 
>
> Always add a cover letter, it makes it easier to follow the patchset, and you 
> can also describe each change in the cover letter.
>
> When you send that other patch enabling ARM64, please add as many details as 
> possible in the cover letter. Keep in mind that we have been working for 
> isolating those FPU codes in a way that we do not regress any of our ASICs, 
> which means that every change was well-tested on multiple devices. Anyway, 
> maybe you can refer to this cover letter to write down the commit message:
>
> https://patchwork.freedesktop.org/series/93042/
>
> Finally, do you have a use case for this change? I mean, ARM64 + AMD dGPU.
>
> Thanks again!
> Siqueira
>


[PATCH RESEND 1/1] drm/amd/display: add DCN support for ARM64

2022-10-26 Thread Ao Zhong
After moving all FPU code to the DML folder, we can enable DCN support
for the ARM64 platform. Remove the -mgeneral-regs-only CFLAG from the
code in the DML folder that needs to use hardware FPU, and add a control
mechanism for ARM Neon.

Signed-off-by: Ao Zhong 
---
 drivers/gpu/drm/amd/display/Kconfig   |  2 +-
 .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.c|  6 ++
 drivers/gpu/drm/amd/display/dc/dml/Makefile   | 64 ---
 3 files changed, 49 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/Kconfig 
b/drivers/gpu/drm/amd/display/Kconfig
index 0142affcdaa3..a7f1c4e51719 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -6,7 +6,7 @@ config DRM_AMD_DC
bool "AMD DC - Enable new display engine"
default y
select SND_HDA_COMPONENT if SND_HDA_CORE
-   select DRM_AMD_DC_DCN if (X86 || PPC64)
+   select DRM_AMD_DC_DCN if (X86 || PPC64 || (ARM64 && KERNEL_MODE_NEON))
help
  Choose this option if you want to use the new display engine
  support for AMDGPU. This adds required support for Vega and
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
index ab0c6d191038..1743ca0a3641 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
@@ -31,6 +31,8 @@
 #elif defined(CONFIG_PPC64)
 #include 
 #include 
+#elif defined(CONFIG_ARM64)
+#include 
 #endif
 
 /**
@@ -99,6 +101,8 @@ void dc_fpu_begin(const char *function_name, const int line)
preempt_disable();
enable_kernel_fp();
}
+#elif defined(CONFIG_ARM64)
+   kernel_neon_begin();
 #endif
}
 
@@ -136,6 +140,8 @@ void dc_fpu_end(const char *function_name, const int line)
disable_kernel_fp();
preempt_enable();
}
+#elif defined(CONFIG_ARM64)
+   kernel_neon_end();
 #endif
}
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile 
b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index d0c6cf61c676..3cdd109189e0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -33,6 +33,12 @@ ifdef CONFIG_PPC64
 dml_ccflags := -mhard-float -maltivec
 endif
 
+ifdef CONFIG_ARM64
+ifdef CONFIG_DRM_AMD_DC_DCN
+dml_rcflags_arm64 := -mgeneral-regs-only
+endif
+endif
+
 ifdef CONFIG_CC_IS_GCC
 ifeq ($(call cc-ifversion, -lt, 0701, y), y)
 IS_OLD_GCC = 1
@@ -87,32 +93,46 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := 
$(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_ccflags) 
-Wno-tautological-compare
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags) 
$(dml_rcflags_arm64)
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn2x/dcn2x.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := 
$(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o  := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := 
$(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := 
$(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := 
$(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := 
$(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := 
$(dml_rcflags) $(dml_rcflags_arm64)
+CF

[PATCH RESEND 0/1] drm/amd/display: add DCN support for ARM64

2022-10-26 Thread Ao Zhong
Hello everyone,

Recently I got a SBSA ARM64 workstation, and try to use it as my daily
drive machine after installing an AMD RX6400 graphics card.

Since the newer AMD GPUs require DCN (Display Core Next) support to work
properly, DCN is not supported on ARM64 platforms. Because some code in
DCN needs to use FPU, and aarch64 has no soft floating-point toolchain.

Displaycore developers have started isolating code that needs to use
FPU, and most of the code has been isolated. In the process of trying to
enable DCN support for the ARM64 platform, I found that there are two
remaining code blocks that are not isolated. Due to -mgeneral-regs-only
is enabled by default to compile the kernel on the ARM64 platform, these
two code blocks will cause the compilation to fail after enabling DCN
support.

I have submitted two patches to isolate the remaining two code blocks
that need to use the FPU.
Links: https://patchwork.freedesktop.org/patch/508813/
Links: https://patchwork.freedesktop.org/patch/508816/
After merging these two patches, we can enable DCN support for ARM64
platform.

I removed -mgeneral-regs-only CFLAG in the dml/Makefile for the code
that needs to use hardware FPU, and add a control mechanism in
amdgpu_dm/dc_fpu.c for ARM Neon.

Ao Zhong (1):
  drm/amd/display: add DCN support for ARM64

 drivers/gpu/drm/amd/display/Kconfig   |  2 +-
 .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.c|  6 ++
 drivers/gpu/drm/amd/display/dc/dml/Makefile   | 64 ---
 3 files changed, 49 insertions(+), 23 deletions(-)

-- 
2.37.4



[PATCH] drm/amdkfd: update GFX11 CWSR trap handler

2022-10-26 Thread Graham Sider
From: Jay Cornwall 

With corresponding FW change fixes issue where triggering CWSR on a
workgroup with waves in s_barrier wouldn't lead to a back-off and
therefore cause a hang.

Signed-off-by: Jay Cornwall 
Tested-by: Graham Sider 
---
 .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h| 764 +-
 .../amd/amdkfd/cwsr_trap_handler_gfx10.asm|   6 +
 2 files changed, 389 insertions(+), 381 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h 
b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index c7118843db05..0c4c5499bb5c 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -2495,442 +2495,444 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbf9f, 0x,
 };
 static const uint32_t cwsr_trap_gfx11_hex[] = {
-   0xbfa1, 0xbfa0021e,
+   0xbfa1, 0xbfa00221,
0xb0804006, 0xb8f8f802,
0x9178ff78, 0x00020006,
-   0xb8fbf803, 0xbf0d9f6d,
-   0xbfa20006, 0x8b6eff78,
-   0x2000, 0xbfa10009,
-   0x8b6eff6d, 0x00ff,
-   0xbfa2001e, 0x8b6eff7b,
-   0x0400, 0xbfa20041,
-   0xbf830010, 0xb8fbf803,
-   0xbfa0fffa, 0x8b6eff7b,
-   0x0900, 0xbfa20015,
-   0x8b6eff7b, 0x71ff,
-   0xbfa10008, 0x8b6fff7b,
-   0x7080, 0xbfa10001,
-   0xbeee1287, 0xb8eff801,
-   0x846e8c6e, 0x8b6e6f6e,
-   0xbfa2000a, 0x8b6eff6d,
-   0x00ff, 0xbfa20007,
-   0xb8eef801, 0x8b6eff6e,
-   0x0800, 0xbfa20003,
+   0xb8fbf803, 0xbf0d9e6d,
+   0xbfa10001, 0xbfbd,
+   0xbf0d9f6d, 0xbfa20006,
+   0x8b6eff78, 0x2000,
+   0xbfa10009, 0x8b6eff6d,
+   0x00ff, 0xbfa2001e,
0x8b6eff7b, 0x0400,
-   0xbfa20026, 0xbefa4d82,
-   0xbf89fc07, 0x84fa887a,
-   0xf4005bbd, 0xf810,
-   0xbf89fc07, 0x846e976e,
-   0x9177ff77, 0x0080,
-   0x8c776e77, 0xf4045bbd,
-   0xf800, 0xbf89fc07,
-   0xf4045ebd, 0xf808,
-   0xbf89fc07, 0x8bee6e6e,
-   0xbfa10001, 0xbe80486e,
-   0x8b6eff6d, 0x01ff,
-   0xbfa20005, 0x8c78ff78,
-   0x2000, 0x80ec886c,
-   0x82ed806d, 0xbfa5,
-   0x8b6eff6d, 0x0100,
-   0xbfa20002, 0x806c846c,
-   0x826d806d, 0x8b6dff6d,
-   0x, 0x8bfe7e7e,
-   0x8bea6a6a, 0xb978f802,
-   0xbe804a6c, 0x8b6dff6d,
-   0x, 0xbefa0080,
-   0xb97a0283, 0xbeee007e,
-   0xbeef007f, 0xbefe0180,
-   0xbefe4d84, 0xbf89fc07,
-   0x8b7aff7f, 0x0400,
-   0x847a857a, 0x8c6d7a6d,
-   0xbefa007e, 0x8b7bff7f,
-   0x, 0xbefe00c1,
-   0xbeff00c1, 0xdca6c000,
-   0x007a, 0x7e000280,
-   0xbefe007a, 0xbeff007b,
-   0xb8fb02dc, 0x847b997b,
-   0xb8fa3b05, 0x807a817a,
-   0xbf0d997b, 0xbfa20002,
-   0x847a897a, 0xbfa1,
-   0x847a8a7a, 0xb8fb1e06,
-   0x847b8a7b, 0x807a7b7a,
+   0xbfa20041, 0xbf830010,
+   0xb8fbf803, 0xbfa0fffa,
+   0x8b6eff7b, 0x0900,
+   0xbfa20015, 0x8b6eff7b,
+   0x71ff, 0xbfa10008,
+   0x8b6fff7b, 0x7080,
+   0xbfa10001, 0xbeee1287,
+   0xb8eff801, 0x846e8c6e,
+   0x8b6e6f6e, 0xbfa2000a,
+   0x8b6eff6d, 0x00ff,
+   0xbfa20007, 0xb8eef801,
+   0x8b6eff6e, 0x0800,
+   0xbfa20003, 0x8b6eff7b,
+   0x0400, 0xbfa20026,
+   0xbefa4d82, 0xbf89fc07,
+   0x84fa887a, 0xf4005bbd,
+   0xf810, 0xbf89fc07,
+   0x846e976e, 0x9177ff77,
+   0x0080, 0x8c776e77,
+   0xf4045bbd, 0xf800,
+   0xbf89fc07, 0xf4045ebd,
+   0xf808, 0xbf89fc07,
+   0x8bee6e6e, 0xbfa10001,
+   0xbe80486e, 0x8b6eff6d,
+   0x01ff, 0xbfa20005,
+   0x8c78ff78, 0x2000,
+   0x80ec886c, 0x82ed806d,
+   0xbfa5, 0x8b6eff6d,
+   0x0100, 0xbfa20002,
+   0x806c846c, 0x826d806d,
+   0x8b6dff6d, 0x,
+   0x8bfe7e7e, 0x8bea6a6a,
+   0xb978f802, 0xbe804a6c,
+   0x8b6dff6d, 0x,
+   0xbefa0080, 0xb97a0283,
+   0xbeee007e, 0xbeef007f,
+   0xbefe0180, 0xbefe4d84,
+   0xbf89fc07, 0x8b7aff7f,
+   0x0400, 0x847a857a,
+   0x8c6d7a6d, 0xbefa007e,
0x8b7bff7f, 0x,
-   0x807aff7a, 0x0200,
-   0x807a7e7a, 0x827b807b,
-   0xd761, 0x00010870,
-   0xd761, 0x00010a71,
-   0xd761, 0x00010c72,
-   0xd761, 0x00010e73,
-   0xd761, 0x00011074,
-   0xd761, 0x00011275,
-   0xd761, 0x00011476,
-   0xd761, 0x00011677,
-   0xd761, 0x00011a79,
-   0xd761, 0x00011c7e,
-   0xd761, 0x00011e7f,
-   0xbefe00ff, 0x3fff,
-   0xbeff0080, 0xdca6c040,
-   0x007a, 0xd760007a,
-   0x00011d00, 0xd760007b,
-   0x00011f00, 0xbefe007a,
-   0xbeff007b, 0xbef4007e,
-   0x8b75ff7f, 0x,
-   0x8c75ff75, 0x0004,
-   0xbef60080, 0xbef700ff,
-   0x10807fac, 0xbef1007d,
-   

Re: [PATCH v5 02/31] drm/i915: Don't register backlight when another backlight should be used (v2)

2022-10-26 Thread Matthew Garrett
On Wed, Oct 26, 2022 at 11:59:28AM +0200, Hans de Goede wrote:

> Ok, so this is a local customization to what is already a custom BIOS
> for a custom motherboard. There is a lot of custom in that sentence and
> TBH at some point things might become too custom for them to be expected
> to work OOTB.

But it *did* work OOTB before. You broke it. I accept that I'm a 
ludicrously weird corner case here, but there are going to be other 
systems that are also affected by this.

> I'm afraid things are not that simple. I assume that with
> "if ACPI backlight control is expected to work" you mean don't
> use ACPI backlight control when (acpi_osi_is_win8() && native_available)
> evaluates to true because it is known to be broken on some of
> those systems because Windows 8 stopped using it ?

Correct.

> Unfortunately something similar applies to vendor interfaces,
> When Windows XP started using (and mandating for certification
> IIRC) ACPI backlight control, vendors still kept their own
> vendor specific EC/smbios/ACPI/WMI backlight interfaces around for
> a long long time, except they were often no longer tested.

The current situation (both before your patchset and with its current 
implementation) is that vendor is preferred to native, so if the vendor 
interface is present then we're already using it.

> > The 
> > problem you're dealing with is that the knowledge of whether or not 
> > there's a vendor interface isn't something the core kernel code knows 
> > about. What you're proposing here is effectively for us to expose 
> > additional information about whether or not there's a vendor interface 
> > in the system firmware, but since we're talking in some cases about 
> > hardware that's almost 20 years old, we're not realistically going to 
> > get those old machines fixed.
> 
> I don't understand why you keep talking about the old vendor interfaces,
> at least for the chromebook part of this thread the issue is that
> the i915 driver no longer registers the intel_backlight device which
> is a native device type, which is caused by the patch this email
> thread is about (and old vendor interfaces do not come into play
> at all here). So AFAICT this is a native vs acpi backlight control
> issue ?

I'm referring to your proposed patch that changed the default from 
backlight_vendor to backlight_native, which would fix my machine and 
Chromebooks but break anything that relies on the vendor interfaces.

> I really want to resolve your bug, but I still lack a lot of info,
> like what backlight interface you were actually using in 6.0 ?

Native.

> {
>  .callback = video_detect_force_video,
>  /* ThinkPad X201s */
>  .matches = {
> DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
> DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X201s"),
> },
> },
> 
> will trigger.

In this case you'd break anyone else running the system who isn't using 
the hacked EC and different ACPI tables - obviously there's ways round 
this, but realistically since I'm (as far as I know) the only person in 
this situation it makes more sense for me to add a kernel parameter than 
carry around an exceedingly niche DMI quirk. I'm fine with that. But the 
point I'm trying to make is that the machines *are* telling you whether 
they'd prefer vendor or native, and you're not taking that into account 
in the video_detect code.


Re: [PATCH -next] drm/amdkfd: Fix NULL pointer dereference in svm_migrate_to_ram()

2022-10-26 Thread Felix Kuehling

On 2022-10-25 22:00, Yang Li wrote:

./drivers/gpu/drm/amd/amdkfd/kfd_migrate.c:985:58-62: ERROR: p is NULL but 
dereferenced.

Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2549
Reported-by: Abaci Robot 
Signed-off-by: Yang Li 


The patch is

Reviewed-by: Felix Kuehling 

I applied to our amd-staging-drm-next branch.

Thanks,
  Felix



---

change in v2:
According to Felix's suggestion, move the pr_debug up before the 
kfd_unref_process
call.

  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 4 +---
  1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 20d6b2578927..b9c8d29d95aa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -978,12 +978,10 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
  out_unlock_svms:
mutex_unlock(&p->svms.lock);
  out_unref_process:
+   pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr);
kfd_unref_process(p);
  out_mmput:
mmput(mm);
-
-   pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr);
-
return r ? VM_FAULT_SIGBUS : 0;
  }
  


[PATCH] drm/amdgpu: disable GFXOFF during compute for GFX11

2022-10-26 Thread Graham Sider
Temporary workaround to fix issues observed in some compute applications
when GFXOFF is enabled on GFX11.

Signed-off-by: Graham Sider 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 37db39ba8718..0d764c15f6bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -706,6 +706,13 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
 
 void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle)
 {
+   /* Temporary workaround to fix issues observed in some
+* compute applications when GFXOFF is enabled on GFX11.
+*/
+   if (IP_VERSION_MAJ(adev->ip_versions[GC_HWIP][0]) == 11) {
+   pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled");
+   amdgpu_gfx_off_ctrl(adev, idle);
+   }
amdgpu_dpm_switch_power_profile(adev,
PP_SMC_POWER_PROFILE_COMPUTE,
!idle);
-- 
2.25.1



[PATCH] drm/amdkfd: Cleanup kfd_dev struct

2022-10-26 Thread Alex Deucher
From: Mukul Joshi 

Cleanup kfd_dev struct by removing ddev and pdev as both
drm_device and pci_dev can be fetched from amdgpu_device.

Signed-off-by: Mukul Joshi 
Tested-by: Amber Lin 
Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  3 +--
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c  | 12 -
 drivers/gpu/drm/amd/amdkfd/kfd_device.c| 16 +---
 drivers/gpu/drm/amd/amdkfd/kfd_iommu.c | 29 +++---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h  |  4 +--
 drivers/gpu/drm/amd/amdkfd/kfd_process.c   | 12 -
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c   |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c  | 25 +--
 9 files changed, 49 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 0561812aa0a4..e2b0f8049b9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -195,7 +195,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
}
 
adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
-   adev_to_drm(adev), 
&gpu_resources);
+   &gpu_resources);
 
amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 647220a8762d..f50e3ba4d7a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -353,7 +353,6 @@ int kgd2kfd_init(void);
 void kgd2kfd_exit(void);
 struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf);
 bool kgd2kfd_device_init(struct kfd_dev *kfd,
-struct drm_device *ddev,
 const struct kgd2kfd_shared_resources *gpu_resources);
 void kgd2kfd_device_exit(struct kfd_dev *kfd);
 void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm);
@@ -381,7 +380,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, 
bool vf)
 }
 
 static inline
-bool kgd2kfd_device_init(struct kfd_dev *kfd, struct drm_device *ddev,
+bool kgd2kfd_device_init(struct kfd_dev *kfd,
const struct kgd2kfd_shared_resources 
*gpu_resources)
 {
return false;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 3c771d580098..acb8bc29218b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -2115,8 +2115,8 @@ static void kfd_find_numa_node_in_srat(struct kfd_dev 
*kdev)
struct acpi_table_header *table_header = NULL;
struct acpi_subtable_header *sub_header = NULL;
unsigned long table_end, subtable_len;
-   u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
-   pci_dev_id(kdev->pdev);
+   u32 pci_id = pci_domain_nr(kdev->adev->pdev->bus) << 16 |
+   pci_dev_id(kdev->adev->pdev);
u32 bdf;
acpi_status status;
struct acpi_srat_cpu_affinity *cpu;
@@ -2191,7 +2191,7 @@ static void kfd_find_numa_node_in_srat(struct kfd_dev 
*kdev)
numa_node = 0;
 
if (numa_node != NUMA_NO_NODE)
-   set_dev_node(&kdev->pdev->dev, numa_node);
+   set_dev_node(&kdev->adev->pdev->dev, numa_node);
 }
 #endif
 
@@ -2252,14 +2252,14 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int 
*avail_size,
sub_type_hdr->proximity_domain_from = proximity_domain;
 
 #ifdef CONFIG_ACPI_NUMA
-   if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
+   if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE)
kfd_find_numa_node_in_srat(kdev);
 #endif
 #ifdef CONFIG_NUMA
-   if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
+   if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE)
sub_type_hdr->proximity_domain_to = 0;
else
-   sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;
+   sub_type_hdr->proximity_domain_to = 
kdev->adev->pdev->dev.numa_node;
 #else
sub_type_hdr->proximity_domain_to = 0;
 #endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index be7a0b5a2dbc..909397fdb7d8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -228,7 +228,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, 
bool vf)
 {
struct kfd_dev *kfd = NULL;
const struct kfd2kgd_calls *f2g = NULL;
-   struct pci_dev *pdev = adev->pdev;
uint32_t gfx_target_version = 0;
 
switch (adev->asic_type) {
@@ -430,7 +429,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, 
bool vf)
 
kfd->adev = adev;
kfd_device_

Re: [PATCH v2 1/2] drm/amdkfd: Init the base cu processor id

2022-10-26 Thread Felix Kuehling

On 2022-10-26 05:03, Ma Jun wrote:

Init and save the base cu processor id for later use

Signed-off-by: Ma Jun 
---
  drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 20 +---
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  3 +++
  2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index d25ac9cbe5b2..35d09e29aafb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -41,13 +41,18 @@ static uint32_t gpu_processor_id_low = 0x80001000;
   *@total_cu_count - Total CUs present in the GPU including ones
   *  masked off
   */
-static inline unsigned int get_and_inc_gpu_processor_id(
-   unsigned int total_cu_count)
+static inline void assign_gpu_processor_id(struct kfd_dev *kfd)
  {
-   int current_id = gpu_processor_id_low;
+   struct amdgpu_device *adev = kfd->adev;
+   unsigned int array_count = 0;
+   unsigned int total_cu_count = 0;
+
+   kfd->processor_id_low = gpu_processor_id_low;


This still seems backwards. And I don't think you need it. You can 
access this after the topology has been parsed from the 
kfd_topology_device (dev->node_props.simd_id_base). I see you're using 
it in the next patch in kfd_fill_cache_non_crat_info, which has a 
kfd_topology_device parameter. This should run after parsing the CRAT 
tables, so the info should be available at that time.


Regards,
  Felix



+
+   array_count = adev->gfx.config.max_sh_per_se * 
adev->gfx.config.max_shader_engines;
+   total_cu_count = array_count * adev->gfx.config.max_cu_per_sh;
  
  	gpu_processor_id_low += total_cu_count;

-   return current_id;
  }
  
  /* Static table to describe GPU Cache information */

@@ -2223,7 +2228,6 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
struct crat_subtype_computeunit *cu;
struct kfd_cu_info cu_info;
int avail_size = *size;
-   uint32_t total_num_of_cu;
int num_of_cache_entries = 0;
int cache_mem_filled = 0;
uint32_t nid = 0;
@@ -2275,8 +2279,10 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
cu->wave_front_size = cu_info.wave_front_size;
cu->array_count = cu_info.num_shader_arrays_per_engine *
cu_info.num_shader_engines;
-   total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh);
-   cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu);
+
+   assign_gpu_processor_id(kdev);
+   cu->processor_id_low = kdev->processor_id_low;
+
cu->num_cu_per_array = cu_info.num_cu_per_sh;
cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu;
cu->num_banks = cu_info.num_shader_engines;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 182eb67edbc5..4c06b233472f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -370,6 +370,9 @@ struct kfd_dev {
  
  	/* Track per device allocated watch points. */

uint32_t alloc_watch_ids;
+
+   /* cu processor id base */
+   unsigned intprocessor_id_low;
  };
  
  struct kfd_ipc_obj;


[PATCH] drm/amd: Fail the suspend if resources can't be evicted

2022-10-26 Thread Mario Limonciello
If a system does not have swap and memory is under 100% usage,
amdgpu will fail to evict resources.  Currently the suspend
carries on proceeding to reset the GPU:

```
[drm] evicting device resources failed
[drm:amdgpu_device_ip_suspend_phase2 [amdgpu]] *ERROR* suspend of IP block 
 failed -12
[drm] free PSP TMR buffer
[TTM] Failed allocating page table
[drm] evicting device resources failed
amdgpu :03:00.0: amdgpu: MODE1 reset
amdgpu :03:00.0: amdgpu: GPU mode1 reset
amdgpu :03:00.0: amdgpu: GPU smu mode1 reset
```

At this point if the suspend actually succeeded I think that amdgpu
would have recovered because the GPU would have power cut off and
restored.  However the kernel fails to continue the suspend from the
memory pressure and amdgpu fails to run the "resume" from the aborted
suspend.

```
ACPI: PM: Preparing to enter system sleep state S3
SLUB: Unable to allocate memory on node -1, gfp=0xdc0(GFP_KERNEL|__GFP_ZERO)
  cache: Acpi-State, object size: 80, buffer size: 80, default order: 0, min 
order: 0
  node 0: slabs: 22, objs: 1122, free: 0
ACPI Error: AE_NO_MEMORY, Could not update object reference count 
(20210730/utdelete-651)

[drm:psp_hw_start [amdgpu]] *ERROR* PSP load kdb failed!
[drm:psp_resume [amdgpu]] *ERROR* PSP resume failed
[drm:amdgpu_device_fw_loading [amdgpu]] *ERROR* resume of IP block  failed 
-62
amdgpu :03:00.0: amdgpu: amdgpu_device_ip_resume failed (-62).
PM: dpm_run_callback(): pci_pm_resume+0x0/0x100 returns -62
amdgpu :03:00.0: PM: failed to resume async: error -62
```

To avoid this series of unfortunate events, fail amdgpu's suspend
when the memory eviction fails.  This will let the system gracefully
recover and the user can try suspend again when the memory pressure
is relieved.

Reported-by: p...@davidak.de
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2223
Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6f958603c8cc2..ae10acede495e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4060,15 +4060,18 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
  * at suspend time.
  *
  */
-static void amdgpu_device_evict_resources(struct amdgpu_device *adev)
+static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
 {
+   int ret;
+
/* No need to evict vram on APUs for suspend to ram or s2idle */
if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
-   return;
+   return 0;
 
-   if (amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM))
+   ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
+   if (ret)
DRM_WARN("evicting device resources failed\n");
-
+   return ret;
 }
 
 /*
@@ -4118,7 +4121,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
fbcon)
if (!adev->in_s0ix)
amdgpu_amdkfd_suspend(adev, adev->in_runpm);
 
-   amdgpu_device_evict_resources(adev);
+   r = amdgpu_device_evict_resources(adev);
+   if (r)
+   return r;
 
amdgpu_fence_driver_hw_fini(adev);
 
-- 
2.25.1



RE: [PATCH 2/5] drm/amdgpu: stop resubmitting jobs for bare metal reset

2022-10-26 Thread Liu, Shaoyun
[AMD Official Use Only - General]

The SRIOV already has its own reset routine amdgpu_device_reset_sriov,  we try 
to put the sriov specific sequence  inside this function. For the rest 
part(re-submitting etc ) we should try to have the same  behavior as bare-metal.
Can  we just don't do the re-submission for all kind of reset since kernel 
already signal the reset event  to user level (at least for compute stack) ?

Regard
Sshaoyun.liu

-Original Message-
From: Koenig, Christian 
Sent: Wednesday, October 26, 2022 1:27 PM
To: Liu, Shaoyun ; Tuikov, Luben ; 
Prosyak, Vitaly ; Deucher, Alexander 
; daniel.vet...@ffwll.ch; 
amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org
Subject: Re: [PATCH 2/5] drm/amdgpu: stop resubmitting jobs for bare metal reset

The problem is that this re-submitting is currently an integral part of how 
SRIOV works.

The host can send a function level reset request to the clients when it sees 
that some schedule switching didn't worked as expected and in this case (and 
only this case) the hardware has actually never started to even work on the 
IBs. So the re-submission is actually save from this side.

But in general you are right, the sw side is just completely broken because we 
came up with a bunch of rather strict rules for the dma_fence implementation 
(and those rules are perfectly valid and necessary).

Regards,
Christian.

Am 26.10.22 um 18:10 schrieb Liu, Shaoyun:
> [AMD Official Use Only - General]
>
> The  user space  shouldn't care about  SRIOV or not ,  I don't think we need 
> to keep the re-submission for SRIOV as well.  The reset from SRIOV could 
> trigger the  host do a whole GPU reset which will have the same issue as bare 
> metal.
>
> Regards
> Shaoyun.liu
>
> -Original Message-
> From: amd-gfx  On Behalf Of
> Christian König
> Sent: Wednesday, October 26, 2022 11:36 AM
> To: Tuikov, Luben ; Prosyak, Vitaly
> ; Deucher, Alexander
> ; daniel.vet...@ffwll.ch;
> amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org
> Cc: Koenig, Christian 
> Subject: [PATCH 2/5] drm/amdgpu: stop resubmitting jobs for bare metal
> reset
>
> Re-submitting IBs by the kernel has many problems because pre- requisite 
> state is not automatically re-created as well. In other words neither binary 
> semaphores nor things like ring buffer pointers are in the state they should 
> be when the hardware starts to work on the IBs again.
>
> Additional to that even after more than 5 years of developing this feature it 
> is still not stable and we have massively problems getting the reference 
> counts right.
>
> As discussed with user space developers this behavior is not helpful in the 
> first place. For graphics and multimedia workloads it makes much more sense 
> to either completely re-create the context or at least re-submitting the IBs 
> from userspace.
>
> For compute use cases re-submitting is also not very helpful since userspace 
> must rely on the accuracy of the result.
>
> Because of this we stop this practice and instead just properly note that the 
> fence submission was canceled. The only use case we keep the re-submission 
> for now is SRIOV and function level resets.
>
> Signed-off-by: Christian König 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 ++-
>   1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index d4584e577b51..39e94feba1ac 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -5288,7 +5288,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
> *adev,
>  continue;
>
>  /* No point to resubmit jobs if we didn't HW reset*/
> -   if (!tmp_adev->asic_reset_res && !job_signaled)
> +   if (!tmp_adev->asic_reset_res && !job_signaled &&
> +   amdgpu_sriov_vf(tmp_adev))
>
> drm_sched_resubmit_jobs(&ring->sched);
>
>  drm_sched_start(&ring->sched,
> !tmp_adev->asic_reset_res);
> --
> 2.25.1
>



Re: [RESEND PATCH] drm/amd/display: prevent memory leak

2022-10-26 Thread Rodrigo Siqueira




On 10/26/22 06:01, gehao...@163.com wrote:

From: gehao 

In dce6(0,1,4)_create_resource_pool and dce8(0,1)_create_resource_pool
the allocated memory should be released if construct pool fails.

Signed-off-by: gehao 
---
  drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c | 3 +++
  drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c | 2 ++
  2 files changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c 
b/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c
index fc6aa098bda0..8db9f7514466 100644
--- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c
@@ -1128,6 +1128,7 @@ struct resource_pool *dce60_create_resource_pool(
if (dce60_construct(num_virtual_links, dc, pool))
return &pool->base;
  
+	kfree(pool);

BREAK_TO_DEBUGGER();
return NULL;
  }
@@ -1325,6 +1326,7 @@ struct resource_pool *dce61_create_resource_pool(
if (dce61_construct(num_virtual_links, dc, pool))
return &pool->base;
  
+	kfree(pool);

BREAK_TO_DEBUGGER();
return NULL;
  }
@@ -1518,6 +1520,7 @@ struct resource_pool *dce64_create_resource_pool(
if (dce64_construct(num_virtual_links, dc, pool))
return &pool->base;
  
+	kfree(pool);

BREAK_TO_DEBUGGER();
return NULL;
  }
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c 
b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
index b28025960050..5825e6f412bd 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
@@ -1137,6 +1137,7 @@ struct resource_pool *dce80_create_resource_pool(
if (dce80_construct(num_virtual_links, dc, pool))
return &pool->base;
  
+	kfree(pool);

BREAK_TO_DEBUGGER();
return NULL;
  }
@@ -1336,6 +1337,7 @@ struct resource_pool *dce81_create_resource_pool(
if (dce81_construct(num_virtual_links, dc, pool))
return &pool->base;
  
+	kfree(pool);

BREAK_TO_DEBUGGER();
return NULL;
  }


LGTM,

Reviewed-by: Rodrigo Siqueira 

Applied to amd-staging-drm-next.

Thanks
Siqueira


RE: [PATCH] ACPI: x86: s2idle: Revmoe unused variable hid

2022-10-26 Thread Limonciello, Mario
[Public]



> -Original Message-
> From: Hung, Alex 
> Sent: Wednesday, October 26, 2022 12:34
> To: amd-gfx@lists.freedesktop.org; Deucher, Alexander
> ; Limonciello, Mario
> 
> Cc: Siqueira, Rodrigo ; Hung, Alex
> 
> Subject: [PATCH] ACPI: x86: s2idle: Revmoe unused variable hid
^ 
Typo here

> This unused variable can cause build failures with allmodconfig, and
> therefore it should be removed.
> 
> Note: this does not applied to mainline (included in 100a57379380) but
> to amd-staging-drm-next only.
> 
> Fixes: 6648f8587530 ("ACPI: x86: s2idle: Move _HID handling for AMD
> systems into structures")
> Signed-off-by: Alex Hung 

For the purpose of amd-staging-drm-next, should we perhaps revert the
commit and sync back to cherry-picks that landed from upstream?

The reason for these commits to be backported in the first place was for
s2idle testing on amd-staging-drm-next, they're dependencies for some
of the newer programs.

> ---
>  drivers/acpi/x86/s2idle.c | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
> index bfe611dc08cc..3d0cf0ace4a9 100644
> --- a/drivers/acpi/x86/s2idle.c
> +++ b/drivers/acpi/x86/s2idle.c
> @@ -475,7 +475,6 @@ static int lps0_device_attach(struct acpi_device
> *adev,
> 
> ACPI_LPS0_DSM_UUID_MICROSOFT, 0,
> 
> &lps0_dsm_guid_microsoft);
>   if (acpi_s2idle_vendor_amd()) {
> - const char *hid = acpi_device_hid(adev);
>   static const struct acpi_device_id *dev_id;
>   const struct amd_lps0_hid_device_data *data;
> 
> --
> 2.38.1


[PATCH] ACPI: x86: s2idle: Revmoe unused variable hid

2022-10-26 Thread Alex Hung
This unused variable can cause build failures with allmodconfig, and
therefore it should be removed.

Note: this does not applied to mainline (included in 100a57379380) but
to amd-staging-drm-next only.

Fixes: 6648f8587530 ("ACPI: x86: s2idle: Move _HID handling for AMD systems 
into structures")
Signed-off-by: Alex Hung 
---
 drivers/acpi/x86/s2idle.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
index bfe611dc08cc..3d0cf0ace4a9 100644
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@@ -475,7 +475,6 @@ static int lps0_device_attach(struct acpi_device *adev,

ACPI_LPS0_DSM_UUID_MICROSOFT, 0,
&lps0_dsm_guid_microsoft);
if (acpi_s2idle_vendor_amd()) {
-   const char *hid = acpi_device_hid(adev);
static const struct acpi_device_id *dev_id;
const struct amd_lps0_hid_device_data *data;
 
-- 
2.38.1



Re: [PATCH 2/5] drm/amdgpu: stop resubmitting jobs for bare metal reset

2022-10-26 Thread Christian König
The problem is that this re-submitting is currently an integral part of 
how SRIOV works.


The host can send a function level reset request to the clients when it 
sees that some schedule switching didn't worked as expected and in this 
case (and only this case) the hardware has actually never started to 
even work on the IBs. So the re-submission is actually save from this side.


But in general you are right, the sw side is just completely broken 
because we came up with a bunch of rather strict rules for the dma_fence 
implementation (and those rules are perfectly valid and necessary).


Regards,
Christian.

Am 26.10.22 um 18:10 schrieb Liu, Shaoyun:

[AMD Official Use Only - General]

The  user space  shouldn't care about  SRIOV or not ,  I don't think we need to 
keep the re-submission for SRIOV as well.  The reset from SRIOV could trigger 
the  host do a whole GPU reset which will have the same issue as bare metal.

Regards
Shaoyun.liu

-Original Message-
From: amd-gfx  On Behalf Of Christian 
König
Sent: Wednesday, October 26, 2022 11:36 AM
To: Tuikov, Luben ; Prosyak, Vitaly ; 
Deucher, Alexander ; daniel.vet...@ffwll.ch; 
amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org
Cc: Koenig, Christian 
Subject: [PATCH 2/5] drm/amdgpu: stop resubmitting jobs for bare metal reset

Re-submitting IBs by the kernel has many problems because pre- requisite state 
is not automatically re-created as well. In other words neither binary 
semaphores nor things like ring buffer pointers are in the state they should be 
when the hardware starts to work on the IBs again.

Additional to that even after more than 5 years of developing this feature it 
is still not stable and we have massively problems getting the reference counts 
right.

As discussed with user space developers this behavior is not helpful in the 
first place. For graphics and multimedia workloads it makes much more sense to 
either completely re-create the context or at least re-submitting the IBs from 
userspace.

For compute use cases re-submitting is also not very helpful since userspace 
must rely on the accuracy of the result.

Because of this we stop this practice and instead just properly note that the 
fence submission was canceled. The only use case we keep the re-submission for 
now is SRIOV and function level resets.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d4584e577b51..39e94feba1ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5288,7 +5288,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 continue;

 /* No point to resubmit jobs if we didn't HW reset*/
-   if (!tmp_adev->asic_reset_res && !job_signaled)
+   if (!tmp_adev->asic_reset_res && !job_signaled &&
+   amdgpu_sriov_vf(tmp_adev))
 drm_sched_resubmit_jobs(&ring->sched);

 drm_sched_start(&ring->sched, 
!tmp_adev->asic_reset_res);
--
2.25.1





Re: [PATCH] drm/amdgpu: Revert "drm/amdgpu: getting fan speed pwm for vega10 properly"

2022-10-26 Thread Ernst Sjöstrand
This stuff I assume:

https://github.com/RadeonOpenCompute/rocm_smi_lib/tree/master/tests/rocm_smi_test

https://docs.amd.com/bundle/ROCm-System-Management-Interface-Guide/page/ROCm-System-Managment.html#building-tests

Regards

Den ons 26 okt. 2022 kl 17:43 skrev Yury Zhuravlev :

>
>
> On Wed, Oct 26, 2022 at 11:38 PM Alex Deucher 
> wrote:
>
>> On Wed, Oct 26, 2022 at 5:07 AM Yury Zhuravlev 
>> wrote:
>> >
>> > Hello Asher,
>> >
>> > Thanks for the information, is it open-source tests? Can I reproduce it?
>> >
>> > Also, it seems like Radeon Instinct MI25 was released before Radeon RX
>> Vega, is it possible that they have different PowerPlay subsystems?
>>
>> Same silicon, same powerplay implementation.
>>
> Okay, what it means? Can I know what exactly you are tested by
> "rsmitstReadWrite.FanReadWrite"?
> I am working now with my patch, and I definitely can read and write fan
> speed by PWM.
> How can I help to solve this problem?
>
> Regards,
>
>


[linux-next:master] BUILD SUCCESS WITH WARNING 60eac8672b5b6061ec07499c0f1b79f6d94311ce

2022-10-26 Thread kernel test robot
tree/branch: 
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master
branch HEAD: 60eac8672b5b6061ec07499c0f1b79f6d94311ce  Add linux-next specific 
files for 20221026

Warning reports:

https://lore.kernel.org/linux-mm/202210090954.ptr6m6rj-...@intel.com
https://lore.kernel.org/linux-mm/202210110857.9s0txvnn-...@intel.com
https://lore.kernel.org/linux-mm/202210111318.mbufyhps-...@intel.com
https://lore.kernel.org/linux-mm/202210240729.zs46cfzo-...@intel.com
https://lore.kernel.org/linux-mm/202210251946.et92yahg-...@intel.com
https://lore.kernel.org/linux-mm/202210261404.b6ulzg7h-...@intel.com
https://lore.kernel.org/llvm/202210060148.uxbijocs-...@intel.com
https://lore.kernel.org/llvm/202210261759.mq8vlhnw-...@intel.com

Warning: (recently discovered and may have been fixed)

lib/zstd/compress/huf_compress.c:460 HUF_getIndex() warn: the 
'RANK_POSITION_LOG_BUCKETS_BEGIN' macro might need parens
security/apparmor/policy_unpack_test.c:149 
policy_unpack_test_unpack_array_with_null_name() error: uninitialized symbol 
'array_size'.
security/apparmor/policy_unpack_test.c:164 
policy_unpack_test_unpack_array_with_name() error: uninitialized symbol 
'array_size'.

Unverified Warning (likely false positive, please contact us if interested):

drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dp.c:5230:16: warning: no 
previous prototype for function 'wake_up_aux_channel' [-Wmissing-prototypes]
drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c:207:24: sparse: sparse: 
symbol 'test_callbacks' was not declared. Should it be static?
drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c:219:21: sparse: sparse: 
symbol 'test_vctrl' was not declared. Should it be static?
drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c:245:23: sparse: sparse: 
Using plain integer as NULL pointer
lib/zstd/decompress/zstd_decompress_block.c:1009 ZSTD_execSequence() warn: 
inconsistent indenting
lib/zstd/decompress/zstd_decompress_block.c:894 ZSTD_execSequenceEnd() warn: 
inconsistent indenting
lib/zstd/decompress/zstd_decompress_block.c:942 
ZSTD_execSequenceEndSplitLitBuffer() warn: inconsistent indenting
lib/zstd/decompress/zstd_decompress_internal.h:206 ZSTD_DCtx_get_bmi2() warn: 
inconsistent indenting
mm/khugepaged.c:1729:3: warning: variable 'index' is used uninitialized 
whenever 'if' condition is true [-Wsometimes-uninitialized]
mm/khugepaged.c:1729:3: warning: variable 'nr' is used uninitialized whenever 
'if' condition is true [-Wsometimes-uninitialized]
mm/khugepaged.c:1729:7: warning: variable 'index' is used uninitialized 
whenever 'if' condition is true [-Wsometimes-uninitialized]
mm/khugepaged.c:1729:7: warning: variable 'nr' is used uninitialized whenever 
'if' condition is true [-Wsometimes-uninitialized]

Warning ids grouped by kconfigs:

gcc_recent_errors
|-- arc-randconfig-s051-20221023
|   |-- 
fs-ext4-fast_commit.c:sparse:sparse:incorrect-type-in-argument-(different-base-types)-expected-int-priv1-got-restricted-__le16-addressable-usertype-fc_len
|   |-- 
fs-ext4-fast_commit.c:sparse:sparse:incorrect-type-in-argument-(different-base-types)-expected-int-tag-got-restricted-__le16-addressable-usertype-fc_tag
|   |-- 
fs-ext4-fast_commit.c:sparse:sparse:incorrect-type-in-argument-(different-base-types)-expected-unsigned-short-usertype-tag-got-restricted-__le16-addressable-usertype-fc_tag
|   |-- 
fs-ext4-fast_commit.c:sparse:sparse:incorrect-type-in-assignment-(different-base-types)-expected-restricted-__le16-usertype-fc_len-got-unsigned-short-usertype
|   |-- 
fs-ext4-fast_commit.c:sparse:sparse:incorrect-type-in-assignment-(different-base-types)-expected-restricted-__le16-usertype-fc_tag-got-unsigned-short-usertype
|   |-- 
fs-ext4-fast_commit.c:sparse:sparse:incorrect-type-in-initializer-(different-base-types)-expected-int-tag-got-restricted-__le16-usertype-fc_tag
|   `-- 
fs-ext4-fast_commit.c:sparse:sparse:restricted-__le16-degrades-to-integer
|-- i386-randconfig-m021
|   |-- 
arch-x86-boot-compressed-..-..-..-..-lib-zstd-decompress-zstd_decompress_block.c-ZSTD_execSequence()-warn:inconsistent-indenting
|   |-- 
arch-x86-boot-compressed-..-..-..-..-lib-zstd-decompress-zstd_decompress_block.c-ZSTD_execSequenceEnd()-warn:inconsistent-indenting
|   |-- 
arch-x86-boot-compressed-..-..-..-..-lib-zstd-decompress-zstd_decompress_block.c-ZSTD_execSequenceEndSplitLitBuffer()-warn:inconsistent-indenting
|   |-- 
arch-x86-boot-compressed-..-..-..-..-lib-zstd-decompress-zstd_decompress_internal.h-ZSTD_DCtx_get_bmi2()-warn:inconsistent-indenting
|   |-- 
lib-zstd-compress-huf_compress.c-HUF_getIndex()-warn:the-RANK_POSITION_LOG_BUCKETS_BEGIN-macro-might-need-parens
|   |-- 
lib-zstd-decompress-zstd_decompress_block.c-ZSTD_execSequence()-warn:inconsistent-indenting
|   |-- 
lib-zstd-decompress-zstd_decompress_block.c-ZSTD_execSequenceEnd()-warn:inconsistent-inden

Re: [PATCH v3 2/2] drm/amd/display: move remaining FPU code to dml folder

2022-10-26 Thread Rodrigo Siqueira




On 10/26/22 07:13, Ao Zhong wrote:

pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
these two operations in dcn32/dcn32_resource.c still need to use FPU,
This will cause compilation to fail on ARM64 platforms because
-mgeneral-regs-only is enabled by default to disable the hardware FPU.
Therefore, imitate the dcn31_zero_pipe_dcc_fraction function in
dml/dcn31/dcn31_fpu.c, declare the dcn32_zero_pipe_dcc_fraction function
in dcn32_fpu.c, and move above two operations into this function.

Acked-by: Christian König 
Signed-off-by: Ao Zhong 
---
  drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 5 +++--
  drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 8 
  drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  | 3 +++
  3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index a88dd7b3d1c1..287b7fa9bf41 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
timing = &pipe->stream->timing;
  
  		pipes[pipe_cnt].pipe.src.gpuvm = true;

-   pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
-   pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+   DC_FP_START();
+   dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
+   DC_FP_END();
pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // 
according to spreadsheet
pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 819de0f11012..58772fce6437 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, 
struct clk_bw_params *bw_pa
}
  }
  
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,

+ int pipe_cnt)
+{
+   dc_assert_fp_enabled();
+
+   pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
+   pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
index 3a3dc2ce4c73..ab010e7e840b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
@@ -73,4 +73,7 @@ int 
dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
  
  void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
  
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,

+ int pipe_cnt);
+
  #endif


Hi Ao,

First of all, thanks a lot for your patchset.

For both patches:

Reviewed-by: Rodrigo Siqueira 

And I also applied them to amd-staging-drm-next.

Btw, if you are using git-send-email for sending patches, I recommend 
the following options:


git send-email --annotate --cover-letter --thread --no-chain-reply-to 
--to="EMAILS" --cc="mail...@list.com" 


Always add a cover letter, it makes it easier to follow the patchset, 
and you can also describe each change in the cover letter.


When you send that other patch enabling ARM64, please add as many 
details as possible in the cover letter. Keep in mind that we have been 
working for isolating those FPU codes in a way that we do not regress 
any of our ASICs, which means that every change was well-tested on 
multiple devices. Anyway, maybe you can refer to this cover letter to 
write down the commit message:


https://patchwork.freedesktop.org/series/93042/

Finally, do you have a use case for this change? I mean, ARM64 + AMD dGPU.

Thanks again!
Siqueira



RE: [PATCH 2/5] drm/amdgpu: stop resubmitting jobs for bare metal reset

2022-10-26 Thread Liu, Shaoyun
[AMD Official Use Only - General]

The  user space  shouldn't care about  SRIOV or not ,  I don't think we need to 
keep the re-submission for SRIOV as well.  The reset from SRIOV could trigger 
the  host do a whole GPU reset which will have the same issue as bare metal.

Regards
Shaoyun.liu

-Original Message-
From: amd-gfx  On Behalf Of Christian 
König
Sent: Wednesday, October 26, 2022 11:36 AM
To: Tuikov, Luben ; Prosyak, Vitaly 
; Deucher, Alexander ; 
daniel.vet...@ffwll.ch; amd-gfx@lists.freedesktop.org; 
dri-de...@lists.freedesktop.org
Cc: Koenig, Christian 
Subject: [PATCH 2/5] drm/amdgpu: stop resubmitting jobs for bare metal reset

Re-submitting IBs by the kernel has many problems because pre- requisite state 
is not automatically re-created as well. In other words neither binary 
semaphores nor things like ring buffer pointers are in the state they should be 
when the hardware starts to work on the IBs again.

Additional to that even after more than 5 years of developing this feature it 
is still not stable and we have massively problems getting the reference counts 
right.

As discussed with user space developers this behavior is not helpful in the 
first place. For graphics and multimedia workloads it makes much more sense to 
either completely re-create the context or at least re-submitting the IBs from 
userspace.

For compute use cases re-submitting is also not very helpful since userspace 
must rely on the accuracy of the result.

Because of this we stop this practice and instead just properly note that the 
fence submission was canceled. The only use case we keep the re-submission for 
now is SRIOV and function level resets.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d4584e577b51..39e94feba1ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5288,7 +5288,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
continue;

/* No point to resubmit jobs if we didn't HW reset*/
-   if (!tmp_adev->asic_reset_res && !job_signaled)
+   if (!tmp_adev->asic_reset_res && !job_signaled &&
+   amdgpu_sriov_vf(tmp_adev))
drm_sched_resubmit_jobs(&ring->sched);

drm_sched_start(&ring->sched, 
!tmp_adev->asic_reset_res);
--
2.25.1



Re: [PATCH] drm/amdgpu: Revert "drm/amdgpu: getting fan speed pwm for vega10 properly"

2022-10-26 Thread Yury Zhuravlev
On Wed, Oct 26, 2022 at 11:38 PM Alex Deucher  wrote:

> On Wed, Oct 26, 2022 at 5:07 AM Yury Zhuravlev  wrote:
> >
> > Hello Asher,
> >
> > Thanks for the information, is it open-source tests? Can I reproduce it?
> >
> > Also, it seems like Radeon Instinct MI25 was released before Radeon RX
> Vega, is it possible that they have different PowerPlay subsystems?
>
> Same silicon, same powerplay implementation.
>
Okay, what it means? Can I know what exactly you are tested by
"rsmitstReadWrite.FanReadWrite"?
I am working now with my patch, and I definitely can read and write fan
speed by PWM.
How can I help to solve this problem?

Regards,


[PATCH 2/5] drm/amdgpu: stop resubmitting jobs for bare metal reset

2022-10-26 Thread Christian König
Re-submitting IBs by the kernel has many problems because pre-
requisite state is not automatically re-created as well. In
other words neither binary semaphores nor things like ring
buffer pointers are in the state they should be when the
hardware starts to work on the IBs again.

Additional to that even after more than 5 years of
developing this feature it is still not stable and we have
massively problems getting the reference counts right.

As discussed with user space developers this behavior is not
helpful in the first place. For graphics and multimedia
workloads it makes much more sense to either completely
re-create the context or at least re-submitting the IBs
from userspace.

For compute use cases re-submitting is also not very
helpful since userspace must rely on the accuracy of
the result.

Because of this we stop this practice and instead just
properly note that the fence submission was canceled. The
only use case we keep the re-submission for now is SRIOV
and function level resets.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d4584e577b51..39e94feba1ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5288,7 +5288,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
continue;
 
/* No point to resubmit jobs if we didn't HW reset*/
-   if (!tmp_adev->asic_reset_res && !job_signaled)
+   if (!tmp_adev->asic_reset_res && !job_signaled &&
+   amdgpu_sriov_vf(tmp_adev))
drm_sched_resubmit_jobs(&ring->sched);
 
drm_sched_start(&ring->sched, 
!tmp_adev->asic_reset_res);
-- 
2.25.1



[PATCH 3/5] drm/amdgpu: stop resubmittting jobs in amdgpu_pci_resume

2022-10-26 Thread Christian König
As far as I can see this is not really recoverable since a PCI reset
clears VRAM.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 39e94feba1ac..b1827e804363 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5745,8 +5745,6 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
if (!ring || !ring->sched.thread)
continue;
 
-
-   drm_sched_resubmit_jobs(&ring->sched);
drm_sched_start(&ring->sched, true);
}
 
-- 
2.25.1



[PATCH 5/5] drm/scheduler: deprecate drm_sched_resubmit_jobs

2022-10-26 Thread Christian König
This interface is not working as it should.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/scheduler/sched_main.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index bb28f31bff6f..ecd4afab4adb 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -489,10 +489,21 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, 
bool full_recovery)
 EXPORT_SYMBOL(drm_sched_start);
 
 /**
- * drm_sched_resubmit_jobs - helper to relaunch jobs from the pending list
+ * drm_sched_resubmit_jobs - Deprecated, don't use in new code!
  *
  * @sched: scheduler instance
  *
+ * Re-submitting jobs was a concept AMD came up as cheap way to implement
+ * recovery after a job timeout.
+ *
+ * This turned out to be not working very well. First of all there are many
+ * problem with the dma_fence implementation and requirements. Either the
+ * implementation is risking deadlocks with core memory management or violating
+ * documented implementation details of the dma_fence object.
+ *
+ * Drivers can still save and restore their state for recovery operations, but
+ * we shouldn't make this a general scheduler feature around the dma_fence
+ * interface.
  */
 void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
 {
-- 
2.25.1



[PATCH 4/5] drm/scheduler: cleanup define

2022-10-26 Thread Christian König
Remove some not implemented function define

Signed-off-by: Christian König 
---
 include/drm/gpu_scheduler.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index c564be29c5ae..d646ff2fd557 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -492,7 +492,6 @@ void drm_sched_increase_karma(struct drm_sched_job *bad);
 bool drm_sched_dependency_optimized(struct dma_fence* fence,
struct drm_sched_entity *entity);
 void drm_sched_fault(struct drm_gpu_scheduler *sched);
-void drm_sched_job_kickout(struct drm_sched_job *s_job);
 
 void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
 struct drm_sched_entity *entity);
-- 
2.25.1



[PATCH 1/5] drm/amd/amdgpu revert "implement tdr advanced mode"

2022-10-26 Thread Christian König
This reverts commit e6c6338f393b74ac0b303d567bb918b44ae7ad75.

This feature basically re-submits one job after another to
figure out which one was the one causing a hang.

This is obviously incompatible with gang-submit which requires
that multiple jobs run at the same time. It's also absolutely
not helpful to crash the hardware multiple times if a clean
recovery is desired.

For testing and debugging environments we should rather disable
recovery alltogether to be able to inspect the state with a hw
debugger.

Additional to that the sw implementation is clearly buggy and causes
reference count issues for the hardware fence.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 103 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|   2 +-
 drivers/gpu/drm/scheduler/sched_main.c |  58 ++--
 include/drm/gpu_scheduler.h|   3 -
 4 files changed, 10 insertions(+), 156 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6f958603c8cc..d4584e577b51 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5070,94 +5070,6 @@ static int amdgpu_device_suspend_display_audio(struct 
amdgpu_device *adev)
return 0;
 }
 
-static void amdgpu_device_recheck_guilty_jobs(
-   struct amdgpu_device *adev, struct list_head *device_list_handle,
-   struct amdgpu_reset_context *reset_context)
-{
-   int i, r = 0;
-
-   for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-   struct amdgpu_ring *ring = adev->rings[i];
-   int ret = 0;
-   struct drm_sched_job *s_job;
-
-   if (!ring || !ring->sched.thread)
-   continue;
-
-   s_job = list_first_entry_or_null(&ring->sched.pending_list,
-   struct drm_sched_job, list);
-   if (s_job == NULL)
-   continue;
-
-   /* clear job's guilty and depend the folowing step to decide 
the real one */
-   drm_sched_reset_karma(s_job);
-   drm_sched_resubmit_jobs_ext(&ring->sched, 1);
-
-   if (!s_job->s_fence->parent) {
-   DRM_WARN("Failed to get a HW fence for job!");
-   continue;
-   }
-
-   ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, 
ring->sched.timeout);
-   if (ret == 0) { /* timeout */
-   DRM_ERROR("Found the real bad job! ring:%s, 
job_id:%llx\n",
-   ring->sched.name, s_job->id);
-
-
-   amdgpu_fence_driver_isr_toggle(adev, true);
-
-   /* Clear this failed job from fence array */
-   amdgpu_fence_driver_clear_job_fences(ring);
-
-   amdgpu_fence_driver_isr_toggle(adev, false);
-
-   /* Since the job won't signal and we go for
-* another resubmit drop this parent pointer
-*/
-   dma_fence_put(s_job->s_fence->parent);
-   s_job->s_fence->parent = NULL;
-
-   /* set guilty */
-   drm_sched_increase_karma(s_job);
-   amdgpu_reset_prepare_hwcontext(adev, reset_context);
-retry:
-   /* do hw reset */
-   if (amdgpu_sriov_vf(adev)) {
-   amdgpu_virt_fini_data_exchange(adev);
-   r = amdgpu_device_reset_sriov(adev, false);
-   if (r)
-   adev->asic_reset_res = r;
-   } else {
-   clear_bit(AMDGPU_SKIP_HW_RESET,
- &reset_context->flags);
-   r = amdgpu_do_asic_reset(device_list_handle,
-reset_context);
-   if (r && r == -EAGAIN)
-   goto retry;
-   }
-
-   /*
-* add reset counter so that the following
-* resubmitted job could flush vmid
-*/
-   atomic_inc(&adev->gpu_reset_counter);
-   continue;
-   }
-
-   /* got the hw fence, signal finished fence */
-   atomic_dec(ring->sched.score);
-   dma_fence_get(&s_job->s_fence->finished);
-   dma_fence_signal(&s_job->s_fence->finished);
-   dma_fence_put(&s_job->s_fence->finished);
-
-   /* remove node from list and free the job */
-   spin_lock(&ring->sched.job_list_lock);
-   list_del_init(&s_job->list);
-   s

Re: [PATCH] drm/amdgpu: Revert "drm/amdgpu: getting fan speed pwm for vega10 properly"

2022-10-26 Thread Alex Deucher
On Wed, Oct 26, 2022 at 5:07 AM Yury Zhuravlev  wrote:
>
> Hello Asher,
>
> Thanks for the information, is it open-source tests? Can I reproduce it?
>
> Also, it seems like Radeon Instinct MI25 was released before Radeon RX Vega, 
> is it possible that they have different PowerPlay subsystems?

Same silicon, same powerplay implementation.

Alex

> On my Vega56, all these registers from vega20 are working very well.
> Seems like we should split implementation somehow.
>
> Regards,
>
> On Wed, Oct 26, 2022 at 3:51 PM Song, Asher  wrote:
>>
>> [AMD Official Use Only - General]
>>
>>
>>
>> +@Meng, Xiaohu
>>
>>
>>
>> Hi Zhuravlev,
>>
>>
>>
>> The information of test card is as following:
>>
>> Lspci name: [AMD/ATI] Vega 10 [Radeon Instinct MI25 MxGPU] (rev 05)
>>
>> Firmware: ATOM BIOS: 113-D0531800-C04
>>
>>
>>
>> When run test scripts compute-rocm-*/utils/run_rsmitst.sh, the below test 
>> cases fail.
>>
>> [  FAILED  ] 4 tests, listed below:
>>
>> [  FAILED  ] rsmitstReadOnly.TestOverdriveRead
>>
>> [  FAILED  ] rsmitstReadWrite.FanReadWrite
>>
>> [  FAILED  ] rsmitstReadWrite.TestOverdriveReadWrite
>>
>> [  FAILED  ] rsmitstReadWrite.TestPowerCapReadWrite
>>
>>
>>
>> Best Regards,
>>
>> Asher
>>
>> From: Yury Zhuravlev 
>> Sent: Wednesday, October 26, 2022 9:31 AM
>> To: Song, Asher 
>> Cc: Deucher, Alexander ; Chen, Guchun 
>> ; Quan, Evan ; 
>> amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH] drm/amdgpu: Revert "drm/amdgpu: getting fan speed pwm 
>> for vega10 properly"
>>
>>
>>
>> Hello,
>> Can you write here your card name and firmware version? Without this patch, 
>> my fan sensors are broken, and it's sensitive to my pc case with water 
>> cooling.
>> My card is:  Sapphire Pulse Radeon RX Vega 56  vd6546 SA
>> lspci name: [AMD/ATI] Vega 10 XL/XT [Radeon RX Vega 56/64] (rev c3)
>> Firmware: ATOM BIOS: 113-376XL-UO2
>> This patch is 100% working correct on my machine, and I tested it last 2 
>> months.
>>
>> Regards,
>>
>>
>>
>> On Fri, Oct 14, 2022 at 1:15 PM Asher Song  wrote:
>>
>> This reverts commit fe01cb24b81c0091d7e5668874d51ce913e44a97.
>>
>> Unfortunately, that commit causes fan monitors can't be read and written
>> properly.
>>
>> Signed-off-by: Asher Song 
>> ---
>>  .../amd/pm/powerplay/hwmgr/vega10_thermal.c   | 25 ++-
>>  1 file changed, 13 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c 
>> b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c
>> index 190af79f3236..dad3e3741a4e 100644
>> --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c
>> +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c
>> @@ -67,21 +67,22 @@ int vega10_fan_ctrl_get_fan_speed_info(struct pp_hwmgr 
>> *hwmgr,
>>  int vega10_fan_ctrl_get_fan_speed_pwm(struct pp_hwmgr *hwmgr,
>> uint32_t *speed)
>>  {
>> -   struct amdgpu_device *adev = hwmgr->adev;
>> -   uint32_t duty100, duty;
>> -   uint64_t tmp64;
>> +   uint32_t current_rpm;
>> +   uint32_t percent = 0;
>>
>> -   duty100 = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL1),
>> -   CG_FDO_CTRL1, FMAX_DUTY100);
>> -   duty = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_THERMAL_STATUS),
>> -   CG_THERMAL_STATUS, FDO_PWM_DUTY);
>> +   if (hwmgr->thermal_controller.fanInfo.bNoFan)
>> +   return 0;
>>
>> -   if (!duty100)
>> -   return -EINVAL;
>> +   if (vega10_get_current_rpm(hwmgr, ¤t_rpm))
>> +   return -1;
>> +
>> +   if (hwmgr->thermal_controller.
>> +   advanceFanControlParameters.usMaxFanRPM != 0)
>> +   percent = current_rpm * 255 /
>> +   hwmgr->thermal_controller.
>> +   advanceFanControlParameters.usMaxFanRPM;
>>
>> -   tmp64 = (uint64_t)duty * 255;
>> -   do_div(tmp64, duty100);
>> -   *speed = MIN((uint32_t)tmp64, 255);
>> +   *speed = MIN(percent, 255);
>>
>> return 0;
>>  }
>> --
>> 2.25.1


Re: [PATCH v5 02/31] drm/i915: Don't register backlight when another backlight should be used (v2)

2022-10-26 Thread Hans de Goede
Hi,

On 10/26/22 01:40, Matthew Garrett wrote:
> On Wed, Oct 26, 2022 at 01:27:25AM +0200, Hans de Goede wrote:
> 
>> this code should actually set the ACPI_VIDEO_BACKLIGHT flag:
>> drivers/acpi/scan.c:
>>
>> static acpi_status
>> acpi_backlight_cap_match(acpi_handle handle, u32 level, void *context,
>>   void **return_value)
>> {
>> long *cap = context;
>>
>> if (acpi_has_method(handle, "_BCM") &&
>> acpi_has_method(handle, "_BCL")) {
>> acpi_handle_debug(handle, "Found generic backlight 
>> support\n");
>> *cap |= ACPI_VIDEO_BACKLIGHT;
>> /* We have backlight support, no need to scan further */
>> return AE_CTRL_TERMINATE;
>> }
>> return 0;
>> }
> 
> Ah, yeah, my local tree no longer matches the upstream behaviour because 
> I've hacked the EC firmware to remove the backlight trigger because it 
> had an extremely poor brightness curve and also automatically changed it 
> on AC events - as a result I removed the backlight code from the DSDT 
> and just fell back to the native control. Like I said I'm a long way 
> from the normal setup, but this did previously work.

Ok, so this is a local customization to what is already a custom BIOS
for a custom motherboard. There is a lot of custom in that sentence and
TBH at some point things might become too custom for them to be expected
to work OOTB.

Note that you can always just override the choses made by the heuristisc/
quirks on the kernel commandline by adding:

acpi_backlight=native   (I think you want this one?)

or if you want the old thinkpad_acpi module vendor/EC interface:

acpi_backlight=vendor

Asking you to pass this on the commandline does not seem like a huge
stretch given the large amount of hw/firmware customization you have done ?

> The "right" logic here seems pretty simple: if ACPI backlight control is 
> expected to work, use it. If it isn't, but there's a vendor interface, 
> use it. If there's no vendor interface, use the native interface.

I'm afraid things are not that simple. I assume that with
"if ACPI backlight control is expected to work" you mean don't
use ACPI backlight control when (acpi_osi_is_win8() && native_available)
evaluates to true because it is known to be broken on some of
those systems because Windows 8 stopped using it ?

Unfortunately something similar applies to vendor interfaces,
When Windows XP started using (and mandating for certification
IIRC) ACPI backlight control, vendors still kept their own
vendor specific EC/smbios/ACPI/WMI backlight interfaces around for
a long long time, except they were often no longer tested.

So basically we have 3 major backlight control methods:

1. native GPU backlight control, which sometimes does not work
on older laptops because the backlight is connected to the EC
rather then the GPU there, yet often still enabled in the
video-bios-tables so the GPU drivers will still try to use it.

2. ACPI -> known to be always present on recent Windows laptops
because mandated by the hardware certification requirements
(even on Windows 8+), but regularly broken on Windows 8+ because
their backlight control was moved from the core-os to the GPU
drivers and those typically use the native method.

3. Vendor specific EC/smbios/ACPI/WMI interfaces which work
on older laptops, but are often present on newer laptops
despite them no longer working and to get working backlight
control either 1. or 2. should be used.

So basically non of the 3 main backlight control methods can
be trusted even if they are present. Which is why need to have
a combination of heuristics + quirks.

And I have been working on moving all this into a central
place in drivers/acpi/video_detect.c because having
the heuristics + quirks spread out all over the place does
not help.

> The 
> problem you're dealing with is that the knowledge of whether or not 
> there's a vendor interface isn't something the core kernel code knows 
> about. What you're proposing here is effectively for us to expose 
> additional information about whether or not there's a vendor interface 
> in the system firmware, but since we're talking in some cases about 
> hardware that's almost 20 years old, we're not realistically going to 
> get those old machines fixed.

I don't understand why you keep talking about the old vendor interfaces,
at least for the chromebook part of this thread the issue is that
the i915 driver no longer registers the intel_backlight device which
is a native device type, which is caused by the patch this email
thread is about (and old vendor interfaces do not come into play
at all here). So AFAICT this is a native vs acpi backlight control
issue ?

I really want to resolve your bug, but I still lack a lot of info,
like what backlight interface you were actually using in 6.0 ?

Can you please provide the following info for your laptop:

1. Output of "ls /sys/class/backlight" with 6.0  (working set

[RESEND PATCH] drm/amd/display: prevent memory leak

2022-10-26 Thread gehao618
From: gehao 

In dce6(0,1,4)_create_resource_pool and dce80_create_resource_pool
the allocated memory should be released if construct pool fails.

Signed-off-by: gehao 
---
 drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c | 3 +++
 drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c 
b/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c
index fc6aa098bda0..8db9f7514466 100644
--- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c
@@ -1128,6 +1128,7 @@ struct resource_pool *dce60_create_resource_pool(
if (dce60_construct(num_virtual_links, dc, pool))
return &pool->base;
 
+   kfree(pool);
BREAK_TO_DEBUGGER();
return NULL;
 }
@@ -1325,6 +1326,7 @@ struct resource_pool *dce61_create_resource_pool(
if (dce61_construct(num_virtual_links, dc, pool))
return &pool->base;
 
+   kfree(pool);
BREAK_TO_DEBUGGER();
return NULL;
 }
@@ -1518,6 +1520,7 @@ struct resource_pool *dce64_create_resource_pool(
if (dce64_construct(num_virtual_links, dc, pool))
return &pool->base;
 
+   kfree(pool);
BREAK_TO_DEBUGGER();
return NULL;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c 
b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
index b28025960050..5825e6f412bd 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
@@ -1137,6 +1137,7 @@ struct resource_pool *dce80_create_resource_pool(
if (dce80_construct(num_virtual_links, dc, pool))
return &pool->base;
 
+   kfree(pool);
BREAK_TO_DEBUGGER();
return NULL;
 }
@@ -1336,6 +1337,7 @@ struct resource_pool *dce81_create_resource_pool(
if (dce81_construct(num_virtual_links, dc, pool))
return &pool->base;
 
+   kfree(pool);
BREAK_TO_DEBUGGER();
return NULL;
 }
-- 
2.25.1



[PATCH v3 2/2] drm/amd/display: move remaining FPU code to dml folder

2022-10-26 Thread Ao Zhong
pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
these two operations in dcn32/dcn32_resource.c still need to use FPU,
This will cause compilation to fail on ARM64 platforms because
-mgeneral-regs-only is enabled by default to disable the hardware FPU.
Therefore, imitate the dcn31_zero_pipe_dcc_fraction function in
dml/dcn31/dcn31_fpu.c, declare the dcn32_zero_pipe_dcc_fraction function
in dcn32_fpu.c, and move above two operations into this function.

Acked-by: Christian König 
Signed-off-by: Ao Zhong 
---
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 5 +++--
 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 8 
 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  | 3 +++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index a88dd7b3d1c1..287b7fa9bf41 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
timing = &pipe->stream->timing;
 
pipes[pipe_cnt].pipe.src.gpuvm = true;
-   pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
-   pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+   DC_FP_START();
+   dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
+   DC_FP_END();
pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // 
according to spreadsheet
pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 819de0f11012..58772fce6437 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, 
struct clk_bw_params *bw_pa
}
 }
 
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+ int pipe_cnt)
+{
+   dc_assert_fp_enabled();
+
+   pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
+   pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
index 3a3dc2ce4c73..ab010e7e840b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
@@ -73,4 +73,7 @@ int 
dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
 
 void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
 
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+ int pipe_cnt);
+
 #endif
-- 
2.37.4



[RESEND PATCH] drm/amd/display: prevent memory leak

2022-10-26 Thread gehao618
From: gehao 

In dce6(0,1,4)_create_resource_pool and dce8(0,1)_create_resource_pool
the allocated memory should be released if construct pool fails.

Signed-off-by: gehao 
---
 drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c | 3 +++
 drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c 
b/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c
index fc6aa098bda0..8db9f7514466 100644
--- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c
@@ -1128,6 +1128,7 @@ struct resource_pool *dce60_create_resource_pool(
if (dce60_construct(num_virtual_links, dc, pool))
return &pool->base;
 
+   kfree(pool);
BREAK_TO_DEBUGGER();
return NULL;
 }
@@ -1325,6 +1326,7 @@ struct resource_pool *dce61_create_resource_pool(
if (dce61_construct(num_virtual_links, dc, pool))
return &pool->base;
 
+   kfree(pool);
BREAK_TO_DEBUGGER();
return NULL;
 }
@@ -1518,6 +1520,7 @@ struct resource_pool *dce64_create_resource_pool(
if (dce64_construct(num_virtual_links, dc, pool))
return &pool->base;
 
+   kfree(pool);
BREAK_TO_DEBUGGER();
return NULL;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c 
b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
index b28025960050..5825e6f412bd 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
@@ -1137,6 +1137,7 @@ struct resource_pool *dce80_create_resource_pool(
if (dce80_construct(num_virtual_links, dc, pool))
return &pool->base;
 
+   kfree(pool);
BREAK_TO_DEBUGGER();
return NULL;
 }
@@ -1336,6 +1337,7 @@ struct resource_pool *dce81_create_resource_pool(
if (dce81_construct(num_virtual_links, dc, pool))
return &pool->base;
 
+   kfree(pool);
BREAK_TO_DEBUGGER();
return NULL;
 }
-- 
2.25.1



[PATCH v2 1/2] drm/amd/display: move remaining FPU code to dml folder

2022-10-26 Thread Ao Zhong
In the process of enabling DCN support for arm64, I found that the
dcn10_resource_construct_fp function in dcn10/dcn10_resource.c still
needs to use FPU. This will cause compilation to fail on ARM64 platforms
because -mgeneral-regs-only is enabled by default to disable the
hardware FPU. So move dcn10_resource_construct_fp from dcn10 folder to
dml/dcn10 folder to enable hardware FPU for that function.

Acked-by: Christian König 
Signed-off-by: Ao Zhong 
---
 .../drm/amd/display/dc/dcn10/dcn10_resource.c | 44 +--
 .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.c  | 37 
 .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.h  |  2 +
 3 files changed, 41 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 56d30baf12df..6bfac8088ab0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -1295,47 +1295,6 @@ static uint32_t read_pipe_fuses(struct dc_context *ctx)
return value;
 }
 
-/*
- * Some architectures don't support soft-float (e.g. aarch64), on those
- * this function has to be called with hardfloat enabled, make sure not
- * to inline it so whatever fp stuff is done stays inside
- */
-static noinline void dcn10_resource_construct_fp(
-   struct dc *dc)
-{
-   if (dc->ctx->dce_version == DCN_VERSION_1_01) {
-   struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
-   struct dcn_ip_params *dcn_ip = dc->dcn_ip;
-   struct display_mode_lib *dml = &dc->dml;
-
-   dml->ip.max_num_dpp = 3;
-   /* TODO how to handle 23.84? */
-   dcn_soc->dram_clock_change_latency = 23;
-   dcn_ip->max_num_dpp = 3;
-   }
-   if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
-   dc->dcn_soc->urgent_latency = 3;
-   dc->debug.disable_dmcu = true;
-   dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
-   }
-
-
-   dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / 
ddr4_dram_width;
-   ASSERT(dc->dcn_soc->number_of_channels < 3);
-   if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
-   dc->dcn_soc->number_of_channels = 2;
-
-   if (dc->dcn_soc->number_of_channels == 1) {
-   dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
-   dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
-   dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
-   dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
-   if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
-   dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
-   }
-   }
-}
-
 static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage *clks)
 {
int i;
@@ -1510,8 +1469,9 @@ static bool dcn10_resource_construct(
memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults));
 
-   /* Other architectures we build for build this with soft-float */
+   DC_FP_START();
dcn10_resource_construct_fp(dc);
+   DC_FP_END();
 
if (!dc->config.is_vmin_only_asic)
if (ASICREV_IS_RAVEN2(dc->ctx->asic_id.hw_internal_rev))
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
index 99644d896222..340636f1de9f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
@@ -27,6 +27,8 @@
 #include "dcn10/dcn10_resource.h"
 
 #include "dcn10_fpu.h"
+#include "resource.h"
+#include "amdgpu_dm/dc_fpu.h"
 
 /**
  * DOC: DCN10 FPU manipulation Overview
@@ -121,3 +123,38 @@ struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = {
.writeback_dram_clock_change_latency_us = 23.0,
.return_bus_width_bytes = 64,
 };
+
+void dcn10_resource_construct_fp(struct dc *dc)
+{
+   dc_assert_fp_enabled();
+   if (dc->ctx->dce_version == DCN_VERSION_1_01) {
+   struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
+   struct dcn_ip_params *dcn_ip = dc->dcn_ip;
+   struct display_mode_lib *dml = &dc->dml;
+
+   dml->ip.max_num_dpp = 3;
+   /* TODO how to handle 23.84? */
+   dcn_soc->dram_clock_change_latency = 23;
+   dcn_ip->max_num_dpp = 3;
+   }
+   if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
+   dc->dcn_soc->urgent_latency = 3;
+   dc->debug.disable_dmcu = true;
+   dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
+   }
+
+
+   dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / 
ddr4_dram_width;
+   ASSERT(dc->dcn_soc->number

Re: [PATCH v2 1/2] drm/amd/display: move remaining FPU code to dml folder

2022-10-26 Thread Ao Zhong
Hello Christian,

thank you for your review. I got a warning in checking the first patch with 
checkpatch.pl.

I'll fix it in the next version.

---
0001-drm-amd-display-move-remaining-FPU-code-to-dml-folde.patch
---
WARNING:braces {} are not necessary for single statement blocks
#131: FILE: drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c:157:
+   if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
+   dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
+   }

total: 0 errors, 1 warnings, 110 lines checked

NOTE: For some of the reported defects, checkpatch may be able to
 mechanically convert to the typical style using --fix or --fix-inplace.

0001-drm-amd-display-move-remaining-FPU-code-to-dml-folde.patch has style 
problems, please review.
---
0002-drm-amd-display-move-remaining-FPU-code-to-dml-folde.patch
---
total: 0 errors, 0 warnings, 29 lines checked

0002-drm-amd-display-move-remaining-FPU-code-to-dml-folde.patch has no obvious 
style problems and is ready for submission.

NOTE: If any of the errors are false positives, please report
 them to the maintainer, see CHECKPATCH in MAINTAINERS.

Am 26.10.22 um 09:19 schrieb Christian König:
> Am 25.10.22 um 23:17 schrieb Ao Zhong:
>> In the process of enabling DCN support for arm64, I found that the
>> dcn10_resource_construct_fp function in dcn10/dcn10_resource.c still
>> needs to use FPU. This will cause compilation to fail on ARM64 platforms
>> because -mgeneral-regs-only is enabled by default to disable the
>> hardware FPU. So move dcn10_resource_construct_fp from dcn10 folder to
>> dml/dcn10 folder to enable hardware FPU for that function.
>
> Of hand that looks good to me, but our display team needs to take a look.
>
> Feel free to add an Acked-by: Christian König  for 
> the series.
>
> While at it could you make sure that checkpatch.pl doesn't has anything to 
> complain about the moved code?
>
> Thanks for the help,
> Christian.
>
>>
>> Signed-off-by: Ao Zhong 
>> ---
>>   .../drm/amd/display/dc/dcn10/dcn10_resource.c | 44 +--
>>   .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.c  | 38 
>>   .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.h  |  2 +
>>   3 files changed, 42 insertions(+), 42 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c 
>> b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
>> index 56d30baf12df..6bfac8088ab0 100644
>> --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
>> +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
>> @@ -1295,47 +1295,6 @@ static uint32_t read_pipe_fuses(struct dc_context 
>> *ctx)
>>   return value;
>>   }
>>   -/*
>> - * Some architectures don't support soft-float (e.g. aarch64), on those
>> - * this function has to be called with hardfloat enabled, make sure not
>> - * to inline it so whatever fp stuff is done stays inside
>> - */
>> -static noinline void dcn10_resource_construct_fp(
>> -    struct dc *dc)
>> -{
>> -    if (dc->ctx->dce_version == DCN_VERSION_1_01) {
>> -    struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
>> -    struct dcn_ip_params *dcn_ip = dc->dcn_ip;
>> -    struct display_mode_lib *dml = &dc->dml;
>> -
>> -    dml->ip.max_num_dpp = 3;
>> -    /* TODO how to handle 23.84? */
>> -    dcn_soc->dram_clock_change_latency = 23;
>> -    dcn_ip->max_num_dpp = 3;
>> -    }
>> -    if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
>> -    dc->dcn_soc->urgent_latency = 3;
>> -    dc->debug.disable_dmcu = true;
>> -    dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
>> -    }
>> -
>> -
>> -    dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / 
>> ddr4_dram_width;
>> -    ASSERT(dc->dcn_soc->number_of_channels < 3);
>> -    if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
>> -    dc->dcn_soc->number_of_channels = 2;
>> -
>> -    if (dc->dcn_soc->number_of_channels == 1) {
>> -    dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
>> -    dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
>> -    dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
>> -    dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
>> -    if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
>> -    dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
>> -    }
>> -    }
>> -}
>> -
>>   static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage 
>> *clks)
>>   {
>>   int i;
>> @@ -1510,8 +1469,9 @@ static bool dcn10_resource_construct(
>>   memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
>>   memcpy(dc->dcn_soc, &dcn10_soc_defaults

[PATCH v3 1/2] drm/amd/display: move remaining FPU code to dml folder

2022-10-26 Thread Ao Zhong
In the process of enabling DCN support for arm64, I found that the
dcn10_resource_construct_fp function in dcn10/dcn10_resource.c still
needs to use FPU. This will cause compilation to fail on ARM64 platforms
because -mgeneral-regs-only is enabled by default to disable the
hardware FPU. So move dcn10_resource_construct_fp from dcn10 folder to
dml/dcn10 folder to enable hardware FPU for that function.

Acked-by: Christian König 
Signed-off-by: Ao Zhong 
---
 .../drm/amd/display/dc/dcn10/dcn10_resource.c | 44 +--
 .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.c  | 37 
 .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.h  |  2 +
 3 files changed, 41 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 56d30baf12df..6bfac8088ab0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -1295,47 +1295,6 @@ static uint32_t read_pipe_fuses(struct dc_context *ctx)
return value;
 }
 
-/*
- * Some architectures don't support soft-float (e.g. aarch64), on those
- * this function has to be called with hardfloat enabled, make sure not
- * to inline it so whatever fp stuff is done stays inside
- */
-static noinline void dcn10_resource_construct_fp(
-   struct dc *dc)
-{
-   if (dc->ctx->dce_version == DCN_VERSION_1_01) {
-   struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
-   struct dcn_ip_params *dcn_ip = dc->dcn_ip;
-   struct display_mode_lib *dml = &dc->dml;
-
-   dml->ip.max_num_dpp = 3;
-   /* TODO how to handle 23.84? */
-   dcn_soc->dram_clock_change_latency = 23;
-   dcn_ip->max_num_dpp = 3;
-   }
-   if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
-   dc->dcn_soc->urgent_latency = 3;
-   dc->debug.disable_dmcu = true;
-   dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
-   }
-
-
-   dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / 
ddr4_dram_width;
-   ASSERT(dc->dcn_soc->number_of_channels < 3);
-   if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
-   dc->dcn_soc->number_of_channels = 2;
-
-   if (dc->dcn_soc->number_of_channels == 1) {
-   dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
-   dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
-   dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
-   dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
-   if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
-   dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
-   }
-   }
-}
-
 static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage *clks)
 {
int i;
@@ -1510,8 +1469,9 @@ static bool dcn10_resource_construct(
memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults));
 
-   /* Other architectures we build for build this with soft-float */
+   DC_FP_START();
dcn10_resource_construct_fp(dc);
+   DC_FP_END();
 
if (!dc->config.is_vmin_only_asic)
if (ASICREV_IS_RAVEN2(dc->ctx->asic_id.hw_internal_rev))
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
index 99644d896222..340636f1de9f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
@@ -27,6 +27,8 @@
 #include "dcn10/dcn10_resource.h"
 
 #include "dcn10_fpu.h"
+#include "resource.h"
+#include "amdgpu_dm/dc_fpu.h"
 
 /**
  * DOC: DCN10 FPU manipulation Overview
@@ -121,3 +123,38 @@ struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = {
.writeback_dram_clock_change_latency_us = 23.0,
.return_bus_width_bytes = 64,
 };
+
+void dcn10_resource_construct_fp(struct dc *dc)
+{
+   dc_assert_fp_enabled();
+   if (dc->ctx->dce_version == DCN_VERSION_1_01) {
+   struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
+   struct dcn_ip_params *dcn_ip = dc->dcn_ip;
+   struct display_mode_lib *dml = &dc->dml;
+
+   dml->ip.max_num_dpp = 3;
+   /* TODO how to handle 23.84? */
+   dcn_soc->dram_clock_change_latency = 23;
+   dcn_ip->max_num_dpp = 3;
+   }
+   if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
+   dc->dcn_soc->urgent_latency = 3;
+   dc->debug.disable_dmcu = true;
+   dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
+   }
+
+
+   dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / 
ddr4_dram_width;
+   ASSERT(dc->dcn_soc->number

[PATCH v2 2/2] drm/amd/display: move remaining FPU code to dml folder

2022-10-26 Thread Ao Zhong
pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
these two operations in dcn32/dcn32_resource.c still need to use FPU,
This will cause compilation to fail on ARM64 platforms because
-mgeneral-regs-only is enabled by default to disable the hardware FPU.
Therefore, imitate the dcn31_zero_pipe_dcc_fraction function in
dml/dcn31/dcn31_fpu.c, declare the dcn32_zero_pipe_dcc_fraction function
in dcn32_fpu.c, and move above two operations into this function.

Acked-by: Christian König 
Signed-off-by: Ao Zhong 
---
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 5 +++--
 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 8 
 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  | 3 +++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index a88dd7b3d1c1..287b7fa9bf41 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
timing = &pipe->stream->timing;
 
pipes[pipe_cnt].pipe.src.gpuvm = true;
-   pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
-   pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+   DC_FP_START();
+   dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
+   DC_FP_END();
pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // 
according to spreadsheet
pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 819de0f11012..58772fce6437 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, 
struct clk_bw_params *bw_pa
}
 }
 
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+ int pipe_cnt)
+{
+   dc_assert_fp_enabled();
+
+   pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
+   pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
index 3a3dc2ce4c73..ab010e7e840b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
@@ -73,4 +73,7 @@ int 
dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
 
 void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
 
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+ int pipe_cnt);
+
 #endif
-- 
2.37.4



[PATCH] drm/amd/display: fix some coding style issues

2022-10-26 Thread Srinivasan Shanmugam
Fix the following checkpatch checks in amdgpu_dm.c

CHECK: Prefer kernel type 'u8' over 'uint8_t'
CHECK: Prefer kernel type 'u32' over 'uint32_t'
CHECK: Prefer kernel type 'u64' over 'uint64_t'
CHECK: Prefer kernel type 's32' over 'int32_t'

Signed-off-by: Srinivasan Shanmugam 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 90 +--
 1 file changed, 45 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index e733042531a6..1ee31c7290ac 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -258,7 +258,7 @@ static u32 dm_vblank_get_counter(struct amdgpu_device 
*adev, int crtc)
 static int dm_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
  u32 *vbl, u32 *position)
 {
-   uint32_t v_blank_start, v_blank_end, h_position, v_position;
+   u32 v_blank_start, v_blank_end, h_position, v_position;
 
if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc))
return -EINVAL;
@@ -357,7 +357,7 @@ static void dm_pflip_high_irq(void *interrupt_params)
struct amdgpu_device *adev = irq_params->adev;
unsigned long flags;
struct drm_pending_vblank_event *e;
-   uint32_t vpos, hpos, v_blank_start, v_blank_end;
+   u32 vpos, hpos, v_blank_start, v_blank_end;
bool vrr_active;
 
amdgpu_crtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - 
IRQ_TYPE_PFLIP);
@@ -644,7 +644,7 @@ static void dmub_hpd_callback(struct amdgpu_device *adev,
struct drm_connector *connector;
struct drm_connector_list_iter iter;
struct dc_link *link;
-   uint8_t link_index = 0;
+   u8 link_index = 0;
struct drm_device *dev;
 
if (adev == NULL)
@@ -745,7 +745,7 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params)
struct amdgpu_device *adev = irq_params->adev;
struct amdgpu_display_manager *dm = &adev->dm;
struct dmcub_trace_buf_entry entry = { 0 };
-   uint32_t count = 0;
+   u32 count = 0;
struct dmub_hpd_work *dmub_hpd_wrk;
struct dc_link *plink = NULL;
 
@@ -1011,7 +1011,7 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
struct dmub_srv_hw_params hw_params;
enum dmub_status status;
const unsigned char *fw_inst_const, *fw_bss_data;
-   uint32_t i, fw_inst_const_size, fw_bss_data_size;
+   u32 i, fw_inst_const_size, fw_bss_data_size;
bool has_hw_support;
 
if (!dmub_srv)
@@ -1172,10 +1172,10 @@ static void dm_dmub_hw_resume(struct amdgpu_device 
*adev)
 
 static void mmhub_read_system_context(struct amdgpu_device *adev, struct 
dc_phy_addr_space_config *pa_config)
 {
-   uint64_t pt_base;
-   uint32_t logical_addr_low;
-   uint32_t logical_addr_high;
-   uint32_t agp_base, agp_bot, agp_top;
+   u64 pt_base;
+   u32 logical_addr_low;
+   u32 logical_addr_high;
+   u32 agp_base, agp_bot, agp_top;
PHYSICAL_ADDRESS_LOC page_table_start, page_table_end, page_table_base;
 
memset(pa_config, 0, sizeof(*pa_config));
@@ -2443,7 +2443,7 @@ struct amdgpu_dm_connector *
 amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state,
 struct drm_crtc *crtc)
 {
-   uint32_t i;
+   u32 i;
struct drm_connector_state *new_con_state;
struct drm_connector *connector;
struct drm_crtc *crtc_from_state;
@@ -3101,8 +3101,8 @@ static void handle_hpd_irq(void *param)
 
 static void dm_handle_mst_sideband_msg(struct amdgpu_dm_connector *aconnector)
 {
-   uint8_t esi[DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI] = { 0 };
-   uint8_t dret;
+   u8 esi[DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI] = { 0 };
+   u8 dret;
bool new_irq_handled = false;
int dpcd_addr;
int dpcd_bytes_to_read;
@@ -3130,7 +3130,7 @@ static void dm_handle_mst_sideband_msg(struct 
amdgpu_dm_connector *aconnector)
 
while (dret == dpcd_bytes_to_read &&
process_count < max_process_count) {
-   uint8_t retry;
+   u8 retry;
dret = 0;
 
process_count++;
@@ -3149,7 +3149,7 @@ static void dm_handle_mst_sideband_msg(struct 
amdgpu_dm_connector *aconnector)
dpcd_bytes_to_read - 1;
 
for (retry = 0; retry < 3; retry++) {
-   uint8_t wret;
+   u8 wret;
 
wret = drm_dp_dpcd_write(
&aconnector->dm_dp_aux.aux,
@@ -4158,12 +4158,12 @@ static void amdgpu_set_panel_orientation(struct 
drm_connector *connector);
 static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 {
struct amdgpu_display_manager *dm = &adev->dm;
-  

[bug report] drm/amdgpu/mes: use ring for kernel queue submission

2022-10-26 Thread Dan Carpenter
Hello Jack Xiao,

The patch d0c423b64765: "drm/amdgpu/mes: use ring for kernel queue
submission" from Mar 27, 2020, leads to the following Smatch static
checker warning:

drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:1056 amdgpu_mes_add_ring()
error: format string overflow. buf_size: 16 length: 38 [user data]

drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
980 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
981 int queue_type, int idx,
982 struct amdgpu_mes_ctx_data *ctx_data,
983 struct amdgpu_ring **out)
984 {
985 struct amdgpu_ring *ring;
986 struct amdgpu_mes_gang *gang;
987 struct amdgpu_mes_queue_properties qprops = {0};
988 int r, queue_id, pasid;
989 
990 /*
991  * Avoid taking any other locks under MES lock to avoid circular
992  * lock dependencies.
993  */
994 amdgpu_mes_lock(&adev->mes);
995 gang = idr_find(&adev->mes.gang_id_idr, gang_id);
996 if (!gang) {
997 DRM_ERROR("gang id %d doesn't exist\n", gang_id);
998 amdgpu_mes_unlock(&adev->mes);
999 return -EINVAL;
1000 }
1001 pasid = gang->process->pasid;
1002 
1003 ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
1004 if (!ring) {
1005 amdgpu_mes_unlock(&adev->mes);
1006 return -ENOMEM;
1007 }
1008 
1009 ring->ring_obj = NULL;
1010 ring->use_doorbell = true;
1011 ring->is_mes_queue = true;
1012 ring->mes_ctx = ctx_data;
1013 ring->idx = idx;
1014 ring->no_scheduler = true;
1015 
1016 if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
1017 int offset = offsetof(struct amdgpu_mes_ctx_meta_data,
1018   compute[ring->idx].mec_hpd);
1019 ring->eop_gpu_addr =
1020 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
1021 }
1022 
1023 switch (queue_type) {
1024 case AMDGPU_RING_TYPE_GFX:
1025 ring->funcs = adev->gfx.gfx_ring[0].funcs;
1026 break;
1027 case AMDGPU_RING_TYPE_COMPUTE:
1028 ring->funcs = adev->gfx.compute_ring[0].funcs;
1029 break;
1030 case AMDGPU_RING_TYPE_SDMA:
1031 ring->funcs = adev->sdma.instance[0].ring.funcs;
1032 break;
1033 default:
1034 BUG();
1035 }
1036 
1037 r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
1038  AMDGPU_RING_PRIO_DEFAULT, NULL);
1039 if (r)
1040 goto clean_up_memory;
1041 
1042 amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
1043 
1044 dma_fence_wait(gang->process->vm->last_update, false);
1045 dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
1046 amdgpu_mes_unlock(&adev->mes);
1047 
1048 r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
1049 if (r)
1050 goto clean_up_ring;
1051 
1052 ring->hw_queue_id = queue_id;
1053 ring->doorbell_index = qprops.doorbell_off;
1054 
1055 if (queue_type == AMDGPU_RING_TYPE_GFX)
--> 1056 sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, 
queue_id);

I'm not sure why this is warning now instead of in 2020.  But the bug is
definitely real.  "gang_id" is capped at INT_MAX so that can overflow
already even if the values of "pasid" and "queue_id" are zero.

Using snprintf() is safer but also probably the buffer should be larger.

1057 else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
1058 sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id,
1059 queue_id);
1060 else if (queue_type == AMDGPU_RING_TYPE_SDMA)
1061 sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id,
1062 queue_id);
1063 else
1064 BUG();
1065 
1066 *out = ring;
1067 return 0;
1068 
1069 clean_up_ring:
1070 amdgpu_ring_fini(ring);
1071 clean_up_memory:
1072 kfree(ring);
1073 amdgpu_mes_unlock(&adev->mes);
1074 return r;
1075 }

regards,
dan carpenter


Re: [PATCH] drm/amdgpu: Revert "drm/amdgpu: getting fan speed pwm for vega10 properly"

2022-10-26 Thread Yury Zhuravlev
Hello Asher,

Thanks for the information, is it open-source tests? Can I reproduce it?

Also, it seems like Radeon Instinct MI25 was released before Radeon RX
Vega, is it possible that they have different PowerPlay subsystems?
On my Vega56, all these registers from vega20 are working very well.
Seems like we should split implementation somehow.

Regards,

On Wed, Oct 26, 2022 at 3:51 PM Song, Asher  wrote:

> [AMD Official Use Only - General]
>
>
>
> +@Meng, Xiaohu 
>
>
>
> Hi Zhuravlev,
>
>
>
> The information of test card is as following:
>
> Lspci name: [AMD/ATI] Vega 10 [Radeon Instinct MI25 MxGPU] (rev 05)
>
> Firmware: ATOM BIOS: 113-D0531800-C04
>
>
>
> When run test scripts compute-rocm-*/utils/run_rsmitst.sh, the below test
> cases fail.
>
> [  FAILED  ] 4 tests, listed below:
>
> [  FAILED  ] rsmitstReadOnly.TestOverdriveRead
>
> [  FAILED  ] rsmitstReadWrite.FanReadWrite
>
> [  FAILED  ] rsmitstReadWrite.TestOverdriveReadWrite
>
> [  FAILED  ] rsmitstReadWrite.TestPowerCapReadWrite
>
>
>
> Best Regards,
>
> Asher
>
> *From:* Yury Zhuravlev 
> *Sent:* Wednesday, October 26, 2022 9:31 AM
> *To:* Song, Asher 
> *Cc:* Deucher, Alexander ; Chen, Guchun <
> guchun.c...@amd.com>; Quan, Evan ;
> amd-gfx@lists.freedesktop.org
> *Subject:* Re: [PATCH] drm/amdgpu: Revert "drm/amdgpu: getting fan speed
> pwm for vega10 properly"
>
>
>
> Hello,
> Can you write here your card name and firmware version? Without this
> patch, my fan sensors are broken, and it's sensitive to my pc case with
> water cooling.
> My card is:  Sapphire Pulse Radeon RX Vega 56  vd6546 SA
> lspci name: [AMD/ATI] Vega 10 XL/XT [Radeon RX Vega 56/64] (rev c3)
> Firmware: ATOM BIOS: 113-376XL-UO2
> This patch is 100% working correct on my machine, and I tested it last 2
> months.
>
> Regards,
>
>
>
> On Fri, Oct 14, 2022 at 1:15 PM Asher Song  wrote:
>
> This reverts commit fe01cb24b81c0091d7e5668874d51ce913e44a97.
>
> Unfortunately, that commit causes fan monitors can't be read and written
> properly.
>
> Signed-off-by: Asher Song 
> ---
>  .../amd/pm/powerplay/hwmgr/vega10_thermal.c   | 25 ++-
>  1 file changed, 13 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c
> b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c
> index 190af79f3236..dad3e3741a4e 100644
> --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c
> +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c
> @@ -67,21 +67,22 @@ int vega10_fan_ctrl_get_fan_speed_info(struct pp_hwmgr
> *hwmgr,
>  int vega10_fan_ctrl_get_fan_speed_pwm(struct pp_hwmgr *hwmgr,
> uint32_t *speed)
>  {
> -   struct amdgpu_device *adev = hwmgr->adev;
> -   uint32_t duty100, duty;
> -   uint64_t tmp64;
> +   uint32_t current_rpm;
> +   uint32_t percent = 0;
>
> -   duty100 = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL1),
> -   CG_FDO_CTRL1, FMAX_DUTY100);
> -   duty = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_THERMAL_STATUS),
> -   CG_THERMAL_STATUS, FDO_PWM_DUTY);
> +   if (hwmgr->thermal_controller.fanInfo.bNoFan)
> +   return 0;
>
> -   if (!duty100)
> -   return -EINVAL;
> +   if (vega10_get_current_rpm(hwmgr, ¤t_rpm))
> +   return -1;
> +
> +   if (hwmgr->thermal_controller.
> +   advanceFanControlParameters.usMaxFanRPM != 0)
> +   percent = current_rpm * 255 /
> +   hwmgr->thermal_controller.
> +   advanceFanControlParameters.usMaxFanRPM;
>
> -   tmp64 = (uint64_t)duty * 255;
> -   do_div(tmp64, duty100);
> -   *speed = MIN((uint32_t)tmp64, 255);
> +   *speed = MIN(percent, 255);
>
> return 0;
>  }
> --
> 2.25.1
>
>


Re: [PATCH 2/2] drm/amdgpu: remove deprecated MES version vars

2022-10-26 Thread Xiao, Jack
[AMD Official Use Only - General]

The series is Reviewed-by: Jack Xiao 

Regards,
Jack

From: Sider, Graham 
Sent: Wednesday, 26 October 2022 03:20
To: amd-gfx@lists.freedesktop.org 
Cc: Xiao, Jack ; Zhang, Hawking ; 
Sider, Graham 
Subject: [PATCH 2/2] drm/amdgpu: remove deprecated MES version vars

MES scheduler and kiq versions are stored in mes.sched_version and
mes.kiq_version, respectively, which are read from a register after
their queues are initialized. Remove mes.ucode_fw_version and
mes.data_fw_version which tried to read this versioning info from the
firmware headers (which don't contain this information).

Signed-off-by: Graham Sider 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 2 --
 drivers/gpu/drm/amd/amdgpu/mes_v10_1.c  | 4 
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c  | 4 
 3 files changed, 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index ad980f4b66e1..97c05d08a551 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -91,14 +91,12 @@ struct amdgpu_mes {
 struct amdgpu_bo*ucode_fw_obj[AMDGPU_MAX_MES_PIPES];
 uint64_t
ucode_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
 uint32_t*ucode_fw_ptr[AMDGPU_MAX_MES_PIPES];
-   uint32_tucode_fw_version[AMDGPU_MAX_MES_PIPES];
 uint64_tuc_start_addr[AMDGPU_MAX_MES_PIPES];

 /* mes ucode data */
 struct amdgpu_bo*data_fw_obj[AMDGPU_MAX_MES_PIPES];
 uint64_tdata_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
 uint32_t*data_fw_ptr[AMDGPU_MAX_MES_PIPES];
-   uint32_tdata_fw_version[AMDGPU_MAX_MES_PIPES];
 uint64_tdata_start_addr[AMDGPU_MAX_MES_PIPES];

 /* eop gpu obj */
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
index 067d10073a56..1abdf8b7ab50 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
@@ -415,10 +415,6 @@ static int mes_v10_1_init_microcode(struct amdgpu_device 
*adev,

 mes_hdr = (const struct mes_firmware_header_v1_0 *)
 adev->mes.fw[pipe]->data;
-   adev->mes.ucode_fw_version[pipe] =
-   le32_to_cpu(mes_hdr->mes_ucode_version);
-   adev->mes.ucode_fw_version[pipe] =
-   le32_to_cpu(mes_hdr->mes_ucode_data_version);
 adev->mes.uc_start_addr[pipe] =
 le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
 ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index e14f314902b1..27a330f51c7d 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -480,10 +480,6 @@ static int mes_v11_0_init_microcode(struct amdgpu_device 
*adev,

 mes_hdr = (const struct mes_firmware_header_v1_0 *)
 adev->mes.fw[pipe]->data;
-   adev->mes.ucode_fw_version[pipe] =
-   le32_to_cpu(mes_hdr->mes_ucode_version);
-   adev->mes.ucode_fw_version[pipe] =
-   le32_to_cpu(mes_hdr->mes_ucode_data_version);
 adev->mes.uc_start_addr[pipe] =
 le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
 ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
--
2.25.1



[PATCH v2 2/2] drm/amdkfd: Fix the warning of array-index-out-of-bounds

2022-10-26 Thread Ma Jun
For some GPUs with more CUs, the original sibling_map[32]

in struct crat_subtype_cache is not enough

to save the cache information when create the VCRAT table,

so skip filling the struct crat_subtype_cache info instead

fill struct kfd_cache_properties directly to fix this problem.

v2:
- Remove unnecessary sys interface "cache_ext"

Signed-off-by: Ma Jun 
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 307 +++---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.h |  12 +
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 238 -
 drivers/gpu/drm/amd/amdkfd/kfd_topology.h |   5 +-
 4 files changed, 278 insertions(+), 284 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 35d09e29aafb..561376d207c5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -55,16 +55,6 @@ static inline void assign_gpu_processor_id(struct kfd_dev 
*kfd)
gpu_processor_id_low += total_cu_count;
 }
 
-/* Static table to describe GPU Cache information */
-struct kfd_gpu_cache_info {
-   uint32_tcache_size;
-   uint32_tcache_level;
-   uint32_tflags;
-   /* Indicates how many Compute Units share this cache
-* within a SA. Value = 1 indicates the cache is not shared
-*/
-   uint32_tnum_cu_shared;
-};
 
 static struct kfd_gpu_cache_info kaveri_cache_info[] = {
{
@@ -1028,9 +1018,13 @@ static int kfd_parse_subtype_cache(struct 
crat_subtype_cache *cache,
props->cachelines_per_tag = cache->lines_per_tag;
props->cache_assoc = cache->associativity;
props->cache_latency = cache->cache_latency;
+
memcpy(props->sibling_map, cache->sibling_map,
sizeof(props->sibling_map));
 
+   /* set the sibling_map_size as 32 for CRAT from ACPI */
+   props->sibling_map_size = CRAT_SIBLINGMAP_SIZE;
+
if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE)
props->cache_type |= HSA_CACHE_TYPE_DATA;
if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE)
@@ -1248,124 +1242,6 @@ int kfd_parse_crat_table(void *crat_image, struct 
list_head *device_list,
return ret;
 }
 
-/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
-static int fill_in_l1_pcache(struct crat_subtype_cache *pcache,
-   struct kfd_gpu_cache_info *pcache_info,
-   struct kfd_cu_info *cu_info,
-   int mem_available,
-   int cu_bitmask,
-   int cache_type, unsigned int cu_processor_id,
-   int cu_block)
-{
-   unsigned int cu_sibling_map_mask;
-   int first_active_cu;
-
-   /* First check if enough memory is available */
-   if (sizeof(struct crat_subtype_cache) > mem_available)
-   return -ENOMEM;
-
-   cu_sibling_map_mask = cu_bitmask;
-   cu_sibling_map_mask >>= cu_block;
-   cu_sibling_map_mask &=
-   ((1 << pcache_info[cache_type].num_cu_shared) - 1);
-   first_active_cu = ffs(cu_sibling_map_mask);
-
-   /* CU could be inactive. In case of shared cache find the first active
-* CU. and incase of non-shared cache check if the CU is inactive. If
-* inactive active skip it
-*/
-   if (first_active_cu) {
-   memset(pcache, 0, sizeof(struct crat_subtype_cache));
-   pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
-   pcache->length = sizeof(struct crat_subtype_cache);
-   pcache->flags = pcache_info[cache_type].flags;
-   pcache->processor_id_low = cu_processor_id
-+ (first_active_cu - 1);
-   pcache->cache_level = pcache_info[cache_type].cache_level;
-   pcache->cache_size = pcache_info[cache_type].cache_size;
-
-   /* Sibling map is w.r.t processor_id_low, so shift out
-* inactive CU
-*/
-   cu_sibling_map_mask =
-   cu_sibling_map_mask >> (first_active_cu - 1);
-
-   pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF);
-   pcache->sibling_map[1] =
-   (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
-   pcache->sibling_map[2] =
-   (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
-   pcache->sibling_map[3] =
-   (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
-   return 0;
-   }
-   return 1;
-}
-
-/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
-static int fill_in_l2_l3_pcache(struct crat_subtyp

[PATCH v2 1/2] drm/amdkfd: Init the base cu processor id

2022-10-26 Thread Ma Jun
Init and save the base cu processor id for later use

Signed-off-by: Ma Jun 
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 20 +---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  3 +++
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index d25ac9cbe5b2..35d09e29aafb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -41,13 +41,18 @@ static uint32_t gpu_processor_id_low = 0x80001000;
  * @total_cu_count - Total CUs present in the GPU including ones
  *   masked off
  */
-static inline unsigned int get_and_inc_gpu_processor_id(
-   unsigned int total_cu_count)
+static inline void assign_gpu_processor_id(struct kfd_dev *kfd)
 {
-   int current_id = gpu_processor_id_low;
+   struct amdgpu_device *adev = kfd->adev;
+   unsigned int array_count = 0;
+   unsigned int total_cu_count = 0;
+
+   kfd->processor_id_low = gpu_processor_id_low;
+
+   array_count = adev->gfx.config.max_sh_per_se * 
adev->gfx.config.max_shader_engines;
+   total_cu_count = array_count * adev->gfx.config.max_cu_per_sh;
 
gpu_processor_id_low += total_cu_count;
-   return current_id;
 }
 
 /* Static table to describe GPU Cache information */
@@ -2223,7 +2228,6 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
struct crat_subtype_computeunit *cu;
struct kfd_cu_info cu_info;
int avail_size = *size;
-   uint32_t total_num_of_cu;
int num_of_cache_entries = 0;
int cache_mem_filled = 0;
uint32_t nid = 0;
@@ -2275,8 +2279,10 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
cu->wave_front_size = cu_info.wave_front_size;
cu->array_count = cu_info.num_shader_arrays_per_engine *
cu_info.num_shader_engines;
-   total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh);
-   cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu);
+
+   assign_gpu_processor_id(kdev);
+   cu->processor_id_low = kdev->processor_id_low;
+
cu->num_cu_per_array = cu_info.num_cu_per_sh;
cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu;
cu->num_banks = cu_info.num_shader_engines;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 182eb67edbc5..4c06b233472f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -370,6 +370,9 @@ struct kfd_dev {
 
/* Track per device allocated watch points. */
uint32_t alloc_watch_ids;
+
+   /* cu processor id base */
+   unsigned intprocessor_id_low;
 };
 
 struct kfd_ipc_obj;
-- 
2.25.1



Re: [6.1][regression] after commit dd80d9c8eecac8c516da5b240d01a35660ba6cb6 some games (Cyberpunk 2077, Forza Horizon 4/5) hang at start

2022-10-26 Thread Christian König

Attached is the original test patch rebased on current amd-staging-drm-next.

Can you test if this is enough to make sure that the games start without 
crashing by fetching the userptrs?


Thanks in advance,
Christian.

Am 21.10.22 um 14:36 schrieb Mikhail Gavrilov:

On Fri, Oct 21, 2022 at 1:33 PM Christian König
 wrote:

Hi,

yes Bas already reported this issue, but I couldn't reproduce it. Need
to come up with a patch to narrow this down further.

Can I send you something to test?

I would appreciate to test any patches and ideas.

From 852c78656f083394296b3d3b96db33608ce0f272 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= 
Date: Wed, 26 Oct 2022 09:26:01 +0200
Subject: [PATCH] drm/amdgpu: partial revert "remove ctx->lock" v2""
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 6c052af778a61977c271632044c754dbbca4f892.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 26 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  1 +
 3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 1bbd39b3b0fc..0b331e8bfba6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -57,6 +57,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
 	if (!p->ctx)
 		return -EINVAL;
 
+	mutex_lock(&p->ctx->lock);
+
 	if (atomic_read(&p->ctx->guilty)) {
 		amdgpu_ctx_put(p->ctx);
 		return -ECANCELED;
@@ -578,6 +580,9 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
 	unsigned int ce_preempt = 0, de_preempt = 0;
 	int i, r;
 
+	/* TODO: Investigate why we still need the context lock */
+	mutex_unlock(&p->ctx->lock);
+
 	for (i = 0; i < p->nchunks; ++i) {
 		struct amdgpu_cs_chunk *chunk;
 
@@ -587,38 +592,41 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
 		case AMDGPU_CHUNK_ID_IB:
 			r = amdgpu_cs_p2_ib(p, chunk, &ce_preempt, &de_preempt);
 			if (r)
-return r;
+goto out;
 			break;
 		case AMDGPU_CHUNK_ID_DEPENDENCIES:
 		case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
 			r = amdgpu_cs_p2_dependencies(p, chunk);
 			if (r)
-return r;
+goto out;
 			break;
 		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
 			r = amdgpu_cs_p2_syncobj_in(p, chunk);
 			if (r)
-return r;
+goto out;
 			break;
 		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
 			r = amdgpu_cs_p2_syncobj_out(p, chunk);
 			if (r)
-return r;
+goto out;
 			break;
 		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
 			r = amdgpu_cs_p2_syncobj_timeline_wait(p, chunk);
 			if (r)
-return r;
+goto out;
 			break;
 		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
 			r = amdgpu_cs_p2_syncobj_timeline_signal(p, chunk);
 			if (r)
-return r;
+goto out;
 			break;
 		}
 	}
 
-	return 0;
+	r = 0;
+out:
+	mutex_lock(&p->ctx->lock);
+	return r;
 }
 
 /* Convert microseconds to bytes. */
@@ -1335,8 +1343,10 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
 
 	dma_fence_put(parser->fence);
 
-	if (parser->ctx)
+	if (parser->ctx) {
+		mutex_unlock(&parser->ctx->lock);
 		amdgpu_ctx_put(parser->ctx);
+	}
 	if (parser->bo_list)
 		amdgpu_bo_list_put(parser->bo_list);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 331aa191910c..3a23fa45bfed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -315,6 +315,7 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
 	kref_init(&ctx->refcount);
 	ctx->mgr = mgr;
 	spin_lock_init(&ctx->ring_lock);
+	mutex_init(&ctx->lock);
 
 	ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
 	ctx->reset_counter_query = ctx->reset_counter;
@@ -409,6 +410,7 @@ static void amdgpu_ctx_fini(struct kref *ref)
 		drm_dev_exit(idx);
 	}
 
+	mutex_destroy(&ctx->lock);
 	kfree(ctx);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 0fa0e56daf67..cc7c8afff414 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -53,6 +53,7 @@ struct amdgpu_ctx {
 	boolpreamble_presented;
 	int32_tinit_priority;
 	int32_toverride_priority;
+	struct mutex			lock;
 	atomic_t			guilty;
 	unsigned long			ras_counter_ce;
 	unsigned long			ras_counter_ue;
-- 
2.25.1



Re: [PATCH v2 1/2] drm/amd/display: move remaining FPU code to dml folder

2022-10-26 Thread Christian König

Am 25.10.22 um 23:17 schrieb Ao Zhong:

In the process of enabling DCN support for arm64, I found that the
dcn10_resource_construct_fp function in dcn10/dcn10_resource.c still
needs to use FPU. This will cause compilation to fail on ARM64 platforms
because -mgeneral-regs-only is enabled by default to disable the
hardware FPU. So move dcn10_resource_construct_fp from dcn10 folder to
dml/dcn10 folder to enable hardware FPU for that function.


Of hand that looks good to me, but our display team needs to take a look.

Feel free to add an Acked-by: Christian König  
for the series.


While at it could you make sure that checkpatch.pl doesn't has anything 
to complain about the moved code?


Thanks for the help,
Christian.



Signed-off-by: Ao Zhong 
---
  .../drm/amd/display/dc/dcn10/dcn10_resource.c | 44 +--
  .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.c  | 38 
  .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.h  |  2 +
  3 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 56d30baf12df..6bfac8088ab0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -1295,47 +1295,6 @@ static uint32_t read_pipe_fuses(struct dc_context *ctx)
return value;
  }
  
-/*

- * Some architectures don't support soft-float (e.g. aarch64), on those
- * this function has to be called with hardfloat enabled, make sure not
- * to inline it so whatever fp stuff is done stays inside
- */
-static noinline void dcn10_resource_construct_fp(
-   struct dc *dc)
-{
-   if (dc->ctx->dce_version == DCN_VERSION_1_01) {
-   struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
-   struct dcn_ip_params *dcn_ip = dc->dcn_ip;
-   struct display_mode_lib *dml = &dc->dml;
-
-   dml->ip.max_num_dpp = 3;
-   /* TODO how to handle 23.84? */
-   dcn_soc->dram_clock_change_latency = 23;
-   dcn_ip->max_num_dpp = 3;
-   }
-   if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
-   dc->dcn_soc->urgent_latency = 3;
-   dc->debug.disable_dmcu = true;
-   dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
-   }
-
-
-   dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / 
ddr4_dram_width;
-   ASSERT(dc->dcn_soc->number_of_channels < 3);
-   if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
-   dc->dcn_soc->number_of_channels = 2;
-
-   if (dc->dcn_soc->number_of_channels == 1) {
-   dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
-   dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
-   dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
-   dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
-   if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
-   dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
-   }
-   }
-}
-
  static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage *clks)
  {
int i;
@@ -1510,8 +1469,9 @@ static bool dcn10_resource_construct(
memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults));
  
-	/* Other architectures we build for build this with soft-float */

+   DC_FP_START();
dcn10_resource_construct_fp(dc);
+   DC_FP_END();
  
  	if (!dc->config.is_vmin_only_asic)

if (ASICREV_IS_RAVEN2(dc->ctx->asic_id.hw_internal_rev))
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
index 99644d896222..8b5e6fff5444 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
@@ -27,6 +27,8 @@
  #include "dcn10/dcn10_resource.h"
  
  #include "dcn10_fpu.h"

+#include "resource.h"
+#include "amdgpu_dm/dc_fpu.h"
  
  /**

   * DOC: DCN10 FPU manipulation Overview
@@ -121,3 +123,39 @@ struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = {
.writeback_dram_clock_change_latency_us = 23.0,
.return_bus_width_bytes = 64,
  };
+
+void dcn10_resource_construct_fp(struct dc *dc)
+{
+   dc_assert_fp_enabled();
+   if (dc->ctx->dce_version == DCN_VERSION_1_01) {
+   struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
+   struct dcn_ip_params *dcn_ip = dc->dcn_ip;
+   struct display_mode_lib *dml = &dc->dml;
+
+   dml->ip.max_num_dpp = 3;
+   /* TODO how to handle 23.84? */
+   dcn_soc->dram_clock_change_latency = 23;
+   dcn_ip->max_num_dpp = 3;
+   }
+   if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev))

[PATCH v2 2/2] drm/amd/display: move remaining FPU code to dml folder

2022-10-26 Thread Ao Zhong
pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
these two operations in dcn32/dcn32_resource.c still need to use FPU,
This will cause compilation to fail on ARM64 platforms because
-mgeneral-regs-only is enabled by default to disable the hardware FPU.
Therefore, imitate the dcn31_zero_pipe_dcc_fraction function in
dml/dcn31/dcn31_fpu.c, declare the dcn32_zero_pipe_dcc_fraction function
in dcn32_fpu.c, and move above two operations into this function.

Signed-off-by: Ao Zhong 
---
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 5 +++--
 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 8 
 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  | 3 +++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index a88dd7b3d1c1..287b7fa9bf41 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
timing = &pipe->stream->timing;
 
pipes[pipe_cnt].pipe.src.gpuvm = true;
-   pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
-   pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+   DC_FP_START();
+   dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
+   DC_FP_END();
pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // 
according to spreadsheet
pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 819de0f11012..58772fce6437 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, 
struct clk_bw_params *bw_pa
}
 }
 
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+ int pipe_cnt)
+{
+   dc_assert_fp_enabled();
+
+   pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
+   pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
index 3a3dc2ce4c73..ab010e7e840b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
@@ -73,4 +73,7 @@ int 
dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
 
 void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
 
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+ int pipe_cnt);
+
 #endif
-- 
2.37.4



[PATCH 2/2] drm/amd/display: Modify mismatched function name

2022-10-26 Thread Jiapeng Chong
No functional modification involved.

drivers/gpu/drm/amd/amdgpu/../display/dc/dc_dmub_srv.c:615: warning: expecting 
prototype for setup_subvp_dmub_command(). Prototype was for 
populate_subvp_cmd_pipe_info() instead.

Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2587
Reported-by: Abaci Robot 
Signed-off-by: Jiapeng Chong 
---
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c 
b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 774e06936661..67eef5beab95 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -595,7 +595,7 @@ static void update_subvp_prefetch_end_to_mall_start(struct 
dc *dc,
 }
 
 /**
- * setup_subvp_dmub_command - Helper to populate the SubVP pipe info for the 
DMUB subvp command
+ * populate_subvp_cmd_pipe_info - Helper to populate the SubVP pipe info for 
the DMUB subvp command
  *
  * @dc: [in] current dc state
  * @context: [in] new dc state
-- 
2.20.1.7.g153144c



[PATCH 1/2] drm/amd/display: make wake_up_aux_channel static

2022-10-26 Thread Jiapeng Chong
This symbol is not used outside of dc_link_dp.c, so marks it static.

drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dp.c:5230:16: warning: no 
previous prototype for function 'wake_up_aux_channel'.

Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2581
Reported-by: Abaci Robot 
Signed-off-by: Jiapeng Chong 
---
 drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 420119efcf5f..e5ab751a5ca1 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -5227,7 +5227,7 @@ static void retrieve_cable_id(struct dc_link *link)
&link->dpcd_caps.cable_id, &usbc_cable_id);
 }
 
-enum dc_status wake_up_aux_channel(struct dc_link *link)
+static enum dc_status wake_up_aux_channel(struct dc_link *link)
 {
enum dc_status status = DC_ERROR_UNEXPECTED;
uint32_t aux_channel_retry_cnt = 0;
-- 
2.20.1.7.g153144c



Re: [PATCH v2 0/8] Fix several device private page reference counting issues

2022-10-26 Thread Alistair Popple


"Vlastimil Babka (SUSE)"  writes:

> On 9/28/22 14:01, Alistair Popple wrote:
>> This series aims to fix a number of page reference counting issues in
>> drivers dealing with device private ZONE_DEVICE pages. These result in
>> use-after-free type bugs, either from accessing a struct page which no
>> longer exists because it has been removed or accessing fields within the
>> struct page which are no longer valid because the page has been freed.
>>
>> During normal usage it is unlikely these will cause any problems. However
>> without these fixes it is possible to crash the kernel from userspace.
>> These crashes can be triggered either by unloading the kernel module or
>> unbinding the device from the driver prior to a userspace task exiting. In
>> modules such as Nouveau it is also possible to trigger some of these issues
>> by explicitly closing the device file-descriptor prior to the task exiting
>> and then accessing device private memory.
>
> Hi, as this series was noticed to create a CVE [1], do you think a stable
> backport is warranted? I think the "It is possible to launch the attack
> remotely." in [1] is incorrect though, right?

Right, I don't see how this could be exploited remotely. And I'm pretty
sure you need root as well because in practice the pgmap needs to be
freed, and for Nouveau at least that only happens on device removal.

> It looks to me that patch 1 would be needed since the CONFIG_DEVICE_PRIVATE
> introduction, while the following few only to kernels with 27674ef6c73f
> (probably not so critical as that includes no LTS)?

Patch 3 already has a fixes tag for 27674ef6c73f. Patch 1 would need to
go back to CONFIG_DEVICE_PRIVATE introduction. I think patches 4-8 would
also need to go back to introduction of CONFIG_DEVICE_PRIVATE, but there
isn't as much impact there and they would be harder to backport I think.
Without them device removal can loop indefinitely in kernel mode (if
patch 3 is present or the kernel is older than 27674ef6c73f).

 - Alistair

> Thanks,
> Vlastimil
>
> [1] https://nvd.nist.gov/vuln/detail/CVE-2022-3523
>
>> This involves some minor changes to both PowerPC and AMD GPU code.
>> Unfortunately I lack hardware to test either of those so any help there
>> would be appreciated. The changes mimic what is done in for both Nouveau
>> and hmm-tests though so I doubt they will cause problems.
>>
>> To: Andrew Morton 
>> To: linux...@kvack.org
>> Cc: linux-ker...@vger.kernel.org
>> Cc: amd-gfx@lists.freedesktop.org
>> Cc: nouv...@lists.freedesktop.org
>> Cc: dri-de...@lists.freedesktop.org
>>
>> Alistair Popple (8):
>>   mm/memory.c: Fix race when faulting a device private page
>>   mm: Free device private pages have zero refcount
>>   mm/memremap.c: Take a pgmap reference on page allocation
>>   mm/migrate_device.c: Refactor migrate_vma and 
>> migrate_deivce_coherent_page()
>>   mm/migrate_device.c: Add migrate_device_range()
>>   nouveau/dmem: Refactor nouveau_dmem_fault_copy_one()
>>   nouveau/dmem: Evict device private memory during release
>>   hmm-tests: Add test for migrate_device_range()
>>
>>  arch/powerpc/kvm/book3s_hv_uvmem.c   |  17 +-
>>  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  19 +-
>>  drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   2 +-
>>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c |  11 +-
>>  drivers/gpu/drm/nouveau/nouveau_dmem.c   | 108 +++
>>  include/linux/memremap.h |   1 +-
>>  include/linux/migrate.h  |  15 ++-
>>  lib/test_hmm.c   | 129 ++---
>>  lib/test_hmm_uapi.h  |   1 +-
>>  mm/memory.c  |  16 +-
>>  mm/memremap.c|  30 ++-
>>  mm/migrate.c |  34 +--
>>  mm/migrate_device.c  | 239 +---
>>  mm/page_alloc.c  |   8 +-
>>  tools/testing/selftests/vm/hmm-tests.c   |  49 +-
>>  15 files changed, 516 insertions(+), 163 deletions(-)
>>
>> base-commit: 088b8aa537c2c767765f1c19b555f21ffe555786


Re: [PATCH v5 02/31] drm/i915: Don't register backlight when another backlight should be used (v2)

2022-10-26 Thread Matthew Garrett
On Wed, Oct 26, 2022 at 01:27:25AM +0200, Hans de Goede wrote:

> this code should actually set the ACPI_VIDEO_BACKLIGHT flag:
> drivers/acpi/scan.c:
> 
> static acpi_status
> acpi_backlight_cap_match(acpi_handle handle, u32 level, void *context,
>   void **return_value)
> {
> long *cap = context;
> 
> if (acpi_has_method(handle, "_BCM") &&
> acpi_has_method(handle, "_BCL")) {
> acpi_handle_debug(handle, "Found generic backlight 
> support\n");
> *cap |= ACPI_VIDEO_BACKLIGHT;
> /* We have backlight support, no need to scan further */
> return AE_CTRL_TERMINATE;
> }
> return 0;
> }

Ah, yeah, my local tree no longer matches the upstream behaviour because 
I've hacked the EC firmware to remove the backlight trigger because it 
had an extremely poor brightness curve and also automatically changed it 
on AC events - as a result I removed the backlight code from the DSDT 
and just fell back to the native control. Like I said I'm a long way 
from the normal setup, but this did previously work.

The "right" logic here seems pretty simple: if ACPI backlight control is 
expected to work, use it. If it isn't, but there's a vendor interface, 
use it. If there's no vendor interface, use the native interface. The 
problem you're dealing with is that the knowledge of whether or not 
there's a vendor interface isn't something the core kernel code knows 
about. What you're proposing here is effectively for us to expose 
additional information about whether or not there's a vendor interface 
in the system firmware, but since we're talking in some cases about 
hardware that's almost 20 years old, we're not realistically going to 
get those old machines fixed. So, it feels like there's two choices:

1) Make a default policy decision, but then allow that decision to be 
altered later on (eg, when a vendor-specific platform driver has been 
loaded) - you've said this poses additional complexities.

2) Move the knowledge of whether or not there's a vendor interface into 
the core code. Basically take every platform driver that exposes a 
vendor interface, and move the detection code into the core.

I think any other approach is going to result in machines that 
previously worked no longer working (and you can't just make the 
vendor/native split dependent on the Coreboot DMI BIOS string, because 
there are some Coreboot platforms that implement the vendor interface 
for compatibility, and you also can't ask all Coreboot users to update 
their firmware to fix things)


[PATCH v2 1/2] drm/amd/display: move remaining FPU code to dml folder

2022-10-26 Thread Ao Zhong
In the process of enabling DCN support for arm64, I found that the
dcn10_resource_construct_fp function in dcn10/dcn10_resource.c still
needs to use FPU. This will cause compilation to fail on ARM64 platforms
because -mgeneral-regs-only is enabled by default to disable the
hardware FPU. So move dcn10_resource_construct_fp from dcn10 folder to
dml/dcn10 folder to enable hardware FPU for that function.

Signed-off-by: Ao Zhong 
---
 .../drm/amd/display/dc/dcn10/dcn10_resource.c | 44 +--
 .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.c  | 38 
 .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.h  |  2 +
 3 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 56d30baf12df..6bfac8088ab0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -1295,47 +1295,6 @@ static uint32_t read_pipe_fuses(struct dc_context *ctx)
return value;
 }
 
-/*
- * Some architectures don't support soft-float (e.g. aarch64), on those
- * this function has to be called with hardfloat enabled, make sure not
- * to inline it so whatever fp stuff is done stays inside
- */
-static noinline void dcn10_resource_construct_fp(
-   struct dc *dc)
-{
-   if (dc->ctx->dce_version == DCN_VERSION_1_01) {
-   struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
-   struct dcn_ip_params *dcn_ip = dc->dcn_ip;
-   struct display_mode_lib *dml = &dc->dml;
-
-   dml->ip.max_num_dpp = 3;
-   /* TODO how to handle 23.84? */
-   dcn_soc->dram_clock_change_latency = 23;
-   dcn_ip->max_num_dpp = 3;
-   }
-   if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
-   dc->dcn_soc->urgent_latency = 3;
-   dc->debug.disable_dmcu = true;
-   dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
-   }
-
-
-   dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / 
ddr4_dram_width;
-   ASSERT(dc->dcn_soc->number_of_channels < 3);
-   if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
-   dc->dcn_soc->number_of_channels = 2;
-
-   if (dc->dcn_soc->number_of_channels == 1) {
-   dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
-   dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
-   dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
-   dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
-   if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
-   dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
-   }
-   }
-}
-
 static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage *clks)
 {
int i;
@@ -1510,8 +1469,9 @@ static bool dcn10_resource_construct(
memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults));
 
-   /* Other architectures we build for build this with soft-float */
+   DC_FP_START();
dcn10_resource_construct_fp(dc);
+   DC_FP_END();
 
if (!dc->config.is_vmin_only_asic)
if (ASICREV_IS_RAVEN2(dc->ctx->asic_id.hw_internal_rev))
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
index 99644d896222..8b5e6fff5444 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
@@ -27,6 +27,8 @@
 #include "dcn10/dcn10_resource.h"
 
 #include "dcn10_fpu.h"
+#include "resource.h"
+#include "amdgpu_dm/dc_fpu.h"
 
 /**
  * DOC: DCN10 FPU manipulation Overview
@@ -121,3 +123,39 @@ struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = {
.writeback_dram_clock_change_latency_us = 23.0,
.return_bus_width_bytes = 64,
 };
+
+void dcn10_resource_construct_fp(struct dc *dc)
+{
+   dc_assert_fp_enabled();
+   if (dc->ctx->dce_version == DCN_VERSION_1_01) {
+   struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
+   struct dcn_ip_params *dcn_ip = dc->dcn_ip;
+   struct display_mode_lib *dml = &dc->dml;
+
+   dml->ip.max_num_dpp = 3;
+   /* TODO how to handle 23.84? */
+   dcn_soc->dram_clock_change_latency = 23;
+   dcn_ip->max_num_dpp = 3;
+   }
+   if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
+   dc->dcn_soc->urgent_latency = 3;
+   dc->debug.disable_dmcu = true;
+   dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
+   }
+
+
+   dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / 
ddr4_dram_width;
+   ASSERT(dc->dcn_soc->number_of_channels < 3);
+   i

[PATCH -next] drm/amdkfd: Fix NULL pointer dereference in svm_migrate_to_ram()

2022-10-26 Thread Yang Li
./drivers/gpu/drm/amd/amdkfd/kfd_migrate.c:985:58-62: ERROR: p is NULL but 
dereferenced.

Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2549
Reported-by: Abaci Robot 
Signed-off-by: Yang Li 
---

change in v2:
According to Felix's suggestion, move the pr_debug up before the 
kfd_unref_process 
call. 

 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 20d6b2578927..b9c8d29d95aa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -978,12 +978,10 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
 out_unlock_svms:
mutex_unlock(&p->svms.lock);
 out_unref_process:
+   pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr);
kfd_unref_process(p);
 out_mmput:
mmput(mm);
-
-   pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr);
-
return r ? VM_FAULT_SIGBUS : 0;
 }
 
-- 
2.20.1.7.g153144c



Re: [PATCH v5 02/31] drm/i915: Don't register backlight when another backlight should be used (v2)

2022-10-26 Thread Hans de Goede
Hi,

On 10/25/22 22:40, Matthew Garrett wrote:
> On Tue, Oct 25, 2022 at 10:25:33PM +0200, Hans de Goede wrote:
> 
>> Having the native driver come and then go and be replaced
>> with the vendor driver would also be quite inconvenient
>> for these planned changes.
> 
> I understand that it would be inconvenient, but you've broken existing 
> working setups.

I fully acknowledge that I have broken existing working setups
and I definitely want to see this fixed before say 6.1-rc6!

I'm not convinced (at all) that any solutions which re-introduce
acpi_video_get_backlight_type() return-s value changing
half way the boot, with some backlight interface getting
registered and then unregistered again later because
it turns out to be the wrong one is a good fix here.

The whole goal of the refactor was to leave these sorts
of shenanigans behind us.

>> Can you perhaps explain a bit in what way your laptop
>> is weird ?
> 
> It's a Chinese replacement motherboard for a Thinkpad X201, running my 
> own port of Coreboot. Its DMI strings look like an actual Thinkpad in 
> order to ensure that thinkpad_acpi can bind for hotkey suport, so it's 
> hard to quirk. It'll actually be fixed by your proposed patch to fall 
> back to native rather than vendor, but that patch will break any older 
> machines that offer a vendor interface and don't have the native control 
> hooked up (pretty sure at least the Thinkpad X40 falls into that 
> category).

So looking at:

https://review.coreboot.org/plugins/gitiles/coreboot/+/refs/heads/master/src/mainboard/51nb/x210/acpi/graphics.asl

this code should actually set the ACPI_VIDEO_BACKLIGHT flag:
drivers/acpi/scan.c:

static acpi_status
acpi_backlight_cap_match(acpi_handle handle, u32 level, void *context,
  void **return_value)
{
long *cap = context;

if (acpi_has_method(handle, "_BCM") &&
acpi_has_method(handle, "_BCL")) {
acpi_handle_debug(handle, "Found generic backlight support\n");
*cap |= ACPI_VIDEO_BACKLIGHT;
/* We have backlight support, no need to scan further */
return AE_CTRL_TERMINATE;
}
return 0;
}

What does seem to be missing compared to a "normal" DSDT
is a call to _OSI("Windows 2012") so I would expect this code
in acpi_video_get_backlight_type():

/* On systems with ACPI video use either native or ACPI video. */
if (video_caps & ACPI_VIDEO_BACKLIGHT) {
/*
 * Windows 8 and newer no longer use the ACPI video interface,
 * so it often does not work. If the ACPI tables are written
 * for win8 and native brightness ctl is available, use that.
 *
 * The native check deliberately is inside the if acpi-video
 * block on older devices without acpi-video support native
 * is usually not the best choice.
 */
if (acpi_osi_is_win8() && native_available)
return acpi_backlight_native;
else
return acpi_backlight_video;
}

To enter the "return acpi_backlight_video" path since acpi_osi_is_win8()
will return false.

And then the ACPI backlight methods from:
https://review.coreboot.org/plugins/gitiles/coreboot/+/refs/heads/master/src/mainboard/51nb/x210/acpi/graphics.asl

should get called when changing the backlight brightness,
so assuming that those methods work then things should work fine.

What does "ls /sys/class/backlight" output on the X210 / NB51 board
with a 6.0 kernel? And what does it output with the 6.1-rc? kernels?

IOW which backlight device / control method is being selected
and which one do you want / which one(s) do actually work?

I have been thinking about maybe doing something with 
a dmi_get_bios_year() check (see below), but that will cause
native to get prefered over vendor on old ThinkPads with
coreboot (and thus a new enough year in DMI_BIOS_DATE), which
will likely break backlight control there (if i915 offers
backlight control on those that is).

Also I wonder if it would be possible to set DMI_BIOS_VENDOR
to "Coreboot" so that we can use that? Note that thinkpad_acpi
does not care about the DMI_BIOS_VENDOR value, at least
not on models which start their DMI_PRODUCT_VERSION with
either "ThinkPad" or "Lenovo".

###

Looking more at this I notice that coreboot has a
drivers_intel_gma_displays_ssdt_generate() which seems to
at least always generate ACPI video bus ASL including
backlight control bits.

So the only reason why the current heurstics are not
returning native is the acpi_osi_is_win8() check.

So maybe that beeds to become:

if ((acpi_osi_is_win8() || dmi_get_bios_year() >= 2018) && 
native_available)
return acpi_backlight_native;
else
return acpi_backlight_video;

Although I think that will result i